├── .gitignore ├── .pre-commit-config.yaml ├── .style.yapf ├── Fluid ├── developer's_guide_for_Fluid │ ├── Developer's_Guide_to_Paddle_Fluid.md │ ├── Developer's_Guide_to_Paddle_Fluid.pdf │ └── images │ │ ├── 1.png │ │ ├── 2.png │ │ ├── 3.png │ │ ├── 4.png │ │ ├── LoDTensor.png │ │ ├── compile_run_time.png │ │ ├── executor.png │ │ ├── fluid-compiler.png │ │ ├── fluid_examples.png │ │ ├── fluid_module_1.png │ │ ├── fluid_module_2.png │ │ ├── graph_construction_example_all.png │ │ ├── layer.png │ │ ├── operator1.png │ │ ├── operator2.png │ │ ├── place.png │ │ ├── print_fluid_program.png │ │ ├── program_desc1.png │ │ ├── program_desc2.png │ │ ├── raw_input.png │ │ ├── scope_variable_tensor.png │ │ ├── sorted_input.png │ │ ├── transpiler.png │ │ └── user_interface.png └── nmt_on_fluid │ ├── NMT on fluid.md │ └── images │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── 4.png │ ├── attention.png │ ├── raw_input.png │ ├── sorted_input.png │ └── user_interface.png ├── README.md ├── TeXNotes ├── .gitignore ├── 00_templates │ ├── assignment │ │ ├── contents.tex │ │ ├── figures │ │ │ └── figure1.png │ │ ├── main.tex │ │ ├── references.bib │ │ └── structure.tex │ └── slides │ │ ├── slides.tex │ │ └── structure.tex ├── Efficient_attention_and_RNNs │ ├── contents │ │ ├── linear_rnn.tex │ │ ├── loop_bounds.tex │ │ ├── miscellany.tex │ │ ├── parallel_rnn.tex │ │ ├── scan.tex │ │ ├── ssm.tex │ │ └── stacked_rnns.tex │ ├── define_language.tex │ ├── figures │ │ ├── SSM-overview.pdf │ │ ├── attention-train.pdf │ │ ├── attention.pdf │ │ ├── cond_branchs.pdf │ │ ├── figures.pptx │ │ ├── mamba-mixer.pdf │ │ ├── mamba-model.pdf │ │ ├── mamba-ssm.pdf │ │ ├── rnn_layer1.pdf │ │ ├── rnn_layer2.pdf │ │ ├── scan_step.pdf │ │ ├── signal_flow_structure_of_stacked_rnn.pdf │ │ ├── stacked_rnns1.pdf │ │ └── stacked_rnns2.pdf │ ├── main.pdf │ ├── main.tex │ ├── references.bib │ └── structure.tex ├── Flash_Attention │ ├── contents │ │ ├── CTA_offset.tex │ │ ├── IO_complexity.tex │ │ ├── flash_attention.tex │ │ ├── fuse_reduce.tex │ │ └── online_softmax.tex │ ├── figures │ │ ├── attention_offset-a.pdf │ │ ├── attention_offset-b.pdf │ │ ├── fuse_reduce.pdf │ │ └── logsoftmax_expression_tree.pdf │ ├── main.pdf │ ├── main.tex │ ├── references.bib │ └── structure.tex ├── Formalize_Flash_Attention │ ├── FlashAttention_formalization.pdf │ ├── contents │ │ ├── Background.tex │ │ ├── backward.tex │ │ ├── beyond_flash_attention.tex │ │ ├── fused_chained_map_and_then_aggregate.tex │ │ ├── map_and_then_aggreage.tex │ │ └── welford_algorithm.tex │ ├── figures │ │ ├── Transformer-block.pdf │ │ ├── Transformer-block.png │ │ ├── attention.pptx │ │ ├── attention_expression_tree1.pdf │ │ ├── attention_expression_tree2.pdf │ │ ├── attention_expression_tree3.pdf │ │ ├── fused_mha.pdf │ │ ├── fused_transformer_block.pdf │ │ ├── map_and_aggregate.pdf │ │ ├── multi-scale-attn1.pdf │ │ ├── multi-scale-attn2.pdf │ │ └── transformer.png │ ├── main.pdf │ ├── main.tex │ ├── references.bib │ └── structure.tex ├── LLM_inference │ ├── contents │ │ ├── background.tex │ │ ├── bytetransformer.tex │ │ ├── deepspeed-inference.tex │ │ └── flexgen.tex │ ├── images │ │ ├── BERT-performance-breakdown.pdf │ │ ├── ILP.png │ │ ├── LLM-inference-graph.png │ │ ├── LLM.xlsx │ │ ├── MHA-variable-seq-length.pdf │ │ ├── SBI-GEMM.pdf │ │ ├── block-schedule-algorithm.png │ │ ├── block-schedule.pdf │ │ ├── byte-transformer-overview2.pdf │ │ ├── bytetransformer-padding-free-input-batch.pdf │ │ ├── bytetransformer_overview.pdf │ │ ├── different-batch-size-in-pipeline-parallelism.png │ │ ├── fused_transformer_block-deepspeed-inference.pdf │ │ ├── grouped-gemm.pdf │ │ ├── grouped-mha.pdf │ │ ├── llm-inference.pdf │ │ ├── llm-inference.pptx │ │ ├── logo.jpeg │ │ ├── memory-system.pdf │ │ ├── pipeline-schedule-deep-speed-inference.png │ │ ├── two-stages-in-llm-inference.pdf │ │ ├── variable-length-mha.pdf │ │ └── zero-padding-algorithm.pdf │ ├── llm_inference.pdf │ ├── llm_inference.tex │ └── references.bib └── Parallel_Execution_of_DO_Loops │ ├── basics.tex │ ├── example.tex │ ├── hyperplane.tex │ ├── hypertheorem.tex │ ├── main.pdf │ ├── main.tex │ ├── optimalpi.tex │ ├── references.bib │ └── structure.tex ├── code_reading ├── README.md └── pet │ ├── README.md │ ├── basics.md │ ├── clang_pulgin.md │ ├── extract_scop.md │ └── tree2scop.md ├── engineering_a_compiler └── scanner.md ├── julia_learning_notes ├── Basics │ ├── 01_JPL_FQ.md │ ├── 02_JPL_gpu_computing.md │ ├── 03_JPL_metaprogramming.md │ ├── Generated_function.ipynb │ ├── Good_references.md │ ├── Julia_ASTs.ipynb │ ├── Macro.ipynb │ ├── Tips.ipynb │ ├── Type_inference │ │ ├── README.md │ │ └── Type_inference.pdf │ ├── Types.ipynb │ ├── WhyJulia │ │ ├── README.md │ │ └── whyJulia.pdf │ ├── broadcast.ipynb │ └── images │ │ └── gpu_julia_packages.png ├── CodeSnippets │ ├── UnionAll_types.ipynb │ ├── assignment_and_mutation.ipynb │ ├── infer_type_parameter.ipynb │ ├── modules.ipynb │ ├── parameteric_types.ipynb │ └── scope.ipynb ├── Flux │ ├── Flux_implementation.md │ ├── Test_user_interface.ipynb │ └── char_rnn_example.ipynb ├── IRTools │ ├── Meta.ipynb │ └── _methods_by_ftype.ipynb ├── README.md ├── Zygote │ ├── CodesStudy1.ipynb │ ├── CodesStudy2.ipynb │ └── code_snippets │ │ ├── hello_world_test.jl │ │ └── program_representation_in_Julia.ipynb └── experiments │ ├── README.md │ └── lstm_test │ ├── README.md │ ├── naive_cpu_test │ ├── README.md │ ├── Recurrent │ │ ├── common.jl │ │ ├── lstm.jl │ │ └── recurrent.jl │ └── cpu_test.jl │ └── naive_gpu_test │ ├── Recurrent │ ├── lstm.jl │ └── recurrent.jl │ └── gpu_test.jl ├── notes_for_tf_pt ├── compile-pt.md └── compile-tf.md ├── paper_notes ├── Diffusion │ └── README.md ├── README.md ├── Template.md ├── array-programming-model │ └── README.md ├── auto-diff │ ├── BP_and_implicit_function_theorem │ │ └── README.md │ ├── Dynamic_Automatic_Differentiation_of_GPU_Broadcast_Kernels.md │ ├── SCT_AD │ │ ├── Differentiating_SSA_form_program.md │ │ └── README.md │ ├── brief_introduction_to_AD.md │ ├── images │ │ ├── expression_graph.png │ │ ├── mix_forward_and_reverse_mode_AD.png │ │ └── multidimensional_dual_numbers.png │ └── tape_based_ad.md ├── compiler-stuffs │ ├── CFG-optimizations │ │ ├── Control-Flow-Optimization.md │ │ ├── README.md │ │ └── images │ │ │ └── discussion-on-cfg.png │ ├── Code-optimizations │ │ ├── dmxpy-optimizations.ipynb │ │ ├── dmxpy.py │ │ ├── images │ │ │ ├── dmxpy.png │ │ │ ├── dmxpy.pptx │ │ │ ├── excerpt-from-dmxpy-in-linpack.png │ │ │ ├── manual-optimizations-for-dmxpy.png │ │ │ ├── simple-version-of-dmxpy.png │ │ │ └── strength-reduction.png │ │ ├── introduction-to-optimizations.md │ │ └── local-optimizatioins.md │ ├── Compiler_and_Interpreter.pdf │ ├── Dependence_analysis │ │ ├── README.md │ │ └── dependence_abstraction │ │ │ ├── contents.tex │ │ │ ├── main.tex │ │ │ ├── references.bib │ │ │ └── structure.tex │ ├── Intermediate-Representations │ │ ├── IR.md │ │ └── images │ │ │ ├── AST-example.png │ │ │ ├── DAG-example.png │ │ │ ├── IR-level-of-abstraction.png │ │ │ ├── an-simple-example-of-dependency-graph.png │ │ │ ├── dependence-graph.png │ │ │ ├── different-levels-of-abstraction-for-an-array-subscript-reference.png │ │ │ ├── naming-leads-to-different-translations.png │ │ │ ├── one-address-code.png │ │ │ └── three-address-code.png │ ├── Loop_analysis │ │ ├── Denpendence_analysis │ │ │ ├── Basics │ │ │ │ ├── README.md │ │ │ │ ├── Terminology.md │ │ │ │ └── images │ │ │ │ │ ├── a_single_rnn.png │ │ │ │ │ ├── bi_directional_rnn.png │ │ │ │ │ ├── bi_directional_rnn.pptx │ │ │ │ │ ├── different_dependences.png │ │ │ │ │ ├── dilated_rnn.png │ │ │ │ │ ├── example-01.png │ │ │ │ │ ├── grid_rnn.png │ │ │ │ │ ├── polyhedral_representation.png │ │ │ │ │ ├── stack_rnn.png │ │ │ │ │ ├── wh.png │ │ │ │ │ └── workflow.png │ │ │ └── dependence_analysis.pdf │ │ ├── Polyhedral_representation_in_Pet │ │ │ ├── references.bib │ │ │ ├── section1.tex │ │ │ ├── slides.tex │ │ │ └── structure.tex │ │ ├── The_parallel_execution_of_do_loops.pdf │ │ └── auto-vectorization │ │ │ ├── README.md │ │ │ └── auto-vectorizing-TensorFlow-graphs.md │ ├── Polyhedral_compilatioin │ │ ├── A_Performance_Vocabulary_for_Affine_Loop_Transformations │ │ │ ├── contents.tex │ │ │ ├── main.tex │ │ │ ├── references.bib │ │ │ └── structure.tex │ │ ├── A_decopled_approach_to_high-level_loop_optimization.md │ │ ├── A_decoupled_approach_to_high-level_loop_optimization │ │ │ └── mathematical_foundations_of_polyhedra.md │ │ ├── Array_regions_analyses_and_applications │ │ │ └── Array_regions_analyses_and_applications.md │ │ ├── Data_dependence_and_PIP │ │ │ ├── contents.tex │ │ │ ├── main.tex │ │ │ ├── references.bib │ │ │ └── structure.tex │ │ ├── Mathmatical_foundations │ │ │ └── concepts.md │ │ ├── More_Legal_Transformations_for_Locality │ │ │ ├── contents.tex │ │ │ ├── images │ │ │ │ ├── SCoP_decomposition.png │ │ │ │ ├── a_skewing_transformation.png │ │ │ │ └── farkas_lemma.png │ │ │ ├── main.pdf │ │ │ ├── main.tex │ │ │ ├── references.bib │ │ │ └── structure.tex │ │ ├── Polyhedral_representation │ │ │ ├── 2d_puls_1.tex │ │ │ ├── main.tex │ │ │ ├── references.bib │ │ │ ├── schedule_tree.tex │ │ │ └── structure.tex │ │ ├── README.md │ │ ├── Tiramisu │ │ │ ├── README.md │ │ │ └── tiramisu_overview.png │ │ ├── images │ │ │ ├── introduction-01.png │ │ │ ├── introduction-02.png │ │ │ ├── introduction-03.png │ │ │ └── introduction-04.png │ │ ├── isl │ │ │ ├── contents.tex │ │ │ ├── main.pdf │ │ │ ├── main.tex │ │ │ ├── presburger_sets_and_relations.tex │ │ │ ├── pw_quasi_affine.tex │ │ │ ├── references.bib │ │ │ ├── sets_and_maps.tex │ │ │ └── structure.tex │ │ ├── mathematical_foundations_of_polyhedra.md │ │ ├── polyhedral_background_01.pdf │ │ ├── polyhedral_background_02.pdf │ │ └── polyhedral_background_03.pdf │ ├── README.md │ └── intermediate-code-generation.md ├── data_processing_systems │ ├── CIEL.md │ ├── README.md │ └── figures │ │ ├── ciel_cluster_architecture.png │ │ └── dynamic-task-graph.png ├── dataflow-architectures │ ├── Advances_in_dataflow_programming_languages.md │ ├── Dataflow_computers_their_history_and_future.md │ ├── README.md │ ├── Scheduled_dataflow.md │ ├── images │ │ ├── Manchester-dynamic-dataflow-machine.png │ │ ├── dataflow-accumulator.png │ │ ├── dataflow-graph-1.png │ │ ├── ill-formed_multi-rate-dataflow.png │ │ ├── img1.png │ │ ├── img2.png │ │ ├── paper-screenshot-1.png │ │ ├── periodic_admissible_schedule.png │ │ ├── solve_G.q.png │ │ ├── static-dataflow-architecture.png │ │ ├── topology_matrix.png │ │ └── two-input_add_actor_and_two-output_duplicate_actor.png │ └── synchronous_dataflow.md ├── dl-compiler │ ├── Glow │ │ ├── Glow.md │ │ ├── Glow.pdf │ │ └── images │ │ │ └── low-level-glow-ir.png │ ├── MLIR │ │ ├── MLIR.md │ │ ├── README.md │ │ ├── swift_for_tensorflow.md │ │ └── swift_for_tensorflow.pdf │ ├── README.md │ ├── TVM │ │ ├── Relay.md │ │ ├── TVM.md │ │ └── TVM.pdf │ ├── XLA │ │ └── XLA.md │ ├── figures │ │ └── sm-and-sub-core-of-volta.png │ └── fusion │ │ └── README.md ├── dl-models │ ├── attention_simplification │ │ └── README.md │ ├── beyond_transformer │ │ └── README.md │ ├── miscellanea │ │ ├── Geometric_deep_learning.md │ │ └── README.md │ ├── nlp │ │ ├── RNN-modeling │ │ │ ├── CW-RNN │ │ │ │ ├── A_Clockwork_RNN.md │ │ │ │ └── A_Clockwork_RNN.pdf │ │ │ ├── GridLSTM │ │ │ │ ├── GridLSTM.md │ │ │ │ └── GridLSTM.pdf │ │ │ ├── HM-LSTM │ │ │ │ ├── Hierarchical_multiscale_RNN.md │ │ │ │ └── Hierarchical_multiscale_RNN.pdf │ │ │ ├── How_Much_Attention_Do_You_Need.md │ │ │ ├── MD-LSTM │ │ │ │ ├── MD-LSTM.md │ │ │ │ └── MD-LSTM.pdf │ │ │ ├── Mogrifier-LSTM │ │ │ │ ├── contents.tex │ │ │ │ ├── images │ │ │ │ │ └── MogrifierLSTM.png │ │ │ │ ├── main.pdf │ │ │ │ ├── main.tex │ │ │ │ ├── references.bib │ │ │ │ └── structure.tex │ │ │ ├── Neural_Speed_Reading_via_Skim_RNN │ │ │ │ └── Neural_Speed_Reading_via_Skim_RNN.md │ │ │ ├── ON-LSTM │ │ │ │ ├── ON-LSTM.md │ │ │ │ └── ON-LSTM.pdf │ │ │ ├── Quasi-Recurrent_neural_network │ │ │ │ ├── Quasi-Recurrent_neural_network.md │ │ │ │ └── Quasi-Recurrent_neural_network.pdf │ │ │ ├── README.md │ │ │ ├── RNN_Variants_Slides_190820 │ │ │ │ ├── RNN_Variants.md │ │ │ │ ├── RNN_Variants.pdf │ │ │ │ └── images │ │ │ │ │ ├── CudnnLSTM.png │ │ │ │ │ └── Recurrent_neural_network_unfold.svg.png │ │ │ ├── Sliced_Recurrent_Neural_Networks │ │ │ │ ├── Sliced_Recurrent_Neural_Networks.md │ │ │ │ └── images │ │ │ │ │ └── SRNN.png │ │ │ ├── The_Unreasonable_Effectiveness_of_the_Forget_Gate │ │ │ │ ├── The_Unreasonable_Effectiveness_of_the_Forget_Gate.md │ │ │ │ └── The_Unreasonable_Effectiveness_of_the_Forget_Gate.pdf │ │ │ ├── Training_RNNs_as_Fast_as_CNNs │ │ │ │ ├── Training_RNNs_as_Fast_as_CNNs.md │ │ │ │ └── Training_RNNs_as_Fast_as_CNNs.pdf │ │ │ ├── Transformer │ │ │ │ ├── README.md │ │ │ │ ├── README.pdf │ │ │ │ └── images │ │ │ │ │ └── QK.png │ │ │ ├── WaveRNN.md │ │ │ └── images │ │ │ │ ├── 2d_lstm_1.png │ │ │ │ ├── 3D-GridLSTM.png │ │ │ │ ├── CWRNN.png │ │ │ │ ├── CWRNN.pptx │ │ │ │ ├── DilatedRNN.png │ │ │ │ ├── DilatedRNN1.png │ │ │ │ ├── GridLSTM-NMT.png │ │ │ │ ├── HM-LSTM-pre-activation.png │ │ │ │ ├── LSTM_equation.png │ │ │ │ ├── active_modules.png │ │ │ │ ├── active_modules.pptx │ │ │ │ ├── boundary_state.png │ │ │ │ ├── boundary_state.pptx │ │ │ │ ├── hardsigmoid.png │ │ │ │ ├── hm-lstm-cell-update.png │ │ │ │ ├── hm-lstm-output-hidden.png │ │ │ │ ├── multi-dimensioanl-rnn.png │ │ │ │ ├── multi-dimensional-multi-directional-context.png │ │ │ │ ├── wh.png │ │ │ │ └── wh.pptx │ │ ├── pre-training │ │ │ ├── ALBERT.md │ │ │ ├── BERT.md │ │ │ ├── ELMo.md │ │ │ ├── GPT.md │ │ │ ├── README.md │ │ │ ├── ULM-FiT.md │ │ │ ├── XLNet.md │ │ │ ├── images │ │ │ │ ├── ELMo.png │ │ │ │ ├── GPT-auxiliary-training-object.png │ │ │ │ ├── STLR-figure.png │ │ │ │ ├── ULM-FiT-STLR.png │ │ │ │ ├── biLM-ELMo.png │ │ │ │ ├── dataset-ULM-FiT.png │ │ │ │ ├── elmo-vectors.png │ │ │ │ ├── example-language-inference.jpg │ │ │ │ ├── example-of-auxiliary-prediction-taks.png │ │ │ │ ├── highlight-bert-LM1.png │ │ │ │ ├── highlight-bert-LM2.png │ │ │ │ ├── highlight-bert-LM3.png │ │ │ │ ├── highlight-bert-input.png │ │ │ │ ├── highway.png │ │ │ │ ├── highway2.png │ │ │ │ ├── how-bert-comes-out.png │ │ │ │ ├── input-of-bert.png │ │ │ │ ├── intro.png │ │ │ │ ├── task-specific-input-transformation.png │ │ │ │ └── transformer-block.png │ │ │ ├── learning-language-representation-slides.md │ │ │ └── learning-language-representation-slides.pdf │ │ └── x-former │ │ │ └── README.md │ ├── structured_state_space_models │ │ ├── README.md │ │ └── maba │ │ │ └── README.md │ └── vision │ │ ├── README.md │ │ ├── ResNeXt │ │ └── README.md │ │ ├── SSD │ │ ├── SSD.md │ │ └── images │ │ │ ├── SSD.png │ │ │ └── SSD2.png │ │ ├── SqueezeNet │ │ └── README.md │ │ └── Xception │ │ └── README.md ├── dl-systems │ ├── A_computational_model_for_TensorFlow.md │ ├── AutoGraph.md │ ├── Beyond_Data_and_Model_Parallelism_for_Deep_Neural_Networks.md │ ├── Cavs_An_Efficient_Runtime_System_for_Dynamic_Neural_Networks.md │ ├── JANUS.md │ ├── JAX.md │ ├── Machine_Learning_Systems_are_Stuck_in_a_Rut.md │ ├── Pydron.md │ ├── TensorFlow_Eager.md │ ├── images │ │ ├── Pydron.png │ │ ├── SSA_translation.png │ │ ├── functioin_call_translation.png │ │ ├── tf_eager_01.png │ │ ├── tf_eager_02.png │ │ └── tf_eager_03.png │ └── tf-cfg-design │ │ ├── Deep_learning_with_dynamic_computation_graphs.md │ │ ├── Dynamic_Control_Flow_in_Large-Scale_Machine_Learning.md │ │ └── tf-while-op-impl.md ├── dl-workload-optimizations │ ├── DeepCPU.md │ ├── Optimizing_RNN_performance │ │ ├── Optimizing_RNN_performance.md │ │ ├── Optimizing_RNN_performance.pdf │ │ └── images │ │ │ ├── multiple_layer_optimization.png │ │ │ ├── pic1.png │ │ │ ├── pic2.png │ │ │ ├── single_cell_optimization.png │ │ │ ├── single_layer_optimization.png │ │ │ └── starting_point.png │ ├── README.md │ └── ShuffleNet_v2.md ├── generalization-of-neural-network │ ├── README.md │ ├── rendered │ │ ├── A_Bayesian_Perspective_on_Generalization_and_Stochastic_Gradient_Descent.pdf │ │ ├── Bayesian_Model_Comparison.pdf │ │ ├── Highly_Scalable_Deep_Learning_Training_System_with_Mixed-Precision.pdf │ │ ├── Large_Batch_Training_of_Convolutional_Networks.pdf │ │ ├── On_Large-Batch_Training_for_Deep_Learning.pdf │ │ └── Train_Longer_Generalize_Better.pdf │ └── sources │ │ ├── A_Bayesian_Perspective_on_Generalization_and_Stochastic_Gradient_Descent.md │ │ ├── Accurate_Large_Minibatch_SGD.md │ │ ├── Bayesian_Model_Comparison.md │ │ ├── Highly_Scalable_Deep_Learning_Training_System_with_Mixed-Precision.md │ │ ├── Large_Batch_Training_of_Convolutional_Networks.md │ │ ├── On_Large-Batch_Training_for_Deep_Learning.md │ │ ├── Train_Longer_Generalize_Better.md │ │ └── images │ │ ├── f1.png │ │ ├── f2.png │ │ ├── fig1.png │ │ ├── figure1.png │ │ ├── figure2.png │ │ ├── insert_bn_after_pool5.png │ │ ├── mixed_precision_with_LARS.png │ │ ├── network_configuration.png │ │ ├── scalability.png │ │ ├── sharpness_metric.png │ │ ├── sharpness_of_minimizers_1.png │ │ ├── sharpness_of_minimizers_2.png │ │ └── warmup_experiments.png ├── large-language-models │ ├── GPT │ │ ├── GPT-models.md │ │ ├── README.md │ │ └── figures │ │ │ ├── Full_GPT_architecture.png │ │ │ └── GPT-3-model-size.png │ ├── README.md │ ├── fast-attention │ │ ├── Flash-Attention.pdf │ │ └── README.md │ ├── llm_inference.pdf │ ├── transformer-optimizations │ │ ├── FlexGen.md │ │ ├── README.md │ │ ├── RMS_layernorm.md │ │ ├── figures │ │ │ ├── block-schedule-with-overlap.png │ │ │ ├── computeation-graph-of-llm-inference.png │ │ │ ├── pre-post-layer-normalization-in-transformer.png │ │ │ └── two-different-schedules.png │ │ └── welford_algorithm_and_layer_norm.md │ └── whisper │ │ ├── README.md │ │ └── figures │ │ ├── whisper-model-size.png │ │ └── whisper_overview.png ├── leading-edge-ai │ ├── Capsules │ │ ├── Dynamic_Routing_between_Capsule.md │ │ └── README.md │ ├── README.md │ └── RIM.md ├── miscellanea │ ├── Neural_Ordinary_Differential_Equations.md │ └── README.md ├── ml-with-discrete-variables │ ├── README.md │ └── Straight-throughEstimator.md ├── normalization-in-NN │ ├── L2_Regularization_versus_Batch_and_Weight_Normalization │ │ └── L2_Regularization_versus_Batch_and_Weight_Normalization.md │ ├── Layer_Normalization │ │ ├── layer_normalization.md │ │ └── layer_normalization.pdf │ ├── README.md │ ├── Weight_Normalization │ │ ├── weight_normalization.md │ │ └── weight_normalization.pdf │ └── optimization │ │ ├── Hessian_and_DeepLearning_Optimizaiton.md │ │ └── Hessian_and_DeepLearning_Optimizaiton.pdf ├── parallel-computing │ ├── IRs │ │ ├── lift.md │ │ └── nova.md │ ├── Nesl │ │ ├── VCODE.md │ │ ├── images │ │ │ └── nesl-1.png │ │ └── nesl.md │ ├── README.md │ ├── data_parallel_language │ │ └── README.md │ ├── execution_model │ │ ├── ActorModel.md │ │ ├── CSP.md │ │ └── MessagePassing.md │ └── programming_model │ │ ├── DMLL.md │ │ ├── README.md │ │ ├── collection_orientated_languages.md │ │ └── images │ │ ├── DMLL-comparison.png │ │ ├── co-ori-lang-1.png │ │ ├── mimd.gif │ │ ├── simd.gif │ │ └── vcode-instruction.png ├── partial_aggregation │ ├── README.md │ └── figures │ │ ├── decomposable_function_1.png │ │ ├── decomposable_function_2.png │ │ ├── decomposable_function_3.png │ │ ├── decomposable_function_4.png │ │ ├── execution_plan_1.png │ │ └── execution_plan_2.png ├── programming-language │ ├── About_programming_language.md │ ├── Glossary │ │ ├── README.md │ │ ├── basic_concepts.md │ │ ├── images │ │ │ └── 1920px-Tree_edges.svg.png │ │ ├── program_analysis.md │ │ ├── programming_paradigm.md │ │ └── type.md │ ├── Nominative_and_structure_type.md │ ├── PL_Design.md │ ├── ProgrammingParadigms.md │ ├── README.md │ ├── SSA │ │ ├── README.md │ │ ├── SSA.md │ │ ├── Simple_and_Efficient_Construction_of_Static_Single_Assignment_Form.md │ │ └── images │ │ │ ├── SSA_example1.1.png │ │ │ ├── SSA_example1.2.png │ │ │ └── SSA_example1.3.png │ ├── Types.md │ └── abstract_binding_tree.md ├── tensor_operations │ ├── README.md │ ├── concepts.tex │ ├── constructs.tex │ ├── images │ │ ├── mm_example.png │ │ ├── nested_tensorarray.png │ │ ├── tensor.png │ │ └── transformer.png │ ├── ir.tex │ ├── item_access.tex │ ├── main.pdf │ ├── main.tex │ ├── nn.tex │ ├── optimization.tex │ ├── shape_operation.tex │ ├── structure.tex │ ├── tensorarray_creation.tex │ ├── transformer.tex │ └── vectorization.tex └── type-systems │ ├── README.md │ ├── basic-concepts.md │ └── notations │ ├── README.md │ ├── bussproofs.sty │ ├── contents │ └── kinding.tex │ ├── formal-grammar.sty │ ├── main.pdf │ ├── main.tex │ └── structure.tex ├── reinforcement_learning ├── README.md └── basic_concepts │ ├── basic_concepts_about_reinforcement_learning.pdf │ └── basic_concepts_about_reinforcement_learning.ppt ├── text_generation_for_gitchat ├── README.md ├── pic │ ├── pic1.one-hot.png │ ├── pic10.highway.png │ ├── pic11.generate_text_from_language_model.png │ ├── pic12.encoder_decoder.png │ ├── pic13.neural_turing_machine.png │ ├── pic2.word_embedding.png │ ├── pic3.rnn.png │ ├── pic4.bp_through_all_nodes.png │ ├── pic5.bp_through_shortcut.png │ ├── pic6.lstm.png │ ├── pic7.gru.png │ ├── pic8.deep_rnn.png │ └── pic9.residual_block.png ├── text_generation.pdf └── trans_2_html.sh └── tiled_efficient_attention ├── README.md ├── README.pdf ├── figures ├── cal_p.png ├── chunk_form_parallelism.png ├── chunk_recurrent.png ├── first_kv.png ├── fused_chunk_gla_fwd_kernel.png ├── fwd_decay_cumsum.png ├── fwd_inner_chunk.png ├── gated_linear_attention.pptx ├── gated_linear_attention_layer.png ├── gla_data_accessed.png ├── gla_equation.png ├── last_decay.png └── ~$gated_linear_attention.pptx ├── main.py └── model ├── __init__.py ├── chunk.py ├── chunk_fuse.py ├── chunk_util.py ├── configuration.py ├── gla.py ├── naive.py ├── recurrent_fuse.py └── utils.py /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | - repo: https://github.com/pre-commit/mirrors-yapf.git 2 | sha: v0.16.0 3 | hooks: 4 | - id: yapf 5 | files: \.py$ 6 | - repo: https://github.com/pre-commit/pre-commit-hooks 7 | sha: a11d9314b22d8f8c7556443875b731ef05965464 8 | hooks: 9 | - id: check-merge-conflict 10 | - id: check-symlinks 11 | - id: detect-private-key 12 | files: (?!.*paddle)^.*$ 13 | - id: end-of-file-fixer 14 | files: \.md$ 15 | - id: trailing-whitespace 16 | files: \.md$ 17 | - repo: https://github.com/Lucas-C/pre-commit-hooks 18 | sha: v1.0.1 19 | hooks: 20 | - id: forbid-crlf 21 | files: \.md$ 22 | - id: remove-crlf 23 | files: \.md$ 24 | - id: forbid-tabs 25 | files: \.md$ 26 | - id: remove-tabs 27 | files: \.md$ 28 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = pep8 3 | column_limit = 80 4 | -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/Developer's_Guide_to_Paddle_Fluid.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/Developer's_Guide_to_Paddle_Fluid.pdf -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/1.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/2.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/3.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/4.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/LoDTensor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/LoDTensor.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/compile_run_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/compile_run_time.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/executor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/executor.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/fluid-compiler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/fluid-compiler.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/fluid_examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/fluid_examples.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/fluid_module_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/fluid_module_1.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/fluid_module_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/fluid_module_2.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/graph_construction_example_all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/graph_construction_example_all.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/layer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/layer.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/operator1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/operator1.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/operator2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/operator2.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/place.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/place.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/print_fluid_program.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/print_fluid_program.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/program_desc1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/program_desc1.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/program_desc2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/program_desc2.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/raw_input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/raw_input.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/scope_variable_tensor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/scope_variable_tensor.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/sorted_input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/sorted_input.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/transpiler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/transpiler.png -------------------------------------------------------------------------------- /Fluid/developer's_guide_for_Fluid/images/user_interface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/developer's_guide_for_Fluid/images/user_interface.png -------------------------------------------------------------------------------- /Fluid/nmt_on_fluid/images/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/nmt_on_fluid/images/1.png -------------------------------------------------------------------------------- /Fluid/nmt_on_fluid/images/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/nmt_on_fluid/images/2.png -------------------------------------------------------------------------------- /Fluid/nmt_on_fluid/images/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/nmt_on_fluid/images/3.png -------------------------------------------------------------------------------- /Fluid/nmt_on_fluid/images/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/nmt_on_fluid/images/4.png -------------------------------------------------------------------------------- /Fluid/nmt_on_fluid/images/attention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/nmt_on_fluid/images/attention.png -------------------------------------------------------------------------------- /Fluid/nmt_on_fluid/images/raw_input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/nmt_on_fluid/images/raw_input.png -------------------------------------------------------------------------------- /Fluid/nmt_on_fluid/images/sorted_input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/nmt_on_fluid/images/sorted_input.png -------------------------------------------------------------------------------- /Fluid/nmt_on_fluid/images/user_interface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/Fluid/nmt_on_fluid/images/user_interface.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | My learning notes. 2 | -------------------------------------------------------------------------------- /TeXNotes/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/.gitignore -------------------------------------------------------------------------------- /TeXNotes/00_templates/assignment/contents.tex: -------------------------------------------------------------------------------- 1 | 从global向shared memory传输数据和从shared memory向register file传输数据总觉得有某种微妙的不同。 2 | 3 | 我们首先规定\colorbox{hl}{text}“拷贝”:是将数据$D$ 从place $A$完全地移动到place $B$存储为$D'$。$D'$可以是$D$的一个permutation,也就是说$D'$中的元素和$D$中的元素一一对应,元素数目不变,而顺序可以不相同。 4 | 5 | global memory是外存,shared memory,cache和RF是片上存储,后者的capacity总是小于前者。 6 | 7 | 给定一个AccessMap将其完全地翻译成实现,不是一个单纯的copy macro kernel问题。这里面涉及了(1)分数据(根据capacity决定一次空间执行的数据块的大小);(2)拷贝;(3)时间上重复执行的执行顺序问题; 8 | 9 | 非常朴素地想,计算过程涉及到数据和多线程两种要素。对于数据,我们总是提供逻辑和物理两种视角: 10 | \begin{enumerate} 11 | \setlength{\itemsep}{-0.1cm} 12 | \item high-dimensional array-like的\colorbox{hl}{逻辑视角};能够用\textcolor{blue}{高维逻辑indices寻址},逻辑视角能够改善可编程性,并且隔离与hardware强相关的实现选择问题 13 | \item \colorbox{hl}{物理视角} 14 | \item Layout是logical high-dimensional indices和物理寻址之间的映射函数 15 | \end{enumerate} 16 | 17 | \begin{figure}[h] 18 | \centering 19 | \includegraphics[width=0.8\textwidth]{figures/shared_2_rf_with_ldmatrix.pdf} 20 | \caption{使用ldmatrix指令从shared memory向regisger file加载数据。} 21 | \end{figure} 22 | 23 | 使用ldmatrix从shared memory加载数据到每个线程thread local的寄存器,warp中的每32线程构成一个 $2 \times 2$的线程tile,每个tile内部8线程, 24 | \textcolor{red}{调用ldmatrix的时候每个线程都需要传入一个shared memory指针},然后单线程读取shared memory中连续128 bits。ldmatrix一次执行最大读取 $16 \times 16$大小的半精度矩阵。 25 | 26 | 在实现中,每个线程都需要正确地计算出自己要读取的shared memory位置的指针偏移。 27 | 28 | ldmatrix的一次执行32个线程能一次性读取$16 \times 16$大小的$2D$ tile,一次执行单线程数据tile大小$1 \times 8$,如果将$n$次执行ldmatrix的结果都保留在thread local的寄存器上,单线程数据块的大小是$1 \times \left( n \times 8 \right)$。 29 | 30 | 所以我们将目标的layout配置成$(1, n*8)$ 31 | 32 | \newpage 33 | 34 | \begin{enumerate} 35 | \setlength{\itemsep}{-0.1cm} 36 | \item 第一个嵌套层级:一个shared memory数据块要转化为多次$(m, n)$次对copy\_2d\_tile\_s2r在时间上的调用,要用for循环issue出去。每个处理一个小分块 $\mathcal{T}$。 37 | \item 第二个嵌套层级:一个copy\_2d\_tile\_s2r 38 | \end{enumerate} -------------------------------------------------------------------------------- /TeXNotes/00_templates/assignment/figures/figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/00_templates/assignment/figures/figure1.png -------------------------------------------------------------------------------- /TeXNotes/00_templates/assignment/references.bib: -------------------------------------------------------------------------------- 1 | @article{lamport1974parallel, 2 | title={The parallel execution of do loops}, 3 | author={Lamport, Leslie}, 4 | journal={Communications of the ACM}, 5 | volume={17}, 6 | number={2}, 7 | pages={83--93}, 8 | year={1974}, 9 | publisher={ACM New York, NY, USA} 10 | } 11 | -------------------------------------------------------------------------------- /TeXNotes/00_templates/slides/slides.tex: -------------------------------------------------------------------------------- 1 | \documentclass {beamer} 2 | 3 | \input {structure.tex} 4 | 5 | \title[] {Ying's Slides} 6 | \subtitle{A short story} 7 | \author {Ying Cao} 8 | \institute {} 9 | \date {\today} 10 | 11 | \begin {document} 12 | 13 | \AtBeginSection[] 14 | { 15 | \begin{frame} 16 | \frametitle{Table of Contents} 17 | \tableofcontents[currentsection] 18 | \end{frame} 19 | } 20 | 21 | \begin {frame} 22 | \titlepage 23 | \end {frame} 24 | 25 | \section {Section 1} 26 | 27 | \begin {frame} {Title frame 1} 28 | 29 | In this slide, some important text will be 30 | \alert{highlighted} because it's important. 31 | Please, don't abuse it. 32 | 33 | \begin {block} {Block blue} 34 | \begin {itemize} 35 | \item Item 1 36 | \item Item 2 37 | \item Item 3 38 | \end {itemize} 39 | \end {block} 40 | \end {frame} 41 | 42 | \section {Section 2} 43 | \begin {frame} {Title Frame 2} 44 | \[\ support(X \to Y) = p(X \cup Y) = \frac { {n(X \cup Y)}} {N}\] 45 | \end {frame} 46 | 47 | \end {document} -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/contents/miscellany.tex: -------------------------------------------------------------------------------- 1 | 若存在可逆矩阵$P$,使得一个关于矩阵$A$的如下等式成立: 2 | $$A = (PDP)^{-1}$$ 3 | 4 | 则称符合这样关系的矩阵$A$与$D$是相似矩阵,记作:$A \sim D$,则$A$的幂可以通过求矩阵$D$的幂求得 5 | 6 | $$A^{m} = (PDP^{-1})^{m} = (PDP^{-1})(PDP^{-1})\dots(PDP^{-1})=PD^{m}P$$ 7 | 8 | \textcolor{red}{如果我们能够得出$D$是一个很简单的矩阵,例如对角矩阵,那么就可以很简单的计算出$A$的幂值}。 9 | 然而,一般的矩阵在实数域不一定能对角化,然而几乎所有矩阵都能在复数域对角化\cite{lru-kexue}。 10 | 于是$A$总能写成: 11 | 12 | \begin{align*} 13 | A=P\Lambda P^{-1} & A^{m} = P\Lambda^{m} P^{-1} 14 | \end{align*} -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/contents/parallel_rnn.tex: -------------------------------------------------------------------------------- 1 | \subsection{典型代表} 2 | 3 | \subsubsection{RWKV\cite{peng2023rwkv}} 4 | 5 | \subsubsection{LRU(Linear Recurrent Unit)\cite{orvieto2023resurrecting}} -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/figures/SSM-overview.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/SSM-overview.pdf -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/figures/attention-train.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/attention-train.pdf -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/figures/attention.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/attention.pdf -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/figures/cond_branchs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/cond_branchs.pdf -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/figures/figures.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/figures.pptx -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/figures/mamba-mixer.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/mamba-mixer.pdf -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/figures/mamba-model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/mamba-model.pdf -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/figures/mamba-ssm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/mamba-ssm.pdf -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/figures/rnn_layer1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/rnn_layer1.pdf -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/figures/rnn_layer2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/rnn_layer2.pdf -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/figures/scan_step.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/scan_step.pdf -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/figures/signal_flow_structure_of_stacked_rnn.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/signal_flow_structure_of_stacked_rnn.pdf -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/figures/stacked_rnns1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/stacked_rnns1.pdf -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/figures/stacked_rnns2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/figures/stacked_rnns2.pdf -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Efficient_attention_and_RNNs/main.pdf -------------------------------------------------------------------------------- /TeXNotes/Efficient_attention_and_RNNs/main.tex: -------------------------------------------------------------------------------- 1 | %!TEX program = xelatex 2 | \documentclass[UTF8]{ctexart} 3 | 4 | \input{structure.tex} 5 | \input{define_language.tex} 6 | 7 | \title{RNNs and scan} 8 | 9 | \begin{document} 10 | 11 | \tableofcontents 12 | \thispagestyle{empty} 13 | \newpage 14 | \setcounter{page}{1} 15 | 16 | \bibliographystyle{plain} 17 | 18 | \noindent 19 | \linespread{1.2} 20 | \selectfont 21 | \setlength{\topskip}{0ex} 22 | \setlength{\parskip}{1ex} 23 | \setlength{\lineskip}{1em} 24 | 25 | \section{Transformer和线性RNN} 26 | \input{contents/linear_rnn.tex} 27 | 28 | \section{SSM(state-space model)} 29 | \input{contents/ssm.tex} 30 | 31 | \newpage 32 | \section{并行RNN} 33 | \input{contents/parallel_rnn.tex} 34 | 35 | \newpage 36 | \section{General non-linear recurrence 的并行计算问题} 37 | \input{contents/stacked_rnns.tex} 38 | \input{contents/loop_bounds.tex} 39 | 40 | \begin{appendices} 41 | \input{contents/miscellany.tex} 42 | \end{appendices} 43 | 44 | \newpage 45 | \bibliography{references.bib} 46 | \end{document} 47 | -------------------------------------------------------------------------------- /TeXNotes/Flash_Attention/figures/attention_offset-a.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Flash_Attention/figures/attention_offset-a.pdf -------------------------------------------------------------------------------- /TeXNotes/Flash_Attention/figures/attention_offset-b.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Flash_Attention/figures/attention_offset-b.pdf -------------------------------------------------------------------------------- /TeXNotes/Flash_Attention/figures/fuse_reduce.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Flash_Attention/figures/fuse_reduce.pdf -------------------------------------------------------------------------------- /TeXNotes/Flash_Attention/figures/logsoftmax_expression_tree.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Flash_Attention/figures/logsoftmax_expression_tree.pdf -------------------------------------------------------------------------------- /TeXNotes/Flash_Attention/main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Flash_Attention/main.pdf -------------------------------------------------------------------------------- /TeXNotes/Flash_Attention/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands 4 | \title{Flash Attention} 5 | % \author{Ying Cao} 6 | % \date{\today} 7 | 8 | \begin{document} 9 | \bibliographystyle{plain} 10 | 11 | % \maketitle % Print the title 12 | % \tableofcontents 13 | 14 | \noindent 15 | \linespread{1.2} 16 | \selectfont 17 | \setlength{\topskip}{0ex} 18 | \setlength{\parskip}{1ex} 19 | \setlength{\lineskip}{1em} 20 | 21 | %--------------------------------------------------------------- 22 | % unnumbered section 23 | %--------------------------------------------------------------- 24 | 25 | \section{Flash Attention} 26 | \input{contents/flash_attention.tex} 27 | 28 | \newpage 29 | \section{Online Normalized Softmax} 30 | \input{contents/online_softmax.tex} 31 | 32 | \newpage 33 | \section{CTA Offset} 34 | \input{contents/CTA_offset.tex} 35 | 36 | \newpage 37 | \section{I/O Complexity Analysis} 38 | \input{contents/IO_complexity.tex} 39 | 40 | \newpage 41 | \section{Fuse Consecutive Aggregations} 42 | \input{contents/fuse_reduce.tex} 43 | 44 | \bibliography{references.bib} 45 | \end{document} 46 | -------------------------------------------------------------------------------- /TeXNotes/Flash_Attention/references.bib: -------------------------------------------------------------------------------- 1 | @article{DBLP:journals/corr/abs-2112-05682, 2 | author = {Markus N. Rabe and 3 | Charles Staats}, 4 | title = {\href{https://arxiv.org/pdf/2112.05682.pdf}{Self-attention Does Not Need O(n\({}^{\mbox{2}}\)) Memory}}, 5 | journal = {CoRR}, 6 | volume = {abs/2112.05682}, 7 | year = {2021}, 8 | url = {https://arxiv.org/abs/2112.05682}, 9 | eprinttype = {arXiv}, 10 | eprint = {2112.05682}, 11 | timestamp = {Tue, 14 Dec 2021 14:21:31 +0100}, 12 | biburl = {https://dblp.org/rec/journals/corr/abs-2112-05682.bib}, 13 | bibsource = {dblp computer science bibliography, https://dblp.org} 14 | } 15 | 16 | @misc{lse-trick, 17 | author = {Gregory Gundersen}, 18 | year = {2020}, 19 | howpublished = "\url{https://gregorygundersen.com/blog/2020/02/09/log-sum-exp/}", 20 | urldate = {February 9, 2020}, 21 | title = {The Log-Sum-Exp Trick}, 22 | note = "[Online; accessed 17-April-2023]" 23 | } -------------------------------------------------------------------------------- /TeXNotes/Formalize_Flash_Attention/FlashAttention_formalization.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/FlashAttention_formalization.pdf -------------------------------------------------------------------------------- /TeXNotes/Formalize_Flash_Attention/figures/Transformer-block.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/Transformer-block.pdf -------------------------------------------------------------------------------- /TeXNotes/Formalize_Flash_Attention/figures/Transformer-block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/Transformer-block.png -------------------------------------------------------------------------------- /TeXNotes/Formalize_Flash_Attention/figures/attention.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/attention.pptx -------------------------------------------------------------------------------- /TeXNotes/Formalize_Flash_Attention/figures/attention_expression_tree1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/attention_expression_tree1.pdf -------------------------------------------------------------------------------- /TeXNotes/Formalize_Flash_Attention/figures/attention_expression_tree2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/attention_expression_tree2.pdf -------------------------------------------------------------------------------- /TeXNotes/Formalize_Flash_Attention/figures/attention_expression_tree3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/attention_expression_tree3.pdf -------------------------------------------------------------------------------- /TeXNotes/Formalize_Flash_Attention/figures/fused_mha.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/fused_mha.pdf -------------------------------------------------------------------------------- /TeXNotes/Formalize_Flash_Attention/figures/fused_transformer_block.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/fused_transformer_block.pdf -------------------------------------------------------------------------------- /TeXNotes/Formalize_Flash_Attention/figures/map_and_aggregate.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/map_and_aggregate.pdf -------------------------------------------------------------------------------- /TeXNotes/Formalize_Flash_Attention/figures/multi-scale-attn1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/multi-scale-attn1.pdf -------------------------------------------------------------------------------- /TeXNotes/Formalize_Flash_Attention/figures/multi-scale-attn2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/multi-scale-attn2.pdf -------------------------------------------------------------------------------- /TeXNotes/Formalize_Flash_Attention/figures/transformer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/figures/transformer.png -------------------------------------------------------------------------------- /TeXNotes/Formalize_Flash_Attention/main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Formalize_Flash_Attention/main.pdf -------------------------------------------------------------------------------- /TeXNotes/Formalize_Flash_Attention/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands 4 | \title{Flash Attention} 5 | % \author{Ying Cao} 6 | % \date{\today} 7 | 8 | \begin{document} 9 | \bibliographystyle{plain} 10 | 11 | \maketitle % Print the title 12 | \tableofcontents 13 | 14 | \clearpage 15 | \noindent 16 | \linespread{1.2} 17 | \selectfont 18 | \setlength{\topskip}{0ex} 19 | \setlength{\parskip}{1ex} 20 | \setlength{\lineskip}{1em} 21 | 22 | %--------------------------------------------------------------- 23 | % unnumbered section 24 | %--------------------------------------------------------------- 25 | 26 | \noindent $::$ is read as "have a type of". 27 | 28 | \noindent $\rightarrow$ is read as "maps to". 29 | 30 | \section{Background: The Computational Process of Reduce and Map}\label{sec1} 31 | \input{contents/background.tex} 32 | 33 | \section{A Generalized \textit{Broadcast-and-then-Aggregate} Operation} 34 | \input{contents/map_and_then_aggreage.tex} 35 | 36 | \section{Block Execution of a Chain of \textit{Broadcast-and-then-Aggregate}} 37 | \input{contents/fused_chained_map_and_then_aggregate.tex} 38 | 39 | \section{The Transformer Block} 40 | \input{contents/beyond_flash_attention.tex} 41 | \clearpage 42 | \begin{appendices} 43 | \input{contents/welford_algorithm.tex} 44 | \input{contents/backward.tex} 45 | \end{appendices} 46 | 47 | \clearpage 48 | \bibliographystyle{abbrv} 49 | \bibliography{references.bib} 50 | 51 | \end{document} -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/BERT-performance-breakdown.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/BERT-performance-breakdown.pdf -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/ILP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/ILP.png -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/LLM-inference-graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/LLM-inference-graph.png -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/LLM.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/LLM.xlsx -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/MHA-variable-seq-length.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/MHA-variable-seq-length.pdf -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/SBI-GEMM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/SBI-GEMM.pdf -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/block-schedule-algorithm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/block-schedule-algorithm.png -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/block-schedule.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/block-schedule.pdf -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/byte-transformer-overview2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/byte-transformer-overview2.pdf -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/bytetransformer-padding-free-input-batch.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/bytetransformer-padding-free-input-batch.pdf -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/bytetransformer_overview.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/bytetransformer_overview.pdf -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/different-batch-size-in-pipeline-parallelism.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/different-batch-size-in-pipeline-parallelism.png -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/fused_transformer_block-deepspeed-inference.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/fused_transformer_block-deepspeed-inference.pdf -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/grouped-gemm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/grouped-gemm.pdf -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/grouped-mha.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/grouped-mha.pdf -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/llm-inference.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/llm-inference.pdf -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/llm-inference.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/llm-inference.pptx -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/logo.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/logo.jpeg -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/memory-system.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/memory-system.pdf -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/pipeline-schedule-deep-speed-inference.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/pipeline-schedule-deep-speed-inference.png -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/two-stages-in-llm-inference.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/two-stages-in-llm-inference.pdf -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/variable-length-mha.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/variable-length-mha.pdf -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/images/zero-padding-algorithm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/images/zero-padding-algorithm.pdf -------------------------------------------------------------------------------- /TeXNotes/LLM_inference/llm_inference.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/LLM_inference/llm_inference.pdf -------------------------------------------------------------------------------- /TeXNotes/Parallel_Execution_of_DO_Loops/example.tex: -------------------------------------------------------------------------------- 1 | This is the implementation of Grid LSTM. 2 | 3 | \begin{lstlisting}[language=Python] 4 | # data parallelism: iterate over samples in a batch. 5 | for sample_id in range(0, batch_size, 1): 6 | x = src_array_batch[sample_id] 7 | y = trg_array_batch[sample_id] 8 | 9 | src_length = x.size()[0] 10 | trg_length = y.size()[0] 11 | 12 | # dim 1: stack Grid LSTM Cell to form depth. 13 | for d in range(0, depth, 1): 14 | # dim 2: iterate over source sequence length. 15 | for i in range(1, src_length + 1, 1): 16 | # dim 3: iterate over target sequence length. 17 | for j in range(1, trg_length + 1, 1): 18 | cell_x = cells[d][0] 19 | cell_y = cells[d][1] 20 | 21 | output_d = outputs[sample_id][d] 22 | 23 | if d == 0: 24 | x_t = x[i - 1, :].view(1, input_dim) 25 | y_t = y[j - 1, :].view(1, input_dim) 26 | else: 27 | x_t = outputs[sample_id][d - 1][i][j][0][0] 28 | y_t = outputs[sample_id][d - 1][i][j][1][0] 29 | states_x = output_d[i][j - 1][0] 30 | states_y = output_d[i - 1][j][1] 31 | 32 | h_x_prev, c_x_prev = states_x 33 | h_y_prev, c_y_prev = states_y 34 | 35 | h = torch.cat((h_x_prev, h_y_prev), dim=1) 36 | h_x, c_x = cell_x(x_t, (h, c_x_prev)) 37 | h_y, c_y = cell_y(y_t, (h, c_y_prev)) 38 | 39 | output_d[i][j][0].append(h_x) # hidden for direction x 40 | output_d[i][j][0].append(c_x) # cell for direction x 41 | 42 | output_d[i][j][1].append(h_y) # hidden for direction y 43 | output_d[i][j][1].append(c_y) # cell for direction y 44 | 45 | \end{lstlisting} 46 | -------------------------------------------------------------------------------- /TeXNotes/Parallel_Execution_of_DO_Loops/main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/TeXNotes/Parallel_Execution_of_DO_Loops/main.pdf -------------------------------------------------------------------------------- /TeXNotes/Parallel_Execution_of_DO_Loops/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass {article} 2 | 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands 4 | \title{Note for 5 | \textit{\href{https://www.microsoft.com/en-us/research/wp-content/uploads/2016/12/The-Parallel-Execution-of-DO-Loops.pdf} 6 | {The Parallel Execution of DO Loops}}} 7 | \author{Ying Cao} 8 | \date{\today} 9 | 10 | \begin{document} 11 | 12 | \maketitle 13 | \tableofcontents 14 | 15 | \section{Basics} 16 | \input{basics.tex} 17 | 18 | \section{The hyperplane method} 19 | \input{hyperplane.tex} 20 | 21 | { 22 | \small 23 | \raggedright 24 | \bibliographystyle{ieeetr} 25 | % or, abbrv, acm, alpha, apalike, ieeetr, plain, siam, unsrt 26 | \begin{spacing}{1} 27 | \bibliography{references.bib} 28 | \end{spacing} 29 | } 30 | \end{document} 31 | -------------------------------------------------------------------------------- /TeXNotes/Parallel_Execution_of_DO_Loops/references.bib: -------------------------------------------------------------------------------- 1 | @article{lamport1974parallel, 2 | title={The parallel execution of do loops}, 3 | author={Lamport, Leslie}, 4 | journal={Communications of the ACM}, 5 | volume={17}, 6 | number={2}, 7 | pages={83--93}, 8 | year={1974}, 9 | publisher={ACM New York, NY, USA} 10 | } 11 | @Misc{EliBendersky, 12 | howpublished = {\url{https://eli.thegreenplace.net/2018/affine-transformations/}}, 13 | note = {Accessed Feburary 4, 2020}, 14 | title = {Affine transformations}, 15 | author = {Eli Bendersky} 16 | } 17 | @Misc{Diophantinewiki, 18 | howpublished = {\url{https://en.wikipedia.org/wiki/Diophantine_equation}}, 19 | note = {Accessed Feburary 4, 2020}, 20 | title = {Diophantine equation}, 21 | author = {} 22 | } 23 | @inproceedings{irigoin1988supernode, 24 | title={Supernode partitioning}, 25 | author={Irigoin, Fran{\c{c}}ois and Triolet, Remi}, 26 | booktitle={Proceedings of the 15th ACM SIGPLAN-SIGACT symposium on Principles of programming languages}, 27 | pages={319--329}, 28 | year={1988} 29 | } 30 | @article{wolf1991loop, 31 | title={A loop transformation theory and an algorithm to maximize parallelism}, 32 | author={Wolf, Michael E and Lam, Monica S}, 33 | journal={IEEE Transactions on Parallel \& Distributed Systems}, 34 | number={4}, 35 | pages={452--471}, 36 | year={1991}, 37 | publisher={IEEE} 38 | } 39 | @book{mordell1969diophantine, 40 | title={Diophantine equations}, 41 | author={Mordell, Louis Joel}, 42 | year={1969}, 43 | publisher={Academic Press} 44 | } 45 | -------------------------------------------------------------------------------- /code_reading/README.md: -------------------------------------------------------------------------------- 1 | [TBD] 2 | -------------------------------------------------------------------------------- /code_reading/pet/README.md: -------------------------------------------------------------------------------- 1 | - [Clang AST](clang_pulgin.md) 2 | - [basics](basics.md) 3 | - [extract SCoP](extract_scop.md) 4 | - [tree to SCoP](tree2scop.md) 5 | -------------------------------------------------------------------------------- /code_reading/pet/clang_pulgin.md: -------------------------------------------------------------------------------- 1 | See [this example](https://github.com/llvm/llvm-project/blob/master/clang/examples/PrintFunctionNames/PrintFunctionNames.cpp) to implement a Clang plugin. 2 | 3 | # References 4 | 5 | 1. [How to write RecursiveASTVisitor based ASTFrontendActions](https://clang.llvm.org/docs/RAVFrontendAction.html) 6 | 1. [Clang tutorial part 1: introduction](https://kevinaboos.wordpress.com/2013/07/23/clang-tutorial-part-i-introduction/) 7 | 1. [Clang Tutorial Part II: LibTooling Example](https://kevinaboos.wordpress.com/2013/07/23/clang-tutorial-part-i-introduction/) 8 | -------------------------------------------------------------------------------- /engineering_a_compiler/scanner.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | - [Scanner](#scanner) 4 | 5 | 6 | 7 | # Scanner 8 | -------------------------------------------------------------------------------- /julia_learning_notes/Basics/03_JPL_metaprogramming.md: -------------------------------------------------------------------------------- 1 | # [Metaprogramming](https://docs.julialang.org/en/release-0.4/manual/metaprogramming/) 2 | 3 | * Julia represents its own code as a data structure of the language itself. 4 | * allow sophisticated code generation without extra build steps 5 | * allow true Lisp-style macros _**operating at the level of abstract syntax trees**_. 6 | * powerful [reflection](https://en.wikipedia.org/wiki/Reflection_%28computer_programming%29) capabilities 7 | 8 | ## Program representation 9 | 10 | * every Julia program starts life as a string 11 | * parse (I understand this function as lexical analysis) each string into an object called an expression, represented by the Julia's type `Expr`. 12 | * `Expr` objects contain three parts: 13 | 1. `Symbol` 14 | * In the context of an expression, symbols are used to indicate access to variables. 15 | * when an expression is evaluated, a symbol is replaced with the value bound to that symbol in the appropriate scope. 16 | 2. the expression arguments 17 | 3. the expression result type 18 | -------------------------------------------------------------------------------- /julia_learning_notes/Basics/Good_references.md: -------------------------------------------------------------------------------- 1 | 1. [The Julia Language Challenge](https://nextjournal.com/sdanisch/the-julia-challenge) 2 | 1. [The Julia Challenge in C++](https://medium.com/@wolfv/the-julia-challenge-in-c-21272d36c002) 3 | -------------------------------------------------------------------------------- /julia_learning_notes/Basics/Type_inference/README.md: -------------------------------------------------------------------------------- 1 | # Inference Convergence Algorithm 2 | 3 | 1. [Inference Convergence](https://juliacomputing.com/blog/2016/04/04/inference-convergence.html) 4 | 1. [Inference Convergence Algorithm in Julia - Revisited](https://juliacomputing.com/blog/2017/05/15/inference-converage2.html) 5 | 1. [Notes](Type_inference.pdf) 6 | -------------------------------------------------------------------------------- /julia_learning_notes/Basics/Type_inference/Type_inference.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/julia_learning_notes/Basics/Type_inference/Type_inference.pdf -------------------------------------------------------------------------------- /julia_learning_notes/Basics/WhyJulia/README.md: -------------------------------------------------------------------------------- 1 | 1. [Why Julia](https://ucidatascienceinitiative.github.io/IntroToJulia/Html/WhyJulia) 2 | 1. [Notes](whyJulia.pdf) 3 | 4 | --- 5 | 6 | ### What is [Type-stable](https://docs.julialang.org/en/v1.2-dev/manual/faq/#man-type-stability-1) 7 | 8 | _**The type of the output is predictable from the types of the inputs.**_ In particular, it means that the type of the output cannot vary depending on the values of the inputs. 9 | 10 | The following codes are type-unstable: 11 | 12 | ```julia 13 | function unstable(flag::Bool) 14 | if flag 15 | return 1 16 | else 17 | return 1.0 18 | end 19 | end 20 | ``` 21 | 22 | _**Julia can't predict the return type of function that is type unstable at compile-time, making generation of fast machine code difficult.**_ 23 | -------------------------------------------------------------------------------- /julia_learning_notes/Basics/WhyJulia/whyJulia.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/julia_learning_notes/Basics/WhyJulia/whyJulia.pdf -------------------------------------------------------------------------------- /julia_learning_notes/Basics/images/gpu_julia_packages.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/julia_learning_notes/Basics/images/gpu_julia_packages.png -------------------------------------------------------------------------------- /julia_learning_notes/README.md: -------------------------------------------------------------------------------- 1 | Learning notes of the Julia programming language. 2 | -------------------------------------------------------------------------------- /julia_learning_notes/Zygote/code_snippets/hello_world_test.jl: -------------------------------------------------------------------------------- 1 | xs = [fill(1.1, 3), fill(2.2, 3)]; 2 | 3 | # using Pkg 4 | # Pkg.activate(".") 5 | 6 | # zero(x::Array{Float64,1}) = [zero(x) for x in x] 7 | # 8 | # function case1(xs) 9 | # h = xs[1][1] 10 | # # sum(h) 11 | # end 12 | # 13 | # Zygote.gradient(case1, xs) 14 | 15 | 16 | using Pkg 17 | Pkg.activate(".") 18 | using Zygote 19 | 20 | function case1(xs) 21 | h = xs[1] 22 | # for i in 2:length(xs) 23 | # h = h .* xs[i] 24 | # end 25 | sum(h) 26 | end 27 | @show case1(xs) 28 | 29 | Zygote.gradient(case1, xs) 30 | -------------------------------------------------------------------------------- /julia_learning_notes/experiments/README.md: -------------------------------------------------------------------------------- 1 | For test only. The codes are not optimized and ugly. 2 | -------------------------------------------------------------------------------- /julia_learning_notes/experiments/lstm_test/README.md: -------------------------------------------------------------------------------- 1 | Extremely naive implementation. Codes are not optimized. They are ugly and redundant. 2 | For Test ONLY. 3 | -------------------------------------------------------------------------------- /julia_learning_notes/experiments/lstm_test/naive_cpu_test/README.md: -------------------------------------------------------------------------------- 1 | The codes are only tested under Julia v0.7-beta. 2 | 3 | - [`@code_lowered`](docs/code_lowered.txt) 4 | - [`@code_typed`](docs/code_typed.txt) 5 | - [`@code_llvm`](docs/code_llvm.txt) 6 | - [`@code_native`](docs/code_native.txt) 7 | -------------------------------------------------------------------------------- /julia_learning_notes/experiments/lstm_test/naive_cpu_test/Recurrent/common.jl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env julia 2 | 3 | abstract type NN 4 | end 5 | 6 | mutable struct Param 7 | n::Integer # input size 8 | d::Integer # output size 9 | w::AbstractArray{AbstractFloat} # learnable weight matrix 10 | dw::AbstractArray{AbstractFloat} # gradients of learnable weight matrix 11 | 12 | Param(n::Integer) = new(n, n, randn(n, n), randn(n, n)) 13 | Param(n::Integer, d::Integer) = new(n, d, randn(n, d), randn(n, d)) 14 | Param(n::Integer, d::Integer, w::Array, dw::Array) = new(n, d, w, dw) 15 | end 16 | 17 | randParam(n::Integer, d::Integer, std::Real=0.1) = Param( 18 | n, d, randn(n, d) * std, zeros(n, d)) 19 | onesParam(n::Integer, d::Integer) = Param(n, d, ones(n, d), zeros(n, d)) 20 | 21 | function softmax(m::Param) 22 | out = Param(m.n, m.d) 23 | maxval = maximum(m.w, 2) 24 | out.w .= exp.(m.w .- maxval) 25 | out.w ./= sum(out.w, 2) 26 | return out 27 | end 28 | 29 | σ(x) = 1.0 / (1.0 + exp(-x)) 30 | -------------------------------------------------------------------------------- /julia_learning_notes/experiments/lstm_test/naive_cpu_test/Recurrent/recurrent.jl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env julia 2 | module Recurrent 3 | 4 | export Param, randParam, onesParam, softmax, sigmoid 5 | export LSTMCell, LSTM_forward 6 | 7 | include("common.jl") 8 | include("lstm.jl") 9 | 10 | end # module 11 | -------------------------------------------------------------------------------- /julia_learning_notes/experiments/lstm_test/naive_cpu_test/cpu_test.jl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env julia 2 | include("Recurrent/recurrent.jl") 3 | using .Recurrent 4 | 5 | srand(1) 6 | 7 | const batch_size = 2 8 | const seq_len = 3 9 | const input_dim = 4 10 | const hidden_dim = 4 11 | 12 | rand_inputs = randn(batch_size * seq_len, input_dim) 13 | 14 | lstm_cell = LSTMCell(input_dim, hidden_dim) 15 | cell_state, hidden_states = LSTM_forward(rand_inputs, lstm_cell, 16 | input_dim, hidden_dim, seq_len) 17 | 18 | println("cell states :") 19 | display(cell_state) 20 | 21 | println("\nhidden states :") 22 | display(hidden_states) 23 | -------------------------------------------------------------------------------- /julia_learning_notes/experiments/lstm_test/naive_gpu_test/Recurrent/recurrent.jl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env julia 2 | module Recurrent 3 | 4 | using CuArrays 5 | 6 | include("lstm.jl") 7 | 8 | export LSTMCell 9 | export σ, lstm_forward 10 | 11 | 12 | end # module 13 | -------------------------------------------------------------------------------- /julia_learning_notes/experiments/lstm_test/naive_gpu_test/gpu_test.jl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env julia 2 | include("Recurrent/recurrent.jl") 3 | 4 | using .Recurrent 5 | using CuArrays: CuArray 6 | 7 | srand(1) 8 | 9 | const batch_size = 512 10 | const seq_len = 10 11 | const input_dim = 512 12 | const hidden_dim = 512 13 | 14 | rand_inputs_d = CuArray(randn(batch_size * seq_len, input_dim)) 15 | lstm_cell = LSTMCell(input_dim, hidden_dim) 16 | 17 | 18 | for i = 1 : 10 19 | cell_states, hidden_states = lstm_forward(rand_inputs_d, lstm_cell, seq_len) 20 | 21 | # println("cell states : ") 22 | # display(cell_states) 23 | 24 | # println("hidden_states : ") 25 | # display(hidden_states) 26 | end 27 | -------------------------------------------------------------------------------- /paper_notes/Diffusion/README.md: -------------------------------------------------------------------------------- 1 | 2 | 1. "Controlling Text-to-Image Diffusion by Orthogonal Finetuning"[[PDF]](https://arxiv.org/pdf/2306.07280.pdf) 3 | 1. LoRA 4 | 1. ControlNet -------------------------------------------------------------------------------- /paper_notes/Template.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | - [Title](#title) 4 | - [My Takeaways and Some Thoughts](#my-takeaways-and-some-thoughts) 5 | - [Overall](#overall) 6 | - [Problem Proposed](#problem-proposed) 7 | - [Goal](#goal) 8 | - [Approach](#approach) 9 | - [Evaluation Methods in this Paper](#evaluation-methods-in-this-paper) 10 | - [Related Works Recommended for Further Reading](#related-works-recommended-for-further-reading) 11 | - [Detail](#detail) 12 | - [Chanllenges](#chanllenges) 13 | - [Proposed Solutions](#proposed-solutions) 14 | - [Reference](#reference) 15 | 16 | 17 | 18 | # Title 19 | 20 | [link](https://arxiv.org/pdf/1812.01329.pdf) 21 | 22 | ## My Takeaways and Some Thoughts 23 | 24 | ## Overall 25 | 26 | ### Problem Proposed 27 | 28 | ### Goal 29 | 30 | ### Approach 31 | 32 | ### Evaluation Methods in this Paper 33 | 34 | ## Related Works Recommended for Further Reading 35 | 36 | ## Detail 37 | 38 | ### Chanllenges 39 | 40 | ### Proposed Solutions 41 | 42 | ## Reference 43 | -------------------------------------------------------------------------------- /paper_notes/auto-diff/BP_and_implicit_function_theorem/README.md: -------------------------------------------------------------------------------- 1 | # Backpropagation is not Just the Chain-Rule 2 | 3 | ## Backprop and the Adjoint Method 4 | 5 | 6 | 7 | ## Reference 8 | 9 | 1. [Backprop is not just the chain rule](https://timvieira.github.io/blog/post/2017/08/18/backprop-is-not-just-the-chain-rule/) 10 | 1. [A new trick for calculating Jacobian vector products](https://j-towns.github.io/2017/06/12/A-new-trick.html) 11 | 1. [Mechanics of Lagrangians](http://www.argmin.net/2016/05/31/mechanics-of-lagrangians/) 12 | 1. [Mates of Costate](http://www.argmin.net/2016/05/18/mates-of-costate/) 13 | 1. [Black-box optimization](https://timvieira.github.io/blog/post/2018/03/16/black-box-optimization/) 14 | -------------------------------------------------------------------------------- /paper_notes/auto-diff/SCT_AD/README.md: -------------------------------------------------------------------------------- 1 | 1. [Tapenade](Tapenade.md) 2 | - Project webpage: https://www-sop.inria.fr/tropics/tapenade.html 3 | - Paper: [The Tapenade Automatic Differentiation tool: principles, model, and specification](https://hal.inria.fr/hal-00913983/document) 4 | - [Slides](http://www-sop.inria.fr/tropics/Laurent.Hascoet/slidesLesHouches.pdf) 5 | 1. [Myia](Automatic_Differentiation_in_Myia.md) 6 | - Github Project: https://github.com/mila-udem/myia 7 | - Paper: [Automatic Differentiation in Myia](https://openreview.net/pdf?id=S1hcluzAb) 8 | 1. [Tagent](Tagent.md) 9 | - Github Project: https://github.com/google/tangent 10 | - [Tangent: Source-to-Source Debuggable Derivatives](https://ai.googleblog.com/2017/11/tangent-source-to-source-debuggable.html) 11 | 1. [JAX](JAX.md) 12 | - Github Project: https://github.com/google/jax 13 | - [Compiling machine learning programs via high-level tracing](https://www.sysml.cc/doc/146.pdf) 14 | 1. [Zygote](Differentiating_SSA_form_program.md) 15 | - Github Project: https://github.com/FluxML/Zygote.jl 16 | - Paper: [Don't Unroll Adjoint: Differentiating SSA-Form Programs](https://arxiv.org/pdf/1810.07951.pdf) 17 | 18 | 1. [DLVM: A modern compiler infrastructure for deep learning systems with adjoint code generation in a domain-specific IR](https://arxiv.org/pdf/1711.03016.pdf) 19 | 20 | --- 21 | 22 | ### Some Related Research Work 23 | 24 | 1. [JANUS Fast and Flexible Deep Learning via Symbolic Graph Execution of Imperative Programs](JANUS.md) 25 | -------------------------------------------------------------------------------- /paper_notes/auto-diff/images/expression_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/auto-diff/images/expression_graph.png -------------------------------------------------------------------------------- /paper_notes/auto-diff/images/mix_forward_and_reverse_mode_AD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/auto-diff/images/mix_forward_and_reverse_mode_AD.png -------------------------------------------------------------------------------- /paper_notes/auto-diff/images/multidimensional_dual_numbers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/auto-diff/images/multidimensional_dual_numbers.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/CFG-optimizations/README.md: -------------------------------------------------------------------------------- 1 | # Reading List 2 | 3 | - [ ] [Optimizing control flow in loops using interval and dependence analysis](https://www.ics.uci.edu/~givargis/pubs/J18.pdf) 4 | - [ ] [Control Flow Analysis Dragon Book Section 8.4](http://www.cs.ecu.edu/karl/5220/spr16/Notes/Optimization/controlflow.html) 5 | - [ ] [Assignment 2: Control Flow Optimization](http://aggregate.org/OC/s18a2.html) 6 | - [ ] [Lecture 4: Control Flow Optimization, COS 598C - Advanced Compilers](https://www.cs.princeton.edu/courses/archive/spr04/cos598C/lectures/04-ControlFlow-3x1.pdf) 7 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/CFG-optimizations/images/discussion-on-cfg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/CFG-optimizations/images/discussion-on-cfg.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Code-optimizations/images/dmxpy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Code-optimizations/images/dmxpy.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Code-optimizations/images/dmxpy.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Code-optimizations/images/dmxpy.pptx -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Code-optimizations/images/excerpt-from-dmxpy-in-linpack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Code-optimizations/images/excerpt-from-dmxpy-in-linpack.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Code-optimizations/images/manual-optimizations-for-dmxpy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Code-optimizations/images/manual-optimizations-for-dmxpy.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Code-optimizations/images/simple-version-of-dmxpy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Code-optimizations/images/simple-version-of-dmxpy.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Code-optimizations/images/strength-reduction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Code-optimizations/images/strength-reduction.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Code-optimizations/local-optimizatioins.md: -------------------------------------------------------------------------------- 1 | # Local Optimizations 2 | 3 | Programmers will protest that they do not write code that contains redundant. In practice, redundancy elimination finds many opportunities. ***Translation from source code to ir elaborates many details, such as address calculations, and introduces redundant expressions***. 4 | 5 | ## Local Value Numbering (LVN) 6 | 7 | ***Local value numbering*** is one of the oldest and most powerful redundency elimination. 8 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Compiler_and_Interpreter.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Compiler_and_Interpreter.pdf -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Dependence_analysis/README.md: -------------------------------------------------------------------------------- 1 | TBD 2 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Dependence_analysis/dependence_abstraction/contents.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Dependence_analysis/dependence_abstraction/contents.tex -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Dependence_analysis/dependence_abstraction/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass {article} 2 | 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands 4 | \title{Dependence Abstraction} 5 | \author{Ying Cao} 6 | \date{\today} 7 | 8 | \begin{document} 9 | 10 | \maketitle 11 | \tableofcontents 12 | \newpage 13 | \input{contents.tex} 14 | 15 | { 16 | \small 17 | \raggedright 18 | \bibliographystyle{ieeetr} 19 | % or, abbrv, acm, alpha, apalike, ieeetr, plain, siam, unsrt 20 | \begin{spacing}{1} 21 | \bibliography{references.bib} 22 | \end{spacing} 23 | } 24 | \end{document} 25 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Dependence_analysis/dependence_abstraction/references.bib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Dependence_analysis/dependence_abstraction/references.bib -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Intermediate-Representations/images/AST-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/AST-example.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Intermediate-Representations/images/DAG-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/DAG-example.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Intermediate-Representations/images/IR-level-of-abstraction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/IR-level-of-abstraction.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Intermediate-Representations/images/an-simple-example-of-dependency-graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/an-simple-example-of-dependency-graph.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Intermediate-Representations/images/dependence-graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/dependence-graph.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Intermediate-Representations/images/different-levels-of-abstraction-for-an-array-subscript-reference.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/different-levels-of-abstraction-for-an-array-subscript-reference.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Intermediate-Representations/images/naming-leads-to-different-translations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/naming-leads-to-different-translations.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Intermediate-Representations/images/one-address-code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/one-address-code.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Intermediate-Representations/images/three-address-code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Intermediate-Representations/images/three-address-code.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/Terminology.md: -------------------------------------------------------------------------------- 1 | # Schedule 2 | 3 | # Polyhedra/Polyhedron 4 | 5 | # Dependence system 6 | 7 | # Lexicographic order 8 | 9 | If $R_1$ is a definition and $R_2$ a use, lexico-positive points of $\Pi$ correspond to data dependences from $S_1$ to $S_2$ (from write to read), while lexico-negative points correspond to anti dependences from $S_2$ to $S_1$ (from read to write). 10 | 11 | # Legality of Unimodular Transformations 12 | 13 | # Fully permutabiliy 14 | 15 | # Wavefront transformation 16 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/a_single_rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/a_single_rnn.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/bi_directional_rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/bi_directional_rnn.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/bi_directional_rnn.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/bi_directional_rnn.pptx -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/different_dependences.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/different_dependences.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/dilated_rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/dilated_rnn.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/example-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/example-01.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/grid_rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/grid_rnn.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/polyhedral_representation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/polyhedral_representation.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/stack_rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/stack_rnn.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/wh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/wh.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/Basics/images/workflow.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/dependence_analysis.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/Denpendence_analysis/dependence_analysis.pdf -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/Polyhedral_representation_in_Pet/references.bib: -------------------------------------------------------------------------------- 1 | @article{lamport1974parallel, 2 | title={The parallel execution of do loops}, 3 | author={Lamport, Leslie}, 4 | journal={Communications of the ACM}, 5 | volume={17}, 6 | number={2}, 7 | pages={83--93}, 8 | year={1974}, 9 | publisher={ACM New York, NY, USA} 10 | } 11 | @article{wolf1991loop, 12 | title={A loop transformation theory and an algorithm to maximize parallelism}, 13 | author={Wolf, Michael E and Lam, Monica S}, 14 | journal={IEEE Transactions on Parallel \& Distributed Systems}, 15 | number={4}, 16 | pages={452--471}, 17 | year={1991}, 18 | publisher={IEEE} 19 | } 20 | @book{banerjee2013loop, 21 | title={Loop parallelization}, 22 | author={Banerjee, Utpal}, 23 | year={2013}, 24 | publisher={Springer Science \& Business Media} 25 | } 26 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/Polyhedral_representation_in_Pet/section1.tex: -------------------------------------------------------------------------------- 1 | \begin{frame}{Definitions and Notations in isl} 2 | \begin{block}{Named Integer Tuples} 3 | A \textit{named integer tuple} consists of an identifier (name) and a sequence of integer values. 4 | The identifier may be omitted and the sequence of integers may have a zero length. 5 | \end{block} 6 | 7 | \item \textbf{Notation}: 8 | 9 | \end{frame} 10 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/Polyhedral_representation_in_Pet/slides.tex: -------------------------------------------------------------------------------- 1 | \documentclass[10pt,aspectratio=43,mathserif]{beamer} 2 | 3 | \input{structure.tex} 4 | 5 | \title[] {Represent a Polyhedral Model using \textit{isl}} 6 | \author{Ying Cao} 7 | \institute {} 8 | \date{\today} 9 | 10 | \begin {document} 11 | % \begin{changemargin}{0.2cm}{0.2cm} 12 | 13 | \begin {frame} 14 | \titlepage 15 | \end {frame} 16 | 17 | % \AtBeginSection[] 18 | % { 19 | % \begin{frame} 20 | % \frametitle{Outlines} 21 | % \tableofcontents[ 22 | % %currentsection, 23 | % % currentsubsection, 24 | % hideallsubsections, 25 | % sectionstyle= hide, % show 26 | % % subsectionstyle=shaded % show 27 | % ] 28 | % \end{frame} 29 | % } 30 | 31 | \section{Definitions and notations in isl} 32 | \input{section1.tex} 33 | 34 | % \section{Issues, Next Plans} 35 | % \input{plans.tex} 36 | 37 | \section{References} 38 | \begin{frame}[allowframebreaks]{References} 39 | \bibliographystyle{ieeetr} 40 | \bibliography{references.bib} 41 | \end{frame} 42 | 43 | % \end{changemargin} 44 | \end {document} 45 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/The_parallel_execution_of_do_loops.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/The_parallel_execution_of_do_loops.pdf -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Loop_analysis/auto-vectorization/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Loop_analysis/auto-vectorization/README.md -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/A_Performance_Vocabulary_for_Affine_Loop_Transformations/contents.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/A_Performance_Vocabulary_for_Affine_Loop_Transformations/contents.tex -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/A_Performance_Vocabulary_for_Affine_Loop_Transformations/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass {article} 2 | 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands 4 | \title{Note for 5 | \textit{\href{https://arxiv.org/pdf/1811.06043.pdf}{A performance vocabulary for affine loop transformations}}} 6 | \author{Ying Cao} 7 | \date{\today} 8 | 9 | \begin{document} 10 | 11 | \maketitle 12 | \tableofcontents 13 | \newpage 14 | \input{contents.tex} 15 | 16 | { 17 | \small 18 | \raggedright 19 | \bibliographystyle{ieeetr} 20 | % or, abbrv, acm, alpha, apalike, ieeetr, plain, siam, unsrt 21 | \begin{spacing}{1} 22 | \bibliography{references.bib} 23 | \end{spacing} 24 | } 25 | \end{document} 26 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/A_Performance_Vocabulary_for_Affine_Loop_Transformations/references.bib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/A_Performance_Vocabulary_for_Affine_Loop_Transformations/references.bib -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/Data_dependence_and_PIP/contents.tex: -------------------------------------------------------------------------------- 1 | \section{Formulation of the data dependence problem} 2 | 3 | \section{PIP and its output} 4 | 5 | \section{What can we do when we have complete and accurate data dependence?} 6 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/Data_dependence_and_PIP/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass {article} 2 | 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands 4 | \title{Data dependence and PIP} 5 | \author{Ying Cao} 6 | \date{\today} 7 | 8 | \begin{document} 9 | 10 | \maketitle 11 | \tableofcontents 12 | \newpage 13 | \input{contents.tex} 14 | 15 | { 16 | \small 17 | \raggedright 18 | \bibliographystyle{ieeetr} 19 | % or, abbrv, acm, alpha, apalike, ieeetr, plain, siam, unsrt 20 | \begin{spacing}{1} 21 | \bibliography{references.bib} 22 | \end{spacing} 23 | } 24 | \end{document} 25 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/Data_dependence_and_PIP/references.bib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/Data_dependence_and_PIP/references.bib -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/Mathmatical_foundations/concepts.md: -------------------------------------------------------------------------------- 1 | # [Paritally Ordered Set]() 2 | 3 | A _**partially ordered set**_ (_**also poset**_) formalizes and generalizes the intuitive concept of an _ordering_, _sequencing_, or _arrangement_ of the elements of a set. A poset consists of _**a set together with a binary relation**_ indicating that, for certain pairs of elements in the set, one of the elements precedes the other in the ordering. The _**relation itself is called a "partial order"**_. 4 | 5 | The word partial in the names "partial order" and "partially ordered set" is used as an indication that _**not every pair of elements needs to be comparable**_. That is, there may be pairs of elements for which neither element precedes the other in the poset. Partial orders thus generalize total orders, in which every pair is comparable. 6 | 7 | # [Lattice](https://en.wikipedia.org/wiki/Lattice_(order)) 8 | 9 | A lattice consists of a partially ordered set in which every two elements have a unique supremum (also called a least upper bound or join) and a unique infimum (also called a greatest lower bound or meet). 10 | 11 | An example is given by the natural numbers, partially ordered by divisibility, for which the unique supremum is the least common multiple and the unique infimum is the greatest common divisor. 12 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/images/SCoP_decomposition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/images/SCoP_decomposition.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/images/a_skewing_transformation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/images/a_skewing_transformation.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/images/farkas_lemma.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/images/farkas_lemma.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/main.pdf -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass {article} 2 | 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands 4 | \title{Note for 5 | \textit{\href{https://hal.inria.fr/inria-00001056/document}{More Legal Transformations for Locality}}} 6 | \author{Ying Cao} 7 | \date{\today} 8 | 9 | \begin{document} 10 | 11 | \maketitle 12 | \tableofcontents 13 | \newpage 14 | \input{contents.tex} 15 | 16 | { 17 | \small 18 | \raggedright 19 | \bibliographystyle{ieeetr} 20 | % or, abbrv, acm, alpha, apalike, ieeetr, plain, siam, unsrt 21 | \begin{spacing}{1} 22 | \bibliography{references.bib} 23 | \end{spacing} 24 | } 25 | \end{document} 26 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/More_Legal_Transformations_for_Locality/references.bib: -------------------------------------------------------------------------------- 1 | @Misc{Bas12, 2 | author={Bastoul, C\'{e}dric}, 3 | title={\textit{Contributions to High-Level Program Optimization}. {H}abilitation {T}hesis. {P}aris-{S}ud {U}niversity, {F}rance}, 4 | month=dec, 5 | year=2012, 6 | } 7 | 8 | @PhdThesis{TBas, 9 | author={Bastoul, C\'{e}dric}, 10 | title={Improving Data Locality in Static Control Programs}, 11 | school={University Paris 6, Pierre et Marie Curie, France}, 12 | month=dec, 13 | year=2004, 14 | } 15 | 16 | @article{xue1997tiling, 17 | title={\href{http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.26.9719&rep=rep1&type=pdf}{On tiling as a loop transformation}}, 18 | author={Xue, Jingling}, 19 | journal={Parallel Processing Letters}, 20 | volume={7}, 21 | number={04}, 22 | pages={409--424}, 23 | year={1997}, 24 | publisher={World Scientific} 25 | } 26 | @inproceedings{griebl1998code, 27 | title={\href{https://www.infosun.fim.uni-passau.de/publications/docs/GLW98pact.pdf}{Code generation in the polytope model}}, 28 | author={Griebl, Martin and Lengauer, Christian and Wetzel, Sabine}, 29 | booktitle={Proceedings. 1998 International Conference on Parallel Architectures and Compilation Techniques (Cat. No. 98EX192)}, 30 | pages={106--111}, 31 | year={1998}, 32 | organization={IEEE} 33 | } 34 | @inproceedings{lengauer1993loop, 35 | title={\href{http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.29.8716&rep=rep1&type=pdf}{Loop parallelization in the polytope model}}, 36 | author={Lengauer, Christian}, 37 | booktitle={International Conference on Concurrency Theory}, 38 | pages={398--416}, 39 | year={1993}, 40 | organization={Springer} 41 | } 42 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/Polyhedral_representation/2d_puls_1.tex: -------------------------------------------------------------------------------- 1 | \section{the $2\mathbf{d}+1$ representation} 2 | 3 | Refer to paper \cite{girbal2006semi}\cite{vasilache2007scalable}\cite{bastoul2011openscop} for details. 4 | 5 | \subsection {Classical loop transformations} 6 | 7 | \begin{enumerate} 8 | \item only modif the iteration domain but do not affact the order in which 9 | statement instances are executed or the way arrays are accessed; 10 | \begin{enumerate} 11 | \item loop unrolling 12 | \item strip-mining 13 | \end{enumerate} 14 | \item modifies both iteration domain and a schedule transformation; 15 | \begin{enumerate} 16 | \item tiling: a combination of strip-mining and loop interchange; 17 | \end{enumerate} 18 | \item modifies schedule; 19 | \begin{enumerate} 20 | \item shifting/pipelining 21 | \end{enumerate} 22 | \item modifies array subscripts 23 | \begin{enumerate} 24 | \item privatization 25 | \end{enumerate} 26 | \item only modifies the array declarations (data layout) 27 | \begin{enumerate} 28 | \item padding 29 | \end{enumerate} 30 | \end{enumerate} 31 | 32 | \subsection{Polyhedral model} 33 | 34 | The polyhedral representation is a semantics-based representation instead of 35 | syntax-based representation. It clearly separates the four different types of 36 | actions performed by program transformations: 37 | 38 | \begin{enumerate} 39 | \item modification of the iteration domain (loop bounds and strides); 40 | \item modification of the schedule of each individual statement; 41 | \item modification of access functions (array subscripts) 42 | \item modification of the data layout (array declarations) 43 | \end{enumerate} 44 | 45 | Loop transformations are expressed as a "syntax-free" function compositions. 46 | 47 | Aribitrarily complex compositions of classical transformations can be captured 48 | in one single transformation step of the polyhedral model. 49 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/Polyhedral_representation/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass {article} 2 | 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands 4 | \title{\href{https://link.springer.com/content/pdf/10.1007/s10766-006-0012-3.pdf}{Semi-Automatic Composition of Loop Transformations for Deep Parallelism and Memory Hierarchies}} 5 | \author{Ying Cao} 6 | \date{\today} 7 | 8 | \begin{document} 9 | 10 | \maketitle 11 | \tableofcontents 12 | \newpage 13 | \input{2d_puls_1.tex} 14 | \input{schedule_tree.tex} 15 | 16 | { 17 | \small 18 | \raggedright 19 | \bibliographystyle{ieeetr} 20 | % or, abbrv, acm, alpha, apalike, ieeetr, plain, siam, unsrt 21 | \begin{spacing}{1} 22 | \bibliography{references.bib} 23 | \end{spacing} 24 | } 25 | \end{document} 26 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/Polyhedral_representation/schedule_tree.tex: -------------------------------------------------------------------------------- 1 | \section{Schedule Trees} 2 | Refer to paper \cite{grosser2014decoupled} \cite{verdoolaege2014schedule} for details. 3 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/README.md: -------------------------------------------------------------------------------- 1 | # Polyhderal Compilation 2 | 3 | ## Polyhedral Program Representation 4 | 5 | 1. Bastoul, Cédric, et al. "[Putting polyhedral loop transformations to work](https://hal.inria.fr/file/index/docid/71681/filename/RR-4902.pdf)." International Workshop on Languages and Compilers for Parallel Computing. Springer, Berlin, Heidelberg, 2003. 6 | - Clan project: http://icps.u-strasbg.fr/~bastoul/development/clan/#DL 7 | - [Clan - a polyhedral representation extractor for high level programs](http://icps.u-strasbg.fr/~bastoul/development/clan/docs/clan.html) 8 | 9 | >_This is the paper for Clan, a tool to extract polyhedral program representation from C programs._ 10 | > 11 | >_This paper answers the question of what a polyhedral program representation look like and helps get quick understandings and intuitions of basic concepts of polyhedral compilation and its workflow._ 12 | > 13 | >_The paper goes through basic concepts, the whole workflow with polyhedral program representation a focus, and inputs, outputs, requirements and formulations for each step without digging into whys and hows._ 14 | 15 | 1. Verdoolaege, Sven, and Tobias Grosser. "[Polyhedral extraction tool](https://www.grosser.es/publications/grosser-2012-Polyhedral-Extraction-Tool-IMPACT.pdf)." Second International Workshop on Polyhedral Compilation Techniques (IMPACT’12), Paris, France. 2012. 16 | - Pet project: https://github.com/Meinersbur/pet 17 | - The doctoral thesis of Pet's author. Chapter 9 of: Grosser, Tobias. [A decoupled approach to high-level loop optimization: tile shapes, polyhedral building blocks and low-level compilers](https://tel.archives-ouvertes.fr/tel-01144563/document). Diss. 2014. 18 | 19 | 1. [Polyhedral Process Networks](https://www.semanticscholar.org/paper/Polyhedral-Process-Networks-Verdoolaege/e8f64c573a680cddb6ede148c1778b94afb70830) 20 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/Tiramisu/tiramisu_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/Tiramisu/tiramisu_overview.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/images/introduction-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/images/introduction-01.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/images/introduction-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/images/introduction-02.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/images/introduction-03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/images/introduction-03.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/images/introduction-04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/images/introduction-04.png -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/isl/contents.tex: -------------------------------------------------------------------------------- 1 | Refer to this tutorial\cite{verdoolaege2016presburger} for the first-hand knowledge. 2 | 3 | Concepts are highlighted in \textcolor{vr}{violet red}, and its corresponding isl implementation is highlighted in \textcolor{og}{oliver green}. 4 | 5 | \input{sets_and_maps.tex} 6 | \input{presburger_sets_and_relations.tex} 7 | \input{pw_quasi_affine.tex} 8 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/isl/main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/isl/main.pdf -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/isl/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass {article} 2 | 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands 4 | \title{\textit{isl} Basics} 5 | \author{Ying Cao} 6 | \date{\today} 7 | 8 | \begin{document} 9 | 10 | \maketitle 11 | \tableofcontents 12 | \newpage 13 | \input{contents.tex} 14 | 15 | { 16 | \small 17 | \raggedright 18 | \bibliographystyle{ieeetr} 19 | % or, abbrv, acm, alpha, apalike, ieeetr, plain, siam, unsrt 20 | \begin{spacing}{1} 21 | \bibliography{references.bib} 22 | \end{spacing} 23 | } 24 | \end{document} 25 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/isl/references.bib: -------------------------------------------------------------------------------- 1 | @article{verdoolaege2016presburger, 2 | title={\href{https://lirias.kuleuven.be/retrieve/361209}{Presburger formulas and polyhedral compilation}}, 3 | author={Verdoolaege, Sven}, 4 | year={2016} 5 | } 6 | -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/polyhedral_background_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/polyhedral_background_01.pdf -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/polyhedral_background_02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/polyhedral_background_02.pdf -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/Polyhedral_compilatioin/polyhedral_background_03.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/Polyhedral_compilatioin/polyhedral_background_03.pdf -------------------------------------------------------------------------------- /paper_notes/compiler-stuffs/intermediate-code-generation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/compiler-stuffs/intermediate-code-generation.md -------------------------------------------------------------------------------- /paper_notes/data_processing_systems/README.md: -------------------------------------------------------------------------------- 1 | 1. Murray D G, Schwarzkopf M, Smowton C, et al. [Ciel: a universal execution engine for distributed data-flow computing](https://web.kaust.edu.sa/Faculty/MarcoCanini/classes/CS345/S19/papers/ciel.pdf)[C]//Proc. 8th ACM/USENIX Symposium on Networked Systems Design and Implementation. 2011: 113-126. 2 | 1. Malewicz, Grzegorz, et al. "[Pregel: a system for large-scale graph processing](https://www.researchgate.net/profile/James-Dehnert/publication/221257383_Pregel_A_system_for_large-scale_graph_processing/links/00b7d537c615821fa4000000/Pregel-A-system-for-large-scale-graph-processing.pdf)." Proceedings of the 2010 ACM SIGMOD International Conference on Management of data. 2010. 3 | 1. Rocklin M. [Dask: Parallel computation with blocked algorithms and task scheduling](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.825.5314&rep=rep1&type=pdf)[C]//Proceedings of the 14th python in science conference. Austin, TX: SciPy, 2015, 126. 4 | 1. Power R, Li J. [Piccolo: Building Fast, Distributed Programs with Partitioned Tables](https://static.usenix.org/events/osdi10/tech/full_papers/Power.pdf)[C]//OSDI. 2010, 10: 293-306. 5 | -------------------------------------------------------------------------------- /paper_notes/data_processing_systems/figures/ciel_cluster_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/data_processing_systems/figures/ciel_cluster_architecture.png -------------------------------------------------------------------------------- /paper_notes/data_processing_systems/figures/dynamic-task-graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/data_processing_systems/figures/dynamic-task-graph.png -------------------------------------------------------------------------------- /paper_notes/dataflow-architectures/README.md: -------------------------------------------------------------------------------- 1 | # Reading List 2 | 3 | ## Dataflow architecutre 4 | 5 | - [ ] [Timely Dataflow: A model](https://static.googleusercontent.com/media/research.google.com/zh-CN//pubs/archive/43546.pdf) 6 | - [ ] [Dataflow computers: their history and future](https://csrl.cse.unt.edu/kavi/Research/encyclopedia-dataflow.pdf) 7 | - [ ] [Reducing control overhead in dataflow architectures](http://arcade.cs.columbia.edu/ws-pact06.pdf) 8 | 9 | ## Scheduled Dataflow 10 | 11 | - [ ] [Scheduled dataflow: execution paradigm, architecture, and performance evaluation](https://www.researchgate.net/profile/Roberto_Giorgi/publication/3044386_Scheduled_Dataflow_Execution_paradigm_architecture_and_performance_evaluation/links/0912f50c049bbceb3c000000/Scheduled-Dataflow-Execution-paradigm-architecture-and-performance-evaluation.pdf?origin=publication_detail) 12 | 13 | ## Some slides 14 | 15 | - [ ] [Dataflow architectures](https://homes.cs.washington.edu/~kstrauss/presentations/df-class.pdf) 16 | - [ ] [Computer Architecture: Dataflow (Part I)](https://www.archive.ece.cmu.edu/~ece740/f13/lib/exe/fetch.php?media=onur-740-fall13-module5.2.1-dataflow-part1.pdf) 17 | 18 | ## Miscellanea 19 | 20 | 1. [mueller's publicatioins](https://arcb.csc.ncsu.edu/~mueller/publications.html#mueller91) 21 | 1. [ECE 4530 Hardware/Software Codesign](https://schaumont.dyn.wpi.edu/ece4530f19/) 22 | 1. [dMazeRunner: Executing Perfectly Nested Loops on Dataflow Accelerators](https://dl.acm.org/doi/pdf/10.1145/3358198) 23 | -------------------------------------------------------------------------------- /paper_notes/dataflow-architectures/images/Manchester-dynamic-dataflow-machine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/Manchester-dynamic-dataflow-machine.png -------------------------------------------------------------------------------- /paper_notes/dataflow-architectures/images/dataflow-accumulator.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/dataflow-accumulator.png -------------------------------------------------------------------------------- /paper_notes/dataflow-architectures/images/dataflow-graph-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/dataflow-graph-1.png -------------------------------------------------------------------------------- /paper_notes/dataflow-architectures/images/ill-formed_multi-rate-dataflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/ill-formed_multi-rate-dataflow.png -------------------------------------------------------------------------------- /paper_notes/dataflow-architectures/images/img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/img1.png -------------------------------------------------------------------------------- /paper_notes/dataflow-architectures/images/img2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/img2.png -------------------------------------------------------------------------------- /paper_notes/dataflow-architectures/images/paper-screenshot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/paper-screenshot-1.png -------------------------------------------------------------------------------- /paper_notes/dataflow-architectures/images/periodic_admissible_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/periodic_admissible_schedule.png -------------------------------------------------------------------------------- /paper_notes/dataflow-architectures/images/solve_G.q.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/solve_G.q.png -------------------------------------------------------------------------------- /paper_notes/dataflow-architectures/images/static-dataflow-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/static-dataflow-architecture.png -------------------------------------------------------------------------------- /paper_notes/dataflow-architectures/images/topology_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/topology_matrix.png -------------------------------------------------------------------------------- /paper_notes/dataflow-architectures/images/two-input_add_actor_and_two-output_duplicate_actor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dataflow-architectures/images/two-input_add_actor_and_two-output_duplicate_actor.png -------------------------------------------------------------------------------- /paper_notes/dl-compiler/Glow/Glow.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-compiler/Glow/Glow.pdf -------------------------------------------------------------------------------- /paper_notes/dl-compiler/Glow/images/low-level-glow-ir.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-compiler/Glow/images/low-level-glow-ir.png -------------------------------------------------------------------------------- /paper_notes/dl-compiler/MLIR/MLIR.md: -------------------------------------------------------------------------------- 1 | # MLIR 2 | 3 | "flow-sensitive" type checking. 4 | 5 | 6 | # Reference 7 | 8 | 1. [Multi-Level Intermediate Representation Overview](https://github.com/tensorflow/mlir#multi-level-intermediate-representation-overview) 9 | 1. [MLIR Tutorial: Building a Compiler with MLIR](https://llvm.org/devmtg/2019-04/slides/Tutorial-AminiVasilacheZinenko-MLIR.pdf) 10 | 1. [A discussion about MLIR from the TVM community](https://discuss.tvm.ai/t/google-lasted-work-mlir-primer/1721/2). 11 | 1. [A Reddit discussion about MLIR](https://www.reddit.com/r/ProgrammingLanguages/comments/at0alm/mlir_primer_a_compiler_infrastructure_for_the_end/) 12 | 1. [2019 EuroLLVM Developers’ Meeting: “MLIR: Multi-Level Intermediate Representation”](https://www.youtube.com/watch?v=qzljG6DKgic&feature=youtu.be) 13 | -------------------------------------------------------------------------------- /paper_notes/dl-compiler/MLIR/README.md: -------------------------------------------------------------------------------- 1 | # References 2 | 3 | 1. [The github project](https://github.com/tensorflow/mlir) 4 | 1. Tow vedios on Youtube 5 | 1. [2019 EuroLLVM Developers’ Meeting: Mehdi & Vasilache & Zinenko “Building a Compiler with MLIR”](https://www.youtube.com/watch?v=cyICUIZ56wQ) 6 | -------------------------------------------------------------------------------- /paper_notes/dl-compiler/MLIR/swift_for_tensorflow.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-compiler/MLIR/swift_for_tensorflow.pdf -------------------------------------------------------------------------------- /paper_notes/dl-compiler/TVM/TVM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-compiler/TVM/TVM.pdf -------------------------------------------------------------------------------- /paper_notes/dl-compiler/XLA/XLA.md: -------------------------------------------------------------------------------- 1 | # XLA: The TensorFlow compiler framework 2 | 3 | * [TensorFlow, Compiled!](https://autodiff-workshop.github.io/slides/JeffDean.pdf) 4 | 5 | ## Goals 6 | 7 | 1. Improved execution speed. 8 | 1. Improved tensor buffer memory usage. 9 | 1. Make the performance of low-level Ops be the same as that of hand-written fused implementations. 10 | 1. Improved mobile footprint. Eliminate the TensorFlow runtime. 11 | 1. Improved protability. 12 | * It should be relatively easy to write a new back-end for novel hardware. 13 | 14 | ## XLA 15 | 16 | * The semantics of operations are _**high level**_. This preserves enough information to allow sophisticated scheduling and optimization. 17 | 18 | ![](https://www.tensorflow.org/images/how-does-xla-work.png) 19 | 20 | * XLA program = static, decomposed TF ops 21 | * math-looking _**primitive ops**_ 22 | * make _**macro-ops by composition**_ 23 | 24 | ### A key question: why write every new macro-op? 25 | 26 | * Why write every new macro-op in C++? 27 | * Why can't compose new operators out of existing TF ops? 28 | 29 | ### Compliation benefits 30 | 31 | 1. Eliminates op dispatch overhead. 32 | 1. Fuses ops. 33 | * reduce memory access 34 | 1. Memory usage analysis 35 | * reuse memory 36 | * update in-place 37 | 1. Models to executables: reduce executable size by generating what you need. 38 | -------------------------------------------------------------------------------- /paper_notes/dl-compiler/figures/sm-and-sub-core-of-volta.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-compiler/figures/sm-and-sub-core-of-volta.png -------------------------------------------------------------------------------- /paper_notes/dl-models/beyond_transformer/README.md: -------------------------------------------------------------------------------- 1 | 1. [VanillaNet: the Power of Minimalism in 2 | Deep Learning](https://arxiv.org/pdf/2305.12972.pdf) 3 | 1. [RWKV: Reinventing RNNs for the Transformer Era](https://arxiv.org/abs/2305.13048) 4 | -------------------------------------------------------------------------------- /paper_notes/dl-models/miscellanea/Geometric_deep_learning.md: -------------------------------------------------------------------------------- 1 | # [Geometric Deep Learning: Grids, Groups, Graphs, Geodesics, and Gauges](https://arxiv.org/pdf/2104.13478.pdf) 2 | -------------------------------------------------------------------------------- /paper_notes/dl-models/miscellanea/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/miscellanea/README.md -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/CW-RNN/A_Clockwork_RNN.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/CW-RNN/A_Clockwork_RNN.pdf -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/GridLSTM/GridLSTM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/GridLSTM/GridLSTM.pdf -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/HM-LSTM/Hierarchical_multiscale_RNN.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/HM-LSTM/Hierarchical_multiscale_RNN.pdf -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/How_Much_Attention_Do_You_Need.md: -------------------------------------------------------------------------------- 1 | # [How Much Attention Do you need](http://aclweb.org/anthology/P18-1167) 2 | 3 | 1. The performance of recurrent and convolutional models can be very close to the Transformer performance by borrowing concepts from the Transformer architecture, but not using self-attention. 4 | 1. Self-attention is much more important for the encoder side than for the decoder side. 5 | * In the encoder side, self-attention can be replaced by a RNN or CNN without a loss in performance in most settings. 6 | * One surpising experimental result is even a model without any target side self-attention performs well. 7 | 1. Source attention on lower encoder layers brings no additional benefit. 8 | 1. The largest gains come from multiple attention mechanisms and residual feed-forward layers. 9 | -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/MD-LSTM/MD-LSTM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/MD-LSTM/MD-LSTM.pdf -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/Mogrifier-LSTM/images/MogrifierLSTM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/Mogrifier-LSTM/images/MogrifierLSTM.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/Mogrifier-LSTM/main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/Mogrifier-LSTM/main.pdf -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/Mogrifier-LSTM/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass {article} 2 | 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands 4 | \title{Note for 5 | \textit{\href{https://arxiv.org/abs/1909.01792}{Mogrifier LSTM}}} 6 | \author{Ying Cao} 7 | \date{\today} 8 | 9 | \begin{document} 10 | 11 | \maketitle 12 | \tableofcontents 13 | 14 | \begin{info}[Codes information] 15 | 16 | \begin{itemize} 17 | \item Currently, the authors of this paper only release their \href{https://github.com/RMichaelSwan/MogrifierLSTM}{experimental codes} 18 | on the github. 19 | \item The final codes are not released yet. When the codes is available, it should 20 | be at \href{https://github.com/deepmind/lamb}{https://github.com/deepmind/lamb}. 21 | \end{itemize} 22 | \end{info} 23 | 24 | \input{contents.tex} 25 | 26 | { 27 | \small 28 | \raggedright 29 | \bibliographystyle{ieeetr} 30 | % or, abbrv, acm, alpha, apalike, ieeetr, plain, siam, unsrt 31 | \begin{spacing}{1} 32 | \bibliography{references.bib} 33 | \end{spacing} 34 | } 35 | \end{document} 36 | -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/Mogrifier-LSTM/references.bib: -------------------------------------------------------------------------------- 1 | @inproceedings{foerster2017input, 2 | title={Input switched affine networks: an RNN architecture designed for interpretability}, 3 | author={Foerster, Jakob N and Gilmer, Justin and Sohl-Dickstein, Jascha and Chorowski, Jan and Sussillo, David}, 4 | booktitle={Proceedings of the 34th International Conference on Machine Learning-Volume 70}, 5 | pages={1136--1145}, 6 | year={2017}, 7 | organization={JMLR. org} 8 | } 9 | @article{ha2016hypernetworks, 10 | title={Hypernetworks}, 11 | author={Ha, David and Dai, Andrew and Le, Quoc V}, 12 | journal={arXiv preprint arXiv:1609.09106}, 13 | year={2016} 14 | } 15 | @article{krause2016multiplicative, 16 | title={Multiplicative LSTM for sequence modelling}, 17 | author={Krause, Ben and Lu, Liang and Murray, Iain and Renals, Steve}, 18 | journal={arXiv preprint arXiv:1609.07959}, 19 | year={2016} 20 | } 21 | -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/Neural_Speed_Reading_via_Skim_RNN/Neural_Speed_Reading_via_Skim_RNN.md: -------------------------------------------------------------------------------- 1 | [The Gumbel-Max Trick for Discrete Distributions](https://hips.seas.harvard.edu/blog/2013/04/06/the-gumbel-max-trick-for-discrete-distributions/) 2 | 3 | [Categorical Variational Autoencoders using Gumbel-Softmax](https://blog.evjang.com/2016/11/tutorial-categorical-variational.html) 4 | -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/ON-LSTM/ON-LSTM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/ON-LSTM/ON-LSTM.pdf -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/Quasi-Recurrent_neural_network/Quasi-Recurrent_neural_network.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/Quasi-Recurrent_neural_network/Quasi-Recurrent_neural_network.pdf -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/RNN_Variants_Slides_190820/RNN_Variants.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/RNN_Variants_Slides_190820/RNN_Variants.pdf -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/RNN_Variants_Slides_190820/images/CudnnLSTM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/RNN_Variants_Slides_190820/images/CudnnLSTM.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/RNN_Variants_Slides_190820/images/Recurrent_neural_network_unfold.svg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/RNN_Variants_Slides_190820/images/Recurrent_neural_network_unfold.svg.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/Sliced_Recurrent_Neural_Networks/Sliced_Recurrent_Neural_Networks.md: -------------------------------------------------------------------------------- 1 | # [Sliced Recurrent Neural Networks](https://arxiv.org/abs/1807.02291) 2 | 3 | ## Model Structure 4 | 5 | 2 hyperparameters of SRNN 6 | 7 | 1. slice number $n$ 8 | 1. slicing times $k$ 9 | 10 | The input sequence is $X = [x_1, x_2, ..., x_t]$ whose length is $T$. 11 | 12 | 1. Slice $X$ into $n$ subsequences of equal length. 13 | 1. Repeat the above process $k$ times until a pre-defined minimum sequence length is obtained. 14 | 1. Apply RNN function to each subsequence. 15 | 16 |

17 | 18 |

19 | 20 | ## My Comments 21 | 22 | Personally, I don't think this work is interesting, for the following reasons: 23 | 24 | 1. SRNN cannot be applied to sequence labeling tasks. How to use it to sequence to sequence models are not clear and not studied. 25 | * It is only evaluated in text classification (sentiment classification). Text classification is a simple task in the NLP field. Sometimes it does not require "understanding the semantics of the language" (A good sentiment analysis model does need to understand the semantics, which is also the core challenge in NLP field. Whether SRNN shows some advantages over modeling semantics or not requires more careful evaluation). The model can achieve high accuracy by overfitting or capturing some statistical significance of training data. 26 | * SRNN even cannot be directly used in an RNN LM. 27 | * The evaluation is not enough. 28 | 1. SRNN cannot be stacked for multiple layers which are very important in RNN modeling. If there is only one RNN unit, the state transition between the current and previous state is shallow. 29 | 1. SRNN is not novel. How it works is hugely like recursive neural networks which are proposed by Socher several years ago. I don't think it makes new contributions. The work is not reliable. 30 | -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/Sliced_Recurrent_Neural_Networks/images/SRNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/Sliced_Recurrent_Neural_Networks/images/SRNN.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/The_Unreasonable_Effectiveness_of_the_Forget_Gate/The_Unreasonable_Effectiveness_of_the_Forget_Gate.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/The_Unreasonable_Effectiveness_of_the_Forget_Gate/The_Unreasonable_Effectiveness_of_the_Forget_Gate.pdf -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/Training_RNNs_as_Fast_as_CNNs/Training_RNNs_as_Fast_as_CNNs.md: -------------------------------------------------------------------------------- 1 | # [Training RNNs as Fast as CNNs](https://arxiv.org/abs/1709.02755) 2 | 3 | ## Model 4 | 5 | ### Main motivations 6 | 7 | 1. _**process the input at each step independently of the other inputs.**_ 8 | 1. _**do the recurrent combination with relatively lightweight computaion (element-wise operations that can be fused into a single kernel function call).**_ 9 | 10 | ### Euqatioins of Simple Recurrent Units (SRU) 11 | 12 | * _**linear transformation of the input**_ 13 | $$ \mathbf{\tilde{x}}_t = \mathbf{W}\mathbf{x}_t $$ 14 | 15 | * _**forget gate**_ 16 | 17 | $$ \mathbf{f}_t = \sigma(\mathbf{W}_f\mathbf{x}_t + \mathbf{b}_f) $$ 18 | 19 | * _**reset gate**_ 20 | $$ \mathbf{r}_t = \sigma(\mathbf{W}_r\mathbf{x}_t + \mathbf{b}_r) $$ 21 | 22 | * _**internal state**_ 23 | 24 | $$ \mathbf{c}_t = \mathbf{f}_t \odot \mathbf{c}_{t - 1} + (\mathbf{1} - \mathbf{f}_t) \odot \mathbf{\tilde{x}}_t $$ 25 | 26 | * _**output state**_ 27 | 28 | $$ \mathbf{h}_t = \mathbf{r}_t \odot g(\mathbf{c_t}) + (\mathbf{1} - \mathbf{r}_t \odot \mathbf{x}_t)$$ 29 | -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/Training_RNNs_as_Fast_as_CNNs/Training_RNNs_as_Fast_as_CNNs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/Training_RNNs_as_Fast_as_CNNs/Training_RNNs_as_Fast_as_CNNs.pdf -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/Transformer/README.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/Transformer/README.pdf -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/Transformer/images/QK.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/Transformer/images/QK.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/WaveRNN.md: -------------------------------------------------------------------------------- 1 | # [Efficient Neural Audio Synthesis](https://arxiv.org/pdf/1802.08435.pdf) 2 | -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/2d_lstm_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/2d_lstm_1.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/3D-GridLSTM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/3D-GridLSTM.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/CWRNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/CWRNN.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/CWRNN.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/CWRNN.pptx -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/DilatedRNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/DilatedRNN.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/DilatedRNN1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/DilatedRNN1.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/GridLSTM-NMT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/GridLSTM-NMT.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/HM-LSTM-pre-activation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/HM-LSTM-pre-activation.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/LSTM_equation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/LSTM_equation.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/active_modules.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/active_modules.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/active_modules.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/active_modules.pptx -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/boundary_state.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/boundary_state.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/boundary_state.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/boundary_state.pptx -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/hardsigmoid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/hardsigmoid.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/hm-lstm-cell-update.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/hm-lstm-cell-update.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/hm-lstm-output-hidden.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/hm-lstm-output-hidden.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/multi-dimensioanl-rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/multi-dimensioanl-rnn.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/multi-dimensional-multi-directional-context.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/multi-dimensional-multi-directional-context.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/wh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/wh.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/RNN-modeling/images/wh.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/RNN-modeling/images/wh.pptx -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/ALBERT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/ALBERT.md -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/XLNet.md: -------------------------------------------------------------------------------- 1 | # [XLNet: Generalized Autoregressive Pretraining for Language Understanding](https://arxiv.org/abs/1906.08237) 2 | -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/ELMo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/ELMo.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/GPT-auxiliary-training-object.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/GPT-auxiliary-training-object.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/STLR-figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/STLR-figure.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/ULM-FiT-STLR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/ULM-FiT-STLR.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/biLM-ELMo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/biLM-ELMo.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/dataset-ULM-FiT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/dataset-ULM-FiT.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/elmo-vectors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/elmo-vectors.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/example-language-inference.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/example-language-inference.jpg -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/example-of-auxiliary-prediction-taks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/example-of-auxiliary-prediction-taks.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/highlight-bert-LM1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/highlight-bert-LM1.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/highlight-bert-LM2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/highlight-bert-LM2.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/highlight-bert-LM3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/highlight-bert-LM3.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/highlight-bert-input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/highlight-bert-input.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/highway.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/highway.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/highway2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/highway2.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/how-bert-comes-out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/how-bert-comes-out.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/input-of-bert.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/input-of-bert.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/intro.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/intro.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/task-specific-input-transformation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/task-specific-input-transformation.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/images/transformer-block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/images/transformer-block.png -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/pre-training/learning-language-representation-slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/nlp/pre-training/learning-language-representation-slides.pdf -------------------------------------------------------------------------------- /paper_notes/dl-models/nlp/x-former/README.md: -------------------------------------------------------------------------------- 1 | 1. [The Transformer Family](https://lilianweng.github.io/lil-log/2020/04/07/the-transformer-family.html) 2 | -------------------------------------------------------------------------------- /paper_notes/dl-models/structured_state_space_models/README.md: -------------------------------------------------------------------------------- 1 | # Paper 2 | 3 | 1. [Hungry Hungry Hippos: Towards Language Modeling with State Space Models](https://arxiv.org/abs/2212.14052) 4 | 1. [Efficiently Modeling Long Sequences with Structured State Spaces](https://arxiv.org/abs/2111.00396) 5 | 6 | # Blogs 7 | 8 | 1. [Structured State Spaces for Sequence Modeling (S4)](https://hazyresearch.stanford.edu/blog/2022-01-14-s4-1) 9 | 1. [Simplifying S4](https://hazyresearch.stanford.edu/blog/2022-06-11-simplifying-s4) 10 | 1. [The Annotated S4](https://srush.github.io/annotated-s4/) 11 | 1. [H3: Language Modeling with State Space Models and (Almost) No Attention](https://hazyresearch.stanford.edu/blog/2023-01-20-h3) 12 | 13 | 1. Simplified State Space Layers for Sequence Modeling (S5): [[知乎]](https://zhuanlan.zhihu.com/p/616238788) 14 | 1. [Mamba: S4史诗级升级](https://zhuanlan.zhihu.com/p/661237120) 15 | 1. [VQ一下Key,Transformer的复杂度就变成线性了](https://kexue.fm/archives/9844) -------------------------------------------------------------------------------- /paper_notes/dl-models/structured_state_space_models/maba/README.md: -------------------------------------------------------------------------------- 1 | # MABA 2 | 3 | 序列建模的一个根本性问题是将context压缩为state。LTI(Linear Time Invariant):线性时不变系统,参数与输入无关,CNN和RNN模型都可以认为是LTI。**而attention的成功arguably地认为system的dynamics是data dependent**。计算attention时,QKV序列的token之间会进行交互。 4 | 5 | RNN将上下文压缩进有限长度的state,相比之下,attention完全不压缩context。autoagressive模式预测时,压缩context到固定长度的状态,决定了RNN在时间和空间上都是高效的,而attention要保留所有的context不进行压缩,计算和空间都是不高效的。 6 | 7 | RNN模型的有效性受到how well the context is compresed的影响。 8 | 9 | # Reference 10 | 11 | 1. Mamba: Linear-Time Sequence Modeling with Selective State Spaces: [[paper]](https://arxiv.org/pdf/2312.00752.pdf) [[codes]](https://github.com/state-spaces/mamba)[open review](https://openreview.net/forum?id=AL1fq05o7H) 12 | 1. [Transformer Quality in Linear Time](https://arxiv.org/pdf/2202.10447.pdf) 13 | 1. [Mamba - a replacement for Transformers?](https://www.youtube.com/watch?v=ouF-H35atOY) 14 | 1. [Legendre Memory Units: Continuous- Time Representation in Recurrent Neural Networks](https://proceedings.neurips.cc/paper/2019/file/952285b9b7e7a1be5aa7849f32ffff05-Paper.pdf) 15 | 1. [Combining recurrent, convolutional, and continuous-time models with linear state-space layers](https://arxiv.org/pdf/2110.13985.pdf) 16 | 17 | 几篇与RNN有关的工作 18 | 19 | 1. Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention [[youtube vedio]](https://www.youtube.com/watch?v=hAooAOFRsYc) 20 | 1. Retentive Network: A Successor to Transformer for Large Language Models [[youtube vedio]](https://www.youtube.com/watch?v=ec56a8wmfRk) 21 | 1. RWKV: Reinventing RNNs for the Transformer Era [[youtube vedio]](https://www.youtube.com/watch?v=x8pW19wKfXQ) -------------------------------------------------------------------------------- /paper_notes/dl-models/vision/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/vision/README.md -------------------------------------------------------------------------------- /paper_notes/dl-models/vision/ResNeXt/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/vision/ResNeXt/README.md -------------------------------------------------------------------------------- /paper_notes/dl-models/vision/SSD/images/SSD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/vision/SSD/images/SSD.png -------------------------------------------------------------------------------- /paper_notes/dl-models/vision/SSD/images/SSD2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-models/vision/SSD/images/SSD2.png -------------------------------------------------------------------------------- /paper_notes/dl-models/vision/SqueezeNet/README.md: -------------------------------------------------------------------------------- 1 | 2 | Identify a CNN architecture with fewer parameters, but equivalent accuracy. 3 | 4 | ## Related works 5 | 6 | * CNN microarchitecture. 7 | 8 | * With the trend of designing very deep CNNs, it becomes cumbersome to manually select filter dimensions for each layer. 9 | -------------------------------------------------------------------------------- /paper_notes/dl-models/vision/Xception/README.md: -------------------------------------------------------------------------------- 1 | #### Background 2 | 3 | - LeNet style models 4 | - simple stacks of convolutions for feature extraction and max-pooling operations for spatial sub-sampling. 5 | - refine into AlexNet 6 | - convolution operations were being repeated multiple times in-between max-pooling. 7 | - this style network goes deeper: a refined version: VGG 8 | - a new style architecture: Inception architecture 9 | - inspired by [network-int-network](https://arxiv.org/abs/1312.4400) 10 | - architectures: 11 | - GoogleLeNet : InceptionV1 12 | - InceptionV2 : [Batch normalization: Accelerating 13 | deep network training by reducing internal covariate shift](https://arxiv.org/abs/1502.03167) 14 | - InceptionV3 : [Rethinking the inception architecture for computer vision](https://arxiv.org/abs/1512.00567) 15 | - Inception-ResNet : [Inception-v4, 16 | inception-resnet and the impact of residual connections on 17 | learning](https://arxiv.org/abs/1602.07261) 18 | 19 | #### Inception hypothesis 20 | 21 | >cross-channel correlations and spatial correlations are sufficiently decoupled that it is 22 | preferable not to map them jointly. 23 | 24 | - A convolution layer attempts to learn filters in a 3D space, with 2 spatial dimensions (width and height) and a channel dimension 25 | - thus a single convolution kernel is tasked with **simultaneously mapping cross-channel correlations and spatial correlations**. 26 | - make this process easier and more efficient by explicitly factoring it into a series of operations that would **independently** look at (1) cross-channel correlations and at (2) spatial correlations. 27 | -------------------------------------------------------------------------------- /paper_notes/dl-systems/A_computational_model_for_TensorFlow.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | - [A Computational Model for TensorFlow](#a-computational-model-for-tensorflow) 4 | - [My Takeaways](#my-takeaways) 5 | - [References](#references) 6 | 7 | 8 | 9 | # A Computational Model for TensorFlow 10 | 11 | [link](http://delivery.acm.org/10.1145/3090000/3088527/pldiws17mapl-maplmainid2.pdf?ip=167.220.233.19&id=3088527&acc=OA&key=9625D833D637755D%2E9625D833D637755D%2E49016EFF332EB890%2E5945DC2EABF3343C&__acm__=1562203775_b4b8c2917e3b8a8dc79240c123a435f0) 12 | 13 | # My Takeaways 14 | 15 | 1. TensorFlow's design has root in _**a long line of work: dataflow system that includes much deeper and harder results**_. See reference [1](#References) and [2](#References) 16 | 17 | # References 18 | 19 | 1. [A relational model of non-deterministic dataflow](https://www.cl.cam.ac.uk/~gw104/journalbib.pdf) 20 | 1. A fully abstract trace model for dataflow and asynchronous networks 21 | -------------------------------------------------------------------------------- /paper_notes/dl-systems/Beyond_Data_and_Model_Parallelism_for_Deep_Neural_Networks.md: -------------------------------------------------------------------------------- 1 | # [Beyond Data and Model Parallelism for Deep Neural Networks](https://www.sysml.cc/doc/2019/16.pdf) 2 | -------------------------------------------------------------------------------- /paper_notes/dl-systems/Cavs_An_Efficient_Runtime_System_for_Dynamic_Neural_Networks.md: -------------------------------------------------------------------------------- 1 | # [Cavs: An Efficient Runtime System for Dynamic Neural Networks](https://www.usenix.org/system/files/conference/atc18/atc18-xu-shizhen.pdf) 2 | -------------------------------------------------------------------------------- /paper_notes/dl-systems/images/Pydron.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-systems/images/Pydron.png -------------------------------------------------------------------------------- /paper_notes/dl-systems/images/SSA_translation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-systems/images/SSA_translation.png -------------------------------------------------------------------------------- /paper_notes/dl-systems/images/functioin_call_translation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-systems/images/functioin_call_translation.png -------------------------------------------------------------------------------- /paper_notes/dl-systems/images/tf_eager_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-systems/images/tf_eager_01.png -------------------------------------------------------------------------------- /paper_notes/dl-systems/images/tf_eager_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-systems/images/tf_eager_02.png -------------------------------------------------------------------------------- /paper_notes/dl-systems/images/tf_eager_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-systems/images/tf_eager_03.png -------------------------------------------------------------------------------- /paper_notes/dl-systems/tf-cfg-design/Deep_learning_with_dynamic_computation_graphs.md: -------------------------------------------------------------------------------- 1 | # [Deep Learning with Dynamic Computation Graphs](https://arxiv.org/abs/1702.02181) 2 | 3 | ## Problem proposed in this paper 4 | 5 | Create single static graphs that emulate dynamic computation graphs of arbitrary shape and size. 6 | 7 | ## Dynamic batching 8 | 9 | TF fold is a high-level library that provides: 10 | - _**compositional blocks**_: sub-graph. This is to simplify the creation of dynamic graph models. 11 | - _**batch-wise**_ parallel implementations for a variety of models. 12 | 13 | This paper proposes the dynamic batching approach. 14 | 15 | 1. The dynamic batching algorithm is implemented as graph rewriting. 16 | 1. Inputs are described as computation graphs. Dynamic batching takes DAG as its input. 17 | 1. Schedule on sub-graph, not operations. 18 | 1. batch both computation and input data 19 | - Nodes(operations) with the same height are independent that can be batched together. 20 | - `gather`, `concatenate`, etc. are inserted to collect input data. Correspondingly, `scatter`, `split`, etc. are inserted in gradient computation. 21 | 1. use `tf.while_op` to iterate over depth which relies on input data. 22 | -------------------------------------------------------------------------------- /paper_notes/dl-systems/tf-cfg-design/tf-while-op-impl.md: -------------------------------------------------------------------------------- 1 | two flavours of control flow constructs. 2 | 3 | 1. functional ops 4 | 1. low-level primitive 5 | 6 | 7 | 8 | # Reference 9 | 10 | 1. [Update in TF 2.0: Functional while_loop](https://github.com/tensorflow/community/blob/master/rfcs/20180821-differentiable-functional-while.md) 11 | -------------------------------------------------------------------------------- /paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/Optimizing_RNN_performance.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/Optimizing_RNN_performance.pdf -------------------------------------------------------------------------------- /paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/multiple_layer_optimization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/multiple_layer_optimization.png -------------------------------------------------------------------------------- /paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/pic1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/pic1.png -------------------------------------------------------------------------------- /paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/pic2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/pic2.png -------------------------------------------------------------------------------- /paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/single_cell_optimization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/single_cell_optimization.png -------------------------------------------------------------------------------- /paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/single_layer_optimization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/single_layer_optimization.png -------------------------------------------------------------------------------- /paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/starting_point.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-workload-optimizations/Optimizing_RNN_performance/images/starting_point.png -------------------------------------------------------------------------------- /paper_notes/dl-workload-optimizations/README.md: -------------------------------------------------------------------------------- 1 | * Computation 2 | * [Cudnn RNN optimization](https://devblogs.nvidia.com/optimizing-recurrent-neural-networks-cudnn-5/) 3 | * [Mixed precision training](https://github.com/NVIDIA/OpenSeq2Seq) : need device-level support, may for for NVIDIA Volta GPUs 4 | * [Persistent RNN](http://proceedings.mlr.press/v48/diamos16.pdf) 5 | * Memory Acess 6 | 7 | _**Usually the above two goals are achieved by optimized kenerl implementation.**_ 8 | 9 | * Communication 10 | * optimize TensorFlow's original implementation 11 | * [Baidu's allreduce for TensorFlow](https://github.com/baidu-research/tensorflow-allreduce/compare/allreduce-patch-1.0) 12 | * [Uber's Horovod](https://github.com/uber/horovod) 13 | * quantized gradients 14 | * Operator scheduling 15 | * overlap computation and memory copy 16 | * synchronization overhead by synchronous SGD algorithm 17 | 18 | --- 19 | 20 | For RNN model, how to train _**very large and deep models for very long sequences**_ on one GPU efficiently. 21 | -------------------------------------------------------------------------------- /paper_notes/dl-workload-optimizations/ShuffleNet_v2.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/dl-workload-optimizations/ShuffleNet_v2.md -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/README.md: -------------------------------------------------------------------------------- 1 | ### Numeric/experimental investigations 2 | 3 | - Wilson D R, Martinez T R. The general inefficiency of batch training for gradient descent learning[J]. Neural Networks, 2003, 16(10): 1429-1451. 4 | - Keskar N S, Mudigere D, Nocedal J, et al. [On large-batch training for deep learning: Generalization gap and sharp minima](rendered/On_Large-Batch_Training_for_Deep_Learning.pdf)[J]. arXiv preprint arXiv:1609.04836, 2016. 5 | 6 | ### Theoretial Analysis 7 | 8 | > Background Knowledge: [Bayesian model comparison](rendered/Bayesian_Model_Comparison.pdf) 9 | 10 | - Smith S L, Le Q V. [A bayesian perspective on generalization and stochastic gradient descent](rendered/A_Bayesian_Perspective_on_Generalization_and_Stochastic_Gradient_Descent.pdf)[C]//Proceedings of Second workshop on Bayesian Deep Learning (NIPS 2017). 2017. 11 | - Hoffer E, Hubara I, Soudry D. [Train longer, generalize better: closing the generalization gap in large batch training of neural networks](rendered/Train_Longer_Generalize_Better.pdf)[C]//Advances in Neural Information Processing Systems. 2017: 1729-1739. 12 | 13 | ### Proposed Solutions 14 | 15 | - [Large batch training of convolutional networks](rendered/Large_Batch_Training_of_Convolutional_Networks.pdf) 16 | - Devarakonda A, Naumov M, Garland M. AdaBatch: Adaptive Batch Sizes for Training Deep Neural Networks[J]. arXiv preprint arXiv:1712.02029, 2017. 17 | - Masters D, Luschi C. Revisiting Small Batch Training for Deep Neural Networks[J]. arXiv preprint arXiv:1804.07612, 2018. 18 | - [Highly Scalable Deep Learning Training System with Mixed-Precision: Training ImageNet in Four Minutes](rendered/Highly_Scalable_Deep_Learning_Training_System_with_Mixed-Precision.pdf) 19 | -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/rendered/A_Bayesian_Perspective_on_Generalization_and_Stochastic_Gradient_Descent.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/rendered/A_Bayesian_Perspective_on_Generalization_and_Stochastic_Gradient_Descent.pdf -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/rendered/Bayesian_Model_Comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/rendered/Bayesian_Model_Comparison.pdf -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/rendered/Highly_Scalable_Deep_Learning_Training_System_with_Mixed-Precision.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/rendered/Highly_Scalable_Deep_Learning_Training_System_with_Mixed-Precision.pdf -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/rendered/Large_Batch_Training_of_Convolutional_Networks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/rendered/Large_Batch_Training_of_Convolutional_Networks.pdf -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/rendered/On_Large-Batch_Training_for_Deep_Learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/rendered/On_Large-Batch_Training_for_Deep_Learning.pdf -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/rendered/Train_Longer_Generalize_Better.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/rendered/Train_Longer_Generalize_Better.pdf -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/sources/Accurate_Large_Minibatch_SGD.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/Accurate_Large_Minibatch_SGD.md -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/sources/images/f1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/f1.png -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/sources/images/f2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/f2.png -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/sources/images/fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/fig1.png -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/sources/images/figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/figure1.png -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/sources/images/figure2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/figure2.png -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/sources/images/insert_bn_after_pool5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/insert_bn_after_pool5.png -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/sources/images/mixed_precision_with_LARS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/mixed_precision_with_LARS.png -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/sources/images/network_configuration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/network_configuration.png -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/sources/images/scalability.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/scalability.png -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/sources/images/sharpness_metric.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/sharpness_metric.png -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/sources/images/sharpness_of_minimizers_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/sharpness_of_minimizers_1.png -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/sources/images/sharpness_of_minimizers_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/sharpness_of_minimizers_2.png -------------------------------------------------------------------------------- /paper_notes/generalization-of-neural-network/sources/images/warmup_experiments.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/generalization-of-neural-network/sources/images/warmup_experiments.png -------------------------------------------------------------------------------- /paper_notes/large-language-models/GPT/GPT-models.md: -------------------------------------------------------------------------------- 1 |

2 |
3 | Fig. The original GPT model. (from [wiki page](https://en.wikipedia.org/wiki/Generative_pre-trained_transformer)) 4 |

5 | 6 | 7 |

8 | 9 |

10 | 11 | |Model|Context Window Length| 12 | |:--|:--| 13 | |GPT|512| 14 | |GPT-2|1024| 15 | |GPT-3|2048| 16 | |GPT-4|[8000](https://help.openai.com/en/articles/7127966-what-is-the-difference-between-the-gpt-4-models)| -------------------------------------------------------------------------------- /paper_notes/large-language-models/GPT/figures/Full_GPT_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/GPT/figures/Full_GPT_architecture.png -------------------------------------------------------------------------------- /paper_notes/large-language-models/GPT/figures/GPT-3-model-size.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/GPT/figures/GPT-3-model-size.png -------------------------------------------------------------------------------- /paper_notes/large-language-models/README.md: -------------------------------------------------------------------------------- 1 | 这里主要用来记录自己围绕着LLM相关的学习和相关工作梳理。计划按照这样的几条线索: 2 | 3 | 模型研究层面(模型结构,训练实践): 4 | 1. GPT 5 | 2. ViT 6 | 3. Diffusion 7 | 8 | 系统 for LLM 9 | 1. LLM相关的系统研究。 10 | 11 | 12 | # 相关项目 13 | 14 | 1. **LLAMA download**: [llama-dl](https://github.com/shawwn/llama-dl) 15 | 1. "**LLaMA**: Open and Efficient Foundation Language Models"[[PDF]](https://arxiv.org/abs/2302.13971v1) [[github]](https://github.com/facebookresearch/llama) 16 | 1. "**Alpaca**: A Strong, Replicable Instruction-Following Model"[[PDF]](https://arxiv.org/pdf/2303.16199.pdf) [[github]](https://github.com/replicate/cog_stanford_alpaca) [[website]](https://crfm.stanford.edu/2023/03/13/alpaca.html) 17 | 1. **Alpaca-LoRA**: [[github]](https://github.com/tloen/alpaca-lora#-alpaca-lora) 18 | 1. **VisualGLM-6B**: [[github]](https://github.com/THUDM/VisualGLM-6B) 19 | 1. **Falcon LLM**: [huggingface](https://huggingface.co/tiiuae) -------------------------------------------------------------------------------- /paper_notes/large-language-models/fast-attention/Flash-Attention.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/fast-attention/Flash-Attention.pdf -------------------------------------------------------------------------------- /paper_notes/large-language-models/fast-attention/README.md: -------------------------------------------------------------------------------- 1 | # Reading List 2 | 3 | 1. "xFormers: A modular and hackable Transformer modelling library" [[codes]](https://github.com/facebookresearch/xformers) 4 | 1. “Simple Local Attentions Remain Competitive for Long-Context Tasks” [[PDF]](https://arxiv.org/pdf/2112.07210.pdf) 5 | 1. "Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention"[[PDF]](https://arxiv.org/pdf/2006.16236.pdf) 6 | 1. "Linformer: Self-Attention with Linear Complexity"[[PDF]](https://arxiv.org/abs/2006.04768) 7 | 1. "Reformer: The Efficient Transformer"[[PDF]](https://arxiv.org/abs/2001.04451) -------------------------------------------------------------------------------- /paper_notes/large-language-models/llm_inference.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/llm_inference.pdf -------------------------------------------------------------------------------- /paper_notes/large-language-models/transformer-optimizations/RMS_layernorm.md: -------------------------------------------------------------------------------- 1 | [Root Mean Square Layer Normalization](https://arxiv.org/pdf/1910.07467.pdf) [[**code**]](https://github.com/bzhangGo/rmsnorm) 2 | 3 | -------------------------------------------------------------------------------- /paper_notes/large-language-models/transformer-optimizations/figures/block-schedule-with-overlap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/transformer-optimizations/figures/block-schedule-with-overlap.png -------------------------------------------------------------------------------- /paper_notes/large-language-models/transformer-optimizations/figures/computeation-graph-of-llm-inference.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/transformer-optimizations/figures/computeation-graph-of-llm-inference.png -------------------------------------------------------------------------------- /paper_notes/large-language-models/transformer-optimizations/figures/pre-post-layer-normalization-in-transformer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/transformer-optimizations/figures/pre-post-layer-normalization-in-transformer.png -------------------------------------------------------------------------------- /paper_notes/large-language-models/transformer-optimizations/figures/two-different-schedules.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/transformer-optimizations/figures/two-different-schedules.png -------------------------------------------------------------------------------- /paper_notes/large-language-models/whisper/figures/whisper-model-size.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/whisper/figures/whisper-model-size.png -------------------------------------------------------------------------------- /paper_notes/large-language-models/whisper/figures/whisper_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/large-language-models/whisper/figures/whisper_overview.png -------------------------------------------------------------------------------- /paper_notes/leading-edge-ai/Capsules/Dynamic_Routing_between_Capsule.md: -------------------------------------------------------------------------------- 1 | # [Dynamic Routing between Capsule](https://arxiv.org/pdf/1710.09829.pdf) 2 | 3 | ## Overview 4 | 5 | This work is inspired by introspecting of human version: human vision ignores irrelevant details by using a carefully determined sequence of fixation points. 6 | 7 | 1. This paper designs a _**multi-layer visual system**_: a parse tree-like structure on _**each fixation**_. 8 | - For a single fixation, a parse tree is carved(what's this?) out of a fixed _**multilayer**_ neural network. 9 | - Each group 10 | 1. _**Ignores**_ the issue of how these single fixation parse trees are coordinated over multiple fixations. 11 | -------------------------------------------------------------------------------- /paper_notes/leading-edge-ai/Capsules/README.md: -------------------------------------------------------------------------------- 1 | # Reading List 2 | -------------------------------------------------------------------------------- /paper_notes/leading-edge-ai/README.md: -------------------------------------------------------------------------------- 1 | # Reading List 2 | 3 | 1. [Recurrent Independent Mechanisms](https://arxiv.org/abs/1909.10893) 4 | - [Reddit dicussion](https://www.reddit.com/r/MachineLearning/comments/dak4ym/r_recurrent_independent_mechanisms/) 5 | -------------------------------------------------------------------------------- /paper_notes/leading-edge-ai/RIM.md: -------------------------------------------------------------------------------- 1 | # RIM 2 | 3 | ## My take-aways 4 | 5 | ## Motivation 6 | 7 | ## Challenges 8 | 9 | ## Approach 10 | 11 | ## Evalution 12 | 13 | # Reference 14 | 15 | 1. [Recurrent Independent Mechanisms](https://arxiv.org/abs/1909.10893) 16 | -------------------------------------------------------------------------------- /paper_notes/miscellanea/Neural_Ordinary_Differential_Equations.md: -------------------------------------------------------------------------------- 1 | # Neural Ordinary Differential Equations 2 | 3 | --- 4 | 5 | ## Background 6 | 7 | ### What is an ordinary differential equation(ODE)? 8 | 9 | An ODE is an equation that involves some ordinary derivatives, the ordinary here is as opposed to partial derivatives of a function. 10 | * For example, if we know: $\frac{\partial{dx}}{\partial{dt}}(t) = \text{cos}t$, then what is the function $x(t)$ 11 | * In general, the basic principle to solve an ODE is _**always integration**_. 12 | 13 | [An introduction to ordinary differential equations](https://mathinsight.org/ordinary_differential_equation_introduction) 14 | 15 | --- 16 | 17 | ### Input and output of ODE solver? 18 | 19 | ### The adjoint sensitive method 20 | * http://math.mit.edu/~stevenj/18.336/adjoint.pdf 21 | * A blog about the [Adjoint Sensitivity Method](https://advancedoptimizationatharvard.wordpress.com/2014/03/02/adjoint-sensitivity-method/) 22 | 23 | ## References 24 | 25 | 1. [DiffEqFlux.jl – A Julia Library for Neural Differential Equations](https://julialang.org/blog/2019/01/fluxdiffeq) 26 | -------------------------------------------------------------------------------- /paper_notes/miscellanea/README.md: -------------------------------------------------------------------------------- 1 | Backup of unclassified notes. 2 | -------------------------------------------------------------------------------- /paper_notes/ml-with-discrete-variables/README.md: -------------------------------------------------------------------------------- 1 | # Reading List 2 | 3 | - [ ] [Categorical Reparameterization with Gumbel-Softmax](https://openreview.net/pdf?id=rkE3y85ee) 4 | - [The Gumbel-Max Trick for Discrete Distributions](https://lips.cs.princeton.edu/the-gumbel-max-trick-for-discrete-distributions/) 5 | - [The Humble Gumbel Distribution](http://amid.fish/humble-gumbel) 6 | - [ ] [Binarized neural networks: Training deep neural networks with weights and activations constrained to+ 1 or-1](https://arxiv.org/abs/1602.02830) 7 | - [ ] [Strategic attentive writer for learning macro-actions](https://pdfs.semanticscholar.org/c3dd/2bf141c1371398e29ad37ced18bee34e1766.pdf) 8 | - [ ] [Estimating or Propagating Gradients Through Stochastic Neurons for Conditional Computation](https://arxiv.org/pdf/1308.3432.pdf) 9 | - [ ] [Gradient Estimation Using Stochastic Computation Graphs](https://arxiv.org/pdf/1506.05254.pdf) 10 | - [ ] [Estimating or Propagating Gradients Through Stochastic Neurons]() 11 | 12 | ## Some blog 13 | 14 | 1. [Neural Networks gone wild! They can sample from discrete distributions now](http://anotherdatum.com/gumbel-gan.html) 15 | -------------------------------------------------------------------------------- /paper_notes/ml-with-discrete-variables/Straight-throughEstimator.md: -------------------------------------------------------------------------------- 1 | # STE 2 | 3 | 4 | # Reference 5 | 6 | 1. [Understanding Straight-Through Estimator in Training Activation Quantized Neural Nets](https://arxiv.org/abs/1903.05662) 7 | 1. [Understanding Straight-Through Estimator in Training Activation Quantized Neural Nets](https://openreview.net/forum?id=Skh4jRcKQ) [ICLR 2019] 8 | -------------------------------------------------------------------------------- /paper_notes/normalization-in-NN/L2_Regularization_versus_Batch_and_Weight_Normalization/L2_Regularization_versus_Batch_and_Weight_Normalization.md: -------------------------------------------------------------------------------- 1 | - weight scale invariance in normalization method 2 | -------------------------------------------------------------------------------- /paper_notes/normalization-in-NN/Layer_Normalization/layer_normalization.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/normalization-in-NN/Layer_Normalization/layer_normalization.pdf -------------------------------------------------------------------------------- /paper_notes/normalization-in-NN/README.md: -------------------------------------------------------------------------------- 1 | - [Layer Normalization](Layer_Normalization/layer_normalization.pdf) 2 | - [Weight Normalizaton](Weight_Normalization/weight_normalization.pdf) 3 | - A good blog to explain why batch normalization works intuitively: [An Intuitive Explanation of Why Batch Normalization Really Works]( http://mlexplained.com/2018/01/10/an-intuitive-explanation-of-why-batch-normalization-really-works-normalization-in-deep-learning-part-1/) 4 | 5 | >_Batch normalization makes the mean and variance of the activations of each layer independent from the values themselves. This means that the magnitude of the higher order interactions are going to be suppressed, allowing larger learning rates to be used._ 6 | -------------------------------------------------------------------------------- /paper_notes/normalization-in-NN/Weight_Normalization/weight_normalization.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/normalization-in-NN/Weight_Normalization/weight_normalization.pdf -------------------------------------------------------------------------------- /paper_notes/normalization-in-NN/optimization/Hessian_and_DeepLearning_Optimizaiton.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/normalization-in-NN/optimization/Hessian_and_DeepLearning_Optimizaiton.pdf -------------------------------------------------------------------------------- /paper_notes/parallel-computing/IRs/lift.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | - [LIFT](#lift) 4 | - [Intermediate language](#intermediate-language) 5 | - [1. Algorithmic Patterns](#1-algorithmic-patterns) 6 | - [2. Data Layout Patterns](#2-data-layout-patterns) 7 | - [3. Parallel Patterns](#3-parallel-patterns) 8 | - [4. Address Space Patterns](#4-address-space-patterns) 9 | - [5. Vectorize Pattern](#5-vectorize-pattern) 10 | - [Reference](#reference) 11 | 12 | 13 | 14 | # LIFT 15 | 16 | - High-level languages based on parallel patterns capture rich information about the algorithmic structure of programs. 17 | - The foundation of the Lift IL is lambda calculus which formalizes the reasoning about functions, their **composition**, **nesting** and application. 18 | 19 | ## Intermediate language 20 | 21 | The Lift IL expresses program as compositions and nesting of functions which operate on **arrays**. 22 | 23 | ### 1. Algorithmic Patterns 24 | 25 | 1. mapSeq 26 | 1. reduceSeq 27 | 1. iterate 28 | 29 | ### 2. Data Layout Patterns 30 | 31 | 1. split 32 | 1. join 33 | 1. gather 34 | 1. scatter 35 | 1. zip 36 | 1. slide 37 | 38 | ### 3. Parallel Patterns 39 | 40 | 1. mapGlb 41 | 1. mapWrg 42 | 1. mapLcl 43 | 44 | ### 4. Address Space Patterns 45 | 46 | 1. toGlobal 47 | 1. toLocal 48 | 1. toPrivate 49 | 50 | ### 5. Vectorize Pattern 51 | 52 | 1. asVector 53 | 1. asScalar 54 | 1. mapVec 55 | 56 | # Reference 57 | 58 | 1. Steuwer, Michel, Toomas Remmelg, and Christophe Dubach. "[Lift: a functional data-parallel IR for high-performance GPU code generation](https://eprints.gla.ac.uk/146596/1/146596.pdf)." 2017 IEEE/ACM International Symposium on Code Generation and Optimization (CGO). IEEE, 2017. 59 | -------------------------------------------------------------------------------- /paper_notes/parallel-computing/IRs/nova.md: -------------------------------------------------------------------------------- 1 | 2 | - [Nova](#nova) 3 | - [1. Built-in parallel operations](#1--built-in-parallel-operations) 4 | - [Reference](#reference) 5 | 6 | 10 | 11 | 12 | # Nova 13 | 14 | ## 1. Built-in parallel operations 15 | 16 | |Operation| 17 | |--| 18 | |map| 19 | |reduce| 20 | |scan| 21 | |permute| 22 | |gather| 23 | |slice| 24 | |filter| 25 | 26 | # Reference 27 | 28 | 1. Collins, Alexander, et al. "[NOVA: A functional language for data parallelism](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.666.8678&rep=rep1&type=pdf)." Proceedings of ACM SIGPLAN International Workshop on Libraries, Languages, and Compilers for Array Programming. 2014. 29 | -------------------------------------------------------------------------------- /paper_notes/parallel-computing/Nesl/VCODE.md: -------------------------------------------------------------------------------- 1 | 2 | - [VCODE](#vcode) 3 | - [VCODE instructions](#vcode-instructions) 4 | - [Reference](#reference) 5 | 6 | 10 | 11 | 12 | # VCODE 13 | 14 | ## VCODE instructions 15 | 16 |

17 | 18 |

19 | 20 | # Reference 21 | 22 | 1. Blelloch, Guy E., and Siddhartha Chatterjee. "[VCODE: A data-parallel intermediate language](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.47.6593&rep=rep1&type=pdf)." Proceedings Frontiers of Massively Parallel Computation. 1990. 23 | -------------------------------------------------------------------------------- /paper_notes/parallel-computing/Nesl/images/nesl-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/Nesl/images/nesl-1.png -------------------------------------------------------------------------------- /paper_notes/parallel-computing/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | - [Reading list](#reading-list) 4 | - [Parallel programming model](#parallel-programming-model) 5 | - [Papers](#papers) 6 | - [Others](#others) 7 | 8 | 9 | 10 | # Reading list 11 | 12 | ## Parallel programming model 13 | 14 | ### Papers 15 | 16 | 1. Sipelstein J M, Blelloch G E. [Collection-oriented languages](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.56.836&rep=rep1&type=pdf)[J]. Proceedings of the IEEE, 1991, 79(4): 504-523. 17 | 18 | ### Others 19 | 20 | 1. [Introduction to Parallel Computing Tutorial](https://hpc.llnl.gov/training/tutorials/introduction-parallel-computing-tutorial) 21 | 1. [A Library of Parallel Algorithms](https://www.cs.cmu.edu/~scandal/nesl/algorithms.html) 22 | 1. [NESL: A Parallel Programming Language](https://www.cs.cmu.edu/~scandal/nesl.html) 23 | 1. [CME 323: Distributed Algorithms and Optimization](https://stanford.edu/~rezab/dao/) 24 | - https://github.com/lamastex/scalable-data-science/blob/master/read/daosu.pdf 25 | 26 | 1. https://github.com/serge-sans-paille/pythran 27 | 28 | --- 29 | 30 | 1. [Introduction to Parallel Computing](https://computing.llnl.gov/tutorials/parallel_comp/) 31 | 1. [Message Passing and the Actor Model](http://dist-prog-book.com/chapter/3/message-passing.html) 32 | 33 | --- 34 | 35 | 1. [Scheduling For Efficient Large-Scale Machine Learning Training](https://www.youtube.com/watch?v=_rAkFBE-ItE) 36 | -------------------------------------------------------------------------------- /paper_notes/parallel-computing/data_parallel_language/README.md: -------------------------------------------------------------------------------- 1 | 1. Larus J. [C**: A large-grain, object-oriented, data-parallel programming language](https://minds.wisconsin.edu/bitstream/handle/1793/59682/TR1126.pdf?sequence=1)[C]//International Workshop on Languages and Compilers for Parallel Computing. Springer, Berlin, Heidelberg, 1992: 326-341. 2 | -------------------------------------------------------------------------------- /paper_notes/parallel-computing/execution_model/ActorModel.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/execution_model/ActorModel.md -------------------------------------------------------------------------------- /paper_notes/parallel-computing/execution_model/CSP.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/execution_model/CSP.md -------------------------------------------------------------------------------- /paper_notes/parallel-computing/execution_model/MessagePassing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/execution_model/MessagePassing.md -------------------------------------------------------------------------------- /paper_notes/parallel-computing/programming_model/DMLL.md: -------------------------------------------------------------------------------- 1 | # DMLL 2 | 3 |

4 | 5 |

6 | 7 | # Reference 8 | 9 | 1. Brown, Kevin J., et al. "[Have abstraction and eat performance, too: Optimized heterogeneous computing with parallel patterns](https://dawn.cs.stanford.edu/pubs/abstraction-cgo2016.pdf)." 2016 IEEE/ACM International Symposium on Code Generation and Optimization (CGO). IEEE, 2016. 10 | -------------------------------------------------------------------------------- /paper_notes/parallel-computing/programming_model/images/DMLL-comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/programming_model/images/DMLL-comparison.png -------------------------------------------------------------------------------- /paper_notes/parallel-computing/programming_model/images/co-ori-lang-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/programming_model/images/co-ori-lang-1.png -------------------------------------------------------------------------------- /paper_notes/parallel-computing/programming_model/images/mimd.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/programming_model/images/mimd.gif -------------------------------------------------------------------------------- /paper_notes/parallel-computing/programming_model/images/simd.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/programming_model/images/simd.gif -------------------------------------------------------------------------------- /paper_notes/parallel-computing/programming_model/images/vcode-instruction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/parallel-computing/programming_model/images/vcode-instruction.png -------------------------------------------------------------------------------- /paper_notes/partial_aggregation/README.md: -------------------------------------------------------------------------------- 1 | ## References 2 | 3 | 1. Yu, Yuan, Pradeep Kumar Gunda, and Michael Isard. "[Distributed aggregation for data-parallel computing: interfaces and implementations](http://www.michaelisard.com/pubs/sosp-035-yu.pdf)." Proceedings of the ACM SIGOPS 22nd symposium on Operating systems principles. 2009. 4 | 1. Liu, Chang, et al. "[Automating distributed partial aggregation](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/main-8.pdf)." Proceedings of the ACM Symposium on Cloud Computing. 2014. 5 | ## Decomposable functions 6 | 7 |

8 |
9 |
10 |
11 | 12 |

13 | 14 | 1. If an aggregation computation can be represented as a set of ***associative-decomposable*** functions followed by some final processing, then it can be split up in such a way that the query plan in Figure 2 below can be applied. 15 | 1. If the computation is instead formed from ***decomposable*** functions followed by final processing then the plan from Figure 2 can be applied, but without any intermediate aggregation stages. 16 | 1. If the computation is ***not decomposable*** then the plan from Figure 1 is required. 17 | 18 |

19 | 20 | 21 |

-------------------------------------------------------------------------------- /paper_notes/partial_aggregation/figures/decomposable_function_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/partial_aggregation/figures/decomposable_function_1.png -------------------------------------------------------------------------------- /paper_notes/partial_aggregation/figures/decomposable_function_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/partial_aggregation/figures/decomposable_function_2.png -------------------------------------------------------------------------------- /paper_notes/partial_aggregation/figures/decomposable_function_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/partial_aggregation/figures/decomposable_function_3.png -------------------------------------------------------------------------------- /paper_notes/partial_aggregation/figures/decomposable_function_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/partial_aggregation/figures/decomposable_function_4.png -------------------------------------------------------------------------------- /paper_notes/partial_aggregation/figures/execution_plan_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/partial_aggregation/figures/execution_plan_1.png -------------------------------------------------------------------------------- /paper_notes/partial_aggregation/figures/execution_plan_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/partial_aggregation/figures/execution_plan_2.png -------------------------------------------------------------------------------- /paper_notes/programming-language/Glossary/images/1920px-Tree_edges.svg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/programming-language/Glossary/images/1920px-Tree_edges.svg.png -------------------------------------------------------------------------------- /paper_notes/programming-language/Glossary/program_analysis.md: -------------------------------------------------------------------------------- 1 | # [Back Edges](https://stackoverflow.com/questions/44494426/back-edges-in-a-graph/44494705) 2 | 3 | Given a DFS tree of a graph, a Back Edge is an edge that connects a vertex to a vertex that is discovered before it's parent. 4 | 5 |

6 | 7 |

8 | 9 | # [Domination Relationship](https://en.wikipedia.org/wiki/Control_flow_graph) 10 | 11 | - A block M **dominates** a block N if every path from the entry that reaches block N has to pass through block M. 12 | - The *entry block* (through which control enters into the flow graph) **dominates** all blocks. 13 | - In the reverse direction, block M **postdominates** block N if every path from N to the exit has to pass through block M. 14 | - The *exit block* (through which all control flow leaves) **postdominates** all blocks. 15 | 16 | # [Use-define chain / Definition-use chain](https://en.wikipedia.org/wiki/Use-define_chain) 17 | 18 | * A Use-Definition Chain (UD Chain) is a data structure that consists of a use, U, of a variable, and all the definitions, D, of that variable that can reach that use without any other intervening definitions. 19 | 20 | * A counterpart of a UD Chain is a Definition-Use Chain (DU Chain), which consists of a definition, D, of a variable and all the uses, U, reachable from that definition without any other intervening definitions. 21 | 22 | Both UD and DU chains are created by using a form of static code analysis known as data flow analysis. 23 | 24 | Knowing the use-def and def-use chains for a program or subprogram is a prerequisite for many compiler optimizations, including constant propagation and common subexpression elimination. 25 | -------------------------------------------------------------------------------- /paper_notes/programming-language/Glossary/programming_paradigm.md: -------------------------------------------------------------------------------- 1 | # [Generic Programming](https://en.wikipedia.org/wiki/Generic_programming) 2 | 3 | Generic programming describes a programming paradigm whereby: 4 | 5 | 1. Fundamental requirements on types are abstracted from across concrete examples of algorithms and data structures and formalized as concepts. 6 | 1. Generic functions implemented in terms of these concepts, typically using language genericity mechanisms, for example: 7 | 8 | - parametric polymorphism in ML, Scala, Haskell and Julia 9 | - templates in C++ and D 10 | - parameterized types in the influential 1994 book Design Patterns. 11 | 12 | # [Programming Language Support for Genericity](https://en.wikipedia.org/wiki/Generic_programming) 13 | 14 | Genericity is implemented and supported differently in various programming languages. 15 | 16 | 1. [Forth](https://en.wikipedia.org/wiki/Forth_(programming_language)): Exposing the compiler behaviour and therefore naturally offers genericity capacities. 17 | 18 | # [Ploymorphism](https://en.wikipedia.org/wiki/Polymorphism_(computer_science)) 19 | 20 | The provision of a single interface to entities of different types or the use of a single symbol to represent multiple different types. 21 | 22 | - **Ad hoc polymorphism**: defines a common interface for an arbitrary set of individually specified types. 23 | - function overloading or operator overloading 24 | - **Parametric polymorphism**: when one or more types are not specified by name but by abstract symbols that can represent any type. 25 | - **Subtyping** (also called subtype polymorphism or inclusion polymorphism): when a name denotes instances of many different classes related by some common superclass. 26 | 27 | _**Julia is a dynamically typed language and doesn't need to make all type decisions at compile time, many traditional difficulties encountered in static parametric type systems can be relatively easily handled**_. 28 | -------------------------------------------------------------------------------- /paper_notes/programming-language/Glossary/type.md: -------------------------------------------------------------------------------- 1 | # [Nominal Typing / Nominal Subtyping](https://en.wikipedia.org/wiki/Nominal_type_system) 2 | 3 | **Nominal typing** means that two variables are type-compatible if and only if their declarations name the same type. 4 | 5 | **Nominal subtyping** means that one type is a subtype of another if and only if it is explicitly declared to be so in its definition. 6 | 7 | ## [Abstract Type](https://en.wikipedia.org/wiki/Abstract_type) 8 | 9 | 1. An abstract type is a type _**in a nominative type system**_ that cannot be instantiated directly. 10 | 1. A type that is not abstract – which can be instantiated – is called a concrete type. 11 | 1. Every instance of an abstract type is an instance of some concrete subtype. Abstract types are also known as existential types. 12 | 13 | # [Boxing](https://en.wikipedia.org/wiki/Object_type_%28object-oriented_programming%29#Boxing) 14 | 15 | Boxing is the process of placing a primitive type within an object so that the primitive can be used as a reference object. 16 | 17 | * Repeated boxing and unboxing of objects can have a severe performance impact, because boxing dynamically allocates new objects and unboxing (if the boxed value is no longer used) then makes them eligible for garbage collection. 18 | * The boxed object is always a copy of the value object, and is _**usually immutable**_. 19 | 20 | ## [Autoboxing](https://en.wikipedia.org/wiki/Object_type_%28object-oriented_programming%29#Autoboxing) 21 | 22 | Autoboxing is the term for getting a reference type out of a value type just through type conversion (either implicit or explicit). The compiler automatically supplies the extra source code that creates the object. 23 | -------------------------------------------------------------------------------- /paper_notes/programming-language/Nominative_and_structure_type.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/programming-language/Nominative_and_structure_type.md -------------------------------------------------------------------------------- /paper_notes/programming-language/PL_Design.md: -------------------------------------------------------------------------------- 1 | # [Boxing](https://en.wikipedia.org/wiki/Object_type_%28object-oriented_programming%29#Boxing) 2 | -------------------------------------------------------------------------------- /paper_notes/programming-language/README.md: -------------------------------------------------------------------------------- 1 | [TBD] 2 | -------------------------------------------------------------------------------- /paper_notes/programming-language/SSA/README.md: -------------------------------------------------------------------------------- 1 | [TBD] 2 | -------------------------------------------------------------------------------- /paper_notes/programming-language/SSA/Simple_and_Efficient_Construction_of_Static_Single_Assignment_Form.md: -------------------------------------------------------------------------------- 1 | # Simple and Efficient Construction of Static Single Assignment Form 2 | -------------------------------------------------------------------------------- /paper_notes/programming-language/SSA/images/SSA_example1.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/programming-language/SSA/images/SSA_example1.1.png -------------------------------------------------------------------------------- /paper_notes/programming-language/SSA/images/SSA_example1.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/programming-language/SSA/images/SSA_example1.2.png -------------------------------------------------------------------------------- /paper_notes/programming-language/SSA/images/SSA_example1.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/programming-language/SSA/images/SSA_example1.3.png -------------------------------------------------------------------------------- /paper_notes/programming-language/abstract_binding_tree.md: -------------------------------------------------------------------------------- 1 | # Abstract Binding Tree 2 | 3 | 4 | # Reference 5 | 6 | 1. [Understanding typing judgments](https://www.hedonisticlearning.com/posts/understanding-typing-judgments.html) -------------------------------------------------------------------------------- /paper_notes/tensor_operations/images/mm_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/tensor_operations/images/mm_example.png -------------------------------------------------------------------------------- /paper_notes/tensor_operations/images/nested_tensorarray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/tensor_operations/images/nested_tensorarray.png -------------------------------------------------------------------------------- /paper_notes/tensor_operations/images/tensor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/tensor_operations/images/tensor.png -------------------------------------------------------------------------------- /paper_notes/tensor_operations/images/transformer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/tensor_operations/images/transformer.png -------------------------------------------------------------------------------- /paper_notes/tensor_operations/ir.tex: -------------------------------------------------------------------------------- 1 | \section{Lower to the internal representation} 2 | -------------------------------------------------------------------------------- /paper_notes/tensor_operations/main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/tensor_operations/main.pdf -------------------------------------------------------------------------------- /paper_notes/tensor_operations/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass {article} 2 | 3 | \input{structure.tex} % Include the file specifying the document structure and custom commands 4 | \title{Concepts, Constructs and the Design Rationale} 5 | %\author{Ying Cao} 6 | \date{\today} 7 | 8 | \begin{document} 9 | 10 | \maketitle 11 | \tableofcontents 12 | \newpage 13 | \input{concepts.tex} 14 | \input{shape_operation.tex} 15 | \input{tensorarray_creation.tex} 16 | \input{item_access.tex} 17 | \input{constructs.tex} 18 | \input{nn.tex} 19 | \input{vectorization.tex} 20 | %\input{ir.tex} 21 | %\input{optimization.tex} 22 | \input{transformer.tex} 23 | \end{document} 24 | -------------------------------------------------------------------------------- /paper_notes/tensor_operations/nn.tex: -------------------------------------------------------------------------------- 1 | \section{Neural network specialized tensor, tensor array operations} 2 | 3 | \subsection{\textbf{\textit{embedding}}} 4 | 5 | Embedding is parallel slicing. 6 | 7 | \begin{lstlisting}[language=Python] 8 | embedding(X:Vector[int], Y:Tensor[T], dim:int) -> Tensor[T] 9 | \end{lstlisting} 10 | 11 | \begin{enumerate} 12 | 13 | \item shape function: $S(\mathbf{Z}) = \Gamma (S(\mathbf{X}), S(\mathbf{Y}), \text{dim})$ 14 | 15 | \begin{equation*} 16 | \begin{aligned} 17 | S(\mathbf{Z}) &= \Gamma (S(\mathbf{X}), S(\mathbf{Y}), \text{dim}) \\ 18 | & = (S(\mathbf{X})[0]) + \text{del}(S(\mathbf{Y}), \text{dim}) 19 | \end{aligned} 20 | \end{equation*} 21 | 22 | \item computation: 23 | 24 | \begin{equation*} 25 | \begin{aligned} 26 | &\mathbf{Z} = \text{reshape}(\mathbf{Z}, (N, -1)) \\ 27 | &\textbf{foreach} \quad (i, x) \quad \textbf{in} \quad \mathbf{X}_{N} \\ 28 | &\quad \quad \mathbf{Z}[i] = \text{slice}(\mathbf{Y}, \mathbf{X}[i], \text{dim}, \textit{keep\_dim}=\text{false}) \\ 29 | &\mathbf{Z} = \text{reshape}(\mathbf{Z}, (N)+\text{del}(S(\mathbf{Y}), \text{dim})) \\ 30 | \end{aligned} 31 | \end{equation*} 32 | 33 | \begin{itemize} 34 | \item iteration domain: $0 \le i \le \text{size}(\mathbf{X}, 0) - 1$ 35 | \item access function: 36 | \begin{enumerate} 37 | \item $f_1: \{ i \rightarrow \mathbf{Z}[i] \}$ 38 | \item $f_2: $ [TBD] 39 | \item $f_3: \{i \rightarrow \mathbf{X}[i]\}$ 40 | \end{enumerate} 41 | \end{itemize} 42 | 43 | \item differentiation rule: 44 | \end{enumerate} 45 | -------------------------------------------------------------------------------- /paper_notes/tensor_operations/optimization.tex: -------------------------------------------------------------------------------- 1 | \section{General Settings of important optimizations} 2 | -------------------------------------------------------------------------------- /paper_notes/tensor_operations/tensorarray_creation.tex: -------------------------------------------------------------------------------- 1 | \section{TensorArray creation} 2 | 3 | 4 | \subsection{Conversion from Tensor} 5 | 6 | \subsubsection{\textbf{\textit{slices}}} 7 | 8 | \begin{lstlisting}[language=Python] 9 | slices(X:Tensor, dim:int) -> TensorArray[Tensor] 10 | \end{lstlisting} 11 | 12 | \subsubsection{\textbf{\textit{chunks}}} 13 | 14 | \subsection{\textbf\textit{Construction from Looping}} 15 | -------------------------------------------------------------------------------- /paper_notes/tensor_operations/vectorization.tex: -------------------------------------------------------------------------------- 1 | \section{Optimization capability as an explicit interface} 2 | 3 | \subsection{Vectorized function: \textit{\textbf{vectorize}}} 4 | -------------------------------------------------------------------------------- /paper_notes/type-systems/README.md: -------------------------------------------------------------------------------- 1 | # Unclassified References 2 | 3 | 1. [Understanding typing judgments](https://www.hedonisticlearning.com/posts/understanding-typing-judgments.html) 4 | 1. [Advanced Programming Languages Spring 2019](https://www.cs.cornell.edu/courses/cs6110/2019sp/schedule.html) 5 | 1. [Examples: Well-formed types](https://www.cs.tufts.edu/comp/105-2020f/slide-cache/slides-5c42e15842b87e76e1275a6931e00e3c.pdf) 6 | 1. [A Tutorial on Type Theory, Foundations of Programming Languages, and Formal Verification](http://jgaltidor.github.io/typetheory_paper.pdf) 7 | 1. [Type-preserving compilation via dependently typed syntax in Agda](https://www.cse.chalmers.se/~abela/talkTYPES2020.pdf) 8 | 1. [A Type System for Well-Founded Recursion](https://people.mpi-sws.org/~dreyer/papers/recursion/tr/main.pdf) -------------------------------------------------------------------------------- /paper_notes/type-systems/notations/README.md: -------------------------------------------------------------------------------- 1 | [TBD] 2 | -------------------------------------------------------------------------------- /paper_notes/type-systems/notations/main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/paper_notes/type-systems/notations/main.pdf -------------------------------------------------------------------------------- /paper_notes/type-systems/notations/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | 3 | \input{structure.tex} 4 | \title{Notations} 5 | 6 | \begin{document} 7 | 8 | % \maketitle % Print the title 9 | % \tableofcontents 10 | 11 | \noindent 12 | \linespread{1.2} 13 | \selectfont 14 | \setlength{\topskip}{0ex} 15 | \setlength{\parskip}{1ex} 16 | \setlength{\lineskip}{1em} 17 | 18 | \section{Terms, Types and Kinds} 19 | \input{contents/kinding.tex} 20 | 21 | \end{document} 22 | -------------------------------------------------------------------------------- /reinforcement_learning/README.md: -------------------------------------------------------------------------------- 1 | [TBD] 2 | -------------------------------------------------------------------------------- /reinforcement_learning/basic_concepts/basic_concepts_about_reinforcement_learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/reinforcement_learning/basic_concepts/basic_concepts_about_reinforcement_learning.pdf -------------------------------------------------------------------------------- /reinforcement_learning/basic_concepts/basic_concepts_about_reinforcement_learning.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/reinforcement_learning/basic_concepts/basic_concepts_about_reinforcement_learning.ppt -------------------------------------------------------------------------------- /text_generation_for_gitchat/pic/pic1.one-hot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic1.one-hot.png -------------------------------------------------------------------------------- /text_generation_for_gitchat/pic/pic10.highway.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic10.highway.png -------------------------------------------------------------------------------- /text_generation_for_gitchat/pic/pic11.generate_text_from_language_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic11.generate_text_from_language_model.png -------------------------------------------------------------------------------- /text_generation_for_gitchat/pic/pic12.encoder_decoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic12.encoder_decoder.png -------------------------------------------------------------------------------- /text_generation_for_gitchat/pic/pic13.neural_turing_machine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic13.neural_turing_machine.png -------------------------------------------------------------------------------- /text_generation_for_gitchat/pic/pic2.word_embedding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic2.word_embedding.png -------------------------------------------------------------------------------- /text_generation_for_gitchat/pic/pic3.rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic3.rnn.png -------------------------------------------------------------------------------- /text_generation_for_gitchat/pic/pic4.bp_through_all_nodes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic4.bp_through_all_nodes.png -------------------------------------------------------------------------------- /text_generation_for_gitchat/pic/pic5.bp_through_shortcut.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic5.bp_through_shortcut.png -------------------------------------------------------------------------------- /text_generation_for_gitchat/pic/pic6.lstm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic6.lstm.png -------------------------------------------------------------------------------- /text_generation_for_gitchat/pic/pic7.gru.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic7.gru.png -------------------------------------------------------------------------------- /text_generation_for_gitchat/pic/pic8.deep_rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic8.deep_rnn.png -------------------------------------------------------------------------------- /text_generation_for_gitchat/pic/pic9.residual_block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/pic/pic9.residual_block.png -------------------------------------------------------------------------------- /text_generation_for_gitchat/text_generation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/text_generation_for_gitchat/text_generation.pdf -------------------------------------------------------------------------------- /text_generation_for_gitchat/trans_2_html.sh: -------------------------------------------------------------------------------- 1 | pandoc README.md -t html -s -o text_generation.html --mathjax=https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML 2 | 3 | -------------------------------------------------------------------------------- /tiled_efficient_attention/README.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/README.pdf -------------------------------------------------------------------------------- /tiled_efficient_attention/figures/cal_p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/cal_p.png -------------------------------------------------------------------------------- /tiled_efficient_attention/figures/chunk_form_parallelism.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/chunk_form_parallelism.png -------------------------------------------------------------------------------- /tiled_efficient_attention/figures/chunk_recurrent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/chunk_recurrent.png -------------------------------------------------------------------------------- /tiled_efficient_attention/figures/first_kv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/first_kv.png -------------------------------------------------------------------------------- /tiled_efficient_attention/figures/fused_chunk_gla_fwd_kernel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/fused_chunk_gla_fwd_kernel.png -------------------------------------------------------------------------------- /tiled_efficient_attention/figures/fwd_decay_cumsum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/fwd_decay_cumsum.png -------------------------------------------------------------------------------- /tiled_efficient_attention/figures/fwd_inner_chunk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/fwd_inner_chunk.png -------------------------------------------------------------------------------- /tiled_efficient_attention/figures/gated_linear_attention.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/gated_linear_attention.pptx -------------------------------------------------------------------------------- /tiled_efficient_attention/figures/gated_linear_attention_layer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/gated_linear_attention_layer.png -------------------------------------------------------------------------------- /tiled_efficient_attention/figures/gla_data_accessed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/gla_data_accessed.png -------------------------------------------------------------------------------- /tiled_efficient_attention/figures/gla_equation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/gla_equation.png -------------------------------------------------------------------------------- /tiled_efficient_attention/figures/last_decay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcy-seso/LearningNotes/6b63c89e2580ae1503dee7a0b0d3456566281334/tiled_efficient_attention/figures/last_decay.png -------------------------------------------------------------------------------- /tiled_efficient_attention/figures/~$gated_linear_attention.pptx: -------------------------------------------------------------------------------- 1 | Ying Cao Ying Cao -------------------------------------------------------------------------------- /tiled_efficient_attention/main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from model import GatedLinearAttention, GLAConfig 4 | 5 | if __name__ == "__main__": 6 | 7 | batch, num_head, length, hidden = 32, 4, 2048, 2048 8 | 9 | config = GLAConfig(d_model=hidden, n_head=num_head) 10 | print(config) 11 | 12 | GLA = GatedLinearAttention(config, 13 | mode="fused_chunk").cuda().to(torch.bfloat16) 14 | 15 | x = torch.randn((batch, length, hidden), 16 | dtype=torch.bfloat16, 17 | device="cuda", 18 | requires_grad=False) 19 | 20 | y, _ = GLA(x) 21 | print(y.shape) 22 | -------------------------------------------------------------------------------- /tiled_efficient_attention/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .configuration import GLAConfig 2 | from .gla import GatedLinearAttention 3 | 4 | __all__ = [ 5 | 'GLAConfig', 6 | 'GatedLinearAttention', 7 | ] 8 | --------------------------------------------------------------------------------