├── .flake8
├── .gitignore
├── README.md
├── autopipe
    ├── .gitignore
    ├── README.md
    ├── TechnicalOverview_old.md
    ├── __init__.py
    ├── analysis
    │   ├── __init__.py
    │   ├── analysis_utils.py
    │   ├── asgd_analysis.py
    │   ├── deprecated_theoretical.py
    │   ├── pipedream_complexity.py
    │   ├── pipeline_partition_analysis.py
    │   ├── profile_pipeline_stages.py
    │   ├── profile_replica.py
    │   └── ssgd_analysis.py
    ├── autopipe
    │   ├── __init__.py
    │   ├── api.py
    │   ├── cache_utils.py
    │   ├── compiler
    │   │   ├── __init__.py
    │   │   ├── compile_normal_model_function.py
    │   │   ├── compile_partitioned_model.py
    │   │   ├── create_pipeline_configuration.py
    │   │   ├── partition_forward_method.py
    │   │   ├── partition_init_method.py
    │   │   ├── state_methods.py
    │   │   └── utils.py
    │   ├── model_partitioning
    │   │   ├── __init__.py
    │   │   ├── acyclic
    │   │   │   ├── __init__.py
    │   │   │   ├── acyclic_partitioning.py
    │   │   │   ├── data_structures.py
    │   │   │   └── gpa.py
    │   │   ├── async_pipeline.py
    │   │   ├── heuristics.py
    │   │   ├── metis
    │   │   │   ├── __init__.py
    │   │   │   ├── metis_partitioning.py
    │   │   │   └── post_process.py
    │   │   ├── mixed_pipe
    │   │   │   ├── __init__.py
    │   │   │   ├── assignment.py
    │   │   │   ├── by_prefix.py
    │   │   │   ├── centers.py
    │   │   │   ├── check_cycles.py
    │   │   │   ├── coarsening.py
    │   │   │   ├── detect_p_rep.py
    │   │   │   ├── heap_dict.py
    │   │   │   ├── partition_mixed_pipe_v1_clusters.py
    │   │   │   ├── partition_mixed_pipe_v2.py
    │   │   │   ├── post_process.py
    │   │   │   ├── refine.py
    │   │   │   ├── systematic_block_ratio_creation.py
    │   │   │   └── test_coarsen_prefix.py
    │   │   ├── pipedream
    │   │   │   ├── __init__.py
    │   │   │   ├── pipedream_partition_no_hir.py
    │   │   │   └── pipedream_partition_no_hir_fixed.py
    │   │   ├── stage_to_device.py
    │   │   └── utils.py
    │   ├── model_profiling
    │   │   ├── __init__.py
    │   │   ├── control_flow_graph.py
    │   │   ├── graph_executor.py
    │   │   ├── infer_is_contiguous.py
    │   │   ├── infer_req_grad.py
    │   │   ├── network_profiler.py
    │   │   ├── profiler.py
    │   │   └── tracer.py
    │   ├── union_find.py
    │   └── utils.py
    ├── cmd_parser.py
    ├── download
    │   ├── download_cifar.py
    │   ├── download_glue.py
    │   ├── download_wikitext.py
    │   └── make_squad.sh
    ├── environment.yml
    ├── misc
    │   ├── display_partitioning_stats.py
    │   └── megatron_11b
    │   │   └── dict.txt
    ├── partition.py
    ├── partitioning_scripts
    │   ├── __init__.py
    │   └── partition_scripts_utils.py
    ├── py_sbatch.sh
    └── tasks
    │   ├── __init__.py
    │   ├── bert_squad.py
    │   ├── cep.py
    │   ├── dummy_t5.py
    │   ├── functional_model.py
    │   ├── glue.py
    │   ├── gpt2.py
    │   ├── megatron.py
    │   ├── new_t5.py
    │   ├── partitioning_task.py
    │   ├── t5.py
    │   ├── transformers_utils.py
    │   └── vision.py
├── docs
    ├── AcceleratingMixedPipeWithCudaMPS.md
    ├── MPI.md
    ├── MiscOptimizations.md
    ├── NewModels.md
    ├── PipeDebug.md
    ├── PipeList.md
    └── PitfallsKnownIssuesAndTODOs.md
├── models
    ├── __init__.py
    ├── new_t5_example
    │   ├── README.md
    │   ├── __init__.py
    │   ├── check_conversion.py
    │   ├── check_partitioned.py
    │   ├── convert_none.py
    │   ├── eval_new_t5.py
    │   └── modeling_t5.py
    ├── normal
    │   ├── NLP_models
    │   │   ├── __init__.py
    │   │   ├── modeling_bert.py
    │   │   ├── modeling_bert_4_1_converted.py
    │   │   ├── modeling_bert_old.py
    │   │   ├── modeling_ctrl.py
    │   │   ├── modeling_ctrl_tied_weights.py
    │   │   ├── modeling_gpt2.py
    │   │   ├── modeling_gpt2_tied_weights.py
    │   │   ├── modeling_roberta.py
    │   │   ├── modeling_t5.py
    │   │   ├── modeling_t5_tied_weights.py
    │   │   ├── modeling_utils_converted.py
    │   │   ├── stateless.py
    │   │   └── utils.py
    │   ├── __init__.py
    │   ├── cep.py
    │   ├── dummy.py
    │   ├── split_linear.py
    │   └── vision_models
    │   │   ├── AlexNet.py
    │   │   ├── DenseNet.py
    │   │   ├── GoogleNet.py
    │   │   ├── Inception.py
    │   │   ├── LeNet.py
    │   │   ├── ResNet.py
    │   │   ├── SqueezeNet.py
    │   │   ├── UNet.py
    │   │   ├── VGG.py
    │   │   ├── WideResNet.py
    │   │   ├── WideResNet_GN.py
    │   │   ├── __init__.py
    │   │   └── amoebaNet
    │   │       ├── __init__.py
    │   │       ├── genotype.py
    │   │       └── utils.py
    └── partitioned
    │   ├── __init__.py
    │   ├── bert_base_uncaseds_384_2p_bw12_async_pipedream.py
    │   ├── bert_base_uncaseds_384_2p_bw12_pipedream.py
    │   ├── bert_large_uncased_squad_8p.py
    │   ├── bert_large_uncased_whole_word_maskings_384_2p_bw12_async_pipedream.py
    │   ├── bert_large_uncased_whole_word_maskings_384_2p_bw12_pipedream.py
    │   ├── bert_large_uncased_whole_word_maskings_384_4p_bw12_async_pipedream.py
    │   ├── bert_large_uncased_whole_word_maskings_384_4p_bw12_pipedream.py
    │   ├── bert_large_uncased_whole_word_maskings_384_8p_bw12_async_pipedream.py
    │   ├── bert_large_uncased_whole_word_maskings_384_8p_bw12_pipedream.py
    │   ├── cep_netN50_C20000_4p_bw12_metis.py
    │   ├── layer_graph_t5_3b_tied_lmheads_320_8_8p_bw12_async_squad1_mpipe.py
    │   ├── layer_graph_t5_3b_tied_lmheads_320_8_8p_bw12_squad1_pipedream.py
    │   ├── layer_graph_t5_3b_tied_lmheads_512_4_8p_bw12_async_squad1_mpipe.py
    │   ├── layer_graph_t5_3b_tied_lmheads_512_4_8p_bw12_squad1_pipedream.py
    │   ├── layer_graph_t5_3b_tied_lmheads_64_4_8p_bw12_async_squad1_mpipe.py
    │   ├── layer_graph_t5_3b_tied_lmheads_64_4_8p_bw12_squad1_pipedream.py
    │   ├── old_gpt2xl_8p_untied.py
    │   ├── op_t5_3b_tied_lmheads_320_8_8p_bw12_async_squad1_mpipe.py
    │   ├── op_t5_3b_tied_lmheads_512_4_8p_bw12_async_squad1_mpipe.py
    │   ├── op_t5_3b_tied_lmheads_64_4_8p_bw12_async_squad1_mpipe.py
    │   ├── t5_3b_tied_lmheads_320_8_8p_bw12_async_squad1_mpipe.py
    │   ├── t5_3b_tied_lmheads_320_8_8p_bw12_squad1.py
    │   ├── t5_3b_tied_lmheads_320_8_8p_bw12_squad1_pipedream.py
    │   ├── t5_3b_tied_lmheads_320_8_8p_bw12_squad1_virtual_stages.py
    │   ├── t5_3b_tied_lmheads_512_4_8p_bw12_async_squad1_mpipe.py
    │   ├── t5_3b_tied_lmheads_512_4_8p_bw12_async_squad1_mpipe_L32.py
    │   ├── t5_3b_tied_lmheads_512_4_8p_bw12_squad1_acyclic.py
    │   ├── t5_3b_tied_lmheads_512_4_8p_bw12_squad1_pipedream.py
    │   ├── t5_3b_tied_lmheads_512_4_8p_bw12_squad1_virtual_stages.py
    │   ├── t5_3b_tied_lmheads_64_4_8p_bw12_async_squad1_mpipe.py
    │   ├── t5_3b_tied_lmheads_64_4_8p_bw12_squad1.py
    │   ├── t5_3b_tied_lmheads_64_4_8p_bw12_squad1_acyclic.py
    │   ├── t5_3b_tied_lmheads_64_4_8p_bw12_squad1_pipedream.py
    │   ├── t5_3b_tied_lmheads_64_4_8p_bw12_squad1_virtual_stages.py
    │   ├── t5_small_tied_lmhead_4p_bw12_async_squad1.py
    │   ├── t5_small_tied_lmheads_512_4_3p_bw12_squad1_virtual_stages.py
    │   ├── vit_base_patch16_384_in21k_imagenet_384c384_8p_bw12_gpipe_acyclic.py
    │   ├── vit_large_patch32_384_in21k_cifar100_384c384_8p_bw12_async_acyclic.py
    │   ├── vit_large_patch32_384_in21k_imagenet_384c384_8p_bw12_async_acyclic.py
    │   ├── vit_large_patch32_384_in21k_imagenet_384c384_8p_bw12_gpipe_acyclic.py
    │   ├── wrn_16x4_c10_p2.py
    │   └── wrn_28x10_c100_dr03_gnc32_4p_bw12_pipedream.py
├── pipe
    ├── .gitignore
    ├── README.md
    ├── __init__.py
    ├── configs
    │   ├── __init__.py
    │   ├── all_options.json
    │   ├── bert
    │   │   ├── squad
    │   │   │   ├── bert_base_uncased_2p
    │   │   │   │   ├── hetprofiling
    │   │   │   │   │   ├── common.json
    │   │   │   │   │   └── stale.json
    │   │   │   │   └── pipedream
    │   │   │   │   │   ├── common.json
    │   │   │   │   │   └── stale.json
    │   │   │   ├── bert_large_uncased_wmm
    │   │   │   │   ├── aggmsnag.json
    │   │   │   │   ├── common.json
    │   │   │   │   ├── gpipe.json
    │   │   │   │   ├── msnag.json
    │   │   │   │   ├── stale.json
    │   │   │   │   └── ws_msnag_ga.json
    │   │   │   ├── bert_large_uncased_wwm_2m
    │   │   │   │   ├── common.json
    │   │   │   │   └── stale.json
    │   │   │   ├── bert_large_uncased_wwm_2p
    │   │   │   │   ├── aggmsnag.json
    │   │   │   │   ├── common.json
    │   │   │   │   ├── ftpipe.json
    │   │   │   │   ├── gpipe.json
    │   │   │   │   ├── pipedream.json
    │   │   │   │   └── stale.json
    │   │   │   ├── bert_large_uncased_wwm_4p
    │   │   │   │   ├── aggmsnag.json
    │   │   │   │   ├── common.json
    │   │   │   │   ├── ftpipe.json
    │   │   │   │   ├── gpipe.json
    │   │   │   │   ├── pipedream.json
    │   │   │   │   └── stale.json
    │   │   │   └── bert_large_uncased_wwm_8p
    │   │   │   │   ├── aggmsnag.json
    │   │   │   │   ├── common.json
    │   │   │   │   ├── ftpipe.json
    │   │   │   │   ├── ftpipe_layer.json
    │   │   │   │   ├── gpipe.json
    │   │   │   │   ├── pipedream.json
    │   │   │   │   └── stale.json
    │   │   └── squad2
    │   │   │   └── bert_large_uncased_wmm
    │   │   │       ├── aggmsnag.json
    │   │   │       ├── common.json
    │   │   │       ├── gpipe.json
    │   │   │       ├── msnag.json
    │   │   │       ├── stale.json
    │   │   │       ├── ws.json
    │   │   │       └── ws_msnag_ga.json
    │   ├── cep
    │   │   └── common.json
    │   ├── cv
    │   │   ├── cifar10
    │   │   │   ├── common.json
    │   │   │   └── wrn_16x4_c10_p2
    │   │   │   │   ├── EXAMPLE.md
    │   │   │   │   └── stale_nr.json
    │   │   ├── cifar100
    │   │   │   └── wrn28x10
    │   │   │   │   ├── README.md
    │   │   │   │   ├── common.json
    │   │   │   │   ├── msnag.json
    │   │   │   │   ├── msnag_optimizer.json
    │   │   │   │   ├── msnag_ws.json
    │   │   │   │   ├── no_recomputation
    │   │   │   │       ├── msnag_nr.json
    │   │   │   │       ├── msnag_ws.json
    │   │   │   │       ├── norecomp.json
    │   │   │   │       ├── stale_nr.json
    │   │   │   │       ├── ws.json
    │   │   │   │       ├── ws_ga.json
    │   │   │   │       └── ws_msnag_ga.json
    │   │   │   │   ├── stale.json
    │   │   │   │   ├── stale_optimizer.json
    │   │   │   │   ├── ws.json
    │   │   │   │   ├── ws_ga.json
    │   │   │   │   ├── ws_msnag_ga.json
    │   │   │   │   └── ws_msnag_ga_jfl.json
    │   │   └── imagenet
    │   │   │   └── weight_stashing_msnag_gap_aware.json
    │   ├── dummy.json
    │   ├── lm
    │   │   └── wt2
    │   │   │   ├── gpt2
    │   │   │       └── tied
    │   │   │       │   ├── common.json
    │   │   │       │   ├── gpipe.json
    │   │   │       │   ├── msnag.json
    │   │   │       │   ├── msnag_ws.json
    │   │   │       │   ├── seq.json
    │   │   │       │   ├── stale.json
    │   │   │       │   ├── ws.json
    │   │   │       │   ├── ws_ga.json
    │   │   │       │   ├── ws_msnag_ga.json
    │   │   │       │   └── ws_msnag_ga_jfl.json
    │   │   │   └── gpt2xl
    │   │   │       ├── tied
    │   │   │           ├── common.json
    │   │   │           ├── gpipe.json
    │   │   │           ├── msnag.json
    │   │   │           ├── msnag_ws.json
    │   │   │           ├── seq.json
    │   │   │           ├── stale.json
    │   │   │           ├── ws.json
    │   │   │           ├── ws_msnag_ga.json
    │   │   │           └── ws_msnag_ga_jfl.json
    │   │   │       ├── untied
    │   │   │           ├── aggmsnag.json
    │   │   │           ├── common.json
    │   │   │           ├── gpipe.json
    │   │   │           ├── msnag.json
    │   │   │           ├── msnag_ws.json
    │   │   │           ├── seq.json
    │   │   │           ├── stale.json
    │   │   │           ├── ws.json
    │   │   │           ├── ws_msnag_ga.json
    │   │   │           └── ws_msnag_ga_jfl.json
    │   │   │       └── untied_s512
    │   │   │           ├── common.json
    │   │   │           └── stale.json
    │   ├── parse_json_config.py
    │   ├── python_configs
    │   │   ├── __init__.py
    │   │   └── configs.py
    │   ├── t5
    │   │   ├── new_t5_exp
    │   │   │   ├── mpipe
    │   │   │   │   ├── boolq
    │   │   │   │   │   ├── common_layer_graph.json
    │   │   │   │   │   ├── common_op_graph.json
    │   │   │   │   │   ├── gpipe_layer_graph.json
    │   │   │   │   │   ├── gpipe_op_graph.json
    │   │   │   │   │   ├── stale_layer_graph.json
    │   │   │   │   │   └── stale_op_graph.json
    │   │   │   │   ├── multirc
    │   │   │   │   │   ├── common_layer_graph.json
    │   │   │   │   │   ├── common_op_graph.json
    │   │   │   │   │   ├── gpipe_layer_graph.json
    │   │   │   │   │   ├── gpipe_op_graph.json
    │   │   │   │   │   ├── stale_layer_graph.json
    │   │   │   │   │   └── stale_op_graph.json
    │   │   │   │   ├── rte
    │   │   │   │   │   ├── common_layer_graph.json
    │   │   │   │   │   ├── common_op_graph.json
    │   │   │   │   │   ├── gpipe_layer_graph.json
    │   │   │   │   │   ├── gpipe_op_graph.json
    │   │   │   │   │   ├── stale_layer_graph.json
    │   │   │   │   │   └── stale_op_graph.json
    │   │   │   │   └── wic
    │   │   │   │   │   ├── common_layer_graph.json
    │   │   │   │   │   ├── common_op_graph.json
    │   │   │   │   │   ├── gpipe_layer_graph.json
    │   │   │   │   │   ├── gpipe_op_graph.json
    │   │   │   │   │   ├── stale_layer_graph.json
    │   │   │   │   │   └── stale_op_graph.json
    │   │   │   ├── seq
    │   │   │   │   ├── boolq
    │   │   │   │   │   ├── common.json
    │   │   │   │   │   ├── gpipe_new.json
    │   │   │   │   │   └── pipedream_stale.json
    │   │   │   │   ├── multirc
    │   │   │   │   │   ├── common.json
    │   │   │   │   │   ├── gpipe_new.json
    │   │   │   │   │   └── pipedream_stale.json
    │   │   │   │   ├── rte
    │   │   │   │   │   ├── common.json
    │   │   │   │   │   ├── gpipe_new.json
    │   │   │   │   │   └── pipedream_stale.json
    │   │   │   │   └── wic
    │   │   │   │   │   ├── common.json
    │   │   │   │   │   ├── gpipe_new.json
    │   │   │   │   │   └── pipedream_stale.json
    │   │   │   └── seq_op_graph
    │   │   │   │   ├── boolq
    │   │   │   │       ├── common.json
    │   │   │   │       ├── gpipe_new.json
    │   │   │   │       └── pipedream_stale.json
    │   │   │   │   ├── multirc
    │   │   │   │       ├── common.json
    │   │   │   │       ├── gpipe_new.json
    │   │   │   │       └── pipedream_stale.json
    │   │   │   │   ├── rte
    │   │   │   │       ├── common.json
    │   │   │   │       ├── gpipe_new.json
    │   │   │   │       └── pipedream_stale.json
    │   │   │   │   └── wic
    │   │   │   │       ├── common.json
    │   │   │   │       ├── gpipe_new.json
    │   │   │   │       └── pipedream_stale.json
    │   │   ├── t5_3b_p8
    │   │   │   ├── seq
    │   │   │   │   ├── boolq
    │   │   │   │   │   ├── common.json
    │   │   │   │   │   ├── gpipe.json
    │   │   │   │   │   ├── gpipe_new.json
    │   │   │   │   │   ├── pipedream_stale.json
    │   │   │   │   │   └── stale.json
    │   │   │   │   ├── cola
    │   │   │   │   │   ├── common.json
    │   │   │   │   │   ├── gpipe.json
    │   │   │   │   │   ├── seq.json
    │   │   │   │   │   └── stale.json
    │   │   │   │   ├── multirc
    │   │   │   │   │   ├── common.json
    │   │   │   │   │   ├── gpipe.json
    │   │   │   │   │   ├── gpipe_new.json
    │   │   │   │   │   ├── pipedream_stale.json
    │   │   │   │   │   ├── seq.json
    │   │   │   │   │   └── stale.json
    │   │   │   │   ├── rte
    │   │   │   │   │   ├── common.json
    │   │   │   │   │   ├── gpipe.json
    │   │   │   │   │   ├── gpipe_new.json
    │   │   │   │   │   ├── pipedream_stale.json
    │   │   │   │   │   ├── seq.json
    │   │   │   │   │   └── stale.json
    │   │   │   │   ├── rte_super_glue
    │   │   │   │   │   ├── common.json
    │   │   │   │   │   ├── gpipe.json
    │   │   │   │   │   ├── gpipe_new.json
    │   │   │   │   │   ├── pipedream_stale.json
    │   │   │   │   │   ├── seq.json
    │   │   │   │   │   └── stale.json
    │   │   │   │   └── wic
    │   │   │   │   │   ├── common.json
    │   │   │   │   │   ├── gpipe.json
    │   │   │   │   │   ├── gpipe_new.json
    │   │   │   │   │   ├── pipedream_stale.json
    │   │   │   │   │   └── stale.json
    │   │   │   └── virtual_stages
    │   │   │   │   ├── boolq
    │   │   │   │       ├── common.json
    │   │   │   │       ├── gpipe.json
    │   │   │   │       ├── gpipe_2.json
    │   │   │   │       ├── stale.json
    │   │   │   │       └── vs_stale.json
    │   │   │   │   ├── multirc
    │   │   │   │       ├── common.json
    │   │   │   │       ├── gpipe.json
    │   │   │   │       └── stale.json
    │   │   │   │   ├── rte
    │   │   │   │       ├── common.json
    │   │   │   │       ├── gpipe.json
    │   │   │   │       └── stale.json
    │   │   │   │   ├── superglue_rte
    │   │   │   │       ├── common.json
    │   │   │   │       ├── gpipe.json
    │   │   │   │       └── stale.json
    │   │   │   │   └── wic
    │   │   │   │       ├── common.json
    │   │   │   │       ├── gpipe.json
    │   │   │   │       └── stale.json
    │   │   ├── t5_base
    │   │   │   └── seq
    │   │   │   │   └── boolq
    │   │   │   │       ├── common.json
    │   │   │   │       ├── gpipe.json
    │   │   │   │       ├── gpipe_new.json
    │   │   │   │       ├── pipedream_stale.json
    │   │   │   │       └── stale.json
    │   │   ├── t5_mpipe
    │   │   │   ├── L=32
    │   │   │   │   ├── common.json
    │   │   │   │   └── stale.json
    │   │   │   ├── boolq
    │   │   │   │   ├── common.json
    │   │   │   │   ├── gpipe.json
    │   │   │   │   └── stale.json
    │   │   │   ├── common.json
    │   │   │   ├── multirc
    │   │   │   │   ├── common.json
    │   │   │   │   ├── gpipe.json
    │   │   │   │   └── stale.json
    │   │   │   ├── rte
    │   │   │   │   ├── common.json
    │   │   │   │   ├── gpipe.json
    │   │   │   │   └── stale.json
    │   │   │   ├── stale.json
    │   │   │   └── wic
    │   │   │   │   ├── common.json
    │   │   │   │   ├── gpipe.json
    │   │   │   │   └── stale.json
    │   │   └── t5_small
    │   │   │   ├── README.md
    │   │   │   ├── adafactor
    │   │   │       ├── common.json
    │   │   │       └── stale.json
    │   │   │   ├── common.json
    │   │   │   ├── rte
    │   │   │       ├── common.json
    │   │   │       └── stale.json
    │   │   │   └── stale.json
    │   └── vit
    │   │   ├── cifar100_384.json
    │   │   ├── cifar10_384.json
    │   │   ├── cv.json
    │   │   ├── cv_dcgn_global.json
    │   │   ├── cv_dcgn_local.json
    │   │   ├── cv_dcgn_local_prop.json
    │   │   ├── imagenet_384.json
    │   │   ├── tst_gpipe.json
    │   │   ├── tst_gpipe_adafactor_cifar100.json
    │   │   ├── tst_gpipe_cifar100.json
    │   │   ├── tst_gpipe_dcgn_global.json
    │   │   ├── tst_gpipe_dcgn_global_cifar100.json
    │   │   ├── tst_gpipe_dcgn_local.json
    │   │   ├── tst_gpipe_dcgn_local_prop_cifar100.json
    │   │   ├── tst_stale.json
    │   │   ├── vit_base_patch16_384_in21k_imagenet_384c384_8p_bw12_gpipe_acyclic.json
    │   │   ├── vit_large_patch32_384_in21k_imagenet_384c384_8p_bw12_async_acyclic.json
    │   │   └── vit_large_patch32_384_in21k_imagenet_384c384_8p_bw12_gpipe_acyclic.json
    ├── data
    │   ├── __init__.py
    │   ├── cep.py
    │   ├── cv.py
    │   ├── datasets.py
    │   ├── download
    │   │   ├── __init__.py
    │   │   ├── download_datasets.py
    │   │   └── download_glue_data.py
    │   ├── from_args_and_kw.py
    │   ├── glue.py
    │   ├── hardcoded_dirs.py
    │   ├── lm.py
    │   ├── packing.py
    │   ├── squad.py
    │   ├── t5
    │   │   ├── __init__.py
    │   │   ├── analyze_t5_packing.py
    │   │   ├── new_t5_tfds_eval.py
    │   │   ├── preproc.py
    │   │   ├── t5_tfds.py
    │   │   └── t5_tfds_eval.py
    │   └── vit.py
    ├── env_utils
    │   ├── create_env_new_server_new.sh
    │   ├── deprecated
    │   │   ├── Makefile
    │   │   ├── create_env_new_server.sh
    │   │   ├── deprecated_create_env.sh
    │   │   ├── old_environment_mpi1.yml
    │   │   └── old_environment_mpi2.yml
    │   ├── docker
    │   │   ├── .gitignore
    │   │   ├── Dockerfile
    │   │   ├── Dockerfile_from_source
    │   │   ├── ompi-recipe
    │   │   │   ├── Dockerfile
    │   │   │   ├── ompi-cuda
    │   │   │   │   ├── build.sh
    │   │   │   │   └── meta.yaml
    │   │   │   └── ompi_conda.sh
    │   │   └── pytorch_from_source.sh
    │   ├── env_add_to_build_from_source.yml
    │   ├── env_without_mpi.yml
    │   └── jupyter-lab.sh
    ├── eval.py
    ├── experiments
    │   ├── __init__.py
    │   ├── analysis
    │   │   ├── __init__.py
    │   │   ├── all_results_to_df.py
    │   │   ├── gen_plots.py
    │   │   ├── get_results.py
    │   │   ├── plot.py
    │   │   ├── plot_sns.py
    │   │   └── rectangles.ipynb
    │   ├── experiments.py
    │   └── t5
    │   │   ├── __init__.py
    │   │   ├── annotation_plot.py
    │   │   ├── ftpipe_plots.py
    │   │   ├── l_study.ipynb
    │   │   ├── nets.ipynb
    │   │   └── tta_barplot.py
    ├── main.py
    ├── misc
    │   ├── _full_example.py
    │   ├── deepspeed.py
    │   ├── filelogger.py
    │   ├── libmpi.py
    │   ├── many_isend.py
    │   ├── mem_on_cpu.py
    │   ├── mesh_failed_runs
    │   │   ├── git_log_head.txt
    │   │   ├── run.sh
    │   │   ├── run_3b_failed.sh
    │   │   └── run_3b_failed_recom_omm.sh
    │   ├── p2p_bw_mat.sh
    │   ├── print_partition_layers_scopes.py
    │   ├── replicate_experiments_deprecated.py
    │   ├── rungrid_bert_squad.py
    │   ├── sanity_check.py
    │   ├── test_mpi
    │   │   ├── README.md
    │   │   ├── nodes.txt
    │   │   ├── pytorch_distributed.py
    │   │   └── pytorch_distributed_cuda_aware.py
    │   ├── to_partition.sh
    │   ├── transformers
    │   │   ├── TODO.md
    │   │   ├── analyze_res.py
    │   │   ├── bert-large
    │   │   │   ├── run.sh
    │   │   │   └── run_squad.py
    │   │   ├── run_language_modeling.py
    │   │   └── run_lm.sh
    │   ├── tst_ibroadcast.py
    │   └── tst_isend.py
    ├── models
    │   ├── __init__.py
    │   ├── load_pipeline_weights_to_hf.py
    │   ├── naive_block_model_parallel.py
    │   ├── parse_config.py
    │   ├── registery
    │   │   ├── __init__.py
    │   │   ├── cep.py
    │   │   ├── cv.py
    │   │   ├── dummy.py
    │   │   ├── hf.py
    │   │   ├── model_handler.py
    │   │   └── vit.py
    │   ├── simple_partitioning_config.py
    │   ├── t5_for_generation.py
    │   ├── transformers_cfg.py
    │   ├── transformers_utils.py
    │   └── vit_np_to_pytorch.py
    ├── optimizers
    │   ├── __init__.py
    │   ├── adafactor.py
    │   ├── adam.py
    │   ├── adam_record.py
    │   ├── adam_record_max_lr.py
    │   ├── adamw.py
    │   ├── adamw_record.py
    │   ├── adamw_record_without_step.py
    │   ├── lr_scheduler.py
    │   ├── required.py
    │   ├── sgd.py
    │   ├── sutskever_modified_sgd.py
    │   └── utils.py
    ├── pipeline
    │   ├── __init__.py
    │   ├── communication
    │   │   ├── __init__.py
    │   │   ├── buffer.py
    │   │   ├── buffered_comm.py
    │   │   ├── common_simple_comm.py
    │   │   ├── grouper.py
    │   │   ├── interface.py
    │   │   ├── multiprocessing.py
    │   │   ├── p2p.py
    │   │   ├── tags.py
    │   │   └── wrapper.py
    │   ├── data_propagation
    │   │   ├── __init__.py
    │   │   ├── automatic_prop.py
    │   │   ├── automatic_prop_non_contig.py
    │   │   ├── cv_target_prop.py
    │   │   └── interface.py
    │   ├── distributed_clip_grad_norm.py
    │   ├── dp_sim
    │   │   ├── __init__.py
    │   │   ├── convert.py
    │   │   └── simulated_dp_batchnorm.py
    │   ├── gap_aware
    │   │   ├── __init__.py
    │   │   ├── adam_gap_aware.py
    │   │   ├── adam_gap_aware_max_lr.py
    │   │   ├── adamw_gap_aware.py
    │   │   ├── gap_aware_hook.py
    │   │   ├── interface.py
    │   │   └── sgd_gap_aware.py
    │   ├── monkey_patch
    │   │   ├── __init__.py
    │   │   ├── dummy_forward_monkey_patcher.py
    │   │   ├── find_modules.py
    │   │   ├── patch.py
    │   │   └── utils.py
    │   ├── partition.py
    │   ├── partition_manager.py
    │   ├── replace_inplace.py
    │   ├── rng_stasher.py
    │   ├── trainers
    │   │   ├── __init__.py
    │   │   ├── bert_squad_trainer.py
    │   │   ├── cep_trainer.py
    │   │   ├── cv_trainer.py
    │   │   ├── gap_aware_trainer.py
    │   │   ├── glue_trainer.py
    │   │   ├── grad_norm
    │   │   │   ├── __init__.py
    │   │   │   ├── global_grad_norm.py
    │   │   │   ├── local_grad_norm.py
    │   │   │   └── local_grad_norm_prop.py
    │   │   ├── interface.py
    │   │   ├── lm_trainer.py
    │   │   ├── statistics
    │   │   │   ├── __init__.py
    │   │   │   ├── cv.py
    │   │   │   ├── gap.py
    │   │   │   ├── glue.py
    │   │   │   ├── interface.py
    │   │   │   ├── lm.py
    │   │   │   ├── squad.py
    │   │   │   └── utils.py
    │   │   ├── t5_trainer.py
    │   │   └── utils.py
    │   ├── true_weights_storage.py
    │   ├── util.py
    │   ├── weight_prediction
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── adafactor.py
    │   │   ├── adam.py
    │   │   ├── adamw.py
    │   │   ├── cow_dict.py
    │   │   ├── interface.py
    │   │   ├── sched_aware.py
    │   │   ├── sgd.py
    │   │   ├── sgd_wd.py
    │   │   └── sym_pred_optimizers
    │   │   │   ├── __init__.py
    │   │   │   ├── auto_lambdify.py
    │   │   │   └── sympy_optimizer.py
    │   ├── weight_stashing
    │   │   ├── __init__.py
    │   │   └── weight_stashing.py
    │   └── work_schedulers
    │   │   ├── __init__.py
    │   │   ├── analysis.py
    │   │   └── schedulers.py
    ├── prepare_pipeline.py
    ├── run
    │   ├── __init__.py
    │   ├── gpu_queue.py
    │   └── helper.py
    └── train.py
├── t5_used_scripts_example
    ├── run_experiments.sh
    ├── run_op_vs_layer_exp_wic.sh
    ├── to_partition_mpipe_layergraph_t5_3b_boolq_multirc.sh
    ├── to_partition_mpipe_layergraph_t5_3b_rte.sh
    ├── to_partition_mpipe_layergraph_t5_3b_wic.sh
    ├── to_partition_mpipe_t5_3b_opgraph_boolq_multirc.sh
    ├── to_partition_mpipe_t5_3b_opgraph_rte.sh
    ├── to_partition_mpipe_t5_3b_opgraph_wic.sh
    ├── to_partition_mpipe_t5_base.sh
    ├── to_partition_spipe_OP_t5_3b_boolq_multirc.sh
    ├── to_partition_spipe_OP_t5_3b_rte.sh
    ├── to_partition_spipe_OP_t5_3b_wic.sh
    ├── to_partition_spipe_t5_3b_boolq_multirc.sh
    ├── to_partition_spipe_t5_3b_rte.sh
    ├── to_partition_spipe_t5_3b_wic.sh
    ├── to_partition_spipe_t5_base.sh
    ├── to_run.sh
    └── to_run_again_wic.sh
└── tests
    ├── __init__.py
    └── test_our_vit_convert.py


/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | ; select = B,C,E,F,P,T4,W,B9
 3 | max-line-length = 120
 4 | # C408 ignored because we like the dict keyword argument syntax
 5 | # E501 is not flexible enough, we're using B950 instead
 6 | # ; ignore =
 7 | # ;     E203,E305,E402,E501,E721,E741,F403,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,
 8 | # ;     # these ignores are from flake8-bugbear; please fix!
 9 | # ;     B007,B008,
10 | # ;     # these ignores are from flake8-comprehensions; please fix!
11 | # ;     C400,C401,C402,C403,C404,C405,C407,C411,C413,C414,C415
12 | per-file-ignores = __init__.py: F401
13 | exclude = *.pyi,.git,configs,cpp


--------------------------------------------------------------------------------
/autopipe/.gitignore:
--------------------------------------------------------------------------------
 1 | .vscode/*
 2 | .ipynb_checkpoints/*
 3 | */__pycache__/*
 4 | ideas/*
 5 | .idea/
 6 | .pytest_cache/**
 7 | *.pyc
 8 | experiment/stanford_car_dataset_images_in_224x224/*
 9 | experiments/stanford-car-dataset-by-classes-folder-224/*
10 | playground.py
11 | *.log
12 | glue_data/**
13 | wikitext-2-raw/**
14 | papers/*
15 | .mypy_cache/
16 | 
17 | old/
18 | squad1/*
19 | squad2/*
20 | cached_train_bert*
21 | results/
22 | py_sbatch.sh
23 | glue_ds.*
24 | original_acyclic_partitioning/*
25 | dynamic_acyclic_partitioning/*
26 | datasets/*
27 | megatron/data/*
28 | megatron/megatron_11b.tar.gz
29 | 


--------------------------------------------------------------------------------
/autopipe/README.md:
--------------------------------------------------------------------------------
 1 | # Partitioning
 2 |  Readme WIP.
 3 |  
 4 |  Available algorithms under `autopipe.autopipe.model_partitioning`:
 5 |  - `mpipe` (mixed-pipe)
 6 |  - `pipedream`
 7 |  - `metis`
 8 |  - `acyclic`
 9 |  
10 |  ## Pitfalls
11 |  sometimes ops are traced with `training=True`, so replace, e.g:
12 |  
13 |  ```bash
14 |  sed "s/training=True/training=self.training/" op_* | grep training= 
15 | ```


--------------------------------------------------------------------------------
/autopipe/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saareliad/FTPipe/986014df3bb985595f22139ab355a48ca00dc12f/autopipe/__init__.py


--------------------------------------------------------------------------------
/autopipe/analysis/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_partition_analysis import run_analysis
2 | from .analysis_utils import convert_to_analysis_format


--------------------------------------------------------------------------------
/autopipe/analysis/pipedream_complexity.py:
--------------------------------------------------------------------------------
 1 | """Since PipeDream does exhaustive search,
 2 |     for small graphs, no mixed-pipe, 1 hardware level hierarchy
 3 |     it may be better to just use it,
 4 |     (however it models communication incorrectly)
 5 |     complexity is (simplified from PipeDream's paper)
 6 |     L*N^3*m^2
 7 | 
 8 |     N - graph nodes (operations/layers)
 9 |     m - gpus per level
10 |     L - number of levels
11 | 
12 | """
13 | 
14 | def pipedream_extimated_time(N, m, L=1):
15 |     # compute a mult fact from resnet50 Pipedream's largest network, reported 8 seconds
16 |     baseline_complexity = 709789824 # resnet (N=177,m=8, L=2)
17 |     baseline_seconds = 8
18 | 
19 |     complexity = L * N**3 * m**2
20 |     estimated_time = baseline_seconds * (complexity / baseline_complexity)
21 | 
22 |     return estimated_time
23 | 


--------------------------------------------------------------------------------
/autopipe/analysis/profile_replica.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from autopipe.autopipe import move_tensors
 4 | from autopipe.autopipe.utils import flatten
 5 | 
 6 | 
 7 | def cuda_computation_times(model, inputs):
 8 |     """ measure forward/backward time of a partition on the GPU
 9 |     """
10 |     if not isinstance(inputs, (tuple, list, dict)):
11 |         inputs = (inputs,)
12 | 
13 | 
14 |     model.cuda()
15 |     # now we move inputs to GPU
16 |     inputs = move_tensors(inputs, 'cuda')
17 |     start = torch.cuda.Event(enable_timing=True)
18 |     end = torch.cuda.Event(enable_timing=True)
19 | 
20 |     torch.cuda.synchronize(device='cuda')
21 |     start.record()
22 |     if isinstance(inputs, (tuple,list)):
23 |         outputs = model(*inputs)
24 |     elif isinstance(inputs, dict):
25 |         outputs = model(**inputs)
26 |     else:
27 |         raise NotImplementedError(str(type(inputs)))
28 | 
29 |     # TODO: can generate targets beforehand to use cross_entropy...
30 |     # TODO: replace randn_like with pre-generated tensors
31 |     # loss = sum((F.cross_entropy(o, torch.randn_like(o)) for o in filter(
32 |     #     lambda t: isinstance(t, torch.Tensor) and t.requires_grad,
33 |     #     flatten(outputs))))
34 | 
35 |     loss = sum((o.norm() for o in filter(
36 |         lambda t: isinstance(t, torch.Tensor) and t.requires_grad,
37 |         flatten(outputs))))  # FIXME: just use real loss.
38 |     loss.backward()
39 |     end.record()
40 |     torch.cuda.synchronize(device='cuda')
41 |     fb_time = (start.elapsed_time(end))
42 | 
43 |     return fb_time


--------------------------------------------------------------------------------
/autopipe/analysis/ssgd_analysis.py:
--------------------------------------------------------------------------------
 1 | """FIXME: DEPRECATED, not accurate, probably incorrect"""
 2 | import math
 3 | 
 4 | from .profile_replica import cuda_computation_times
 5 | 
 6 | 
 7 | # NOTE: can so similar analysis for ZerO(1,2,3),
 8 | # (multiply communication by x1.5 according to what they claim)
 9 | 
10 | 
11 | def run_analysis(sample, model, n_workers, bw_GBps=12, verbose=True):
12 |     send_mb = sum([(p.nelement() * p.element_size())
13 |                    for p in model.parameters()]) / 1e6
14 | 
15 |     single_send_time = send_mb / bw_GBps
16 | 
17 |     # FIXME: this is not correct at all.
18 |     # because we can do it with reduce-brodcast
19 |     num_sends = n_workers * math.log2(n_workers)
20 | 
21 |     total_send_time = num_sends * single_send_time
22 | 
23 |     comp_time = cuda_computation_times(model, sample)
24 | 
25 |     # NOTE: this is very naive analysis,
26 |     # from pytorch >1.3 they overlap comm with comp.
27 |     # (gaining around +30% speedup).
28 |     utilization = comp_time / (comp_time + total_send_time)
29 | 
30 |     expected_speedup = utilization * n_workers
31 | 
32 |     # TODO: print something...
33 | 
34 |     d = dict(n_workers=n_workers,
35 |              send_mb=send_mb,
36 |              single_send_time=single_send_time,
37 |              num_sends=num_sends,
38 |              total_send_time=total_send_time,
39 |              comp_time=comp_time,
40 |              utilization=utilization,
41 |              expected_speedup=expected_speedup)
42 | 
43 |     return expected_speedup, d
44 | 


--------------------------------------------------------------------------------
/autopipe/autopipe/__init__.py:
--------------------------------------------------------------------------------
 1 | from .cache_utils import compute_and_cache, compute_and_maybe_cache, PickleCache, GraphCache
 2 | from .compiler import compile_partitioned_model
 3 | from .model_partitioning import metis_partition, acyclic_partition, partition_2dbin_pack, partition_mpipe, \
 4 |     analyze_n_clusters, \
 5 |     get_weight_functions
 6 | from .model_partitioning.async_pipeline import partition_and_match_weights_until_last_partition_is_with_no_recomputation
 7 | from .model_profiling import Graph, Node, profile_network, GraphProfiler, trace_module, NodeWeightFunction, \
 8 |     EdgeWeightFunction
 9 | from .model_profiling.graph_executor import execute_graph, pre_hook_factory, post_hook_factory
10 | from .model_profiling.infer_req_grad import infer_req_grad
11 | from .utils import move_tensors, ExecTimes, FullExecTimes
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/autopipe/autopipe/compiler/__init__.py:
--------------------------------------------------------------------------------
1 | from .compile_partitioned_model import compile_partitioned_model
2 | 


--------------------------------------------------------------------------------
/autopipe/autopipe/compiler/compile_normal_model_function.py:
--------------------------------------------------------------------------------
1 | """ This file should be responsible for compiling normal model function"""
2 | 
3 | 


--------------------------------------------------------------------------------
/autopipe/autopipe/model_partitioning/__init__.py:
--------------------------------------------------------------------------------
1 | from .metis import metis_partition
2 | from .acyclic import acyclic_partition
3 | from .mixed_pipe import partition_2dbin_pack, analyze_n_clusters, partition_mpipe
4 | from .heuristics import get_weight_functions
5 | from . import utils
6 | 
7 | __all__ = ["acyclic_partition", "metis_partition", "partition_2dbin_pack", "partition_mpipe", "analyze_n_clusters",
8 |            "get_weight_functions"]
9 | 


--------------------------------------------------------------------------------
/autopipe/autopipe/model_partitioning/acyclic/__init__.py:
--------------------------------------------------------------------------------
1 | from .acyclic_partitioning import ALGORITHM, acyclic_partition, Objective, META_ALGORITH, Constraint
2 | from .data_structures import QuotientGraph
3 | 


--------------------------------------------------------------------------------
/autopipe/autopipe/model_partitioning/metis/__init__.py:
--------------------------------------------------------------------------------
1 | from .metis_partitioning import metis_partition
2 | 


--------------------------------------------------------------------------------
/autopipe/autopipe/model_partitioning/mixed_pipe/__init__.py:
--------------------------------------------------------------------------------
1 | from .partition_mixed_pipe_v1_clusters import partition_2dbin_pack, analyze_n_clusters
2 | from .partition_mixed_pipe_v2 import partition_mpipe
3 | 


--------------------------------------------------------------------------------
/autopipe/autopipe/model_partitioning/pipedream/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saareliad/FTPipe/986014df3bb985595f22139ab355a48ca00dc12f/autopipe/autopipe/model_partitioning/pipedream/__init__.py


--------------------------------------------------------------------------------
/autopipe/autopipe/model_profiling/__init__.py:
--------------------------------------------------------------------------------
1 | from .control_flow_graph import Graph, NodeTypes, Node, NodeWeightFunction, EdgeWeightFunction
2 | from .graph_executor import execute_graph, PostHook, PreHook
3 | from .network_profiler import profile_network
4 | from .profiler import GraphProfiler
5 | from .tracer import trace_module, register_new_traced_function, used_namespaces, register_new_explicit_untraced_function
6 | 


--------------------------------------------------------------------------------
/autopipe/autopipe/model_profiling/infer_is_contiguous.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Dict, Any
 2 | 
 3 | import torch
 4 | 
 5 | from .control_flow_graph import Node, Graph
 6 | from .graph_executor import execute_graph, pre_hook_factory, post_hook_factory
 7 | from ..utils import nested_map, detach_tensors
 8 | 
 9 | 
10 | def infer_is_contiguous(graph: Graph, model: torch.nn.Module, args=None, kwargs=None):
11 |     if args is None:
12 |         args = ()
13 |     if kwargs is None:
14 |         kwargs = dict()
15 | 
16 |     with torch.no_grad():
17 |         visitor = Visitor()
18 |         execute_graph(model, graph, model_args=args, model_kwargs=kwargs, pre_hook=pre_hook_factory(visitor.prehook),
19 |                       post_hook=post_hook_factory(visitor.posthook))
20 | 
21 | 
22 | class Visitor():
23 |     def prehook(self, node: Node, function: Callable, args: tuple, kwargs: Dict):
24 |         for n, a in zip(node.args, args):
25 |             # the or statement should not be necessary
26 |             n.is_contiguous = n.is_contiguous or Visitor.is_contiguous(a)
27 | 
28 |         for n, kws in node.kwargs.items():
29 |             v = kwargs[kws[0]]
30 |             # the or statement should not be necessary
31 |             n.is_contiguous = n.is_contiguous or Visitor.is_contiguous(v)
32 | 
33 |         return detach_tensors(args), detach_tensors(kwargs)
34 | 
35 |     def posthook(self, node: Node, function: Callable, args: tuple, kwargs: Dict, outputs: Any):
36 |         node.is_contiguous = Visitor.is_contiguous(outputs)
37 | 
38 |         return detach_tensors(outputs)
39 | 
40 |     @staticmethod
41 |     def is_contiguous(ts):
42 |         def f(t):
43 |             if isinstance(t, torch.Tensor):
44 |                 return t.is_contiguous()
45 |             return False
46 | 
47 |         return nested_map(f, ts)
48 | 


--------------------------------------------------------------------------------
/autopipe/autopipe/model_profiling/infer_req_grad.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Dict, Any
 2 | 
 3 | import torch
 4 | 
 5 | from .control_flow_graph import Node, Graph
 6 | from .graph_executor import execute_graph, pre_hook_factory, post_hook_factory
 7 | from ..utils import nested_map, detach_tensors
 8 | 
 9 | 
10 | def infer_req_grad(graph: Graph, model: torch.nn.Module, args=None, kwargs=None):
11 |     if args is None:
12 |         args = ()
13 |     if kwargs is None:
14 |         kwargs = dict()
15 | 
16 |     with torch.enable_grad():
17 |         visitor = Visitor()
18 |         execute_graph(model, graph, model_args=args, model_kwargs=kwargs, pre_hook=pre_hook_factory(visitor.prehook),
19 |                       post_hook=post_hook_factory(visitor.posthook))
20 | 
21 | 
22 | class Visitor():
23 |     def prehook(self, node: Node, function: Callable, args: tuple, kwargs: Dict):
24 |         for n, a in zip(node.args, args):
25 |             # the or statement should not be necessary
26 |             n.req_grad = n.req_grad or Visitor.req_grad(a)
27 | 
28 |         for n, kws in node.kwargs.items():
29 |             v = kwargs[kws[0]]
30 |             # the or statement should not be necessary
31 |             n.req_grad = n.req_grad or Visitor.req_grad(v)
32 | 
33 |         return detach_tensors(args), detach_tensors(kwargs)
34 | 
35 |     def posthook(self, node: Node, function: Callable, args: tuple, kwargs: Dict, outputs: Any):
36 |         node.req_grad = Visitor.req_grad(outputs)
37 | 
38 |         return detach_tensors(outputs)
39 | 
40 |     @staticmethod
41 |     def req_grad(ts):
42 |         def f(t):
43 |             if isinstance(t, torch.Tensor):
44 |                 return t.requires_grad
45 |             return False
46 | 
47 |         return nested_map(f, ts)
48 | 


--------------------------------------------------------------------------------
/autopipe/download/download_cifar.py:
--------------------------------------------------------------------------------
1 | from torchvision.datasets import CIFAR10, CIFAR100
2 | 
3 | 
4 | if __name__ == "__main__":
5 |     CIFAR100(root="", download=True, train=True)
6 |     CIFAR100(root="", download=True, train=False)
7 | 
8 |     CIFAR10(root="", download=True, train=True)
9 |     CIFAR10(root="", download=True, train=False)


--------------------------------------------------------------------------------
/autopipe/download/make_squad.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | if [ ! -d squad1 ] ; then 
 3 | mkdir squad1
 4 | cd squad1 || exit 1
 5 | wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json
 6 | wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json
 7 | wget https://github.com/allenai/bi-att-flow/blob/master/squad/evaluate-v1.1.py
 8 | cd ..
 9 | 
10 | fi
11 | 
12 | if [ ! -d squad2 ] ; then 
13 | mkdir squad2
14 | cd squad2 || exit 1
15 | wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json
16 | wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json
17 | curl https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/ > evaluate-v2.0.py
18 | cd ..
19 | fi
20 | 


--------------------------------------------------------------------------------
/autopipe/environment.yml:
--------------------------------------------------------------------------------
 1 | name: partitioning
 2 | channels:
 3 |   - pytorch
 4 |   - defaults
 5 |   - conda-forge
 6 | dependencies:
 7 |   - python=3.8
 8 |   - pytorch=1.6.0
 9 |   - torchvision=0.7.0
10 |   - graphviz
11 |   - python-graphviz
12 |   - networkx
13 |   - tqdm
14 |   - scikit-learn
15 |   - pip
16 |   - cython
17 |   - pip:
18 |       - git+https://github.com/networkx/networkx-metis.git
19 |       - transformers>2.9.1
20 |       - datasets
21 | 


--------------------------------------------------------------------------------
/autopipe/partitioning_scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saareliad/FTPipe/986014df3bb985595f22139ab355a48ca00dc12f/autopipe/partitioning_scripts/__init__.py


--------------------------------------------------------------------------------
/autopipe/py_sbatch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ###
 4 | # py_sbatch.sh
 5 | #
 6 | # This script runs python from within our conda env as a slurm batch job.
 7 | # All arguments passed to this script are passed directly to the python
 8 | # interpreter.
 9 | #
10 | 
11 | ###
12 | # Example usage:
13 | #
14 | # Running the prepare-submission command from main.py as a batch job
15 | # ./py_sbatch.sh main.py prepare-submission --id 123456789
16 | #
17 | # Running all notebooks without preparing a submission
18 | # ./py_sbatch.sh main.py run-nb *.ipynb
19 | #
20 | # Running any other python script myscript.py with arguments
21 | # ./py_sbatch.sh myscript.py --arg1 --arg2=val2
22 | #
23 | 
24 | ###
25 | # Parameters for sbatch
26 | #
27 | NODE=rishon3
28 | NUM_CORES=16
29 | NUM_GPUS=8
30 | JOB_NAME="jobname"
31 | MAIL_USER="username@campus.technion.ac.il"
32 | MAIL_TYPE=ALL # Valid values are NONE, BEGIN, END, FAIL, REQUEUE, ALL
33 | 
34 | ###
35 | # Conda parameters
36 | #
37 | CONDA_HOME=$HOME/miniconda3
38 | CONDA_ENV=base
39 | 
40 | sbatch \
41 | 	-w $NODE \
42 | 	-c $NUM_CORES \
43 | 	--gres=gpu:$NUM_GPUS \
44 | 	--job-name $JOB_NAME \
45 | 	--mail-user $MAIL_USER \
46 | 	--mail-type $MAIL_TYPE \
47 | 	-o '%x_%j.out' \
48 | <<EOF
49 | #!/bin/bash
50 | echo "*** SLURM BATCH JOB '$JOB_NAME' STARTING ***"
51 | 
52 | # Setup the conda env
53 | echo "*** Activating environment $CONDA_ENV ***"
54 | source $CONDA_HOME/etc/profile.d/conda.sh
55 | conda activate $CONDA_ENV
56 | 
57 | # Run python with the args to the script
58 | python $@
59 | 
60 | echo "*** SLURM BATCH JOB '$JOB_NAME' DONE ***"
61 | EOF
62 | 
63 | 


--------------------------------------------------------------------------------
/autopipe/tasks/functional_model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from pipe.models.transformers_utils import resize_token_embeddings
 4 | from . import register_task
 5 | from .new_t5 import T5Partitioner, ParsePartitioningT5Opts, TiedT5ForConditionalGeneration, T5Config, T5Tokenizer
 6 | 
 7 | import torch.nn.functional as F
 8 | 
 9 | _MODEL_DIM = 10000
10 | 
11 | class FunctionalModel(torch.nn.Module):
12 |     def __init__(self):
13 |         super(FunctionalModel, self).__init__()
14 |         self.w1 = torch.nn.Parameter(torch.randn(_MODEL_DIM, _MODEL_DIM))
15 |         self.w2 = torch.nn.Parameter(torch.randn(_MODEL_DIM, _MODEL_DIM))
16 |         self.w3 = torch.nn.Parameter(torch.randn(_MODEL_DIM, _MODEL_DIM))
17 |         self.w4 = torch.nn.Parameter(torch.randn(_MODEL_DIM, _MODEL_DIM))
18 |         self.w5 = torch.nn.Parameter(torch.randn(_MODEL_DIM, _MODEL_DIM))
19 | 
20 |     def forward(self, x):
21 |         x = F.relu(F.linear(x, self.w1))
22 |         x = F.relu(F.linear(x, self.w2))
23 |         x = F.relu(F.linear(x, self.w3))
24 |         x = F.relu(F.linear(x, self.w4))
25 |         x = F.relu(F.linear(x, self.w5))
26 |         x = F.dropout(F.linear(x, self.w1))
27 |         return x
28 | 
29 | 
30 | class DumTFunctionalModelPartitioner(T5Partitioner):
31 | 
32 |     def get_model(self, args) -> torch.nn.Module:
33 |         return FunctionalModel()
34 | 
35 | 
36 |     def get_input(self, args, analysis=False):
37 |         if analysis:
38 |             return torch.randn( args.analysis_batch_size ,_MODEL_DIM)
39 | 
40 |         return torch.randn(args.partitioning_batch_size, _MODEL_DIM)
41 | 
42 | 
43 | register_task("functional_model", ParsePartitioningT5Opts, DumTFunctionalModelPartitioner)
44 | 


--------------------------------------------------------------------------------
/autopipe/tasks/partitioning_task.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Dict
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class PartitioningTask(ABC):
 8 | 
 9 |     def __init__(self, args) -> None:
10 |         pass
11 | 
12 |     @property
13 |     @abstractmethod
14 |     def batch_dim(self) -> int:
15 |         pass
16 | 
17 |     @abstractmethod
18 |     def get_model(self, args) -> torch.nn.Module:
19 |         pass
20 | 
21 |     @abstractmethod
22 |     def get_input(self, args, analysis=False):
23 |         pass
24 | 
25 |     # TODO maybe we want to always register operator.is and operator.is_not as untraced
26 |     def register_functions(self):
27 |         """ register explicit_traced/untraced_functions
28 |         
29 |         for example if we wish to trace math.log and not trace operator.is
30 | 
31 |         then it should be done here
32 |         """
33 | 
34 |     def update_analysis_kwargs(self, args, config, analysis_kwargs: Dict) -> Dict:
35 |         """enable modifications of the analysis_kwargs which are passed to run_analysis
36 |         for example set stages_on_same_gpu for gpt2 stateless
37 |         """
38 |         return analysis_kwargs
39 | 
40 |     def post_partitioning(self, args, graph, analysis_result, summary):
41 |         """ hook which is called after the partitioning process is done"""
42 | 


--------------------------------------------------------------------------------
/docs/AcceleratingMixedPipeWithCudaMPS.md:
--------------------------------------------------------------------------------
 1 | ### Accelerating mixed pipe with CUDA MPS
 2 | As $UID, run the following commands
 3 | ```
 4 | ulimit -n 16384
 5 | 
 6 | # export CUDA_VISIBLE_DEVICES=0 # Select GPU 0.
 7 | export CUDA_MPS_PIPE_DIRECTORY=/tmp/nvidia-mps # Select a location that’s
 8 | accessible to the given $UID
 9 | export CUDA_MPS_LOG_DIRECTORY=/tmp/nvidia-log # Select a location that’s
10 | accessible to the given $UID
11 | nvidia-cuda-mps-control -d # Start deamon in background
12 | ```
13 | 
14 | To shutdown:
15 | ```bash
16 | echo quit | nvidia-cuda-mps-control


--------------------------------------------------------------------------------
/docs/MPI.md:
--------------------------------------------------------------------------------
1 | ## Running with MPI
2 | 
3 | - [running with mpi](https://www.open-mpi.org/faq/?category=running) 
4 | - especially see [mpi-env-vars](https://www.open-mpi.org/faq/?category=running#mpi-environmental-variables).
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/MiscOptimizations.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Misc
 3 | 
 4 | ### Communication Matrix Embedding
 5 | - Communication Matrix Embedding with cuda P2P samples (15% BW improvment for pipeline). Can use this [script](/misc/p2p_bw_mat.sh).
 6 | 
 7 | ### Binding to nodes
 8 | Binding to CPUs which are closer to GPUs can imporve performance.
 9 | - use node 0 : `numactl --cpunodebind=0` (requirement: sudo apt install numactl)
10 |   - checking this: either `lstopo` or `lspci -vvv | less`.
11 | 
12 | ### Check your Pytorch build
13 | - To see how pytorch is compiled, use
14 | ```
15 | torch.__config__.show()
16 | ```
17 | 


--------------------------------------------------------------------------------
/docs/PipeDebug.md:
--------------------------------------------------------------------------------
 1 | ## Debugging
 2 | 
 3 | - [debugging mpi python applications with vscode](https://gist.github.com/asroy/ca018117e5dbbf53569b696a8c89204f)
 4 | 
 5 |   - debug work only when dataloading is on main thread. (`num_data_workers=0`).
 6 |   - run same thing, with `--debug` flag, then wait for attachment:
 7 | 
 8 |   > > ```bash
 9 |   > > mpirun -np 2 python main.py --debug <LIST OF RANKS>
10 |   > > ```
11 | 
12 | - If you debug cuda, you may want to fix the trace by:
13 | 
14 |   > > ```bash
15 |   > > CUDA_LAUNCH_BLOCKING=1 mpirun -np 2 python main.py --debug <LIST OF RANKS>
16 |   > > ```
17 | 
18 | - Before you debug, you may want to check run the error is cuda specific and not cpu
19 | 
20 | 


--------------------------------------------------------------------------------
/docs/PipeList.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Available pipes
 3 | 
 4 | Altough our [publication](https://www.usenix.org/system/files/atc21-eliad.pdf) refers mainly to 1-2  pipeline approachs for fine-tuning giant models on commodity hardware (mainly, the Pareto frontiers for the discussed setting), the framework we implemented (quite a while before the publication) supports training all model sizes with, for which, of course, different sweetspots apply.
 5 | 
 6 | We implemented many pipeline optimization algorithms to study the tradeoffs of DNN training with asynchronous pipeline-parallelism.
 7 | 
 8 | The following pipeline configurations are available:
 9 | 
10 | <!-- ### Stale,  pipelines -->
11 | 
12 | - `stale`: no staleness mitigation.
13 | 
14 | 
15 | - weight prediction (`wp`) : {`msnag`, `aggmsnag`}
16 |    - supported for the {`sgd`,`adam`,`adamw`}` optimizers
17 |    - `msnag` is momentum based weight prediction
18 |    - `aggmsnag` is adopting momentum based wieght prediction to gradient accumulation
19 | 
20 | - recomputation
21 |    - See Table 1 on [FTPipe paper](https://www.usenix.org/system/files/atc21-eliad.pdf) for the effect on stale pipelines
22 | - no recomputation  (`nr` or `norecomp`)
23 | 
24 | - weight stashing (`ws`)
25 | 
26 | - [Gap Aware](https://arxiv.org/pdf/1909.10802.pdf) staleness mitigation (`ga`)
27 |   - for {`sgd`, `adam`, `adamw`} optimizers 
28 | - scheduler aware prediction: making the weight prediction aware of the scheduler.
29 | - gradient aggregation in pipeline (`step_every`)
30 | 
31 | - combinations of mostly all of the above: {`wp`, `ws`, `ga`}
32 | 
33 | Note: Weight predicion is often called `msnag` in code.
34 | 
35 | 
36 | ### Fully-synchronous
37 | 
38 | - `gpipe`
39 | - DistributedDataParallel (DDP): SSGD
40 | - Sequential (`seq`): naive inter-layer model parallelisem (multi gpu)
41 | - and of course, a single gpu for small models.
42 | 
43 | 
44 | Note: Tied weights are handled (decorated) per use-case.


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saareliad/FTPipe/986014df3bb985595f22139ab355a48ca00dc12f/models/__init__.py


--------------------------------------------------------------------------------
/models/new_t5_example/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saareliad/FTPipe/986014df3bb985595f22139ab355a48ca00dc12f/models/new_t5_example/__init__.py


--------------------------------------------------------------------------------
/models/new_t5_example/convert_none.py:
--------------------------------------------------------------------------------
1 | from autopipe.autopipe.utils import convert_none_checks
2 | 
3 | # run from current dir
4 | if __name__ == '__main__':
5 |     # convert_none_checks(input_file="pipe/misc/new_t5/modeling_t5.py", output_file="pipe/misc/new_t5/modeling_t5.py")
6 |     convert_none_checks(input_file="modeling_t5.py", output_file="modeling_t5.py")


--------------------------------------------------------------------------------
/models/new_t5_example/eval_new_t5.py:
--------------------------------------------------------------------------------
 1 | # TODO: make it generic...
 2 | from pipe.models.load_pipeline_weights_to_hf import HFLoader
 3 | from transformers import AutoModel, AutoConfig, AutoTokenizer, T5ForConditionalGeneration
 4 | 
 5 | 
 6 | class NewT5HFLoader(HFLoader):
 7 |     def __init__(self, hf_transformers_model_class=T5ForConditionalGeneration):
 8 |         super().__init__(
 9 |             hf_transformers_model_class=hf_transformers_model_class)
10 | 
11 |     def substitue_state_dict_keys_back_to_original(self, training_state_dict):
12 |         # TODO: training_state_dict is origianl state dict used at our training.
13 |         d = dict()
14 | 
15 |         for k, v in training_state_dict.items():
16 |             # we modified keys from prefix.block.N.layer.M.suffix into prefix.N.M.suffix
17 |             # this regex substitution performs the reverse transformation
18 |             # new_key = re.sub(r'([0-9]+.)([0-9]+.)', r'block.\1layer.\2', k)
19 |             d[k] = v
20 | 
21 |         # in case we load weights from the tied model
22 |         if "shared_embed_weight" in d:
23 |             w = d.pop("shared_embed_weight")
24 |             d['shared.weight'] = d['encoder.embed_tokens.weight'] = d[
25 |                 'decoder.embed_tokens.weight'] = w
26 |         return d
27 | 


--------------------------------------------------------------------------------
/models/normal/NLP_models/__init__.py:
--------------------------------------------------------------------------------
 1 | # from .modeling_bert import (BertPreTrainedModel, BertModel, BertForPreTraining,
 2 | #                             BertForMaskedLM, BertForNextSentencePrediction,
 3 | #                             BertForSequenceClassification,
 4 | #                             BertForMultipleChoice, BertForTokenClassification,
 5 | #                             BertForQuestionAnswering, load_tf_weights_in_bert,
 6 | #                             )
 7 | # from .modeling_ctrl import CTRLModel, CTRLLMHeadModel
 8 | # from .modeling_ctrl_tied_weights import CTRLModel as StatelessCTRLModel, CTRLLMHeadModel as StatelessCTRLLMHeadModel
 9 | # from .modeling_gpt2 import (GPT2PreTrainedModel, GPT2Model, GPT2LMHeadModel,
10 | #                             GPT2DoubleHeadsModel)
11 | # from .modeling_gpt2_tied_weights import (GPT2Model as StatelessGPT2Model,
12 | #                                          GPT2LMHeadModel as StatelessGPT2LMHeadModel,
13 | #                                          GPT2DoubleHeadsModel as StatelessGPT2DoubleHeadsModel)
14 | 


--------------------------------------------------------------------------------
/models/normal/__init__.py:
--------------------------------------------------------------------------------
1 | # from .vision_models import *
2 | # from .NLP_models import GPT2LMHeadModel, GPT2Model,CTRLLMHeadModel,CTRLModel
3 | # from .NLP_models import StatelessGPT2LMHeadModel, StatelessGPT2Model,StatelessCTRLLMHeadModel,StatelessCTRLModel
4 | # from .NLP_models import (BertModel, BertForPreTraining,
5 | #                          BertForMaskedLM, BertForNextSentencePrediction,
6 | #                          BertForSequenceClassification,
7 | #                          BertForMultipleChoice, BertForTokenClassification,
8 | #                          BertForQuestionAnswering)
9 | 


--------------------------------------------------------------------------------
/models/normal/dummy.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import Tensor
 3 | import torch.nn as nn
 4 | 
 5 | 
 6 | class Dummy(nn.Module):
 7 |     def __init__(self):
 8 |         super(Dummy, self).__init__()
 9 |         self.l0 = nn.Linear(100, 100)
10 |         self.l1 = nn.Linear(100, 100)
11 |         self.l2 = nn.Linear(100, 100)
12 |         self.l3 = nn.Linear(100, 100)
13 | 
14 |     def forward(self, x):
15 |         output2 = self.l0(x)
16 |         t0 = self.l1(x)
17 |         t1 = self.l2(t0)
18 |         output0, output1 = self.l3(t1)
19 | 
20 |         return output1, output0, output2
21 | 
22 | 
23 | class Stage0(nn.Module):
24 |     def __init__(self, layers, tensors):
25 |         super(Stage0, self).__init__()
26 |         assert 'Dummy/Linear[l0]' in layers
27 |         self.l = layers['Dummy/Linear[l0]']
28 |         assert isinstance(self.l, nn.Linear)
29 | 
30 |     def forward(self, x):
31 |         return (self.l(x),)
32 | 
33 | 
34 | class Stage1(nn.Module):
35 |     def __init__(self, layers, tensors):
36 |         super(Stage1, self).__init__()
37 |         assert 'Dummy/Linear[l1]' in layers
38 |         self.l = layers['Dummy/Linear[l1]']
39 |         assert isinstance(self.l, nn.Linear)
40 | 
41 |     def forward(self, x):
42 |         return (self.l(x),)
43 | 
44 | 
45 | class Stage2(nn.Module):
46 |     def __init__(self, layers, tensors):
47 |         super(Stage2, self).__init__()
48 |         assert 'Dummy/Linear[l2]' in layers
49 |         self.l = layers['Dummy/Linear[l2]']
50 |         assert isinstance(self.l, nn.Linear)
51 | 
52 |     def forward(self, x):
53 |         return (self.l(x),)
54 | 
55 | 
56 | class Stage3(nn.Module):
57 |     def __init__(self, layers, tensors):
58 |         super(Stage3, self).__init__()
59 |         assert 'Dummy/Linear[l3]' in layers
60 |         self.l = layers['Dummy/Linear[l3]']
61 |         assert isinstance(self.l, nn.Linear)
62 | 
63 |     def forward(self, x):
64 |         x = self.l(x)
65 |         return (x, x + 1)
66 | 


--------------------------------------------------------------------------------
/models/normal/vision_models/LeNet.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | __all__ = ["LeNet"]
 4 | 
 5 | 
 6 | class LeNet(nn.Module):
 7 |     def __init__(self, num_classes=1000):
 8 |         super(LeNet, self).__init__()
 9 |         self.conv1 = nn.Conv2d(3, 6, kernel_size=5)
10 |         self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
11 |         self.fc1 = nn.Linear(16*5*5, 120)
12 |         self.fc2 = nn.Linear(120, 84)
13 |         self.fc3 = nn.Linear(84, num_classes)
14 |         self.relu1 = nn.ReLU()
15 |         self.relu2 = nn.ReLU()
16 |         self.relu3 = nn.ReLU()
17 |         self.relu4 = nn.ReLU()
18 |         self.max_pool2d1 = nn.MaxPool2d(2)
19 |         self.max_pool2d2 = nn.MaxPool2d(2)
20 | 
21 |     def forward(self, x):
22 |         x = self.relu1(self.conv1(x))
23 |         x = self.max_pool2d1(x)
24 |         x = self.relu2(self.conv2(x))
25 |         x = self.max_pool2d2(x)
26 |         x = x.view(x.size(0), -1)
27 |         x = self.relu3(self.fc1(x))
28 |         x = self.relu4(self.fc2(x))
29 |         x = self.fc3(x)
30 |         return x
31 | 


--------------------------------------------------------------------------------
/models/normal/vision_models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .AlexNet import alexnet
 2 | from .VGG import vgg11, vgg11_bn, vgg13, vgg13_bn, vgg16, vgg16_bn, vgg19, vgg19_bn
 3 | from .ResNet import resnet101, resnet152, resnet18, resnet34, resnet50, resnext101_32x8d, resnext50_32x4d
 4 | from .LeNet import LeNet
 5 | from .DenseNet import densenet121, densenet161, densenet169, densenet201
 6 | from .GoogleNet import GoogLeNet
 7 | from .WideResNet import WideResNet
 8 | from .Inception import inception_v3
 9 | from .SqueezeNet import squeezenet1_0, squeezenet1_1
10 | from .amoebaNet import amoebanetd
11 | from .UNet import UNet
12 | 


--------------------------------------------------------------------------------
/models/partitioned/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saareliad/FTPipe/986014df3bb985595f22139ab355a48ca00dc12f/models/partitioned/__init__.py


--------------------------------------------------------------------------------
/pipe/README.md:
--------------------------------------------------------------------------------
 1 | # FTPipe
 2 | 
 3 | Pipeline Runtime
 4 | 
 5 | ```bash
 6 | python -m pipe.main ... # train models (+eval) (+preprocess)
 7 | ```
 8 | 
 9 | Do use the `--help` and examples to explore.
10 | 
11 | ## Get the data
12 | ```bash
13 | python pipe/data/download/datasets/download_datasets.py
14 | ```
15 | Data for T5 tasks is obtained by using `--mode perprocess`
16 | cmd option.
17 | ## Run
18 | 
19 | ### Choose a config
20 | See [configs](configs/) for config examples.
21 | 
22 | To choose a spesific config, add it to command line:
23 | 
24 | ```bash
25 | mpirun -np 2 python -m pipe.main --config $PATH_TO_CONFIG
26 | ```
27 | without doing so, it will run the [dummy config](configs/dummy.json) (created for dev usage).
28 | 
29 | ### Preprocess
30 | if data preprocessing is needed, run the selected config with:
31 | ```bash
32 | python -m pipe.main --mode preproc --config $PATH_TO_CONFIG ...
33 | ```
34 | 
35 | ### MPI
36 | 
37 | cuda aware openmpi:
38 | 
39 | ```bash
40 | mpirun -np 2 python -m pipe.main --config $PATH_TO_CONFIG
41 | ```
42 | 
43 | ### Multiprocessing
44 | A PoC runtime which can be used with the `--mode mp` cmd option.
45 | 
46 | Supposed to work for very simple stright pipelines only (mostly- torchvision models, vit), and is BUGGY when configuration gets more exotic. (e.g, Tied Wieghts)
47 | ```bash
48 | python -m pipe.main --nprocs 2 --mode mp --config $PATH_TO_CONFIG
49 | ```
50 | 


--------------------------------------------------------------------------------
/pipe/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saareliad/FTPipe/986014df3bb985595f22139ab355a48ca00dc12f/pipe/__init__.py


--------------------------------------------------------------------------------
/pipe/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saareliad/FTPipe/986014df3bb985595f22139ab355a48ca00dc12f/pipe/configs/__init__.py


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_base_uncased_2p/hetprofiling/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false
5 | }
6 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_base_uncased_2p/pipedream/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false
5 | }
6 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wmm/aggmsnag.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_prediction": {
 5 |         "type": "aggmsnag",
 6 |         "args": {
 7 |             "pred_mem": "clone",
 8 |             "nag_with_predictor": false,
 9 |             "sched_aware": true
10 |         }
11 |     },
12 |     "weight_stashing": false
13 | }
14 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wmm/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "train_batches_limit": -1,
 7 |     "bs_train": 3,
 8 |     "bs_test": 3
 9 | }
10 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wmm/msnag.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_prediction": {
 5 |         "type": "msnag",
 6 |         "args": {
 7 |             "pred_mem": "clone",
 8 |             "nag_with_predictor": false,
 9 |             "sched_aware": true
10 |         }
11 |     },
12 |     "weight_stashing": false
13 | }
14 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wmm/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false
5 | }
6 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wmm/ws_msnag_ga.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_prediction": {
 5 |         "type": "msnag",
 6 |         "args": {
 7 |             "pred_mem": "clone",
 8 |             "nag_with_predictor": false,
 9 |             "sched_aware": true
10 |         }
11 |     },
12 |     "gap_aware": {
13 |         "type": "adam",
14 |         "policy": "all_except_last",
15 |         "args": {
16 |         }
17 |     },
18 |     "weight_stashing": true
19 | }
20 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wwm_2m/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false
5 | }
6 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wwm_2p/aggmsnag.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "ftpipe.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_prediction": {
 5 |         "type": "aggmsnag",
 6 |         "args": {
 7 |             "pred_mem": "clone",
 8 |             "nag_with_predictor": false,
 9 |             "sched_aware": true
10 |         }
11 |     },
12 |     "weight_stashing": false
13 | }
14 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wwm_2p/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 12,
 6 |     "train_batches_limit": -1,
 7 |     "bs_train": 2,
 8 |     "bs_test": 1
 9 | }
10 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wwm_2p/pipedream.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": true
5 | }
6 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wwm_2p/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "ftpipe.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false
5 | }


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wwm_4p/aggmsnag.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "ftpipe.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_prediction": {
 5 |         "type": "aggmsnag",
 6 |         "args": {
 7 |             "pred_mem": "clone",
 8 |             "nag_with_predictor": false,
 9 |             "sched_aware": true
10 |         }
11 |     },
12 |     "weight_stashing": false
13 | }
14 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wwm_4p/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 12,
 6 |     "train_batches_limit": -1,
 7 |     "bs_train": 2,
 8 |     "bs_test": 1
 9 | }
10 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wwm_4p/pipedream.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": true
5 | }
6 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wwm_4p/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "ftpipe.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false
5 | }


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wwm_8p/aggmsnag.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "ftpipe.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_prediction": {
 5 |         "type": "aggmsnag",
 6 |         "args": {
 7 |             "pred_mem": "clone",
 8 |             "nag_with_predictor": false,
 9 |             "sched_aware": true
10 |         }
11 |     },
12 |     "weight_stashing": false
13 | }
14 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wwm_8p/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 12,
 6 |     "train_batches_limit": -1,
 7 |     "bs_train": 2,
 8 |     "bs_test": 1
 9 | }
10 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wwm_8p/pipedream.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": true
5 | }
6 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad/bert_large_uncased_wwm_8p/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "ftpipe.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false
5 | }


--------------------------------------------------------------------------------
/pipe/configs/bert/squad2/bert_large_uncased_wmm/aggmsnag.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_prediction": {
 5 |         "type": "aggmsnag",
 6 |         "args": {
 7 |             "pred_mem": "clone",
 8 |             "nag_with_predictor": false,
 9 |             "sched_aware": true
10 |         }
11 |     },
12 |     "weight_stashing": false
13 | }
14 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad2/bert_large_uncased_wmm/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "train_batches_limit": -1,
 7 |     "bs_train": 3,
 8 |     "bs_test": 3
 9 | }
10 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad2/bert_large_uncased_wmm/msnag.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_prediction": {
 5 |         "type": "msnag",
 6 |         "args": {
 7 |             "pred_mem": "clone",
 8 |             "nag_with_predictor": false,
 9 |             "sched_aware": true
10 |         }
11 |     },
12 |     "weight_stashing": false
13 | }
14 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad2/bert_large_uncased_wmm/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false
5 | }
6 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad2/bert_large_uncased_wmm/ws.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": true
5 | }
6 | 


--------------------------------------------------------------------------------
/pipe/configs/bert/squad2/bert_large_uncased_wmm/ws_msnag_ga.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_prediction": {
 5 |         "type": "msnag",
 6 |         "args": {
 7 |             "pred_mem": "clone",
 8 |             "nag_with_predictor": false,
 9 |             "sched_aware": true
10 |         }
11 |     },
12 |     "gap_aware": {
13 |         "type": "adam",
14 |         "policy": "all_except_last",
15 |         "args": {
16 |         }
17 |     },
18 |     "weight_stashing": true
19 | }
20 | 


--------------------------------------------------------------------------------
/pipe/configs/cep/common.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "logdir": "logs/cep/",
 3 |     "data_dir": "/home_local/saareliad/data",
 4 |     "out_dir": "results/cep",
 5 |     "auto_file_name": true,
 6 |     "out_filename": "cep",
 7 |     "distributed_backend": "mpi",
 8 |     "model": "cep_netN50_C20000_4p_bw12_metis",
 9 |     "stage_to_device_map": [1, 0, 1, 3, 2],
10 |     "nprocs": 5,
11 |     "shuffle_train": false,
12 |     "cep_dataset_kwargs": {
13 |         "n": 50,
14 |         "k": 11,
15 |         "max_samples_num": 5120000
16 |     },
17 |     "epochs": 240,
18 |     "steps": -1,
19 |     "dataset": "cep",
20 |     "trainer": {
21 |         "type": "cep",
22 |         "args": {
23 |         }
24 |     },
25 |     "statistics": "cv",
26 |     "step_every": 16,
27 |     "bs_train": 32,
28 |     "bs_test": 32,
29 |     "num_data_workers": 5,
30 |     "optimizer": {
31 |         "type": "adamw",
32 |         "args": {
33 |             "lr": 0.001,
34 |             "weight_decay": 1e-2
35 |         }
36 |     },
37 |     "lr_scheduler": {
38 |         "type": "get_constant_schedule_with_warmup",
39 |         "preproc_args": {
40 |         },
41 |         "args": {
42 |             "num_warmup_steps": 0,
43 |             "last_epoch": -1
44 |         }
45 |     },
46 |     "seed_from_cmd": false,
47 |     "seed": 42,
48 |     "bs_train_from_cmd": false,
49 |     "bs_test_from_cmd": false,
50 |     "num_chunks": 1,
51 |     "verbose_comm": false,
52 |     "flush_rate": -1,
53 |     "work_scheduler": "gpipe",
54 |     "cudnn_benchmark": true,
55 |     "max_buffers": 1,
56 |     "keep_buffers_alive": false,
57 |     "train_batches_limit": -1,
58 |     "log_frequency": 200,
59 |     "dont_drop_last": true,
60 |     "test_batches_limit": 0,
61 |     "save_checkpoints":  true,
62 |     "checkpoints_save_name_prefix": "cep",
63 |     "checkpoints_save_dir": "/nfs_Disk2/cep/smaller/",
64 |     "load_model_one_by_one": false,
65 |     "save_checkpoint_every_x_steps": 300000
66 | }
67 | 


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar10/common.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "logdir": "logs/cv/",
 3 |     "data_dir": "/home_local/saareliad/data",
 4 |     "out_dir":"results/cv/cifar10/2p/",
 5 |     "auto_file_name": true,
 6 |     "out_filename": "cv",
 7 |     "statistics": "cv",
 8 |     "distributed_backend": "mpi",
 9 |     "model": "wrn_16x4_p2",
10 |     "dataset": "cifar10",
11 |     "trainer": {
12 |         "type": "cv",
13 |         "args": {
14 |         }
15 |     },
16 | 
17 |     "optimizer": {
18 |         "type": "sgd1",
19 |         "args": {
20 |             "lr": 0.1,
21 |             "weight_decay": 0.0005,
22 |             "momentum": 0.9,
23 |             "nesterov": false
24 |         }
25 |     },
26 |     "bs_train": 128,
27 |     "bs_test": 200,
28 |     "num_data_workers": 6,
29 |     "lr_scheduler": {
30 |         "type": "get_multi_step_lr_schedule_with_warmup",
31 |         "args": {
32 |             "num_warmup_steps": 5,
33 |             "milestones": [60, 120, 160],
34 |             "gamma": 0.2,
35 |             "last_epoch": -1
36 |         }
37 |     },
38 |     "epochs": 205,
39 |     "steps": -1,
40 |     "seed_from_cmd": true,
41 |     "num_chunks": 1,
42 |     "verbose_comm": false,
43 |     "flush_rate": -1,
44 |     "work_scheduler": "1F1B",
45 |     "cudnn_benchmark": true,
46 |     "max_buffers": 1,
47 |     "step_every": 1,
48 |     "keep_buffers_alive": true,
49 |     "train_batches_limit":-1,
50 |     "log_frequency": 100
51 | }
52 | 


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar10/wrn_16x4_c10_p2/EXAMPLE.md:
--------------------------------------------------------------------------------
 1 | # Simplest example
 2 | 
 3 | ## Environment
 4 | (Experimental, unchecked) env without MPI.
 5 | ```bash
 6 | conda env create -f env_utils/env_without_mpi.yml
 7 | conda activate nompi
 8 | ```
 9 | 
10 | (if you see import errors just install the missing packages.)
11 | 
12 | ## Data
13 | use
14 | ```bash
15 | python download/datasets/download_datasets.py
16 | ```
17 | to get several datasets.
18 | 
19 | or execute just the relevant part in python
20 | ```python
21 | from torchvision.datasets import CIFAR10
22 | DATA_DIR='/home_local/saareliad/data' # replace something of yours ;
23 | CIFAR10(root=DATA_DIR, download=True, train=True) 
24 | CIFAR10(root=DATA_DIR, download=True, train=False)
25 | ```
26 | 
27 | ## Run
28 | (single machine optimized streams, no MPI build needed)
29 | ```
30 | python main.py –mode mp –config configs/cv/cifar10/wrn_16x4_c10_p2/stale_nr.json –seed 42
31 | ```
32 | 
33 | ### Optional: single GPU
34 | 
35 | Can change to make it run pipeline on single GPU by changing 
36 | relevant lines in [configs/cv/cifar10/wrn_16x4_c10_p2/stale_nr.json](configs/cv/cifar10/wrn_16x4_c10_p2/stale_nr.json)\
37 | to
38 | ```json
39 |     "stage_to_device_map": [0, 0],
40 | ```
41 | 
42 | ## Model
43 | Simplest PoC partitioning.\
44 | Auto-generated: [models/partitioned/wrn_16x4_c10_p2.py](models/partitioned/wrn_16x4_c10_p2.py)
45 | Code handling reading the config is mostly here:
46 | [models/simple_partitioning_config.py](models/simple_partitioning_config.py)


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar10/wrn_16x4_c10_p2/stale_nr.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "logdir": "logs/cv/",
 3 |     "data_dir": "/home_local/saareliad/data",
 4 |     "out_dir":"results/cv/cifar10/2p/",
 5 |     "auto_file_name": true,
 6 |     "out_filename": "stale_nr",
 7 |     "statistics": "cv",
 8 |     "distributed_backend": "mpi",
 9 |     "model": "wrn_16x4_c10_p2",
10 |     "stage_to_device_map": [0, 1],
11 |     "nprocs": 2,
12 |     "dataset": "cifar10",
13 |     "trainer": {
14 |         "type": "cv",
15 |         "args": {
16 |         }
17 |     },
18 | 
19 |     "optimizer": {
20 |         "type": "sgd1",
21 |         "args": {
22 |             "lr": 0.1,
23 |             "weight_decay": 0.0005,
24 |             "momentum": 0.9,
25 |             "nesterov": true
26 |         }
27 |     },
28 |     "bs_train": 128,
29 |     "bs_test": 200,
30 |     "num_data_workers": 6,
31 |     "lr_scheduler": {
32 |         "type": "get_multi_step_lr_schedule_with_warmup",
33 |         "args": {
34 |             "num_warmup_steps": 5,
35 |             "milestones": [60, 120, 160],
36 |             "gamma": 0.2,
37 |             "last_epoch": -1
38 |         }
39 |     },
40 |     "epochs": 205,
41 |     "steps": -1,
42 |     "seed_from_cmd": true,
43 |     "num_chunks": 1,
44 |     "verbose_comm": false,
45 |     "flush_rate": -1,
46 |     "work_scheduler": "1F1B",
47 |     "cudnn_benchmark": true,
48 |     "max_buffers": 1,
49 |     "step_every": 1,
50 |     "keep_buffers_alive": true,
51 |     "train_batches_limit":-1,
52 |     "log_frequency": 100,
53 |     "weight_stashing": false,
54 |     "no_recomputation": true
55 | }
56 | 


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/README.md:
--------------------------------------------------------------------------------
 1 | # All configs here:
 2 |     "configs/cv/cifar100/wrn28x10/"
 3 | 
 4 | ## Recomputation
 5 | 
 6 | configs/cv/cifar100/wrn28x10/msnag.json
 7 | configs/cv/cifar100/wrn28x10/stale.json
 8 | configs/cv/cifar100/wrn28x10/ws_msnag_ga_jfl.json
 9 | 
10 | configs/cv/cifar100/wrn28x10/msnag_ws.json
11 | configs/cv/cifar100/wrn28x10/ws_ga.json
12 | configs/cv/cifar100/wrn28x10/ws_msnag_ga.json
13 | configs/cv/cifar100/wrn28x10/ws.json
14 | 
15 | 
16 | ## No Recomputation
17 | 
18 | configs/cv/cifar100/wrn28x10/no_recomputation/msnag_nr.json
19 | configs/cv/cifar100/wrn28x10/no_recomputation/stale_nr.json
20 | 
21 | configs/cv/cifar100/wrn28x10/no_recomputation/msnag_ws.json
22 | configs/cv/cifar100/wrn28x10/no_recomputation/ws_ga.json
23 | configs/cv/cifar100/wrn28x10/no_recomputation/ws_msnag_ga.json
24 | configs/cv/cifar100/wrn28x10/no_recomputation/ws.json
25 | 
26 | # Estimed Time
27 | 
28 | total_configs_we_want = (3 + 4 + 2) = 9
29 | 
30 | seeds = 5
31 | 
32 | estimed_time_per_config = 3 hours
33 | 
34 | <u>total time:
35 | </u>
36 | 45*3 = 135 hours = 5.625 days
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/common.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "logdir": "logs/cv/",
 3 |     "data_dir": "/home_local/saareliad/data",
 4 |     "out_dir":"results/cv/",
 5 |     "auto_file_name": true,
 6 |     "out_filename": "cv",
 7 |     "statistics": "cv",
 8 |     "distributed_backend": "mpi",
 9 |     "model": "wrn_28x10_c100_dr03_gnc32_4p_bw12_pipedream",
10 |     "dataset": "cifar100",
11 |     "trainer": {
12 |         "type": "cv",
13 |         "args": {
14 |         }
15 |     },
16 | 
17 |     "optimizer": {
18 |         "type": "sgd1",
19 |         "args": {
20 |             "lr": 0.1,
21 |             "weight_decay": 0.0005,
22 |             "momentum": 0.9,
23 |             "nesterov": false
24 |         }
25 |     },
26 |     "bs_train": 128,
27 |     "bs_test": 200,
28 |     "num_data_workers": 10,
29 |     "lr_scheduler": {
30 |         "type": "get_multi_step_lr_schedule_with_warmup",
31 |         "args": {
32 |             "num_warmup_steps": 5,
33 |             "milestones": [60, 120, 160],
34 |             "gamma": 0.2,
35 |             "last_epoch": -1
36 |         }
37 |     },
38 |     "epochs": 205,
39 |     "steps": -1,
40 |     "seed_from_cmd": true,
41 |     "num_chunks": 1,
42 |     "verbose_comm": false,
43 |     "flush_rate": -1,
44 |     "work_scheduler": "1F1B",
45 |     "cudnn_benchmark": true,
46 |     "max_buffers": 1,
47 |     "step_every": 1,
48 |     "keep_buffers_alive": true,
49 |     "train_batches_limit":-1,
50 |     "log_frequency": 100
51 | }
52 | 


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/msnag.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": [
3 |         "common.json",
4 |         "msnag_optimizer.json"
5 |     ],
6 |     "base_config_path_is_relative": true,
7 |     "weight_stashing": false
8 | }


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/msnag_optimizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "optimizer": {
 3 |         "type": "sgd1",
 4 |         "args": {
 5 |             "lr": 0.1,
 6 |             "weight_decay": 0.0005,
 7 |             "momentum": 0.9,
 8 |             "nesterov": false
 9 |         }
10 |     },
11 |     "nesterov_set_for_last_partition": true,
12 |     "weight_prediction": {
13 |         "type": "aggmsnag",
14 |         "args": {
15 |             "pred_mem": "clone",
16 |             "nag_with_predictor": true
17 |         }
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/msnag_ws.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": [
3 |         "common.json",
4 |         "msnag_optimizer.json"
5 |     ],
6 |     "base_config_path_is_relative": true,
7 |     "weight_stashing": true
8 | }


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/no_recomputation/msnag_nr.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": [
3 |         "../common.json",
4 |         "../msnag_optimizer.json",
5 |         "norecomp.json"
6 |     ],
7 |     "base_config_path_is_relative": true,
8 |     "weight_stashing": false
9 | }


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/no_recomputation/msnag_ws.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": [
3 |         "../common.json",
4 |         "../msnag_optimizer.json",
5 |         "norecomp.json"
6 |     ],
7 |     "base_config_path_is_relative": true,
8 |     "weight_stashing": true
9 | }


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/no_recomputation/norecomp.json:
--------------------------------------------------------------------------------
1 | {
2 |     "out_filename": "cv_norecomp",
3 |     "no_recomputation": true
4 | }


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/no_recomputation/stale_nr.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": [
3 |         "../common.json",
4 |         "../stale_optimizer.json",
5 |         "norecomp.json"
6 |     ],
7 |     "base_config_path_is_relative": true,
8 |     "weight_stashing": false
9 | }


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/no_recomputation/ws.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": [
3 |         "../common.json",
4 |         "../stale_optimizer.json",
5 |         "norecomp.json"
6 |     ],
7 |     "base_config_path_is_relative": true,
8 |     "weight_stashing": true
9 | }


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/no_recomputation/ws_ga.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": [
 3 |         "../common.json",
 4 |         "../stale_optimizer.json",
 5 |         "norecomp.json"
 6 |     ],
 7 |     "base_config_path_is_relative": true,
 8 |     "gap_aware": {
 9 |         "type": "sgd1",
10 |         "policy": "all_except_last",
11 |         "args": {
12 |             "big_gamma": 0.999,
13 |             "epsilon": 1e-8
14 |         }
15 |     },
16 |     "weight_stashing": true
17 | }


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/no_recomputation/ws_msnag_ga.json:
--------------------------------------------------------------------------------
 1 | 
 2 | {
 3 |     "base_config_path": [
 4 |         "../common.json",
 5 |         "../msnag_optimizer.json",
 6 |         "norecomp.json"
 7 |     ],
 8 |     "base_config_path_is_relative": true,
 9 |     "gap_aware": {
10 |         "type": "sgd1",
11 |         "policy": "all_except_last",
12 |         "args": {
13 |             "big_gamma": 0.999,
14 |             "epsilon": 1e-8
15 |         }
16 |     },
17 |     "weight_stashing": true
18 | }


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/stale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": [
 3 |         "common.json",
 4 |         "stale_optimizer.json"
 5 |     ],
 6 |     "base_config_path_is_relative": true,
 7 |     "weight_stashing": false,
 8 |     "train_batches_limit":-1
 9 | 
10 | }


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/stale_optimizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "optimizer": {
 3 |         "type": "sgd1",
 4 |         "args": {
 5 |             "lr": 0.1,
 6 |             "weight_decay": 0.0005,
 7 |             "momentum": 0.9,
 8 |             "nesterov": true
 9 |         }
10 |     }
11 | }


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/ws.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": [
3 |         "common.json",
4 |         "stale_optimizer.json"
5 |     ],
6 |     "base_config_path_is_relative": true,
7 |     "weight_stashing": true
8 | }


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/ws_ga.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": [
 3 |         "common.json",
 4 |         "stale_optimizer.json"
 5 |     ],
 6 |     "base_config_path_is_relative": true,
 7 |     "gap_aware": {
 8 |         "type": "sgd1",
 9 |         "policy": "all_except_last",
10 |         "args": {
11 |             "big_gamma": 0.999,
12 |             "epsilon": 1e-8
13 |         }
14 |     },
15 |     "weight_stashing": true
16 | }


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/ws_msnag_ga.json:
--------------------------------------------------------------------------------
 1 | 
 2 | {
 3 |     "base_config_path": [
 4 |         "common.json",
 5 |         "msnag_optimizer.json"
 6 |     ],
 7 |     "base_config_path_is_relative": true,
 8 |     "gap_aware": {
 9 |         "type": "sgd1",
10 |         "policy": "all_except_last",
11 |         "args": {
12 |             "big_gamma": 0.999,
13 |             "epsilon": 1e-8
14 |         }
15 |     },
16 |     "weight_stashing": true
17 | }


--------------------------------------------------------------------------------
/pipe/configs/cv/cifar100/wrn28x10/ws_msnag_ga_jfl.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": [
 3 |         "common.json",
 4 |         "msnag_optimizer.json"
 5 |     ],
 6 |     "base_config_path_is_relative": true,
 7 |     "gap_aware": {
 8 |         "type": "sgd1",
 9 |         "policy": "all_except_last",
10 |         "args": {
11 |             "big_gamma": 0.999,
12 |             "epsilon": 1e-8
13 |         }
14 |     },
15 |     "weight_stashing": true,
16 |     "gap_aware_just_loss": true,
17 |     "no_recomputation": false
18 | }


--------------------------------------------------------------------------------
/pipe/configs/cv/imagenet/weight_stashing_msnag_gap_aware.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "logdir": "logs/",
 3 |     "out_dir": "results/imagenet",
 4 |     "data_dir": "/home_local/saareliad/data/imagenet/",
 5 |     "auto_file_name": true,
 6 |     "out_filename": "e1",
 7 |     "distributed_backend": "mpi",
 8 |     "statistics": "cv",
 9 |     "model": "resnet50_imagenet_p8",
10 |     "dataset": "imagenet",
11 |     "trainer": {
12 |         "type": "cv",
13 |         "args": {
14 |         }
15 |     },
16 |     "bs_train": 128,
17 |     "bs_test": 128,
18 |     "num_data_workers": 10,
19 |     "optimizer": {
20 |         "type": "sgd1",
21 |         "args": {
22 |             "lr": 0.1,
23 |             "weight_decay": 0.0001,
24 |             "momentum": 0.9,
25 |             "nesterov": false
26 |         }
27 |     },
28 |     "lr_scheduler": {
29 |         "type": "get_multi_step_lr_schedule_with_warmup",
30 |         "args": {
31 |             "num_warmup_steps": 5,
32 |             "milestones": [30, 60, 90],
33 |             "gamma": 0.1,
34 |             "last_epoch": -1
35 |         }
36 |     },
37 |     "weight_prediction": {
38 |         "type": "msnag",
39 |         "args": {
40 |             "pred_mem": "clone",
41 |             "nag_with_predictor": true
42 |         }
43 |     },
44 |     "gap_aware": {
45 |         "type": "sgd1",
46 |         "policy": "all_except_last",
47 |         "args": {
48 |             "big_gamma": 0.999,
49 |             "epsilon": 1e-8
50 |         }
51 |     },
52 |     "epochs": 100,
53 |     "steps": -1,    
54 |     "seed": 42,
55 |     "num_chunks": 1,
56 |     "verbose_comm": false,
57 |     "flush_rate": -1,
58 |     "weight_stashing": true,
59 |     "work_scheduler": "1F1B",
60 |     "seed_from_cmd": true,
61 |     "nesterov_set_for_last_partition": true,
62 |     "no_recomputation": false,
63 |     "keep_buffers_alive": true,
64 |     "max_buffers": 1,
65 |     "cudnn_benchmark": true
66 | }
67 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2/tied/common.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "logdir": "logs/debug/",
 3 |     "data_dir": "/home_local/saareliad/data",
 4 |     "out_dir": "results/lm/gpt2/tied/",
 5 |     "auto_file_name": true,
 6 |     "out_filename": "tied",
 7 |     "distributed_backend": "mpi",
 8 |     "model": "gpt2_p4_lm_tied",
 9 |     "stage_to_device_map": [0, 1, 2, 3, 0],
10 |     "model_name_or_path": "gpt2",
11 |     "dataset": "wt2",
12 |     "statistics": "lm_loss_per_batch",
13 |     "trainer": {
14 |         "type": "lm",
15 |         "args": {
16 |         }
17 |     },
18 |     "bs_train": 4,
19 |     "bs_test": 4,
20 |     "train_seq_len": 1024,
21 |     "valid_seq_len": 1024,
22 |     "test_seq_len": 1024,
23 |     "num_data_workers": 10,
24 |     "optimizer": {
25 |         "type": "adamw",
26 |         "args": {
27 |             "lr": 5e-5,
28 |             "weight_decay": 0,
29 |             "eps": 1e-8
30 |         }
31 |     },
32 |     "lr_scheduler": {
33 |         "type": "get_linear_schedule_with_warmup",
34 |         "preproc_args": {
35 |             "num_training_steps": "epochs_to_steps",
36 |             "num_warmup_steps": "epochs_to_steps"
37 |         },
38 |         "args": {
39 |             "num_warmup_steps": 0,
40 |             "num_training_steps": -1,
41 |             "last_epoch": -1
42 |         }
43 |     },
44 |     "epochs": 3,
45 |     "steps": -1,
46 |     "seed_from_cmd": true,
47 |     "num_chunks": 1,
48 |     "verbose_comm": false,
49 |     "flush_rate": -1,
50 |     "work_scheduler": "1F1B",
51 |     "cudnn_benchmark": true,
52 |     "max_buffers": 1,
53 |     "step_every": 1,
54 |     "train_batches_limit": -1,
55 |     "log_frequency": 20,
56 |     "overwrite_cache": true,
57 |     "keep_buffers_alive": false,
58 |     "dont_drop_last": true,
59 |     "stateless_tied": true,
60 |     "nprocs": 5
61 | }
62 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2/tied/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 4,
 6 |     "overwrite_cache": false,
 7 |     "train_batches_limit": -1,
 8 |     "bs_train": 1,
 9 |     "bs_test": 4,
10 |     "epochs": 3
11 | }
12 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2/tied/msnag.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "statistics": "lm_loss_per_batch",
 5 |         "weight_prediction": {
 6 |         "type": "msnag",
 7 |         "args": {
 8 |             "pred_mem": "clone",
 9 |             "nag_with_predictor": false,
10 |             "sched_aware": true
11 |         }
12 |     },
13 |     "weight_stashing": false
14 | }


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2/tied/msnag_ws.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "statistics": "lm_loss_per_batch",
 5 |         "weight_prediction": {
 6 |         "type": "msnag",
 7 |         "args": {
 8 |             "pred_mem": "clone",
 9 |             "nag_with_predictor": false,
10 |             "sched_aware": true
11 |         }
12 |     },
13 |     "weight_stashing": true
14 | }


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2/tied/seq.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "statistics": "lm_loss_per_batch",
 5 |     "weight_stashing": false,
 6 |     "work_scheduler": "SEQ",
 7 |     "no_recomputation": true,
 8 |     "out_filename": "seq_tied"
 9 | }
10 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2/tied/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "statistics": "lm_loss_per_batch",
5 |     "weight_stashing": false
6 | }


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2/tied/ws.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "statistics": "lm_loss_per_batch",
5 |     "weight_stashing": true
6 | }


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2/tied/ws_ga.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "statistics": "lm_loss_per_batch",
 5 |     "gap_aware": {
 6 |         "type": "adamw",
 7 |         "policy": "all_except_last",
 8 |         "args": {
 9 |         }
10 |     },
11 |     "weight_stashing": true
12 | }
13 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2/tied/ws_msnag_ga.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "statistics": "lm_loss_per_batch",
 5 |         "weight_prediction": {
 6 |         "type": "msnag",
 7 |         "args": {
 8 |             "pred_mem": "clone",
 9 |             "nag_with_predictor": false,
10 |             "sched_aware": true
11 |         }
12 |     },
13 |     "gap_aware": {
14 |         "type": "adamw",
15 |         "policy": "all_except_last",
16 |         "args": {
17 |         }
18 |     },
19 |     "weight_stashing": true
20 | }
21 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2/tied/ws_msnag_ga_jfl.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "ws_msnag_ga.json",
3 |     "base_config_path_is_relative": true,
4 |     "gap_aware_just_loss": true
5 | }


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/tied/common.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "logdir": "logs/debug/",
 3 |     "data_dir": "/home_local/saareliad/data",
 4 |     "out_dir": "results/lm/gpt2xl/tied/",
 5 |     "statistics": "lm_loss_per_batch",
 6 |     "auto_file_name": true,
 7 |     "out_filename": "tied_wd_wa",
 8 |     "distributed_backend": "mpi",
 9 |     "model": "gpt2_xl_p8_lm_tied",
10 |     "model_name_or_path": "gpt2-xl",
11 |     "dataset": "wt2",
12 |     "trainer": {
13 |         "type": "lm",
14 |         "args": {
15 |         }
16 |     },
17 |     "bs_train": 1,
18 |     "bs_test": 1,
19 |     "train_seq_len": 1024,
20 |     "valid_seq_len": 1024,
21 |     "test_seq_len": 1024,
22 |     "num_data_workers": 10,
23 |     "optimizer": {
24 |         "type": "adamw",
25 |         "args": {
26 |             "lr": 5e-5,
27 |             "weight_decay": 0.01,
28 |             "eps": 1e-8
29 |         }
30 |     },
31 |     "lr_scheduler": {
32 |         "type": "get_linear_schedule_with_warmup",
33 |         "preproc_args": {
34 |             "num_training_steps": "epochs_to_steps"
35 |         },
36 |         "args": {
37 |             "num_warmup_steps": 200,
38 |             "num_training_steps": -1,
39 |             "last_epoch": -1
40 |         }
41 |     },
42 |     "epochs": 2,
43 |     "steps": -1,
44 |     "seed_from_cmd": true,
45 |     "num_chunks": 1,
46 |     "verbose_comm": false,
47 |     "flush_rate": -1,
48 |     "work_scheduler": "1F1B",
49 |     "cudnn_benchmark": true,
50 |     "max_buffers": 1,
51 |     "step_every": 1,
52 |     "train_batches_limit": -1,
53 |     "dont_drop_last": true,
54 |     "keep_buffers_alive": false,
55 |     "log_frequency": 80,
56 |     "stateless_tied": true,
57 |     "stage_to_device_map": [0, 1, 2, 3, 4, 5, 6, 7, 0],
58 |     "overwrite_cache": false
59 | }
60 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/tied/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "overwrite_cache": true,
 7 |     "train_batches_limit": -1,
 8 |     "bs_train": 1,
 9 |     "bs_test": 4,
10 |     "epochs": 3
11 | }


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/tied/msnag.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "statistics": "lm_loss_per_batch",
 5 |         "weight_prediction": {
 6 |         "type": "msnag",
 7 |         "args": {
 8 |             "pred_mem": "clone",
 9 |             "nag_with_predictor": false,
10 |             "sched_aware": true
11 |         }
12 |     },
13 |     "weight_stashing": false
14 | }
15 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/tied/msnag_ws.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "statistics": "lm_loss_per_batch",
 5 |         "weight_prediction": {
 6 |         "type": "msnag",
 7 |         "args": {
 8 |             "pred_mem": "clone",
 9 |             "nag_with_predictor": false,
10 |             "sched_aware": true
11 |         }
12 |     },
13 |     "weight_stashing": true
14 | }


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/tied/seq.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "statistics": "lm_loss_per_batch",
 5 |     "weight_stashing": false,
 6 |     "work_scheduler": "SEQ",
 7 |     "no_recomputation": true,
 8 |     "out_filename": "seq_wd",
 9 |     "dont_drop_last": true,
10 |     "overwrite_cache": true
11 | }
12 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/tied/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "statistics": "lm_loss_per_batch",
5 |     "weight_stashing": false
6 | }


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/tied/ws.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "statistics": "lm_loss_per_batch",
5 |     "weight_stashing": true
6 | }


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/tied/ws_msnag_ga.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "statistics": "lm_loss_per_batch",
 5 |         "weight_prediction": {
 6 |         "type": "msnag",
 7 |         "args": {
 8 |             "pred_mem": "clone",
 9 |             "nag_with_predictor": false,
10 |             "sched_aware": true
11 |         }
12 |     },
13 |     "gap_aware": {
14 |         "type": "adamw",
15 |         "policy": "all_except_last",
16 |         "args": {
17 |         }
18 |     },
19 |     "weight_stashing": true
20 | }
21 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/tied/ws_msnag_ga_jfl.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "ws_msnag_ga.json",
3 |     "base_config_path_is_relative": true,
4 |     "gap_aware_just_loss": true
5 | }


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/untied/aggmsnag.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "statistics": "lm_loss_per_batch",
 5 |     "weight_prediction": {
 6 |     "type": "aggmsnag",
 7 |     "args": {
 8 |         "pred_mem": "clone",
 9 |         "nag_with_predictor": false,
10 |         "sched_aware": true
11 |     }
12 |     },
13 |     "weight_stashing": false
14 | }
15 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/untied/common.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "logdir": "logs/debug/",
 3 |     "data_dir": "/home_local/saareliad/data",
 4 |     "out_dir": "results/DEBUG1/lm/gpt2xl/untied/wd",
 5 |     "statistics": "lm_loss_per_batch",
 6 |     "auto_file_name": true,
 7 |     "out_filename": "wd",
 8 |     "distributed_backend": "mpi",
 9 |     "model": "old_gpt2xl_8p_untied",
10 |     "model_name_or_path": "gpt2-xl",
11 |     "dataset": "wt2",
12 |     "trainer": {
13 |         "type": "lm",
14 |         "args": {
15 |         }
16 |     },
17 |     "bs_train": 1,
18 |     "bs_test": 1,
19 |     "train_seq_len": 1024,
20 |     "valid_seq_len": 1024,
21 |     "test_seq_len": 1024,
22 |     "num_data_workers": 10,
23 |     "optimizer": {
24 |         "type": "adamw",
25 |         "args": {
26 |             "lr": 5e-5,
27 |             "weight_decay": 0.01,
28 |             "eps": 1e-8
29 |         }
30 |     },
31 |     "lr_scheduler": {
32 |         "type": "get_linear_schedule_with_warmup",
33 |         "preproc_args": {
34 |             "num_training_steps": "epochs_to_steps",
35 |             "num_warmup_steps": "epochs_to_steps"
36 |         },
37 |         "args": {
38 |             "num_warmup_steps": 0,
39 |             "num_training_steps": -1,
40 |             "last_epoch": -1
41 |         }
42 |     },
43 |     "epochs": 1,
44 |     "steps": -1,
45 |     "seed_from_cmd": true,
46 |     "num_chunks": 1,
47 |     "verbose_comm": false,
48 |     "flush_rate": -1,
49 |     "work_scheduler": "1F1B",
50 |     "cudnn_benchmark": true,
51 |     "max_buffers": 1,
52 |     "step_every": 1,
53 |     "train_batches_limit": -1,
54 |     "dont_drop_last": true,
55 |     "keep_buffers_alive": false,
56 |     "log_frequency": 80,
57 |     "overwrite_cache": true
58 | }
59 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/untied/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "overwrite_cache": true,
 7 |     "train_batches_limit": -1,
 8 |     "bs_train": 1,
 9 |     "bs_test": 4,
10 |     "epochs": 1
11 |     
12 | }
13 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/untied/msnag.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "statistics": "lm_loss_per_batch",
 5 |     "weight_prediction": {
 6 |     "type": "msnag",
 7 |     "args": {
 8 |         "pred_mem": "clone",
 9 |         "nag_with_predictor": false,
10 |         "sched_aware": true
11 |     }
12 |     },
13 |     "weight_stashing": false
14 | }
15 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/untied/msnag_ws.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "statistics": "lm_loss_per_batch",
 5 |         "weight_prediction": {
 6 |         "type": "msnag",
 7 |         "args": {
 8 |             "pred_mem": "clone",
 9 |             "nag_with_predictor": false,
10 |             "sched_aware": true
11 |         }
12 |     },
13 |     "weight_stashing": true
14 | }


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/untied/seq.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "statistics": "lm_loss_per_batch",
 5 |     "weight_stashing": false,
 6 |     "work_scheduler": "SEQ",
 7 |     "no_recomputation": true,
 8 |     "out_filename": "seq_wd",
 9 |     "dont_drop_last": true,
10 |     "overwrite_cache": true
11 | }
12 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/untied/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "statistics": "lm_loss_per_batch",
5 |     "weight_stashing": false,
6 |     "train_batches_limit":-1
7 | }
8 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/untied/ws.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "statistics": "lm_loss_per_batch",
5 |     "weight_stashing": true
6 | }


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/untied/ws_msnag_ga.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "statistics": "lm_loss_per_batch",
 5 |         "weight_prediction": {
 6 |         "type": "msnag",
 7 |         "args": {
 8 |             "pred_mem": "clone",
 9 |             "nag_with_predictor": false,
10 |             "sched_aware": true
11 |         }
12 |     },
13 |     "gap_aware": {
14 |         "type": "adamw",
15 |         "policy": "all_except_last",
16 |         "args": {
17 |         }
18 |     },
19 |     "weight_stashing": true
20 | }
21 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/untied/ws_msnag_ga_jfl.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "ws_msnag_ga.json",
3 |     "base_config_path_is_relative": true,
4 |     "gap_aware_just_loss": true
5 | }


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/untied_s512/common.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "logdir": "logs/debug/",
 3 |     "data_dir": "/home_local/saareliad/data",
 4 |     "out_dir": "results/new_gpt2xl/lm/gpt2xl_b512/untied/wd/wa",
 5 |     "statistics": "lm_loss_per_batch",
 6 |     "auto_file_name": true,
 7 |     "out_filename": "wd",
 8 |     "distributed_backend": "mpi",
 9 |     "model": "new_gpt2_xl_tied_lm_p8_seq_512",
10 |     "model_name_or_path": "gpt2-xl",
11 |     "dataset": "wt2",
12 |     "trainer": {
13 |         "type": "lm",
14 |         "args": {
15 |         }
16 |     },
17 |     "bs_train": 2,
18 |     "bs_test": 2,
19 |     "train_seq_len": 512,
20 |     "valid_seq_len": 512,
21 |     "test_seq_len": 512,
22 |     "num_data_workers": 10,
23 |     "optimizer": {
24 |         "type": "adamw",
25 |         "args": {
26 |             "lr": 5e-5,
27 |             "weight_decay": 0.01,
28 |             "eps": 1e-8
29 |         }
30 |     },
31 |     "lr_scheduler": {
32 |         "type": "get_linear_schedule_with_warmup",
33 |         "preproc_args": {
34 |             "num_training_steps": "epochs_to_steps",
35 |             "num_warmup_steps": "ratio_from_num_training_steps"
36 |         },
37 |         "args": {
38 |             "num_warmup_steps": 0.06,
39 |             "num_training_steps": -1,
40 |             "last_epoch": -1
41 |         }
42 |     },
43 |     "epochs": 1,
44 |     "steps": -1,
45 |     "seed_from_cmd": true,
46 |     "num_chunks": 1,
47 |     "verbose_comm": false,
48 |     "flush_rate": -1,
49 |     "work_scheduler": "1F1B",
50 |     "cudnn_benchmark": true,
51 |     "max_buffers": 1,
52 |     "step_every": 8,
53 |     "train_batches_limit": -1,
54 |     "dont_drop_last": true,
55 |     "keep_buffers_alive": false,
56 |     "log_frequency": 80,
57 |     "overwrite_cache": true
58 | }
59 | 


--------------------------------------------------------------------------------
/pipe/configs/lm/wt2/gpt2xl/untied_s512/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "statistics": "lm_loss_per_batch",
5 |     "weight_stashing": false,
6 |     "train_batches_limit":-1
7 | }
8 | 


--------------------------------------------------------------------------------
/pipe/configs/python_configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saareliad/FTPipe/986014df3bb985595f22139ab355a48ca00dc12f/pipe/configs/python_configs/__init__.py


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/mpipe/boolq/gpipe_layer_graph.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common_layer_graph.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 2,
 7 |     "bs_test": 2,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/mpipe/checkpoints/t5/3b/boolq/gpipe_layer/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/mpipe/boolq/gpipe_op_graph.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common_op_graph.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 2,
 7 |     "bs_test": 2,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/mpipe/checkpoints/t5/3b/boolq/gpipe_op/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/mpipe/boolq/stale_layer_graph.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common_layer_graph.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b",
6 |     "checkpoints_save_name_prefix": "stale_adafactor",
7 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/mpipe/checkpoints/t5/3b/boolq/stale_layer/"
8 | }
9 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/mpipe/boolq/stale_op_graph.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common_op_graph.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b",
6 |     "checkpoints_save_name_prefix": "stale_adafactor",
7 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/mpipe/checkpoints/t5/3b/boolq/stale_op/"
8 | }
9 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/mpipe/multirc/gpipe_layer_graph.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common_layer_graph.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "bs_train": 1,
 7 |     "bs_test": 1,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/mpipe/checkpoints/t5/3b/multirc/gpipe_layer/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/mpipe/multirc/gpipe_op_graph.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common_op_graph.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "bs_train": 1,
 7 |     "bs_test": 1,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/mpipe/checkpoints/t5/3b/multirc/gpipe_op/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/mpipe/multirc/stale_layer_graph.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common_layer_graph.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "work_scheduler": "1f1b",
 6 |     "checkpoints_save_name_prefix": "stale_adafactor",
 7 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/mpipe/checkpoints/t5/3b/multirc/stale_layer/"
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/mpipe/multirc/stale_op_graph.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common_op_graph.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b",
6 |     "checkpoints_save_name_prefix": "stale_adafactor",
7 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/mpipe/checkpoints/t5/3b/multirc/stale_op/"
8 | }
9 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/mpipe/rte/gpipe_layer_graph.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common_layer_graph.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 4,
 7 |     "bs_test": 4,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/mpipe/checkpoints/t5/3b/rte/gpipe_layer/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/mpipe/rte/gpipe_op_graph.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common_op_graph.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 4,
 7 |     "bs_test": 4,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/mpipe/checkpoints/t5/3b/rte/gpipe_op/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/mpipe/rte/stale_layer_graph.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common_layer_graph.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b",
6 |     "checkpoints_save_name_prefix": "stale_adafactor",
7 |     "checkpoints_save_dir": "/nfs_Disk2/mpipe/checkpoints/t5/3b/rte/stale_layer/"
8 | }
9 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/mpipe/rte/stale_op_graph.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common_op_graph.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b",
6 |     "checkpoints_save_name_prefix": "stale_adafactor",
7 |     "checkpoints_save_dir": "/nfs_Disk2/mpipe/checkpoints/t5/3b/rte/stale_op/"
8 | }
9 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/mpipe/wic/gpipe_layer_graph.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common_layer_graph.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "bs_train": 16,
 7 |     "bs_test": 16,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/mpipe/checkpoints/t5/3b/wic/gpipe_layer/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/mpipe/wic/gpipe_op_graph.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common_op_graph.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "bs_train": 16,
 7 |     "bs_test": 16,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/mpipe/checkpoints/t5/3b/wic/gpipe_op/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/mpipe/wic/stale_layer_graph.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common_layer_graph.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b",
6 |     "checkpoints_save_name_prefix": "stale_adafactor",
7 |     "checkpoints_save_dir": "/nfs_Disk2/mpipe/checkpoints/t5/3b/wic/stale_layer/"
8 | }
9 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/mpipe/wic/stale_op_graph.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common_op_graph.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b",
6 |     "checkpoints_save_name_prefix": "stale_adafactor",
7 |     "checkpoints_save_dir": "/nfs_Disk2/mpipe/checkpoints/t5/3b/wic/stale_op/"
8 | }
9 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/seq/boolq/gpipe_new.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 2,
 7 |     "bs_test": 2,
 8 |     "checkpoints_save_name_prefix": "gpipe_new",
 9 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/no_virtual_stages/checkpoints/t5/3b/boolq/gpipe/",
10 |     "save_checkpoints":  true,
11 |     "epochs": 2,
12 |     "steps": -1,
13 |     "dont_drop_last": false
14 | }
15 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/seq/boolq/pipedream_stale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "work_scheduler": "1f1b",
 6 |     "dont_drop_last": false,
 7 |     "checkpoints_save_name_prefix": "stale",
 8 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/no_virtual_stages/checkpoints/t5/3b/boolq/stale/",
 9 |     "step_every": 10,
10 |     "bs_train": 2
11 | }
12 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/seq/multirc/gpipe_new.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "bs_train": 1,
 7 |     "bs_test": 1,
 8 |     "checkpoints_save_name_prefix": "gpipe_new",
 9 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/no_virtual_stages/checkpoints/t5/3b/multirc/gpipe/",
10 |     "save_checkpoints":  false,
11 |     "epochs": 1,
12 |     "dont_drop_last": false,
13 |     "steps": -1
14 | }
15 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/seq/multirc/pipedream_stale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "work_scheduler": "1f1b",
 6 |     "checkpoints_save_name_prefix": "new_stale_seq",
 7 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/no_virtual_stages/checkpoints/t5/3b/multirc/stale/",
 8 |     "dont_drop_last": false,
 9 |     "step_every": 4,
10 |     "bs_train": 2
11 | }
12 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/seq/rte/gpipe_new.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 4,
 7 |     "bs_test": 4,
 8 |     "checkpoints_save_name_prefix": "new_gpipe",
 9 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/saare/checkpoints/no_virtual_stages/checkpoints/t5/3b/rte/gpipe/",
10 |     "epochs": 6,
11 |     "steps": -1,
12 |     "save_checkpoints":  false
13 | }
14 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/seq/rte/pipedream_stale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "checkpoints_save_name_prefix": "stale_adafactor",
 6 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/no_virtual_stages/checkpoints/t5/3b/rte/stale/",
 7 |     "step_every": 10,
 8 |     "bs_train": 4,
 9 |     "bs_test": 4
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/seq/wic/gpipe_new.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "bs_train": 16,
 7 |     "bs_test": 16,
 8 |     "checkpoints_save_name_prefix": "gpipe_new",
 9 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/no_virtual_stages/checkpoints/t5/3b/wic/gpipe/",
10 |     "save_checkpoints":  false,
11 |     "epochs": 12,
12 |     "steps": -1
13 | }
14 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/seq/wic/pipedream_stale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "work_scheduler": "1f1b",
 6 |     "checkpoints_save_name_prefix": "stale_adafactor",
 7 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/no_virtual_stages/checkpoints/t5/3b/wic/stale/",
 8 |     "bs_train": 32,
 9 |     "step_every": 4
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/seq_op_graph/boolq/gpipe_new.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 2,
 7 |     "bs_test": 2,
 8 |     "checkpoints_save_name_prefix": "gpipe_new",
 9 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/seq_op/checkpoints/t5/3b/boolq/gpipe/",
10 |     "save_checkpoints":  true,
11 |     "epochs": -1,
12 |     "steps": 3200,
13 |     "dont_drop_last": false
14 | }
15 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/seq_op_graph/boolq/pipedream_stale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "work_scheduler": "1f1b",
 6 |     "dont_drop_last": false,
 7 |     "checkpoints_save_name_prefix": "stale",
 8 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/seq_op/checkpoints/t5/3b/boolq/stale/",
 9 |     "step_every": 5,
10 |     "bs_train": 4
11 | }
12 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/seq_op_graph/multirc/gpipe_new.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "bs_train": 1,
 7 |     "bs_test": 1,
 8 |     "checkpoints_save_name_prefix": "gpipe_new",
 9 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/seq_op/checkpoints/t5/3b/multirc/gpipe/",
10 |     "save_checkpoints":  false,
11 |     "epochs": 2,
12 |     "dont_drop_last": false,
13 |     "steps": -1
14 | }
15 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/seq_op_graph/multirc/pipedream_stale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "work_scheduler": "1f1b",
 6 |     "checkpoints_save_name_prefix": "new_stale_seq",
 7 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/seq_op/checkpoints/t5/3b/multirc/stale/",
 8 |     "dont_drop_last": false,
 9 |     "step_every": 2,
10 |     "bs_train": 4
11 | }
12 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/seq_op_graph/rte/gpipe_new.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 4,
 7 |     "bs_test": 4,
 8 |     "checkpoints_save_name_prefix": "new_gpipe",
 9 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/saare/checkpoints/seq_op/checkpoints/t5/3b/rte/gpipe/",
10 |     "epochs": 6,
11 |     "steps": -1,
12 |     "save_checkpoints":  false
13 | }
14 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/seq_op_graph/rte/pipedream_stale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "checkpoints_save_name_prefix": "stale_adafactor",
 6 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/seq_op/checkpoints/t5/3b/rte/stale/",
 7 |     "step_every": 5,
 8 |     "bs_train": 8,
 9 |     "bs_test": 8
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/seq_op_graph/wic/gpipe_new.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 16,
 6 |     "bs_train": 8,
 7 |     "bs_test": 8,
 8 |     "checkpoints_save_name_prefix": "gpipe_new",
 9 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/seq_op/checkpoints/t5/3b/wic/gpipe/",
10 |     "save_checkpoints":  false,
11 |     "epochs": 12,
12 |     "steps": -1
13 | }
14 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/new_t5_exp/seq_op_graph/wic/pipedream_stale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "work_scheduler": "1f1b",
 6 |     "checkpoints_save_name_prefix": "stale_adafactor",
 7 |     "checkpoints_save_dir": "/mnt/qnap/saareliad/seq_op/checkpoints/t5/3b/wic/stale/",
 8 |     "bs_train": 32,
 9 |     "step_every": 4
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/boolq/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 2,
 7 |     "bs_test": 2,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/no_virtual_stages/checkpoints/t5/3b/boolq/gpipe/",
10 |     "save_checkpoints":  false,
11 |     "epochs": 2,
12 |     "steps": -1
13 | }
14 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/boolq/gpipe_new.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 2,
 7 |     "bs_test": 2,
 8 |     "checkpoints_save_name_prefix": "gpipe_new",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/no_virtual_stages/checkpoints/t5/3b/boolq/gpipe/",
10 |     "model": "t5_3b_tied_lmheads_512_4_8p_bw12_squad1_pipedream",
11 |     "save_checkpoints":  true,
12 |     "epochs": -1,
13 |     "steps": 3200,
14 |     "dont_drop_last": false
15 | }
16 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/boolq/pipedream_stale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "work_scheduler": "1f1b",
 6 |     "model": "t5_3b_tied_lmheads_512_4_8p_bw12_squad1_pipedream",
 7 |     "dont_drop_last": true,
 8 |     "step_every": 5,
 9 |     "bs_train": 4
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/boolq/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b"
6 | }
7 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/cola/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "bs_train": 32,
 7 |     "bs_test": 32,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/saare/checkpoints/t5/3b/cola/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/cola/seq.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "work_scheduler": "SEQ",
 6 |     "no_recomputation": true,
 7 |     "out_filename": "seq"
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/cola/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false
5 | }
6 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/multirc/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "bs_train": 1,
 7 |     "bs_test": 1,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/no_virtual_stages/checkpoints/t5/3b/multirc/gpipe/",
10 |     "save_checkpoints":  false,
11 |     "epochs": 2,
12 |     "steps": -1
13 | }
14 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/multirc/gpipe_new.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "bs_train": 1,
 7 |     "bs_test": 1,
 8 |     "checkpoints_save_name_prefix": "gpipe_new",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/no_virtual_stages/checkpoints/t5/3b/multirc/gpipe/",
10 |     "model": "t5_3b_tied_lmheads_512_4_8p_bw12_squad1_pipedream",
11 |     "save_checkpoints":  false,
12 |     "epochs": 2,
13 |     "steps": -1
14 | }
15 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/multirc/pipedream_stale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "work_scheduler": "1f1b",
 6 |     "checkpoints_save_name_prefix": "new_stale_seq",
 7 |     "model": "t5_3b_tied_lmheads_512_4_8p_bw12_squad1_pipedream",
 8 |     "dont_drop_last": true,
 9 |     "step_every": 2,
10 |     "bs_train": 4
11 | }
12 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/multirc/seq.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "work_scheduler": "SEQ",
 6 |     "no_recomputation": true,
 7 |     "bs_train": 1
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/multirc/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b"
6 | }
7 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/rte/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 4,
 7 |     "bs_test": 4,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/saare/checkpoints/t5/3b/rte/seq/",
10 |     "save_checkpoints":  false,
11 |     "epochs": 12,
12 |     "steps": -1
13 | }
14 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/rte/gpipe_new.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 4,
 7 |     "bs_test": 4,
 8 |     "checkpoints_save_name_prefix": "new_gpipe",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/saare/checkpoints/t5/3b/rte/seq/",
10 |     "model": "t5_3b_tied_lmheads_320_8_8p_bw12_squad1_pipedream",
11 |     "epochs": 6,
12 |     "steps": -1,
13 |     "save_checkpoints":  false
14 | }
15 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/rte/pipedream_stale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "model": "t5_3b_tied_lmheads_320_8_8p_bw12_squad1_pipedream",
 6 |     "step_every": 4,
 7 |     "bs_train": 10,
 8 |     "save_checkpoint_every_x_steps": 500
 9 | }
10 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/rte/seq.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "work_scheduler": "SEQ",
 6 |     "no_recomputation": true,
 7 |     "out_filename": "seq"
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/rte/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false
5 | }
6 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/rte_super_glue/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 4,
 7 |     "bs_test": 4,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/saare/checkpoints/t5/3b/super_glue_rte/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/rte_super_glue/gpipe_new.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 4,
 7 |     "bs_test": 4,
 8 |     "checkpoints_save_name_prefix": "new_gpipe",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/saare/checkpoints/t5/3b/super_glue_rte/",
10 |     "model": "t5_3b_tied_lmheads_320_8_8p_bw12_squad1_pipedream",
11 |     "epochs": 6,
12 |     "steps": -1,
13 |     "save_checkpoints":  false
14 | }
15 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/rte_super_glue/pipedream_stale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "model": "t5_3b_tied_lmheads_320_8_8p_bw12_squad1_pipedream",
 6 |     "step_every": 4,
 7 |     "bs_train": 10,
 8 |     "save_checkpoint_every_x_steps": 500
 9 | }
10 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/rte_super_glue/seq.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "work_scheduler": "SEQ",
 6 |     "no_recomputation": true,
 7 |     "out_filename": "seq"
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/rte_super_glue/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false
5 | }
6 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/wic/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "bs_train": 16,
 7 |     "bs_test": 16,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/no_virtual_stages/checkpoints/t5/3b/wic/gpipe/",
10 |     "save_checkpoints":  false,
11 |     "epochs": 12,
12 |     "steps": -1
13 | }
14 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/wic/gpipe_new.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "bs_train": 16,
 7 |     "bs_test": 16,
 8 |     "checkpoints_save_name_prefix": "gpipe_new",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/no_virtual_stages/checkpoints/t5/3b/wic/gpipe/",
10 |     "save_checkpoints":  false,
11 |     "epochs": 12,
12 |     "steps": -1,
13 |     "model": "t5_3b_tied_lmheads_64_4_8p_bw12_squad1_pipedream"
14 | }
15 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/wic/pipedream_stale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "work_scheduler": "1f1b",
 6 |     "model": "t5_3b_tied_lmheads_64_4_8p_bw12_squad1_pipedream",
 7 |     "bs_train": 64,
 8 |     "step_every": 2
 9 | }
10 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/seq/wic/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b"
6 | }
7 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/virtual_stages/boolq/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 2,
 7 |     "bs_test": 2,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/virtual_stages/checkpoints/t5/3b/boolq/gpipe/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/virtual_stages/boolq/gpipe_2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 2,
 7 |     "bs_test": 2,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/virtual_stages/checkpoints/t5/3b/boolq/gpipe/",
10 |     "save_checkpoints":  false,
11 |     "epochs": 2,
12 |     "steps": -1
13 | }
14 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/virtual_stages/boolq/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b"
6 | }
7 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/virtual_stages/boolq/vs_stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "virtual_stages_1f1b",
6 |     "supremum_staleness": 13
7 | }
8 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/virtual_stages/multirc/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "bs_train": 1,
 7 |     "bs_test": 1,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/virtual_stages/checkpoints/t5/3b/boolq/gpipe/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/virtual_stages/multirc/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b"
6 | }
7 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/virtual_stages/rte/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 4,
 7 |     "bs_test": 4,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/virtual_stages/checkpoints/t5/3b/rte/gpipe/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/virtual_stages/rte/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b"
6 | }
7 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/virtual_stages/superglue_rte/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 4,
 7 |     "bs_test": 4,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/virtual_stages/checkpoints/t5/3b/gpipe/superglue_rte/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/virtual_stages/superglue_rte/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false
5 | }
6 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/virtual_stages/wic/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "bs_train": 16,
 7 |     "bs_test": 16,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/virtual_stages/checkpoints/t5/3b/wic/gpipe/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_3b_p8/virtual_stages/wic/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b"
6 | }
7 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_base/seq/boolq/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 2,
 7 |     "bs_test": 2,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/no_virtual_stages/checkpoints/t5/3b/boolq/gpipe/",
10 |     "save_checkpoints":  false,
11 |     "epochs": 2,
12 |     "steps": -1
13 | }
14 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_base/seq/boolq/gpipe_new.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 2,
 7 |     "bs_test": 2,
 8 |     "checkpoints_save_name_prefix": "gpipe_new",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/no_virtual_stages/checkpoints/t5/base/boolq/gpipe/",
10 |     "model": "t5_base_tied_lmheads_512_4_8p_bw12_squad1_pipedream",
11 |     "save_checkpoints":  true,
12 |     "epochs": -1,
13 |     "steps": 3200,
14 |     "dont_drop_last": true
15 | }
16 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_base/seq/boolq/pipedream_stale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "weight_stashing": false,
 5 |     "work_scheduler": "1f1b",
 6 |     "model": "t5_base_tied_lmheads_512_4_8p_bw12_squad1_pipedream",
 7 |     "dont_drop_last": true,
 8 |     "step_every": 5,
 9 |     "bs_train": 4
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_base/seq/boolq/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b"
6 | }
7 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_mpipe/L=32/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b"
6 | }
7 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_mpipe/boolq/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 2,
 7 |     "bs_test": 2,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/mpipe/checkpoints/t5/3b/boolq/gpipe/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_mpipe/boolq/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b",
6 |     "checkpoints_save_name_prefix": "stale_adafactor",
7 |     "checkpoints_save_dir": "/nfs_Disk2/mpipe/checkpoints/t5/3b/boolq/stale/"
8 | }
9 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_mpipe/multirc/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "bs_train": 1,
 7 |     "bs_test": 1,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/mpipe/checkpoints/t5/3b/boolq/gpipe/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_mpipe/multirc/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b",
6 |     "checkpoints_save_name_prefix": "stale_adafactor",
7 |     "checkpoints_save_dir": "/nfs_Disk2/mpipe/checkpoints/t5/3b/multirc/stale/"
8 | }
9 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_mpipe/rte/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 10,
 6 |     "bs_train": 4,
 7 |     "bs_test": 4,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/mpipe/checkpoints/t5/3b/rte/gpipe/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_mpipe/rte/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b",
6 |     "checkpoints_save_name_prefix": "stale_adafactor",
7 |     "checkpoints_save_dir": "/nfs_Disk2/mpipe/checkpoints/t5/3b/rte/stale/"
8 | }
9 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_mpipe/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b"
6 | }
7 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_mpipe/wic/gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_config_path": "common.json",
 3 |     "base_config_path_is_relative": true,
 4 |     "work_scheduler": "GPIPE",
 5 |     "step_every": 8,
 6 |     "bs_train": 16,
 7 |     "bs_test": 16,
 8 |     "checkpoints_save_name_prefix": "gpipe_adafactor",
 9 |     "checkpoints_save_dir": "/nfs_Disk2/mpipe/checkpoints/t5/3b/wic/gpipe/"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_mpipe/wic/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false,
5 |     "work_scheduler": "1f1b",
6 |     "checkpoints_save_name_prefix": "stale_adafactor",
7 |     "checkpoints_save_dir": "/nfs_Disk2/mpipe/checkpoints/t5/3b/wic/stale/"
8 | }
9 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_small/README.md:
--------------------------------------------------------------------------------
1 | # t5-small, TODO:
2 | this whole thing was used as internal test and has deprecated use of 
3 | a t5_squad dataset (adapted from huggingface example, which I found inaccurate)


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_small/adafactor/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false
5 | }
6 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_small/rte/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false
5 | }
6 | 


--------------------------------------------------------------------------------
/pipe/configs/t5/t5_small/stale.json:
--------------------------------------------------------------------------------
1 | {
2 |     "base_config_path": "common.json",
3 |     "base_config_path_is_relative": true,
4 |     "weight_stashing": false
5 | }
6 | 


--------------------------------------------------------------------------------
/pipe/configs/vit/cifar100_384.json:
--------------------------------------------------------------------------------
1 | {
2 |   "data_dir": "/home_local/saareliad/data",
3 |   "dataset": "cifar100_384",
4 |   "epochs": -1
5 | }
6 | 


--------------------------------------------------------------------------------
/pipe/configs/vit/cifar10_384.json:
--------------------------------------------------------------------------------
1 | {
2 |   "data_dir": "/home_local/saareliad/data",
3 |   "dataset": "cifar10_384",
4 |   "epochs": -1,
5 |   "steps": 10000
6 | }


--------------------------------------------------------------------------------
/pipe/configs/vit/cv.json:
--------------------------------------------------------------------------------
1 | {
2 |   "trainer": {
3 |     "type": "cv_per_step_lr_scheduler",
4 |     "args": {
5 |     }
6 |   },
7 |   "statistics": "cv"
8 | }


--------------------------------------------------------------------------------
/pipe/configs/vit/cv_dcgn_global.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "trainer": {
 3 |     "type": "cv_per_step_lr_scheduler_global_grad_norm",
 4 |     "args": {
 5 |       "always_calc_grad_norm": false,
 6 |       "max_grad_norm": 1.0
 7 |     }
 8 |   },
 9 |   "statistics": "cv_grad_norm"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/vit/cv_dcgn_local.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "trainer": {
 3 |     "type": "cv_per_step_lr_scheduler_local_grad_norm",
 4 |     "args": {
 5 |       "always_calc_grad_norm": false,
 6 |       "max_grad_norm": 1.0
 7 |     }
 8 |   },
 9 |   "statistics": "cv_grad_norm"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/vit/cv_dcgn_local_prop.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "trainer": {
 3 |     "type": "cv_per_step_lr_scheduler_local_grad_norm_prop",
 4 |     "args": {
 5 |       "always_calc_grad_norm": false,
 6 |       "max_grad_norm": 1.0
 7 |     }
 8 |   },
 9 |   "statistics": "cv_grad_norm"
10 | }
11 | 


--------------------------------------------------------------------------------
/pipe/configs/vit/imagenet_384.json:
--------------------------------------------------------------------------------
1 | {
2 |   "data_dir": "/home_local/saareliad/data/imagenet/",
3 |   "dataset": "imagenet_384",
4 |   "epochs": -1,
5 |   "steps": 20000
6 | }


--------------------------------------------------------------------------------
/pipe/configs/vit/tst_gpipe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "base_config_path": [
 3 |     "cv.json",
 4 |     "imagenet_384.json",
 5 |     "vit_large_patch32_384_in21k_imagenet_384c384_8p_bw12_gpipe_acyclic.json"
 6 |   ],
 7 |   "base_config_path_is_relative": true,
 8 |   "logdir": "logs/vit/imagenet/",
 9 |   "out_dir": "results/vit/imagenet/",
10 |   "auto_file_name": true,
11 |   "out_filename": "tst_vit",
12 |   "distributed_backend": "mpi",
13 |   "bs_train": 32,
14 |   "bs_test": 32,
15 |   "num_data_workers": 10,
16 |   "dont_drop_last": true,
17 |   "step_every": 16,
18 |   "log_frequency": 2000,
19 |   "optimizer": {
20 |     "type": "sgd1",
21 |     "args": {
22 |       "lr": 0.03,
23 |       "weight_decay": 0,
24 |       "momentum": 0.9,
25 |       "nesterov": true
26 |     }
27 |   },
28 |   "lr_scheduler": {
29 |     "type": "get_cosine_schedule_with_warmup",
30 |     "preproc_args": {
31 |     },
32 |     "args": {
33 |       "num_warmup_steps": 500,
34 |       "num_training_steps": 20000,
35 |       "num_cycles": 0.5,
36 |       "last_epoch": -1
37 |     }
38 |   },
39 |   "weight_stashing": false,
40 |   "work_scheduler": "gpipe",
41 |   "cudnn_benchmark": true,
42 |   "seed_from_cmd": false,
43 |   "seed": 42
44 | }


--------------------------------------------------------------------------------
/pipe/configs/vit/tst_gpipe_adafactor_cifar100.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "base_config_path": [
 3 |     "cv.json",
 4 |     "cifar100_384.json",
 5 |     "vit_base_patch16_384_in21k_imagenet_384c384_8p_bw12_gpipe_acyclic.json"
 6 |   ],
 7 |   "base_config_path_is_relative": true,
 8 |   "logdir": "logs/vit/cifar100/adafactor/",
 9 |   "out_dir": "results/vit/cifar100/",
10 |   "auto_file_name": true,
11 |   "out_filename": "fast_adafactor",
12 |   "distributed_backend": "mpi",
13 |   "bs_train": 32,
14 |   "bs_test": 32,
15 |   "num_data_workers": 10,
16 |   "dont_drop_last": false,
17 |   "step_every": 16,
18 |   "log_frequency": 100000,
19 |   "optimizer": {
20 |     "type": "adafactor",
21 |     "args": {
22 |       "lr": 0.03,
23 |       "weight_decay": 0,
24 |       "beta1": 0.9,
25 |       "scale_parameter": true,
26 |       "relative_step": false
27 |     }
28 |   },
29 |   "steps": 1000,
30 |   "lr_scheduler": {
31 |     "type": "get_cosine_schedule_with_warmup",
32 |     "preproc_args": {
33 |     },
34 |     "args": {
35 |       "num_warmup_steps": 100,
36 |       "num_training_steps": 1000,
37 |       "num_cycles": 0.5,
38 |       "last_epoch": -1
39 |     }
40 |   },
41 |   "weight_stashing": false,
42 |   "work_scheduler": "gpipe",
43 |   "cudnn_benchmark": true,
44 |   "seed_from_cmd": false,
45 |   "seed": 42
46 | }
47 | 


--------------------------------------------------------------------------------
/pipe/configs/vit/tst_gpipe_cifar100.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "base_config_path": [
 3 |     "cv.json",
 4 |     "cifar100_384.json",
 5 |     "vit_base_patch16_384_in21k_imagenet_384c384_8p_bw12_gpipe_acyclic.json"
 6 |   ],
 7 |   "base_config_path_is_relative": true,
 8 |   "logdir": "logs/vit/cifar100/no_grad_norm/",
 9 |   "out_dir": "results/vit/cifar100/",
10 |   "auto_file_name": true,
11 |   "out_filename": "no_grad_norm_no_nesterov_meanstd05",
12 |   "distributed_backend": "mpi",
13 |   "bs_train": 32,
14 |   "bs_test": 32,
15 |   "num_data_workers": 10,
16 |   "dont_drop_last": false,
17 |   "step_every": 16,
18 |   "log_frequency": 100000,
19 |   "optimizer": {
20 |     "type": "sgd2",
21 |     "args": {
22 |       "lr": 0.03,
23 |       "weight_decay": 0,
24 |       "momentum": 0.9,
25 |       "nesterov": false
26 |     }
27 |   },
28 |   "steps": 1000,
29 |   "lr_scheduler": {
30 |     "type": "get_cosine_schedule_with_warmup",
31 |     "preproc_args": {
32 |     },
33 |     "args": {
34 |       "num_warmup_steps": 100,
35 |       "num_training_steps": 1000,
36 |       "num_cycles": 0.5,
37 |       "last_epoch": -1
38 |     }
39 |   },
40 |   "weight_stashing": false,
41 |   "work_scheduler": "gpipe",
42 |   "cudnn_benchmark": true,
43 |   "seed_from_cmd": false,
44 |   "seed": 42
45 | }
46 | 


--------------------------------------------------------------------------------
/pipe/configs/vit/tst_gpipe_dcgn_global.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "base_config_path": [
 3 |     "cv_dcgn_global.json",
 4 |     "imagenet_384.json",
 5 |     "vit_large_patch32_384_in21k_imagenet_384c384_8p_bw12_gpipe_acyclic.json"
 6 |   ],
 7 |   "base_config_path_is_relative": true,
 8 |   "logdir": "logs/vit/imagenet/global/",
 9 |   "out_dir": "results/vit/imagenet/",
10 |   "auto_file_name": true,
11 |   "out_filename": "tst_vit_dcgn_global_no_nesterov_meanstd05",
12 |   "distributed_backend": "mpi",
13 |   "bs_train": 32,
14 |   "bs_test": 128,
15 |   "num_data_workers": 10,
16 |   "dont_drop_last": false,
17 |   "step_every": 16,
18 |   "log_frequency": 200000,
19 |   "optimizer": {
20 |     "type": "sgd2",
21 |     "args": {
22 |       "lr": 0.03,
23 |       "weight_decay": 0,
24 |       "momentum": 0.9,
25 |       "nesterov": false
26 |     }
27 |   },
28 |   "lr_scheduler": {
29 |     "type": "get_cosine_schedule_with_warmup",
30 |     "preproc_args": {
31 |     },
32 |     "args": {
33 |       "num_warmup_steps": 500,
34 |       "num_training_steps": 20000,
35 |       "num_cycles": 0.5,
36 |       "last_epoch": -1
37 |     }
38 |   },
39 |   "weight_stashing": false,
40 |   "work_scheduler": "gpipe",
41 |   "cudnn_benchmark": true,
42 |   "seed_from_cmd": false,
43 |   "seed": 42
44 | }
45 | 


--------------------------------------------------------------------------------
/pipe/configs/vit/tst_gpipe_dcgn_global_cifar100.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "base_config_path": [
 3 |     "cv_dcgn_global.json",
 4 |     "cifar100_384.json",
 5 |     "vit_base_patch16_384_in21k_imagenet_384c384_8p_bw12_gpipe_acyclic.json"
 6 |   ],
 7 |   "base_config_path_is_relative": true,
 8 |   "logdir": "logs/vit/cifar100/global/",
 9 |   "out_dir": "results/vit/cifar100/",
10 |   "auto_file_name": true,
11 |   "out_filename": "fast_dcgn_global_no_nesterov_meanstd05",
12 |   "distributed_backend": "mpi",
13 |   "bs_train": 32,
14 |   "bs_test": 32,
15 |   "num_data_workers": 10,
16 |   "dont_drop_last": false,
17 |   "step_every": 16,
18 |   "log_frequency": 100000,
19 |   "optimizer": {
20 |     "type": "sgd2",
21 |     "args": {
22 |       "lr": 0.03,
23 |       "weight_decay": 0,
24 |       "momentum": 0.9,
25 |       "nesterov": false
26 |     }
27 |   },
28 |   "steps": 1000,
29 |   "lr_scheduler": {
30 |     "type": "get_cosine_schedule_with_warmup",
31 |     "preproc_args": {
32 |     },
33 |     "args": {
34 |       "num_warmup_steps": 100,
35 |       "num_training_steps": 1000,
36 |       "num_cycles": 0.5,
37 |       "last_epoch": -1
38 |     }
39 |   },
40 |   "weight_stashing": false,
41 |   "work_scheduler": "gpipe",
42 |   "cudnn_benchmark": true,
43 |   "seed_from_cmd": false,
44 |   "seed": 42
45 | }
46 | 


--------------------------------------------------------------------------------
/pipe/configs/vit/tst_gpipe_dcgn_local.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "base_config_path": [
 3 |     "cv_dcgn_local.json",
 4 |     "imagenet_384.json",
 5 |     "vit_large_patch32_384_in21k_imagenet_384c384_8p_bw12_gpipe_acyclic.json"
 6 |   ],
 7 |   "base_config_path_is_relative": true,
 8 |   "logdir": "logs/vit/imagenet/",
 9 |   "out_dir": "results/vit/imagenet/",
10 |   "auto_file_name": true,
11 |   "out_filename": "tst_vit_dcgn_local",
12 |   "distributed_backend": "mpi",
13 |   "bs_train": 4,
14 |   "bs_test": 128,
15 |   "num_data_workers": 10,
16 |   "dont_drop_last": true,
17 |   "step_every": 128,
18 |   "log_frequency": 200000,
19 |   "optimizer": {
20 |     "type": "sgd1",
21 |     "args": {
22 |       "lr": 0.03,
23 |       "weight_decay": 0,
24 |       "momentum": 0.9,
25 |       "nesterov": true
26 |     }
27 |   },
28 |   "lr_scheduler": {
29 |     "type": "get_cosine_schedule_with_warmup",
30 |     "preproc_args": {
31 |     },
32 |     "args": {
33 |       "num_warmup_steps": 500,
34 |       "num_training_steps": 20000,
35 |       "num_cycles": 0.5,
36 |       "last_epoch": -1
37 |     }
38 |   },
39 |   "weight_stashing": false,
40 |   "work_scheduler": "gpipe",
41 |   "cudnn_benchmark": true,
42 |   "seed_from_cmd": false,
43 |   "seed": 42
44 | }
45 | 


--------------------------------------------------------------------------------
/pipe/configs/vit/tst_gpipe_dcgn_local_prop_cifar100.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "base_config_path": [
 3 |     "cv_dcgn_local_prop.json",
 4 |     "cifar100_384.json",
 5 |     "vit_base_patch16_384_in21k_imagenet_384c384_8p_bw12_gpipe_acyclic.json"
 6 |   ],
 7 |   "base_config_path_is_relative": true,
 8 |   "logdir": "logs/vit/cifar100/local_prop/",
 9 |   "out_dir": "results/vit/cifar100/",
10 |   "auto_file_name": true,
11 |   "out_filename": "fast_dcgn_local_prop_no_nesterov_meanstd05",
12 |   "distributed_backend": "mpi",
13 |   "bs_train": 32,
14 |   "bs_test": 32,
15 |   "num_data_workers": 10,
16 |   "dont_drop_last": false,
17 |   "step_every": 16,
18 |   "log_frequency": 100000,
19 |   "optimizer": {
20 |     "type": "sgd2",
21 |     "args": {
22 |       "lr": 0.03,
23 |       "weight_decay": 0,
24 |       "momentum": 0.9,
25 |       "nesterov": false
26 |     }
27 |   },
28 |   "steps": 1000,
29 |   "lr_scheduler": {
30 |     "type": "get_cosine_schedule_with_warmup",
31 |     "preproc_args": {
32 |     },
33 |     "args": {
34 |       "num_warmup_steps": 100,
35 |       "num_training_steps": 1000,
36 |       "num_cycles": 0.5,
37 |       "last_epoch": -1
38 |     }
39 |   },
40 |   "weight_stashing": false,
41 |   "work_scheduler": "gpipe",
42 |   "cudnn_benchmark": true,
43 |   "seed_from_cmd": false,
44 |   "seed": 42
45 | }
46 | 


--------------------------------------------------------------------------------
/pipe/configs/vit/tst_stale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "base_config_path": [
 3 |     "cv.json",
 4 |     "imagenet_384.json",
 5 |     "vit_large_patch32_384_in21k_imagenet_384c384_8p_bw12_async_acyclic.json"
 6 |   ],
 7 |   "base_config_path_is_relative": true,
 8 |   "logdir": "logs/vit/imagenet/",
 9 |   "out_dir": "results/vit/imagenet/",
10 |   "auto_file_name": true,
11 |   "out_filename": "tst_vit",
12 |   "distributed_backend": "mpi",
13 |   "bs_train": 128,
14 |   "bs_test": 128,
15 |   "num_data_workers": 10,
16 |   "dont_drop_last": true,
17 |   "step_every": 4,
18 |   "optimizer": {
19 |     "type": "sgd1",
20 |     "args": {
21 |       "lr": 0.03,
22 |       "weight_decay": 0,
23 |       "momentum": 0.9,
24 |       "nesterov": true
25 |     }
26 |   },
27 |   "lr_scheduler": {
28 |     "type": "get_cosine_schedule_with_warmup",
29 |     "preproc_args": {
30 |     },
31 |     "args": {
32 |       "num_warmup_steps": 500,
33 |       "num_training_steps": 20000,
34 |       "num_cycles": 0.5,
35 |       "last_epoch": -1
36 |     }
37 |   },
38 |   "weight_stashing": false,
39 |   "work_scheduler": "1F1B",
40 |   "cudnn_benchmark": true,
41 |   "seed_from_cmd": false,
42 |   "seed": 42
43 | }


--------------------------------------------------------------------------------
/pipe/configs/vit/vit_base_patch16_384_in21k_imagenet_384c384_8p_bw12_gpipe_acyclic.json:
--------------------------------------------------------------------------------
1 | {
2 |   "model": "vit_base_patch16_384_in21k_imagenet_384c384_8p_bw12_gpipe_acyclic",
3 |   "stage_to_device_map": [0,1,2,3,4,5,6,7],
4 |   "nprocs": 8
5 | }


--------------------------------------------------------------------------------
/pipe/configs/vit/vit_large_patch32_384_in21k_imagenet_384c384_8p_bw12_async_acyclic.json:
--------------------------------------------------------------------------------
1 | {
2 |   "model": "vit_large_patch32_384_in21k_imagenet_384c384_8p_bw12_async_acyclic",
3 |   "stage_to_device_map": [0,1,2,3,4,5,6,7],
4 |   "nprocs": 8
5 | }


--------------------------------------------------------------------------------
/pipe/configs/vit/vit_large_patch32_384_in21k_imagenet_384c384_8p_bw12_gpipe_acyclic.json:
--------------------------------------------------------------------------------
1 | {
2 |   "model": "vit_large_patch32_384_in21k_imagenet_384c384_8p_bw12_gpipe_acyclic",
3 |   "stage_to_device_map": [0,1,2,3,4,5,6,7],
4 |   "nprocs": 8
5 | }


--------------------------------------------------------------------------------
/pipe/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # TODO: be explicit
 2 | from typing import Optional, Dict, Any
 3 | 
 4 | from .datasets import *
 5 | from . import cv, lm, cep, vit, squad
 6 | from .from_args_and_kw import *
 7 | # Now, import all so available datasets will be loaded
 8 | from .t5 import t5_tfds
 9 | 
10 | 
11 | def is_explicit_non_seperated_dataset(args):
12 |     return "_nonsep" in args.data_propagator
13 | 
14 | 
15 | def get_dataloaders(args,
16 |                     pipe_config: Optional[PipelineConfig] = None,
17 |                     dataset_keywords: Optional[Dict[str, Any]] = None):
18 |     if dataset_keywords is None:
19 |         dataset_keywords = dict()
20 |     # TODO: replicated
21 |     if not is_explicit_non_seperated_dataset(args):
22 |         train_dl, test_dl, samplers, extra = get_separate_dls_from_args(
23 |             args,
24 |             pipe_config=pipe_config,
25 |             verbose=False,
26 |             dataset_keywords=dataset_keywords,
27 |             shuffle_train=getattr(args, "shuffle_train", True)
28 |         )
29 |     else:
30 |         raise NotImplementedError("now deprecated")
31 |     return train_dl, test_dl, samplers, extra
32 | 


--------------------------------------------------------------------------------
/pipe/data/cep.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data import TensorDataset
 2 | 
 3 | from models.normal.cep import Dataset
 4 | from pipe.data import CommonDatasetHandler, register_dataset, register_hardcoded_just_xy_dataset
 5 | 
 6 | 
 7 | def _get_separated_dataset(just, DATA_DIR, args, **dataset_keywords):
 8 |     if just is None:
 9 |         return TensorDataset(), TensorDataset()
10 |     return Dataset(**args.cep_dataset_kwargs, just=just), Dataset(**args.cep_dataset_kwargs, just=just)
11 | 
12 | 
13 | class SEP_CEP_DatasetHandler(CommonDatasetHandler):
14 |     def __init__(self, **kw):
15 |         super().__init__()
16 |         train_ds, test_ds = _get_separated_dataset(**kw)
17 |         self.train_ds = train_ds
18 |         self.test_ds = test_ds
19 | 
20 |     def get_train_ds(self, **kw):
21 |         return self.train_ds
22 | 
23 |     def get_test_ds(self, **kw):
24 |         return self.test_ds  # TODO
25 | 
26 |     def get_validation_ds(self, **kw):
27 |         NotImplementedError()
28 | 
29 |     def get_modify_trainer_fn(self):
30 |         pass
31 | 
32 |     def modify_dataloader_keywords(self, dataloader_keywords):
33 |         return dataloader_keywords
34 | 
35 | 
36 | register_dataset("cep", SEP_CEP_DatasetHandler)
37 | register_hardcoded_just_xy_dataset("cep")
38 | 


--------------------------------------------------------------------------------
/pipe/data/download/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saareliad/FTPipe/986014df3bb985595f22139ab355a48ca00dc12f/pipe/data/download/__init__.py


--------------------------------------------------------------------------------
/pipe/data/hardcoded_dirs.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | # Fallback to this dataset dir of no other dir is given as argument to functions.
4 | DEFAULT_DATA_DIR = os.path.expanduser('~/.pytorch-datasets')
5 | IMAGENET_ROOT_DIR = "/home_local/saareliad/data/imagenet/"
6 | # WIKI2_DATA_DIR = DATA_DIR/wikitext-2-raw
7 | 


--------------------------------------------------------------------------------
/pipe/data/t5/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saareliad/FTPipe/986014df3bb985595f22139ab355a48ca00dc12f/pipe/data/t5/__init__.py


--------------------------------------------------------------------------------
/pipe/env_utils/deprecated/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL := /bin/bash
 2 | # 
 3 | # The SHELL enebales us to source stuff.
 4 | #
 5 | ########################################
 6 | # Below are stuff for  pytorch 1.3v    #
 7 | # (a compiled nighlty version)         #
 8 | ########################################
 9 | .PHONY: env
10 | env:
11 | 	# Install initial "easy" requirements from file.
12 | 	conda env create -f environment.yml
13 | 	conda activate msnag
14 | 	# solves some problems of conda feature tracking.
15 | 	conda config --env --add channels saareliad
16 | 	conda config --env --add pinned_packages saareliad::pytorch
17 | 	# Install pytorch:
18 | 	# optional, remove previous installation
19 | 	# conda uninstall pytorch -y  # In case it intalled the normal pytorch somehow
20 | 	conda install -c saareliad pytorch -y
21 | 	python -c"import torch"
22 | 	
23 | 	# Install torchvision:
24 | 	# Note: we need to do it after we installed pytorch.
25 | 	# (1) Install faster pollow-simd with AVX2 support.
26 | 	# can if we have AVX2 support with grep avx2 /proc/cpuinfo
27 | 	pip uninstall pillow
28 | 	CC="cc -mavx2" pip install -U --force-reinstall pillow-simd
29 | 	# (2) Install torchvision from source.
30 | 	pip install git+https://github.com/pytorch/vision.git@v0.5.0
31 | 
32 | 	# Note: torchvision has to be built with the same cuda as pytroch. (currently: 10.1)
33 | 	# if it does not work, just do
34 | 	# pip install torchvision==0.5 --no-dependencies, but we won't have AVX2.
35 | 
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/pipe/env_utils/deprecated/old_environment_mpi1.yml:
--------------------------------------------------------------------------------
 1 | name: msnag
 2 | channels:
 3 |   - saareliad  # should be explicitly set first, to gain priority over pytorch, however anaconda got channel priority problems (*)
 4 |   - pytorch
 5 |   - defaults
 6 |   - conda-forge
 7 | dependencies:
 8 |   - python=3.7
 9 |   - pip
10 | #  - pytorch # (*) therefore I removed this, and add script in makefile.
11 | #  - torchvision # Remove it too, as it requires pytorch...
12 |   - se-msnag1  # cuda aware openmpi
13 |   - cudatoolkit
14 |   - jupyterlab
15 |   - numpy
16 |   - scikit-learn
17 |   - ipython
18 |   - jupyter
19 |   - pandas
20 |   - cython
21 |   - pytest
22 |   - tqdm
23 |   - sympy
24 |   # - pybind11  # For binding c++ code to python, may be used to increace performance.
25 |   - graphviz  # For partitioning visualization
26 |   - python-graphviz  # For partitioning visualization
27 |   - networkx  # For partitioning
28 |   - seaborn
29 |   - pip:
30 |     - git+https://github.com/networkx/networkx-metis.git  # For partitioning
31 |     - matplotlib
32 |     - nbmerge
33 |     - pytest-xdist
34 |     - flake8
35 |     - autopep8
36 |     - rope
37 |     - click
38 |     - transformers
39 |     - ptvsd
40 |     - t5
41 |     - datasets
42 | 


--------------------------------------------------------------------------------
/pipe/env_utils/deprecated/old_environment_mpi2.yml:
--------------------------------------------------------------------------------
 1 | name: msnag2
 2 | channels:
 3 |   - saareliad  # should be explicitly set first, to gain priority over pytorch, however anaconda got channel priority problems (*)
 4 |   - pytorch
 5 |   - defaults
 6 |   - conda-forge
 7 | dependencies:
 8 |   - python=3.8
 9 |   - pip
10 |   - se-msnag2  # cuda aware openmpi
11 |   - magma-cuda102
12 |   - numpy 
13 |   - ninja 
14 |   - pyyaml 
15 |   - mkl 
16 |   - mkl-include 
17 |   - setuptools 
18 |   - cmake 
19 |   - cffi
20 |   - jupyterlab
21 |   - scikit-learn
22 |   - ipython
23 |   - jupyter
24 |   - pandas
25 |   - cython
26 |   - pytest
27 |   - tqdm
28 |   - sympy
29 |   - pybind11  # For binding c++ code to python, may be used to increace performance.
30 |   - graphviz  # For partitioning visualization
31 |   - python-graphviz  # For partitioning visualization
32 |   - networkx  # For partitioning
33 |   - seaborn
34 |   - pip:
35 |     - git+https://github.com/networkx/networkx-metis.git  # For partitioning
36 |     - matplotlib
37 |     - nbmerge
38 |     - pytest-xdist
39 |     - flake8
40 |     - autopep8
41 |     - rope
42 |     - click
43 |     - transformers
44 |     - ptvsd
45 |     - t5
46 |     - datasets
47 | 


--------------------------------------------------------------------------------
/pipe/env_utils/docker/.gitignore:
--------------------------------------------------------------------------------
1 | pytorch-conda-recipe
2 | pytorch-recipe


--------------------------------------------------------------------------------
/pipe/env_utils/docker/Dockerfile_from_source:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu16.04
 2 | ARG PYTHON_VERSION=3.8
 3 | ARG WITH_TORCHVISION=1
 4 | RUN apt-get update && apt-get install -y --no-install-recommends \
 5 |          build-essential \
 6 |          cmake \
 7 |          git \
 8 |          curl \
 9 |          ca-certificates \
10 |          libjpeg-dev \
11 |          libpng-dev && \
12 |      rm -rf /var/lib/apt/lists/*
13 | 
14 | 
15 | RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
16 |      chmod +x ~/miniconda.sh && \
17 |      ~/miniconda.sh -b -p /opt/conda && \
18 |      rm ~/miniconda.sh && \
19 |      /opt/conda/bin/conda install -y python=$PYTHON_VERSION pip numpy pyyaml scipy ipython mkl mkl-include ninja cython && \
20 |      /opt/conda/bin/conda install -y -c pytorch magma-cuda102 && \
21 |     /opt/conda/bin/conda install -y -c saareliad se-msnag2 && \
22 |      /opt/conda/bin/conda clean -ya
23 | ENV PATH /opt/conda/bin:$PATH
24 | # This must be done before pip so that requirements.txt is available
25 | WORKDIR /opt/pytorch
26 | COPY . .
27 | 
28 | RUN git submodule sync && git submodule update --init --recursive
29 | RUN TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
30 |     CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
31 |     pip install -v .
32 | 
33 | ENV CC "cc -mavx2"
34 | RUN pip uninstall pillow
35 | RUN pip install -U --force-reinstall pillow-simd
36 | 
37 | RUN if [ "$WITH_TORCHVISION" = "1" ] ; then git clone https://github.com/pytorch/vision.git && cd vision && pip install -v . ; else echo "building without torchvision" ; fi
38 | 
39 | WORKDIR /workspace
40 | RUN chmod -R a+w .


--------------------------------------------------------------------------------
/pipe/env_utils/docker/ompi-recipe/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG baseImage=nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04
 2 | FROM $baseImage
 3 | # pytorch args
 4 | # ARG PYTHON_VERSION=3.7
 5 | # ARG WITH_TORCHVISION=1
 6 | 
 7 | # building ompi
 8 | RUN apt-get update && \
 9 |     apt-get install -y --no-install-recommends  \
10 |         bzip2 \
11 |         ca-certificates \
12 |         curl \
13 |         wget && \
14 |     rm -rf /var/lib/apt/lists/*
15 | 
16 | RUN wget "https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" -O /opt/miniconda.sh && \
17 |     chmod +x /opt/miniconda.sh && \
18 |     /opt/miniconda.sh -b -p /opt/conda && \
19 |     /opt/conda/bin/conda update -n base conda && \
20 |     rm /opt/miniconda.sh
21 | 
22 | ENV PATH /opt/conda/bin:${PATH}
23 | 
24 | RUN conda install -c anaconda \
25 |         anaconda-client \
26 |         conda-build \
27 |         conda-verify && \
28 |     conda clean -ya
29 | 
30 | COPY . /opt/ompi-recipe
31 | 
32 | WORKDIR /opt/ompi-recipe
33 | 


--------------------------------------------------------------------------------
/pipe/env_utils/docker/ompi-recipe/ompi-cuda/build.sh:
--------------------------------------------------------------------------------
 1 | export CC=`basename $CC`
 2 | export CXX=`basename $CXX`
 3 | export LIBRARY_PATH=$PREFIX/lib
 4 | 
 5 | pushd ompi && \
 6 |     ./configure --prefix=$PREFIX \
 7 |                 --disable-dependency-tracking \
 8 |                 --disable-mpi-fortran \
 9 |                 --disable-wrapper-rpath \
10 |                 --disable-wrapper-runpath \
11 |                 --with-cuda \
12 |                 --with-wrapper-cflags="-I$PREFIX/include" \
13 |                 --with-wrapper-cxxflags="-I$PREFIX/include" \
14 |                 --with-wrapper-ldflags="-L$PREFIX/lib -Wl,-rpath,$PREFIX/lib" && \
15 |     make -j${CPU_COUNT} all && \
16 |     make install && \
17 |     popd
18 | 
19 | #--with-sge \
20 | #--with-slrum \
21 | 


--------------------------------------------------------------------------------
/pipe/env_utils/docker/ompi-recipe/ompi-cuda/meta.yaml:
--------------------------------------------------------------------------------
 1 | {% set name = "se-msnag2" %}
 2 | {% set ompiVersion = "4.0.3" %}
 3 | {% set ompiVersionShort = "4.0" %}
 4 | {% set cudaVersion = '.'.join(environ.get('CUDA_VERSION', '10.2').split('.')[:2]) %}
 5 | 
 6 | package:
 7 |   name: {{ name }}
 8 |   version: {{ ompiVersion }}
 9 | 
10 | source:
11 |   - url: https://download.open-mpi.org/release/open-mpi/v{{ ompiVersionShort }}/openmpi-{{ ompiVersion }}.tar.gz
12 |     folder: ompi
13 | 
14 | build:
15 |   number: 0
16 |   noarch: generic
17 |   string: cuda{{ cudaVersion }}
18 | 
19 | requirements:
20 |   build:
21 |     - ca-certificates
22 |     - cmake
23 |     - git
24 |     - make
25 |     - zlib
26 |     - {{ compiler('c') }}
27 |     - {{ compiler('cxx') }}
28 |   run:
29 |     - {{ compiler('c') }}
30 |     - {{ compiler('cxx') }}
31 | 
32 | test:
33 |   commands:
34 |     - mpiexec --version
35 |     - orte-info
36 | 
37 | about:
38 |   summary: "No free meals until we publish :)"
39 | 
40 | extra:
41 |   recipe-maintainers:
42 |     - saareliad
43 | 


--------------------------------------------------------------------------------
/pipe/env_utils/docker/ompi-recipe/ompi_conda.sh:
--------------------------------------------------------------------------------
 1 | # without docker:
 2 | # wget https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.3.tar.gz
 3 | # tar -xvzf openmpi-4.0.3.tar.gz ompi
 4 | 
 5 | docker build -t conda:ompi-dev . && docker run --rm -it --runtime nvidia conda:ompi-dev /bin/bash
 6 | # inside-container# anaconda login
 7 | # inside-container# conda build ompi-cuda
 8 | # inside-container# anaconda upload /path/to/ompi-cuda/package.tar.gz
 9 | # anaconda upload /opt/conda/conda-bld/noarch/se-msnag2-4.0.3-cuda10.2.tar.bz2
10 | 
11 | 
12 | 
13 | # # The path is determind by prefix env var (?)
14 | # # export PREFIX=''
15 | # Or somewhere in conda build: /opt/conda/... see dockerfile.


--------------------------------------------------------------------------------
/pipe/env_utils/env_add_to_build_from_source.yml:
--------------------------------------------------------------------------------
 1 | name: pt2
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - scikit-learn # here we start
 6 |   - jupyter
 7 |   - jupyterlab
 8 |   - pandas
 9 |   - tqdm
10 |   - sympy
11 |   - networkx
12 |   - matplotlib
13 |   - pip
14 |   - pip:
15 |     - ptvsd
16 |     - flake8
17 |     - autopep8
18 |     - transformers
19 |     - yapf
20 |     - t5==0.7.1
21 |     - datasets
22 |     - git+https://github.com/networkx/networkx-metis.git  # For partitioning
23 |     - timm
24 |     - sortedcollections
25 |     - graphviz
26 |     - adjustText
27 |     - seaborn
28 | 
29 | 
30 | 
31 | #prefix: /home_local/saareliad/miniconda3/envs/py38
32 | 
33 | 


--------------------------------------------------------------------------------
/pipe/env_utils/env_without_mpi.yml:
--------------------------------------------------------------------------------
 1 | name: nompi
 2 | channels:
 3 |   - pytorch
 4 |   - defaults
 5 |   - conda-forge
 6 | dependencies:
 7 |   - python=3.8
 8 |   - pytorch=1.8.1
 9 |   - torchvision=0.9.1
10 |   - scikit-learn # here we start
11 |   - jupyter
12 |   - jupyterlab
13 |   - pandas
14 |   - tqdm
15 |   - sympy
16 |   - networkx
17 |   - matplotlib
18 |   - scipy
19 |   - ipython
20 |   - numpy
21 |   - pip
22 |   - pip:
23 |     - ptvsd
24 |     - flake8
25 |     - autopep8
26 |     - transformers
27 |     - yapf
28 |     - t5==0.7.1
29 |     - datasets
30 |     - seaborn
31 |     - timm
32 |     - sortedcollections
33 |     - graphviz
34 |     - adjustText
35 |     - git+https://github.com/networkx/networkx-metis.git  # For partitioning


--------------------------------------------------------------------------------
/pipe/env_utils/jupyter-lab.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ###
 4 | # jupyter-lab.sh
 5 | #
 6 | # This script is intended to help you run jupyter lab on servers.
 7 | #
 8 | # Example usage:
 9 | #
10 | # To run on the gateway machine (limited resources, no GPU):
11 | # ./jupyter-lab.sh
12 | #
13 | # To run on a compute node:
14 | # srun -c 2 --gres=gpu:1 --pty jupyter-lab.sh
15 | #
16 | 
17 | ###
18 | # Conda parameters
19 | #
20 | HH=$HOME
21 | test "$(hostname)" == 'ninja1' && HH=/home_local/$USER
22 | test "$(hostname)" == 'ninja2' && HH=/home_local/$USER
23 | test "$(hostname)" == 'ninja4' && HH=/home_local/$USER
24 | test "$(hostname)" == 'rambo1' && HH=/home_local/$USER
25 | test "$(hostname)" == 'rambo2' && HH=/home_local/$USER
26 | test "$(hostname)" == 'rambo3' && HH=/home_local/$USER
27 | test "$(hostname)" == 'rambo4' && HH=/home_local/$USER
28 | test "$(hostname)" == 'rambo5' && HH=/home_local/$USER
29 | 
30 | CONDA_HOME=$HH/miniconda3
31 | 
32 | CONDA_ENV=py38
33 | 
34 | unset XDG_RUNTIME_DIR
35 | source $CONDA_HOME/etc/profile.d/conda.sh
36 | conda activate $CONDA_ENV
37 | 
38 | jupyter lab --no-browser --ip=$(hostname -I | cut -d' ' -f1) --port-retries=100
39 | 
40 | 


--------------------------------------------------------------------------------
/pipe/experiments/__init__.py:
--------------------------------------------------------------------------------
1 | from .experiments import load_experiment, load_experiment_for_update, save_experiment


--------------------------------------------------------------------------------
/pipe/experiments/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saareliad/FTPipe/986014df3bb985595f22139ab355a48ca00dc12f/pipe/experiments/analysis/__init__.py


--------------------------------------------------------------------------------
/pipe/experiments/t5/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saareliad/FTPipe/986014df3bb985595f22139ab355a48ca00dc12f/pipe/experiments/t5/__init__.py


--------------------------------------------------------------------------------
/pipe/misc/deepspeed.py:
--------------------------------------------------------------------------------
 1 | # need to change
 2 | # https://github.com/microsoft/DeepSpeed/blob/01726ce2b8ec1adbffae7974b5bfe600962c2043/deepspeed/runtime/engine.py#L545
 3 | # to support other optimizers (adagrad)
 4 | 
 5 | 
 6 | # doe they support fp32?
 7 | # https://github.com/microsoft/DeepSpeed/issues/109
 8 | 
 9 | 
10 | # {
11 | #   "train_batch_size": 8,
12 | #   "gradient_accumulation_steps": 1,
13 | #   "steps_per_print": 1,
14 | #   "zero_optimization": true,
15 | #   "fp32_allreduce": true,
16 | #   "optimizer": {
17 | #     "type": "Adam",
18 | #     "params": {
19 | #       "lr": 0.0001
20 | #     }
21 | #   },
22 | #
23 | #   "fp16": {
24 | #     "enabled": false
25 | #   }
26 | # }
27 | #
28 | 
29 | 


--------------------------------------------------------------------------------
/pipe/misc/mesh_failed_runs/git_log_head.txt:
--------------------------------------------------------------------------------
1 | commit 1c70f92b22159d55d2f57eb5e34dbd78f067799f Author: Adam Roberts <adarob@google.com> Date: Mon Aug 31 11:52:25 2020 -0700 Allow additional gin configs to be passed to MtfModel for parsing after the operative config. PiperOrigin-RevId: 329347298 commit 3effd60e0fb052cd6519c279c7f026ee9f1a0975 Author: Sharan Narang <sharannarang@google.com>
2 | 


--------------------------------------------------------------------------------
/pipe/misc/mesh_failed_runs/run.sh:
--------------------------------------------------------------------------------
 1 | t5_mesh_transformer  \
 2 |   --model_dir="t5_3b_recompute_fp32" \
 3 |   --gin_file="dataset.gin" \
 4 |   --gin_param="utils.run.mesh_shape = 'model:8,batch:1'" \
 5 |   --gin_param="utils.run.mesh_devices = ['gpu:0', 'gpu:1', 'gpu:2', 'gpu:3', 'gpu:4', 'gpu:5', 'gpu:6', 'gpu:7']" \
 6 |   --gin_param="MIXTURE_NAME = 'glue_rte_v002'" \
 7 |   --gin_param="run.train_steps = 1004000" \
 8 |   --gin_param="tokens_per_batch=12800" \
 9 |   --gin_param="inputs_length = 320" \
10 |   --gin_param="targets_length = 8" \
11 |   --gin_param="pack_or_pad.pack = False" \
12 |   --gin_param="serialize_num_microbatches.tokens_per_microbatch_per_replica = 320" \
13 |   --gin_param="encoder/LayerStack.recompute_grads = True" \
14 |   --gin_param="decoder/LayerStack.recompute_grads = True" \
15 |   --gin_file="learning_rate_schedules/constant_0_001.gin" \
16 |   --gin_file="gs://t5-data/pretrained_models/3B/operative_config.gin"
17 | 
18 | 
19 | 
20 | # --gin_param="run.sequence_length = {'inputs': 320, 'targets': 8}"
21 | #  --gin_param="get_variable_dtype.activation_dtype = 'float32'" \
22 | #  --gin_param="get_variable_dtype.master_dtype = 'float32'" \
23 | #  --gin_param="get_variable_dtype.slice_dtype = 'float33'" \
24 | 


--------------------------------------------------------------------------------
/pipe/misc/mesh_failed_runs/run_3b_failed.sh:
--------------------------------------------------------------------------------
 1 | t5_mesh_transformer  \
 2 |   --model_dir="model" \
 3 |   --gin_file="dataset.gin" \
 4 |   --gin_param="utils.run.mesh_shape = 'model:8,batch:1'" \
 5 |   --gin_param="utils.run.mesh_devices = ['gpu:0', 'gpu:1', 'gpu:2', 'gpu:3', 'gpu:4', 'gpu:5', 'gpu:6', 'gpu:7']" \
 6 |   --gin_param="MIXTURE_NAME = 'glue_rte_v002'" \
 7 |   --gin_param="run.train_steps = 1004000" \
 8 |   --gin_param="tokens_per_batch=12800" \
 9 |   --gin_param="inputs_length = 320" \
10 |   --gin_param="targets_length = 8" \
11 |   --gin_param="pack_or_pad.pack = False" \
12 |   --gin_param="serialize_num_microbatches.tokens_per_microbatch_per_replica = 1280" \
13 |   --gin_file="learning_rate_schedules/constant_0_001.gin" \
14 |   --gin_file="gs://t5-data/pretrained_models/3B/operative_config.gin"
15 | 
16 | 
17 | 
18 | # --gin_param="run.sequence_length = {'inputs': 320, 'targets': 8}"
19 | 


--------------------------------------------------------------------------------
/pipe/misc/mesh_failed_runs/run_3b_failed_recom_omm.sh:
--------------------------------------------------------------------------------
 1 | t5_mesh_transformer  \
 2 |   --model_dir="t5_3b_recompute_fp32" \
 3 |   --gin_file="dataset.gin" \
 4 |   --gin_param="utils.run.mesh_shape = 'model:8,batch:1'" \
 5 |   --gin_param="utils.run.mesh_devices = ['gpu:0', 'gpu:1', 'gpu:2', 'gpu:3', 'gpu:4', 'gpu:5', 'gpu:6', 'gpu:7']" \
 6 |   --gin_param="MIXTURE_NAME = 'glue_rte_v002'" \
 7 |   --gin_param="run.train_steps = 1004000" \
 8 |   --gin_param="tokens_per_batch=12800" \
 9 |   --gin_param="inputs_length = 320" \
10 |   --gin_param="targets_length = 8" \
11 |   --gin_param="pack_or_pad.pack = False" \
12 |   --gin_param="serialize_num_microbatches.tokens_per_microbatch_per_replica = 1280" \
13 |   --gin_param="encoder/LayerStack.recompute_grads = True" \
14 |   --gin_param="decoder/LayerStack.recompute_grads = True" \
15 |   --gin_file="learning_rate_schedules/constant_0_001.gin" \
16 |   --gin_file="gs://t5-data/pretrained_models/3B/operative_config.gin"
17 | 
18 | 
19 | 
20 | # --gin_param="run.sequence_length = {'inputs': 320, 'targets': 8}"
21 | #  --gin_param="get_variable_dtype.activation_dtype = 'float32'" \
22 | #  --gin_param="get_variable_dtype.master_dtype = 'float32'" \
23 | #  --gin_param="get_variable_dtype.slice_dtype = 'float33'" \
24 | 


--------------------------------------------------------------------------------
/pipe/misc/p2p_bw_mat.sh:
--------------------------------------------------------------------------------
1 | DIR=/usr/local/cuda/samples/1_Utilities/p2pBandwidthLatencyTest/
2 | 
3 | cd ${DIR}
4 | sudo make
5 | ./p2pBandwidthLatencyTest
6 | cd -
7 | 


--------------------------------------------------------------------------------
/pipe/misc/print_partition_layers_scopes.py:
--------------------------------------------------------------------------------
 1 | from pprint import pprint
 2 | 
 3 | # from models.partitioned.t5_3b_tied_lmheads_320_8_8p_bw12_squad1_virtual_stages import *
 4 | # from models.partitioned.t5_3b_tied_lmheads_64_4_8p_bw12_squad1_acyclic import *
 5 | from models.partitioned.t5_3b_tied_lmheads_64_4_8p_bw12_squad1_pipedream import *
 6 | 
 7 | if __name__ == '__main__':
 8 | 
 9 | 
10 |     for i, v in list(locals().items()):
11 |         i: str
12 |         if not i.startswith("Partition"):
13 |             continue
14 |         print(i)
15 |         pprint(v.LAYER_SCOPES)
16 |         pprint(v.TENSORS)
17 |         print()
18 | 
19 | 


--------------------------------------------------------------------------------
/pipe/misc/test_mpi/README.md:
--------------------------------------------------------------------------------
 1 | ## Test MPI run
 2 | [adapted from here](https://medium.com/@esaliya/pytorch-distributed-with-mpi-acb84b3ae5fd)
 3 | Test if pytorch has openmpi backend
 4 | 
 5 | ```
 6 | mpirun -np 2 python pytorch_distributed.py
 7 | ```
 8 | 
 9 | ## Test for multiple machines
10 | on each machine:
11 | ```
12 | mpirun --hostfile nodes.txt --map-by node -np 2 python pytorch_distributed.py
13 | ```
14 | testing for ninja4 and ninja2 (in this order)
15 | 
16 | TODO: play with init to make it work (did not work OOTB)
17 | 


--------------------------------------------------------------------------------
/pipe/misc/test_mpi/nodes.txt:
--------------------------------------------------------------------------------
1 | 132.68.36.205
2 | 132.68.36.203 
3 | 


--------------------------------------------------------------------------------
/pipe/misc/test_mpi/pytorch_distributed.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import socket
 3 | import torch
 4 | import torch.distributed as dist
 5 | 
 6 | 
 7 | def run(rank, size, hostname):
 8 |     print(f"I am {rank} of {size} in {hostname}")
 9 |     tensor = torch.zeros(1)
10 |     if rank == 0:
11 |         tensor += 1
12 |         # Send the tensor to process 1
13 |         dist.send(tensor=tensor, dst=1)
14 |     else:
15 |         # Receive tensor from process 0
16 |         dist.recv(tensor=tensor, src=0)
17 |     print('Rank ', rank, ' has data ', tensor[0])
18 | 
19 | 
20 | def init_processes(rank, size, hostname, fn, backend='tcp'):
21 |     """ Initialize the distributed environment. """
22 |     dist.init_process_group(backend, rank=rank, world_size=size)
23 |     fn(rank, size, hostname)
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     world_size = int(os.environ['OMPI_COMM_WORLD_SIZE'])
28 |     world_rank = int(os.environ['OMPI_COMM_WORLD_RANK'])
29 |     hostname = socket.gethostname()
30 |     init_processes(world_rank, world_size, hostname, run, backend='mpi')
31 | 


--------------------------------------------------------------------------------
/pipe/misc/test_mpi/pytorch_distributed_cuda_aware.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import socket
 3 | import torch
 4 | import torch.distributed as dist
 5 | 
 6 | 
 7 | def run(rank, size, hostname):
 8 |     print(f"I am {rank} of {size} in {hostname}")
 9 |     tensor = torch.zeros(1).cuda()
10 |     if rank == 0:
11 |         tensor += 1
12 |         # Send the tensor to process 1
13 |         dist.send(tensor=tensor, dst=1)
14 |     else:
15 |         # Receive tensor from process 0
16 |         dist.recv(tensor=tensor, src=0)
17 |     print('Rank ', rank, ' has data ', tensor[0])
18 | 
19 | 
20 | def init_processes(rank, size, hostname, fn, backend='tcp'):
21 |     """ Initialize the distributed environment. """
22 |     dist.init_process_group(backend, rank=rank, world_size=size)
23 |     fn(rank, size, hostname)
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     world_size = int(os.environ['OMPI_COMM_WORLD_SIZE'])
28 |     world_rank = int(os.environ['OMPI_COMM_WORLD_RANK'])
29 |     hostname = socket.gethostname()
30 |     init_processes(world_rank, world_size, hostname, run, backend='mpi')
31 | 


--------------------------------------------------------------------------------
/pipe/misc/transformers/TODO.md:
--------------------------------------------------------------------------------
1 | # TODOs
2 |   * try with the exact same model we use
3 |   * try without tied wieghts
4 |   * try without grad clip
5 |   * try with weight decay
6 | 


--------------------------------------------------------------------------------
/pipe/misc/transformers/analyze_res.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from torch import tensor  # for parsing.
 3 | import warnings
 4 | from pprint import pprint
 5 | 
 6 | relative_dir_names = [f"epoch_{i}" for i in range(3)] + [""]
 7 | dirs = [os.path.join(os.getcwd(), d) for d in relative_dir_names]
 8 | files = [os.path.join(d, "eval_results.txt") for d in dirs]
 9 | 
10 | epoch_to_ppl = {}
11 | for epoch, file in enumerate(files):
12 |     with open(file, "r") as f:
13 |         perplexity = None
14 |         exec(f.read())
15 |         if perplexity is None:
16 |             pass
17 |             # s = "perplexity is None, epoch:{epoch}".format(epoch=epoch)
18 |             # warnings.warn(s)
19 |         else:
20 |             epoch_to_ppl[epoch] = perplexity.item()
21 | 
22 | pprint(epoch_to_ppl)
23 | 


--------------------------------------------------------------------------------
/pipe/misc/transformers/bert-large/run.sh:
--------------------------------------------------------------------------------
 1 | export SQUAD_DIR=/home_local/saareliad/data/squad2/
 2 | export OMP_NUM_THREADS=10
 3 | # MODEL="deepset/bert-large-uncased-whole-word-masking-squad2" # its not finetuned...
 4 | MODEL="bert-large-uncased-whole-word-masking"
 5 | function eval(){
 6 | python -m torch.distributed.launch --nproc_per_node=8 run_squad.py \
 7 |     --model_type bert \
 8 |     --model_name_or_path ${MODEL} \
 9 |     --do_eval \
10 |     --do_lower_case \
11 |     --train_file $SQUAD_DIR/train-v2.0.json \
12 |     --predict_file $SQUAD_DIR/dev-v2.0.json \
13 |     --learning_rate 3e-5 \
14 |     --num_train_epochs 2 \
15 |     --max_seq_length 384 \
16 |     --doc_stride 128 \
17 |     --output_dir ./wwm_uncased_finetuned_squad2/ \
18 |     --per_gpu_eval_batch_size=16 
19 | }
20 | 
21 | function train(){
22 | python -m torch.distributed.launch --nproc_per_node=8 run_squad.py \
23 |     --model_type bert \
24 |     --model_name_or_path ${MODEL} \
25 |     --do_eval \
26 |     --do_train \
27 |     --do_lower_case \
28 |     --train_file $SQUAD_DIR/train-v2.0.json \
29 |     --predict_file $SQUAD_DIR/dev-v2.0.json \
30 |     --learning_rate 3e-5 \
31 |     --num_train_epochs 2 \
32 |     --max_seq_length 384 \
33 |     --doc_stride 128 \
34 |     --output_dir ./wwm_uncased_finetuned_squad2/ \
35 |     --per_gpu_train_batch_size=3  \
36 |     --per_gpu_eval_batch_size=3 
37 | }
38 | 
39 | train
40 | 


--------------------------------------------------------------------------------
/pipe/misc/tst_ibroadcast.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import torch
 4 | import torch.distributed as dist
 5 | 
 6 | BACKEND = 'gloo'
 7 | CUDA = True
 8 | 
 9 | if __name__ == "__main__":
10 |     dist.init_process_group(BACKEND, init_method="env://", world_size=2)
11 |     pg = dist.new_group(ranks=[0, 1], backend=BACKEND)
12 |     pg2 = dist.new_group(ranks=[0, 1], backend=BACKEND)
13 |     shape = (10, 10, 1000)
14 | 
15 |     res = torch.ones(*shape)
16 |     buff = torch.zeros(*shape)
17 |     if CUDA:
18 |         res = res.cuda()
19 |         buff = buff.cuda()
20 | 
21 |     start = time.time()
22 |     if dist.get_rank() == 0:
23 |         tensor = torch.ones(*shape).cuda()
24 |         o = dist.broadcast(tensor, 0, async_op=True, group=pg)
25 |         o2 = dist.broadcast(buff, 1, async_op=True, group=pg2)
26 | 
27 |     else:
28 |         tensor = buff
29 |         tensor2 = buff.clone() + 6
30 |         o2 = dist.broadcast(tensor2, 1, async_op=True, group=pg2)
31 |         o = dist.broadcast(tensor, 0, async_op=True, group=pg)
32 | 
33 |     if dist.get_rank() == 1:
34 |         o.wait()
35 |         end = time.time()
36 |         print(end - start)
37 | 
38 |         print(torch.sum(tensor))
39 |         print(tensor.dtype)
40 |         assert torch.all(tensor == res)
41 |         print("Done")
42 |         end = time.time()
43 |         print(end - start)
44 | 
45 |     if dist.get_rank() == 0:
46 |         o2.wait()
47 |         print("Done o2", "got", buff.sum())
48 | 
49 | """
50 | python -m torch.distributed.launch --nnodes 1 --nproc_per_node 2 --node_rank 0 tst_iboradcast.py
51 | 
52 | """
53 | 


--------------------------------------------------------------------------------
/pipe/misc/tst_isend.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.distributed as dist
 3 | 
 4 | """ Will test MPI with this later... """
 5 | 
 6 | BACKAND = 'gloo'
 7 | CUDA = False
 8 | 
 9 | if __name__ == "__main__":
10 |     dist.init_process_group(BACKAND, init_method="env://", world_size=2)
11 |     shape = (10, 10, 10)
12 |     if dist.get_rank() == 0:
13 |         if CUDA:
14 |             o = dist.isend(torch.ones(*shape).cuda(), 1, tag=4)
15 |         o2 = dist.isend(torch.ones(*shape).mul_(2), 1, tag=6)
16 |     else:
17 |         if CUDA:
18 |             tensor = torch.zeros(*shape)
19 |         tensor2 = torch.zeros(*shape)
20 | 
21 |         if CUDA:
22 |             o = dist.irecv(tensor, 0, tag=4)
23 | 
24 |         o2 = dist.irecv(tensor2, 0, tag=6)
25 | 
26 |     if dist.get_rank() == 1:
27 |         if CUDA:
28 |             o.wait()
29 |         o2.wait()
30 |         if CUDA:
31 |             print("tensor", torch.sum(tensor), tensor.dtype)
32 | 
33 |         print("tensor2", torch.sum(tensor2), tensor2.dtype)
34 | 
35 |         if CUDA:
36 |             assert torch.all(tensor == torch.ones(*shape).cuda())
37 | 
38 |         assert torch.all(tensor2 == torch.ones(*shape).mul_(2))
39 |         print("Done")
40 | 
41 | """
42 | python -m torch.distributed.launch --nnodes 1 --nproc_per_node 2 --node_rank 0 misc/tst_isend.py
43 | 
44 | """
45 | 


--------------------------------------------------------------------------------
/pipe/models/__init__.py:
--------------------------------------------------------------------------------
1 | from . import transformers_utils
2 | from . import transformers_cfg
3 | from . import parse_config
4 | from .registery import AVAILABLE_MODELS, register_model


--------------------------------------------------------------------------------
/pipe/models/registery/__init__.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import os
 3 | 
 4 | from . import model_handler
 5 | from .model_handler import AVAILABLE_MODELS, register_model
 6 | 
 7 | 
 8 | # from . import cv
 9 | # from . import hf
10 | # from . import vit
11 | # from . import cep
12 | # from . import dummy
13 | 
14 | def _import_handlers_from_dir(tasks_dir=os.path.dirname(__file__),
15 |                               module_name='.models.registery.', package="pipe"):
16 |     """ Automatically import any Python files in the tasks directory
17 |         in order to automatically register all available tasks
18 |     Args:
19 |         tasks_dir: task dir to import from
20 |     """
21 | 
22 |     for file in os.listdir(tasks_dir):
23 |         path = os.path.join(tasks_dir, file)
24 |         if (
25 |                 not file.startswith('_')
26 |                 and not file.startswith('.')
27 |                 and (file.endswith('.py') or os.path.isdir(path))
28 |         ):
29 |             task_name = file[:file.find('.py')] if file.endswith('.py') else file
30 | 
31 |             importlib.import_module(module_name + task_name, package=package)
32 | 
33 | 
34 | _import_handlers_from_dir()
35 | 


--------------------------------------------------------------------------------
/pipe/models/registery/cep.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | from models.normal.cep import Net
 4 | from pipe.models.registery.model_handler import CommonModelHandler
 5 | 
 6 | 
 7 | def get_cep_model(n=50, k=11, c=500, n_split=4):
 8 |     model = Net(n, c, n_split=n_split)
 9 |     return model
10 | 
11 | 
12 | class CEPModelHandler(CommonModelHandler):
13 |     def __init__(self, normal_model_fn, *args, **kw):
14 |         super().__init__(*args, **kw)
15 |         self.normal_model_fn = normal_model_fn
16 | 
17 |     def _get_normal_model_instance(self, *args, **kwargs):
18 |         return self.normal_model_fn(*args, **kwargs)
19 | 
20 | 
21 | CEPModelHandler(
22 |     normal_model_fn=functools.partial(get_cep_model, n=50, c=500, n_split=4)).register_autogenerated(
23 |     generated_file_name_or_path="cep_netN50_C500_4p_bw12_metis")
24 | 
25 | CEPModelHandler(
26 |     normal_model_fn=functools.partial(get_cep_model, n=50, c=20000, n_split=4)).register_autogenerated(
27 |     generated_file_name_or_path="cep_netN50_C20000_4p_bw12_metis")
28 | 


--------------------------------------------------------------------------------
/pipe/models/registery/dummy.py:
--------------------------------------------------------------------------------
 1 | from pipe.models.registery.model_handler import CommonModelHandler
 2 | from autopipe.tasks.dummy_t5 import DumT5Partitioner, T5Tokenizer
 3 | from types import SimpleNamespace
 4 | 
 5 | class DummyModelHandler(CommonModelHandler):
 6 |     def __init__(self, *args, **kw):
 7 |         super().__init__(*args, **kw)
 8 | 
 9 |     def _get_normal_model_instance(self, *args, **kwargs):
10 |         if self.normal_model_instance is None:
11 | 
12 |             args = SimpleNamespace()
13 |             p = DumT5Partitioner(args)
14 |             args.lmhead = True
15 |             args.stateless_tied = True
16 |             args.precompute_masks = False
17 |             self.normal_model_instance = p.get_model(args)
18 |             self.tokenizer = p.tokenizer
19 |             self.config = p.config
20 | 
21 |         return self.normal_model_instance
22 | 
23 |     def get_extra(self, *args, **kw):
24 |         return dict(config=self.config, tokenizer=self.tokenizer)
25 | 
26 | 
27 | DummyModelHandler().register_autogenerated("DUMMY_LAYERSt5_base_tied_lmheads_512_4_2p_bw12_squad1_pipedream")
28 | DummyModelHandler().register_autogenerated("DUMMY_t5_base_tied_lmheads_512_4_2p_bw12_squad1_pipedream")
29 | DummyModelHandler().register_autogenerated("DUMMY_LAYERSt5_base_tied_lmheads_512_4_2p_bw12_squad1_mpipe")
30 | DummyModelHandler().register_autogenerated("DUMMY_t5_base_tied_lmheads_512_4_2p_bw12_squad1_mpipe")
31 | 


--------------------------------------------------------------------------------
/pipe/optimizers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .adafactor import Adafactor
 2 | from .adam import Adam
 3 | from .adam_record import Adam as AdamGA
 4 | from .adamw import AdamW
 5 | from .adamw_record import AdamW as AdamWGA
 6 | # .data parameter update is only change (pytorch 1.5)
 7 | from .sgd import SGD as PytorchSGD
 8 | from .sutskever_modified_sgd import SGD as SutskeverSGD
 9 | 
10 | # from .utils import linear_lr_scaling
11 | 
12 | AVAILBALE_OPTIMIZERS = {
13 |     'sgd1': PytorchSGD,
14 |     'sgd2': SutskeverSGD,
15 |     'adam': Adam,
16 |     'adamw': AdamW,
17 |     'adam_record_step': AdamGA,
18 |     'adamw_record_step': AdamWGA,
19 |     'adafactor': Adafactor
20 | }
21 | 


--------------------------------------------------------------------------------
/pipe/optimizers/required.py:
--------------------------------------------------------------------------------
1 | """ hack - copied from Pytorch because the API hides it - so modified models will need less parameters"""
2 | 
3 | class _RequiredParameter(object):
4 |     """Singleton class representing a required parameter for an Optimizer."""
5 |     def __repr__(self):
6 |         return "<required parameter>"
7 | 
8 | required = _RequiredParameter()


--------------------------------------------------------------------------------
/pipe/optimizers/utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def linear_lr_scaling(bs_train, BASE_LR, BASE_BS_TRAIN, downscale=False):
 3 | 
 4 |     if bs_train < BASE_BS_TRAIN:
 5 |         if not downscale:
 6 |             return BASE_LR
 7 |         else:
 8 |             lr = BASE_LR / (BASE_BS_TRAIN / bs_train)
 9 |     else:
10 |         lr = BASE_LR * (bs_train / BASE_BS_TRAIN)
11 |     
12 |     assert(lr > 0)
13 |     return lr
14 | 


--------------------------------------------------------------------------------
/pipe/pipeline/__init__.py:
--------------------------------------------------------------------------------
1 | from .communication import CommunicationHandlerBase, get_auto_comm_handler_cls
2 | from .partition_manager import SinglePartitionManager
3 | from .true_weights_storage import TrueWeightsStorage
4 | 


--------------------------------------------------------------------------------
/pipe/pipeline/communication/__init__.py:
--------------------------------------------------------------------------------
 1 | from .interface import CommunicationHandlerBase
 2 | from .p2p import P2PCommunicationHandler
 3 | 
 4 | # from .common_simple_comm import SimpleCommBase
 5 | # from .bcast import BCASTCommunicationHandler
 6 | # from .replicated import P2PRankIO as ReplicatedCommunicationHandler
 7 | # from .replicated import create_replicated_comm_handler_args
 8 | 
 9 | # TODO: We want to support hybrid comm
10 | # TODO: Add replicated comm.
11 | # TODO: gloo will raise notimplementer error, it can be used for testing though.
12 | 
13 | 
14 | __all__ = [
15 |     "get_auto_comm_handler_cls", "CommunicationHandlerBase",
16 |     "P2PCommunicationHandler",
17 | ]
18 | 
19 | # "ReplicatedCommunicationHandler"
20 | 
21 | from enum import Enum, auto
22 | 
23 | 
24 | class CommPolicy(Enum):
25 |     P2P = auto()
26 |     BCAST = auto()
27 | 
28 | 
29 | def to_policy(backend, cpu):
30 |     assert backend in {'nccl', 'gloo', 'mpi'}
31 | 
32 |     if backend == 'mpi' or cpu:
33 |         return CommPolicy.P2P
34 | 
35 |     raise NotImplementedError()
36 |     # return CommPolicy.BCAST
37 | 
38 | 
39 | # TODO: add replicated somewhow.
40 | POLICY_TO_COMM = {
41 |     CommPolicy.P2P: P2PCommunicationHandler,
42 |     # CommPolicy.BCAST: BCASTCommunicationHandler,
43 | }
44 | 
45 | 
46 | def get_auto_comm_handler_cls(backend, cpu):
47 |     return POLICY_TO_COMM[to_policy(backend, cpu)]
48 | 


--------------------------------------------------------------------------------
/pipe/pipeline/communication/grouper.py:
--------------------------------------------------------------------------------
 1 | from itertools import zip_longest
 2 | 
 3 | __all__ = ["grouper"]
 4 | 
 5 | 
 6 | # Creating iteration tool for "Double Buffers"
 7 | 
 8 | 
 9 | def zip_discard_compr(*iterables, sentinel=object()):
10 |     # https://stackoverflow.com/questions/38054593/zip-longest-without-fillvalue
11 |     return [[entry for entry in iterable if entry is not sentinel]
12 |             for iterable in zip_longest(*iterables, fillvalue=sentinel)]
13 | 
14 | 
15 | def grouper(iterable, n):
16 |     """Collect data into *non fixed-length* chunks or blocks
17 |         (changed the one in itertools recepies)
18 |     """
19 |     # grouper('ABCDEFG', 3,) --> ABC DEF Gxx"
20 |     args = [iter(iterable)] * n
21 |     return zip_discard_compr(*args)
22 | 
23 | # Fixed recved:
24 | # [torch.cat(group) for group in grouper(x, num_chunks)]
25 | 
26 | # [torch.cat(group) for group in grouper(x, self.comm_handler.num_chunks)]
27 | 


--------------------------------------------------------------------------------
/pipe/pipeline/data_propagation/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Type
 2 | 
 3 | from .automatic_prop import AutomaticPipelinePropagator
 4 | from .automatic_prop_non_contig import AutomaticPipelinePropagatorNonContig
 5 | from .interface import PipelineDataPropagator
 6 | 
 7 | AVAILABLE_PROPAGATORS = {
 8 |     'auto': AutomaticPipelinePropagator,  # HACK: has call for contagious.
 9 |     'auto_non_contig': AutomaticPipelinePropagatorNonContig
10 | }
11 | 
12 | 
13 | def get_propagator_cls(args) -> Type[PipelineDataPropagator]:
14 |     propagator_cls = AVAILABLE_PROPAGATORS.get(args.data_propagator)
15 |     if propagator_cls is None:
16 |         raise NotImplementedError(
17 |             f"args.data_propagator={args.data_propagator}, AVAILABLE_PROPAGATORS={AVAILABLE_PROPAGATORS.keys()}")
18 | 
19 |     return propagator_cls
20 | 


--------------------------------------------------------------------------------
/pipe/pipeline/data_propagation/automatic_prop_non_contig.py:
--------------------------------------------------------------------------------
 1 | from pipe.pipeline.data_propagation.automatic_prop import PipelineDataPropagator
 2 | 
 3 | 
 4 | class AutomaticPipelinePropagatorNonContig(PipelineDataPropagator):
 5 | 
 6 |     def __init__(self, *args, **kw):
 7 |         super().__init__()
 8 | 
 9 |     def pack_send_context(self, model_out, *ctx):
10 |         # ctx here is just the label y, in case we send it in the pipeline.
11 |         # otherwise, it just returns model_out.
12 |         # return tuple(x.detach().contiguous() if isinstance(x, torch.Tensor) else x for x in chain(model_out, ctx))
13 |         return *model_out, *ctx
14 | 


--------------------------------------------------------------------------------
/pipe/pipeline/data_propagation/cv_target_prop.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .interface import PipelineDataPropagator
 4 | 
 5 | 
 6 | class CVTargetInPipePropagator(PipelineDataPropagator):
 7 |     def __init__(self, device, is_last_partition, is_first_partition):
 8 |         super().__init__()
 9 |         self.device = device
10 | 
11 |         # Determine unpack_cls
12 |         if is_last_partition:
13 |             self.unpack_cls = self.unpack_data_for_last_partition
14 |         elif is_first_partition:
15 |             self.unpack_cls = self.unpack_data_for_first_partition
16 |         else:
17 |             self.unpack_cls = self.unpack_data_for_mid_partition
18 | 
19 |     def unpack_data_for_partition(self, data):
20 |         # assert len(data) == 2
21 |         return self.unpack_cls(data)
22 | 
23 |     def unpack_data_for_last_partition(self, data):
24 |         *x, y = data
25 |         # x = x.to(self.device, non_blocking=True)
26 |         with torch.no_grad():
27 |             y = y.to(self.device, non_blocking=True)
28 |         return x, y
29 | 
30 |     def unpack_data_for_first_partition(self, data):
31 |         x, y = data
32 |         with torch.no_grad():
33 |             x = x.to(self.device, non_blocking=True)
34 |         # Note: we don't send the y to GPU if we don't use it in this partition.
35 |         return x, y
36 | 
37 |     def unpack_data_for_mid_partition(self, data):
38 |         # x we already be on our device :)
39 |         # we don't need the y.
40 |         # try:
41 |         *x, y = data
42 |         # FIXME
43 | 
44 |         return x, y
45 |         # x, y = data
46 |         # x = x.to(self.device, non_blocking=True)
47 |         # Note: we don't send the y to GPU if we don't use it in this partition.
48 |         # return x, y
49 | 
50 |     def pack_send_context(self, model_out, *ctx):
51 |         # ctx here is just the label y
52 |         return (*model_out, *ctx)
53 | 


--------------------------------------------------------------------------------
/pipe/pipeline/data_propagation/interface.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | from typing import Tuple, Any
 3 | 
 4 | 
 5 | class PipelineDataPropagator(abc.ABC):
 6 |     """
 7 |     Class describing how to handle data loaded or passed through the pipeline.
 8 |         
 9 |     Usage:
10 | 
11 |         # Get data:
12 |         (1)
13 |         from_prev_stage = (...)  # get it from somewhere
14 |         to_stage, to_somewhere_else = propagator.preload_from_dataloader(dlitr)
15 |         x = (*to_stage, *from_prev_stage)
16 | 
17 |         # Run the model:
18 |         (2)
19 |         x, *ctx = propagator.unpack_data_for_partition(data)
20 |         model_out = model(x, ...)
21 | 
22 |         # Unify outside context
23 |         (3)
24 |         ctx = (*ctx, *to_somewhere_else)
25 | 
26 |         # Send Data:
27 |         (4)
28 |         t = propagator.pack_send_context(model_out, *ctx)
29 |         send(t) ...
30 |     """
31 | 
32 |     def __init__(self, *args, **kw):
33 |         pass
34 | 
35 |     # @staticmethod
36 |     @abc.abstractmethod
37 |     def unpack_data_for_partition(self, data) -> Tuple[Tuple[Any], Tuple[Any]]:
38 |         """ In case we send labels in pipeline: extract them from the output.
39 |             For last partition: extract what is loaded for outside loss and statistics (e.g: batch size, ...)
40 |         """
41 |         pass
42 | 
43 |     # @staticmethod
44 |     @abc.abstractmethod
45 |     def pack_send_context(self, model_out, *ctx) -> Tuple[Any]:
46 |         pass
47 | 
48 |     def preload_from_dataloader(self, dlitr) -> Tuple[Tuple[Any], Tuple[Any]]:
49 |         if dlitr is None:
50 |             return (), ()
51 |         else:
52 |             raise NotImplementedError()
53 | 


--------------------------------------------------------------------------------
/pipe/pipeline/dp_sim/__init__.py:
--------------------------------------------------------------------------------
1 | from .convert import convert_to_num_gpus
2 | from .simulated_dp_batchnorm import BatchNorm1d, BatchNorm2d, BatchNorm3d
3 | 


--------------------------------------------------------------------------------
/pipe/pipeline/gap_aware/__init__.py:
--------------------------------------------------------------------------------
 1 | # Here will come implementation for GAP aware.
 2 | # https://arxiv.org/abs/1909.10802
 3 | # We can apply it if one of the following happends:
 4 | # 1. we stash the parameters theta we did forwad on (so we could calculate the gap)
 5 | # 2. the gap is easy (e.g the gradient)
 6 | 
 7 | from .adam_gap_aware import AdamGapAware, get_adam_gap_aware_cls
 8 | from .adamw_gap_aware import AdamWGapAware, get_adamw_gap_aware_cls
 9 | from .interface import GapAwareBase
10 | from .sgd_gap_aware import GapAware, get_sgd_gap_aware_cls
11 | 
12 | SUPPORTED_GAP_AWARE_POLICIES = {
13 |     'almost_last_partition', 'all_except_last',
14 |     'all_except_last_two'
15 | }
16 | 
17 | # TODO: adamw
18 | 


--------------------------------------------------------------------------------
/pipe/pipeline/monkey_patch/__init__.py:
--------------------------------------------------------------------------------
1 | # from .patch import dummy_forward_monkeypatch
2 | from .dummy_forward_monkey_patcher import DummyForwardMonkeyPatcher
3 | 


--------------------------------------------------------------------------------
/pipe/pipeline/monkey_patch/find_modules.py:
--------------------------------------------------------------------------------
 1 | def find_modules(module, module_name, module_instance, found):
 2 |     """
 3 |     Recursively find all instances of a specific module inside a module.
 4 | 
 5 |     Arguments:
 6 |         module {nn.Module} -- Module to search on
 7 |         module_name {str} -- Name of the model to search on in the currect context (used to output access string)
 8 |         module_instance {nn.Module} -- Class of the module to search
 9 |         found {list} -- List to append results to.
10 | 
11 |     Result will be [(access_string, model),...] inside 'found'.
12 | 
13 |     # Adapted from facebook XLM repo
14 | 
15 |     Examples:
16 | 
17 |     1. Example of finding inside a class comprehended of MODEL_NAMES:
18 |     ```
19 |     for name in self.MODEL_NAMES:
20 |          find_modules(getattr(self, name),
21 |                       f'self.{name}', HashingMemory, self.memory_list)
22 |     ```
23 | 
24 |     2. Example finding PKMLayer inside txl:
25 |     ```
26 |     from find_modules import find_modules
27 |     found = []
28 |     find_modules(model, 'model', PKMLayer, found)
29 |     print([t[0] for t in found])
30 |     ```
31 |     """
32 | 
33 |     if isinstance(module, module_instance):
34 |         found.append((module_name, module))
35 |     else:
36 |         for name, child in module.named_children():
37 |             name = ('%s[%s]' if name.isdigit()
38 |                     else '%s.%s') % (module_name, name)
39 |             find_modules(child, name, module_instance, found)
40 | 


--------------------------------------------------------------------------------
/pipe/pipeline/trainers/grad_norm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saareliad/FTPipe/986014df3bb985595f22139ab355a48ca00dc12f/pipe/pipeline/trainers/grad_norm/__init__.py


--------------------------------------------------------------------------------
/pipe/pipeline/trainers/grad_norm/local_grad_norm.py:
--------------------------------------------------------------------------------
 1 | from typing import Type
 2 | 
 3 | import torch
 4 | from torch.nn.utils import clip_grad_norm_
 5 | 
 6 | from pipe.pipeline.trainers.interface import ScheduledOptimizationStepMultiPartitionTrainer
 7 | from pipe.pipeline.trainers.utils import calc_local_total_norm
 8 | 
 9 | 
10 | def local_grad_norm_mixin_trainer_factory(trainer_cls: Type[ScheduledOptimizationStepMultiPartitionTrainer]):
11 |     class GradNormMixedTrainer(trainer_cls):
12 |         def __init__(self, *args, max_grad_norm=None, always_calc_grad_norm=False, **kw):
13 |             super().__init__(*args, **kw)
14 |             self.always_calc_grad_norm = always_calc_grad_norm
15 |             self.max_grad_norm = max_grad_norm
16 | 
17 |         def step_on_computed_grads(self, old_lrs=None):
18 |             self._grad_norm()
19 |             return super().step_on_computed_grads(old_lrs=old_lrs)
20 | 
21 |         def _grad_norm(self):
22 |             total_norm = None
23 |             if self.max_grad_norm:
24 |                 with torch.no_grad():
25 |                     total_norm = clip_grad_norm_(self.model.parameters(),
26 |                                                  self.max_grad_norm,
27 |                                                  norm_type=2)
28 |             elif self.always_calc_grad_norm:
29 |                 with torch.no_grad():
30 |                     total_norm = calc_local_total_norm(self.model.parameters(), norm_type=2)
31 | 
32 |             if total_norm and self.statistics.has_statistic("local_grad_norm"):
33 |                 self.statistics.update_on_batch("local_grad_norm", total_norm.item(), 1)
34 | 
35 |     return GradNormMixedTrainer
36 | 


--------------------------------------------------------------------------------
/pipe/pipeline/trainers/statistics/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Type
 2 | 
 3 | from .cv import CVStats, NormCVstats, CVDistanceNorm, CVDistance
 4 | from .glue import GlueStats, NormGluestats, GlueDistanceNorm, GlueDistance
 5 | from .interface import Stats
 6 | from .lm import LMStats, NormLMstats, LMDistanceNorm, LMDistance
 7 | from .squad import SquadStats, NormSquadstats, SquadDistanceNorm, SquadDistance
 8 | 
 9 | # TODO: remove the "record_loss_per_batch", it is not used mostly.
10 | # TODO: option to record every X batches, when epoch is giant.
11 | 
12 | # TODO: change the way of getting statistics
13 | 
14 | AVAILBALE_STATS = {}
15 | 
16 | 
17 | def register_statistics(name: str, stats_cls: Type[Stats]):
18 |     AVAILBALE_STATS[name] = stats_cls
19 |     AVAILBALE_STATS[name + "_loss_per_batch"] = stats_cls
20 | 
21 | 
22 | def get_statistics(name: str, *args, **kw) -> Stats:
23 |     record_loss_per_batch = "loss_per_batch" in name
24 |     st_cls = AVAILBALE_STATS.get(name)
25 |     return st_cls(*args, record_loss_per_batch=record_loss_per_batch, **kw)
26 | 
27 | 
28 | register_statistics("cv", CVStats)
29 | register_statistics("cv_grad_norm", NormCVstats)
30 | register_statistics("cv_theta_dist", CVDistance)
31 | register_statistics("cv_dist_grad_norm", CVDistanceNorm)
32 | 
33 | register_statistics("lm", LMStats)
34 | register_statistics("lm_grad_norm", NormLMstats)
35 | register_statistics("lm_theta_dist", LMDistance)
36 | register_statistics("lm_dist_grad_norm", LMDistanceNorm)
37 | 
38 | register_statistics("squad", SquadStats)
39 | register_statistics("squad_grad_norm", NormSquadstats)
40 | register_statistics("squad_theta_dist", SquadDistance)
41 | register_statistics("squad_dist_grad_norm", SquadDistanceNorm)
42 | 
43 | register_statistics("glue", GlueStats)
44 | register_statistics("glue_grad_norm", NormGluestats)
45 | register_statistics("glue_theta_dist", GlueDistance)
46 | register_statistics("glue_dist_grad_norm", GlueDistanceNorm)
47 | 


--------------------------------------------------------------------------------
/pipe/pipeline/trainers/statistics/gap.py:
--------------------------------------------------------------------------------
 1 | from itertools import chain
 2 | 
 3 | import torch
 4 | from torch.optim import Optimizer
 5 | 
 6 | from pipe.pipeline.trainers.statistics import Stats
 7 | 
 8 | 
 9 | def try_record_real_gap_from_current(statistics: Stats,
10 |                                      optimizer: Optimizer,
11 |                                      real_theta,
12 |                                      pre_computed_gap=None,
13 |                                      gap_name="gap"):
14 |     """ calculates gap between model parameters and a given set of parameters, real_theta
15 |         real_theta: Given set of parameters. TODO: rename
16 |     """
17 |     if statistics.has_statistic(gap_name):
18 |         if pre_computed_gap is None:
19 |             if real_theta is None:
20 |                 gap = 0
21 |             else:
22 |                 with torch.no_grad():
23 |                     gap = sum([
24 |                         torch.dist(a, b, p=2).item() for a, b in zip(
25 |                             chain.from_iterable([[p for p in pg['params']]
26 |                                                  for pg in
27 |                                                  optimizer.param_groups]),
28 |                             chain.from_iterable(real_theta))
29 |                     ])
30 |         else:
31 |             gap = pre_computed_gap
32 | 
33 |         statistics.update_on_batch(gap_name, gap, 1)
34 |         return gap


--------------------------------------------------------------------------------
/pipe/pipeline/trainers/statistics/utils.py:
--------------------------------------------------------------------------------
 1 | class AverageMeter(object):
 2 |     """Computes and stores the average and current value"""
 3 | 
 4 |     def __init__(self):
 5 |         self.reset()
 6 | 
 7 |     def reset(self):
 8 |         self.avg = 0
 9 |         self.sum = 0
10 |         self.count = 0
11 |         # self.record = []
12 | 
13 |     def update(self, val, n=1):
14 |         self.sum += val * n
15 |         self.count += n
16 | 
17 |     def get_avg(self):
18 |         return self.sum / self.count
19 | 
20 | 
21 | class AccuracyMeter(AverageMeter):
22 |     def __init__(self):
23 |         super().__init__()
24 | 
25 |     def update(self, val, n=1):
26 |         """ just to supoort adding num correct instead of accuracy """
27 |         self.sum += val
28 |         self.count += n
29 | 
30 |     def get_avg(self):
31 |         return (self.sum / self.count) * 100
32 | 


--------------------------------------------------------------------------------
/pipe/pipeline/trainers/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch._six import inf
 3 | 
 4 | 
 5 | def calc_local_total_norm(parameters, norm_type=2):
 6 |     """ Exactly like clip_grad_norm_, but without the clip.
 7 |         # See https://github.com/pytorch/pytorch/blob/master/torch/nn/utils/clip_grad.py
 8 |      """
 9 |     if isinstance(parameters, torch.Tensor):
10 |         parameters = [parameters]
11 |     parameters = list(filter(lambda p: p.grad is not None, parameters))
12 |     norm_type = float(norm_type)
13 |     if norm_type == inf:
14 |         total_norm = max(p.grad.detach().abs().max() for p in parameters)
15 |     else:
16 |         total_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), norm_type) for p in parameters]), norm_type)
17 |     # clip_coef = max_norm / (total_norm + 1e-6)
18 |     # if clip_coef < 1:
19 |     #     for p in parameters:
20 |     #         p.grad.detach().mul_(clip_coef)
21 |     return total_norm
22 | 
23 | 
24 | def calc_local_total_norm_wo_sqrt(parameters, norm_type=2):
25 |     """ Exactly like clip_grad_norm_, but without the clip.
26 |         # See https://github.com/pytorch/pytorch/blob/master/torch/nn/utils/clip_grad.py
27 |      """
28 |     if isinstance(parameters, torch.Tensor):
29 |         parameters = [parameters]
30 |     parameters = list(filter(lambda p: p.grad is not None, parameters))
31 |     norm_type = float(norm_type)
32 |     if norm_type == inf:
33 |         raise NotImplementedError()
34 |     else:
35 |         total_norm = torch.stack([torch.vdot(v, v) for v in [p.grad.detach().view(-1) for p in parameters]]).sum()
36 |     # clip_coef = max_norm / (total_norm + 1e-6)
37 |     # if clip_coef < 1:
38 |     #     for p in parameters:
39 |     #         p.grad.detach().mul_(clip_coef)
40 |     return total_norm
41 | 


--------------------------------------------------------------------------------
/pipe/pipeline/weight_prediction/README.md:
--------------------------------------------------------------------------------
 1 | # Counting experiments
 2 | 
 3 | ## Counting `WeightPredictor`s
 4 | 
 5 |  For every combination of
 6 | 
 7 |     prediction_mem {clone, linear}
 8 |     optimization algorithm (e.g {PYTORCH_SGD, TENSORFLOW_SGD, ADAM, WADAM, ...})
 9 | 
10 | We have a WeightPredictor
11 | 
12 | ## Counting `FixFunction`s
13 | 
14 | for every combination of
15 | 
16 |     optimization algorithm (e.g {PYTORCH_SGD, TENSORFLOW_SGD, ADAM, WADAM, ...})
17 | 
18 | we have several ways for predicting
19 | 
20 |     {ms_nag, just_multiply}
21 | 
22 | ## Calculation with numbers
23 | 
24 |     opt = 3 (sgd1, sgd2, wadam)
25 |     pred_mem = 2
26 |     pred_alg = 2
27 | 
28 | ### Total prediction runs (12)
29 | 
30 |     pred_runs: 2*3*2 = 12
31 | 
32 | ### Total to comapre (12)
33 | 
34 |     # Stale weights (3)
35 |     # weight stashing (3)
36 |     # Fully sync (3)
37 |     # GPipe/DP (3)
38 | 
39 | Total of 24 runs per net/dataset/....
40 | 
41 | ## Then we vary
42 | 
43 |     pipe length...
44 |     flush rate...
45 |     ...
46 | 


--------------------------------------------------------------------------------
/pipe/pipeline/weight_prediction/cow_dict.py:
--------------------------------------------------------------------------------
 1 | # Taken from https://github.com/csernazs/cowdict/blob/master/cowdict/cowdict.py
 2 | from collections import MutableMapping
 3 | 
 4 | 
 5 | class CowDict(MutableMapping):
 6 |     def __init__(self, base: dict):
 7 |         self.base = base
 8 |         self.dict = {}
 9 |         self.deleted_keys = set()
10 | 
11 |     def __getitem__(self, key):
12 |         if key in self.deleted_keys:
13 |             raise KeyError(key)
14 | 
15 |         try:
16 |             return self.dict[key]
17 |         except KeyError:
18 |             return self.base[key]
19 | 
20 |     def __setitem__(self, key, value):
21 |         try:
22 |             self.deleted_keys.remove(key)
23 |         except KeyError:
24 |             pass
25 | 
26 |         self.dict[key] = value
27 | 
28 |     def __delitem__(self, key):
29 |         if key in self.base:
30 |             try:
31 |                 del self.dict[key]
32 |             except KeyError:
33 |                 pass
34 | 
35 |             self.deleted_keys.add(key)
36 | 
37 |         elif key in self.dict:
38 |             del self.dict[key]
39 |             self.deleted_keys.add(key)
40 |         else:
41 |             raise KeyError(key)
42 | 
43 |     def __len__(self):
44 |         return len(set(self.dict.keys()).union(set(self.base.keys())) - self.deleted_keys)
45 | 
46 |     def __iter__(self):
47 | 
48 |         for key in self.dict:
49 |             if key not in self.deleted_keys:
50 |                 yield key
51 | 
52 |         for key in self.base:
53 |             if key not in self.dict and key not in self.deleted_keys:
54 |                 yield key
55 | 
56 |     def __repr__(self):
57 |         retval = ["{"]
58 |         for key, value in self.items():
59 |             retval.append(repr(key))
60 |             retval.append(": ")
61 |             retval.append(repr(value))
62 |             retval.append(", ")
63 | 
64 |         del retval[-1]
65 |         retval.append("}")
66 |         return "".join(retval)
67 | 


--------------------------------------------------------------------------------
/pipe/pipeline/weight_prediction/interface.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | 
 3 | 
 4 | class WeightPredictor(abc.ABC):
 5 |     def __init__(self, optimizer,
 6 |                  fix_fn=None, scheduler=None, nag_with_predictor=False, true_weights_storage=None):
 7 |         self.optimizer = optimizer
 8 |         self.fix_fn = fix_fn
 9 |         self.scheduler = scheduler
10 |         self.nag_with_predictor = nag_with_predictor
11 |         if nag_with_predictor:
12 |             print("-I- Doing NAG with predictor")
13 |         self.true_weights_storage = true_weights_storage
14 | 
15 |     def setup(self, n_steps):
16 |         if n_steps == 0 and self.nag_with_predictor:
17 |             n_steps = 1
18 |         self.n_steps = n_steps
19 | 
20 |     @abc.abstractmethod
21 |     def forward(self):
22 |         raise NotImplementedError()
23 | 
24 |     @abc.abstractmethod
25 |     def revert(self):
26 |         raise NotImplementedError()
27 | 
28 | 
29 | class FixFunction(abc.ABC):
30 |     @abc.abstractmethod
31 |     def __call__(self, p: WeightPredictor, pg):
32 |         # WeightPredictor is used mainly to get sched from....
33 |         raise NotImplementedError()
34 | 


--------------------------------------------------------------------------------
/pipe/pipeline/weight_prediction/sym_pred_optimizers/__init__.py:
--------------------------------------------------------------------------------
1 | from .auto_lambdify import auto_lambdify
2 | from .sympy_optimizer import WDSympySGD, WDSympySGDMsnag, NormalSympyAdam, run_and_display_sim, run_sim
3 | 


--------------------------------------------------------------------------------
/pipe/pipeline/weight_stashing/__init__.py:
--------------------------------------------------------------------------------
1 | from .weight_stashing import WeightStasher, WeightStashingCachePolicy
2 | 


--------------------------------------------------------------------------------
/pipe/run/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saareliad/FTPipe/986014df3bb985595f22139ab355a48ca00dc12f/pipe/run/__init__.py


--------------------------------------------------------------------------------
/t5_used_scripts_example/to_partition_mpipe_layergraph_t5_3b_boolq_multirc.sh:
--------------------------------------------------------------------------------
 1 | rm new_trace_cache_t53b_512_4_lg new_prof_cache_t53b_512_4_lg_ftpipe
 2 | python -m autopipe.partition new_t5 \
 3 |  --model_name_or_path \
 4 |  t5-3b \
 5 |  --t5_task \
 6 |  squad1 \
 7 |  --lmhead \
 8 |  --n_iter \
 9 |  10 \
10 |  --analysis_batch_size \
11 |  8 \
12 |  --partitioning_batch_size \
13 |  8 \
14 |  --ct \
15 |  new_trace_cache_t53b_512_4_lg \
16 |  --cp \
17 |  new_prof_cache_t53b_512_4_lg_ftpipe \
18 |  --stateless_tied \
19 |  --lmhead \
20 |  --n_partitions \
21 |  8 \
22 |  --L \
23 |  16 \
24 |  --max_seq_length \
25 |  512 \
26 |  --answer_max_seq_length \
27 |  4 \
28 |  --partitioning_method \
29 |   mpipe \
30 |  --preset \
31 |   ftpipe \
32 |  --dont_use_async_meta_alg \
33 |  --save_memory_mode \
34 |  --disable_op_profiling \
35 |  --special_blocks \
36 |  T5Block \
37 |  --basic_blocks \
38 |  T5Block \
39 |  --output_file \
40 |  layer_graph_ 
41 |  # > partitioning_output_mpipe_t53b_512_4_lg_ftpipe.txt
42 | 
43 | 


--------------------------------------------------------------------------------
/t5_used_scripts_example/to_partition_mpipe_layergraph_t5_3b_rte.sh:
--------------------------------------------------------------------------------
 1 | python -m autopipe.partition new_t5 \
 2 |  --model_name_or_path \
 3 |  t5-3b \
 4 |  --t5_task \
 5 |  squad1 \
 6 |  --lmhead \
 7 |  --n_iter \
 8 |  10 \
 9 |  --analysis_batch_size \
10 |  8 \
11 |  --partitioning_batch_size \
12 |  8 \
13 |  --ct \
14 |  new_trace_cache_t53b_320_8_lg \
15 |  --cp \
16 |  new_prof_cache_t53b_320_8_lg_ftpipe \
17 |  --stateless_tied \
18 |  --lmhead \
19 |  --n_partitions \
20 |  8 \
21 |  --L \
22 |  16 \
23 |  --max_seq_length \
24 |  320 \
25 |  --answer_max_seq_length \
26 |  8 \
27 |  --partitioning_method \
28 |   mpipe \
29 |  --preset \
30 |   ftpipe \
31 |  --dont_use_async_meta_alg \
32 |  --save_memory_mode \
33 |  --disable_op_profiling \
34 |  --special_blocks \
35 |  T5Block \
36 |  --basic_blocks \
37 |  T5Block \
38 |  --output_file \
39 |  layer_graph_
40 | #> partitioning_output_mpipe_t53b_320_8_lg_ftpipe.txt
41 | 
42 | 


--------------------------------------------------------------------------------
/t5_used_scripts_example/to_partition_mpipe_layergraph_t5_3b_wic.sh:
--------------------------------------------------------------------------------
 1 | # NOTE: reducing size to fit in memory (?) it used to work b4.
 2 | rm new_trace_cache_t53b_64_4_lg new_prof_cache_t53b_64_4_lg_ftpipe
 3 | python -m autopipe.partition new_t5 \
 4 |  --model_name_or_path \
 5 |  t5-3b \
 6 |  --t5_task \
 7 |  squad1 \
 8 |  --lmhead \
 9 |  --n_iter \
10 |  10 \
11 |  --analysis_batch_size \
12 |  64 \
13 |  --partitioning_batch_size \
14 |  64 \
15 |  --ct \
16 |  new_trace_cache_t53b_64_4_lg \
17 |  --cp \
18 |  new_prof_cache_t53b_64_4_lg_ftpipe \
19 |  --stateless_tied \
20 |  --lmhead \
21 |  --n_partitions \
22 |  8 \
23 |  --L \
24 |  16 \
25 |  --max_seq_length \
26 |  64 \
27 |  --answer_max_seq_length \
28 |  4 \
29 |  --partitioning_method \
30 |   mpipe \
31 |  --preset \
32 |   ftpipe \
33 |  --dont_use_async_meta_alg \
34 |  --save_memory_mode \
35 |  --disable_op_profiling \
36 |  --special_blocks \
37 |  T5Block \
38 |  --basic_blocks \
39 |  T5Block \
40 |  --output_file \
41 |  layer_graph_
42 | # > partitioning_output_mpipe_t53b_64_4_lg_ftpipe.txt
43 | 
44 | 


--------------------------------------------------------------------------------
/t5_used_scripts_example/to_partition_mpipe_t5_3b_opgraph_boolq_multirc.sh:
--------------------------------------------------------------------------------
 1 | # NOTE: reduced size to fit in memory.
 2 | rm new_trace_cache_t53b_512_4_op new_prof_cache_t53b_512_4_op_ftpipe
 3 | python -m autopipe.partition new_t5 \
 4 |  --model_name_or_path \
 5 |  t5-3b \
 6 |  --t5_task \
 7 |  squad1 \
 8 |  --lmhead \
 9 |  --n_iter \
10 |  5 \
11 |  --analysis_batch_size \
12 |  4 \
13 |  --partitioning_batch_size \
14 |  2 \
15 |  --ct \
16 |  new_trace_cache_t53b_512_4_op \
17 |  --cp \
18 |  new_prof_cache_t53b_512_4_op_ftpipe \
19 |  --stateless_tied \
20 |  --lmhead \
21 |  --n_partitions \
22 |  8 \
23 |  --L \
24 |  15 \
25 |  16 \
26 |  17 \
27 |  --max_seq_length \
28 |  512 \
29 |  --answer_max_seq_length \
30 |  4 \
31 |  --partitioning_method \
32 |   mpipe \
33 |  --preset \
34 |   ftpipe \
35 |  --dont_use_async_meta_alg \
36 |  --save_memory_mode \
37 |  --special_blocks \
38 |  T5Block \
39 | --output_file \
40 |   op_
41 | # --basic_blocks \
42 | # T5Block
43 | 
44 | 


--------------------------------------------------------------------------------
/t5_used_scripts_example/to_partition_mpipe_t5_3b_opgraph_rte.sh:
--------------------------------------------------------------------------------
 1 | python -m autopipe.partition new_t5 \
 2 |  --model_name_or_path \
 3 |  t5-3b \
 4 |  --t5_task \
 5 |  squad1 \
 6 |  --lmhead \
 7 |  --n_iter \
 8 |  10 \
 9 |  --analysis_batch_size \
10 |  8 \
11 |  --partitioning_batch_size \
12 |  8 \
13 |  --ct \
14 |  new_trace_cache_t53b_320_8_op \
15 |  --cp \
16 |  new_prof_cache_t53b_320_8_op_ftpipe \
17 |  --stateless_tied \
18 |  --lmhead \
19 |  --n_partitions \
20 |  8 \
21 |  --L \
22 |  16 \
23 |  --max_seq_length \
24 |  320 \
25 |  --answer_max_seq_length \
26 |  8 \
27 |  --partitioning_method \
28 |   mpipe \
29 |  --preset \
30 |   ftpipe \
31 |  --dont_use_async_meta_alg \
32 |  --save_memory_mode \
33 |  --special_blocks \
34 |   T5Block \
35 |  --output_file \
36 |   op_
37 | # --output_file \
38 | #  lg \
39 | # --basic_blocks \
40 | # T5Block
41 | 
42 | 


--------------------------------------------------------------------------------
/t5_used_scripts_example/to_partition_mpipe_t5_3b_opgraph_wic.sh:
--------------------------------------------------------------------------------
 1 | rm new_prof_cache_t53b_64_4_op_ftpipe new_trace_cache_t53b_64_4_op
 2 | python -m autopipe.partition new_t5 \
 3 |  --model_name_or_path \
 4 |  t5-3b \
 5 |  --t5_task \
 6 |  squad1 \
 7 |  --lmhead \
 8 |  --n_iter \
 9 |  10 \
10 |  --analysis_batch_size \
11 |  32 \
12 |  --partitioning_batch_size \
13 |  32 \
14 |  --ct \
15 |  new_trace_cache_t53b_64_4_op \
16 |  --cp \
17 |  new_prof_cache_t53b_64_4_op_ftpipe \
18 |  --stateless_tied \
19 |  --lmhead \
20 |  --n_partitions \
21 |  8 \
22 |  --L \
23 |  16 \
24 |  --max_seq_length \
25 |  64 \
26 |  --answer_max_seq_length \
27 |  4 \
28 |  --partitioning_method \
29 |   mpipe \
30 |  --preset \
31 |   ftpipe \
32 |  --dont_use_async_meta_alg \
33 |  --save_memory_mode \
34 |  --special_blocks \
35 |  T5Block \
36 |  --output_file \
37 |   op_
38 | 
39 | # --output_file \
40 | #  lg \
41 | # --basic_blocks \
42 | # T5Block
43 | 
44 | 


--------------------------------------------------------------------------------
/t5_used_scripts_example/to_partition_mpipe_t5_base.sh:
--------------------------------------------------------------------------------
 1 | python -m autopipe.partition new_t5 \
 2 |  --model_name_or_path \
 3 |  t5-base \
 4 |  --t5_task \
 5 |  squad1 \
 6 |  --lmhead \
 7 |  --n_iter \
 8 |  1 \
 9 |  --analysis_batch_size \
10 |  2 \
11 |  --partitioning_batch_size \
12 |  2 \
13 |  --stateless_tied \
14 |  --lmhead \
15 |  --n_partitions \
16 |  4 \
17 |  --L \
18 |  4 \
19 |  8 \
20 |  12 \
21 |  16 \
22 |  --max_seq_length \
23 |  512 \
24 |  --answer_max_seq_length \
25 |  4 \
26 |  --partitioning_method \
27 |   mpipe \
28 |  --save_memory_mode \
29 |  --output_file \
30 |  lg
31 |  --special_blocks \
32 |  T5Block \
33 |  --basic_blocks \
34 |  T5Block
35 | 
36 | 


--------------------------------------------------------------------------------
/t5_used_scripts_example/to_partition_spipe_OP_t5_3b_boolq_multirc.sh:
--------------------------------------------------------------------------------
 1 | rm new_prof_cache_t53b_512_4_op new_trace_cache_t53b_512_4_op
 2 | python -m autopipe.partition new_t5 \
 3 |  --model_name_or_path \
 4 |  t5-3b \
 5 |  --t5_task \
 6 |  squad1 \
 7 |  --lmhead \
 8 |  --n_iter \
 9 |  5 \
10 |  --analysis_batch_size \
11 |  32 \
12 |  --partitioning_batch_size \
13 |  32 \
14 |  --ct \
15 |  new_trace_cache_t53b_512_4_op \
16 |  --cp \
17 |  new_prof_cache_t53b_512_4_op \
18 |  --stateless_tied \
19 |  --lmhead \
20 |  --n_partitions \
21 |  8 \
22 |  --max_seq_length \
23 |  512 \
24 |  --answer_max_seq_length \
25 |  4 \
26 |  --partitioning_method \
27 |   pipedream \
28 |  --preset \
29 |   pipedream \
30 |  --dont_use_async_meta_alg \
31 |  --save_memory_mode \
32 |  --special_blocks \
33 |  T5Block \
34 |  --output_file \
35 |   op_graph_
36 | 
37 | # --basic_blocks \
38 | # T5Block
39 | 
40 | 


--------------------------------------------------------------------------------
/t5_used_scripts_example/to_partition_spipe_OP_t5_3b_rte.sh:
--------------------------------------------------------------------------------
 1 | rm new_prof_cache_t53b_320_8_op new_trace_cache_t53b_320_8_op
 2 | python -m autopipe.partition new_t5 \
 3 |  --model_name_or_path \
 4 |  t5-3b \
 5 |  --t5_task \
 6 |  squad1 \
 7 |  --lmhead \
 8 |  --n_iter \
 9 |  5 \
10 |  --analysis_batch_size \
11 |  32 \
12 |  --partitioning_batch_size \
13 |  32 \
14 |  --ct \
15 |  new_trace_cache_t53b_320_8_op \
16 |  --cp \
17 |  new_prof_cache_t53b_320_8_op \
18 |  --stateless_tied \
19 |  --lmhead \
20 |  --n_partitions \
21 |  8 \
22 |  --max_seq_length \
23 |  320 \
24 |  --answer_max_seq_length \
25 |  8 \
26 |  --partitioning_method \
27 |   pipedream \
28 |  --preset \
29 |   pipedream \
30 |  --dont_use_async_meta_alg \
31 |  --save_memory_mode \
32 |  --special_blocks \
33 |  T5Block \
34 |  --output_file \
35 |   op_graph_
36 | 
37 | # --basic_blocks \
38 | # T5Block
39 | 
40 | 


--------------------------------------------------------------------------------
/t5_used_scripts_example/to_partition_spipe_OP_t5_3b_wic.sh:
--------------------------------------------------------------------------------
 1 | rm new_prof_cache_t53b_64_4_op new_trace_cache_t53b_64_4_op
 2 | python -m autopipe.partition new_t5 \
 3 |  --model_name_or_path \
 4 |  t5-3b \
 5 |  --t5_task \
 6 |  squad1 \
 7 |  --lmhead \
 8 |  --n_iter \
 9 |  5 \
10 |  --analysis_batch_size \
11 |  8 \
12 |  --partitioning_batch_size \
13 |  8 \
14 |  --ct \
15 |  new_trace_cache_t53b_64_4_op \
16 |  --cp \
17 |  new_prof_cache_t53b_64_4_op \
18 |  --stateless_tied \
19 |  --lmhead \
20 |  --n_partitions \
21 |  8 \
22 |  --max_seq_length \
23 |  64 \
24 |  --answer_max_seq_length \
25 |  4 \
26 |  --partitioning_method \
27 |   pipedream \
28 |  --preset \
29 |   pipedream \
30 |  --dont_use_async_meta_alg \
31 |  --save_memory_mode \
32 |  --special_blocks \
33 |  T5Block \
34 |  --output_file \
35 |   op_graph_
36 | 
37 | # --basic_blocks \
38 | # T5Block
39 | 
40 | 


--------------------------------------------------------------------------------
/t5_used_scripts_example/to_partition_spipe_t5_3b_boolq_multirc.sh:
--------------------------------------------------------------------------------
 1 | python -m autopipe.partition new_t5 \
 2 |  --model_name_or_path \
 3 |  t5-3b \
 4 |  --t5_task \
 5 |  squad1 \
 6 |  --lmhead \
 7 |  --n_iter \
 8 |  10 \
 9 |  --analysis_batch_size \
10 |  4 \
11 |  --partitioning_batch_size \
12 |  4 \
13 |  --ct \
14 |  new_trace_cache_t53b_512_4_lg \
15 |  --cp \
16 |  new_prof_cache_t53b_512_4_lg \
17 |  --stateless_tied \
18 |  --lmhead \
19 |  --n_partitions \
20 |  8 \
21 |  --max_seq_length \
22 |  512 \
23 |  --answer_max_seq_length \
24 |  4 \
25 |  --partitioning_method \
26 |   pipedream \
27 |  --preset \
28 |   pipedream \
29 |   --disable_op_profiling \
30 |  --dont_use_async_meta_alg \
31 |  --save_memory_mode \
32 |  --special_blocks \
33 |   T5Block \
34 |  --basic_blocks \
35 |   T5Block \
36 |  --output_file \
37 |   layer_graph_
38 | 
39 | # --basic_blocks \
40 | # T5Block
41 | 
42 | 


--------------------------------------------------------------------------------
/t5_used_scripts_example/to_partition_spipe_t5_3b_rte.sh:
--------------------------------------------------------------------------------
 1 | rm new_trace_cache_t53b_320_8_lg new_prof_cache_t53b_320_8_lg
 2 | python -m autopipe.partition new_t5 \
 3 |  --model_name_or_path \
 4 |  t5-3b \
 5 |  --t5_task \
 6 |  squad1 \
 7 |  --lmhead \
 8 |  --n_iter \
 9 |  10 \
10 |  --analysis_batch_size \
11 |  8 \
12 |  --partitioning_batch_size \
13 |  8 \
14 |  --ct \
15 |  new_trace_cache_t53b_320_8_lg \
16 |  --cp \
17 |  new_prof_cache_t53b_320_8_lg \
18 |  --stateless_tied \
19 |  --lmhead \
20 |  --n_partitions \
21 |  8 \
22 |  --max_seq_length \
23 |  320 \
24 |  --answer_max_seq_length \
25 |  8 \
26 |  --partitioning_method \
27 |   pipedream \
28 |  --preset \
29 |   pipedream \
30 |   --disable_op_profiling \
31 |  --dont_use_async_meta_alg \
32 |  --save_memory_mode \
33 |  --special_blocks \
34 |  T5Block \
35 |  --basic_blocks \
36 |  T5Block \
37 |  --output_file \
38 |   layer_graph_
39 | 
40 | # --basic_blocks \
41 | # T5Block
42 | 
43 | 


--------------------------------------------------------------------------------
/t5_used_scripts_example/to_partition_spipe_t5_3b_wic.sh:
--------------------------------------------------------------------------------
 1 | rm new_trace_cache_t53b_64_4_lg new_prof_cache_t53b_64_4_lg
 2 | python -m autopipe.partition new_t5 \
 3 |  --model_name_or_path \
 4 |  t5-3b \
 5 |  --t5_task \
 6 |  squad1 \
 7 |  --lmhead \
 8 |  --n_iter \
 9 |  10 \
10 |  --analysis_batch_size \
11 |  64 \
12 |  --partitioning_batch_size \
13 |  64 \
14 |  --ct \
15 |  new_trace_cache_t53b_64_4_lg \
16 |  --cp \
17 |  new_prof_cache_t53b_64_4_lg \
18 |  --stateless_tied \
19 |  --lmhead \
20 |  --n_partitions \
21 |  8 \
22 |  --max_seq_length \
23 |  64 \
24 |  --answer_max_seq_length \
25 |  4 \
26 |  --partitioning_method \
27 |   pipedream \
28 |  --preset \
29 |   pipedream \
30 |   --disable_op_profiling \
31 |  --dont_use_async_meta_alg \
32 |  --save_memory_mode \
33 |  --special_blocks \
34 |  T5Block \
35 |  --basic_blocks \
36 |  T5Block \
37 |  --output_file \
38 |   layer_graph_
39 | 
40 | # --basic_blocks \
41 | # T5Block
42 | 
43 | 


--------------------------------------------------------------------------------
/t5_used_scripts_example/to_partition_spipe_t5_base.sh:
--------------------------------------------------------------------------------
 1 | python -m autopipe.partition new_t5 \
 2 |  --model_name_or_path \
 3 |  t5-base \
 4 |  --t5_task \
 5 |  squad1 \
 6 |  --lmhead \
 7 |  --n_iter \
 8 |  10 \
 9 |  --analysis_batch_size \
10 |  4 \
11 |  --partitioning_batch_size \
12 |  4 \
13 |  --ct \
14 |  new_trace_cache_t5base_512_4_lg \
15 |  --cp \
16 |  new_prof_cache_t5base_512_4_lg \
17 |  --stateless_tied \
18 |  --lmhead \
19 |  --n_partitions \
20 |  8 \
21 |  --max_seq_length \
22 |  512 \
23 |  --answer_max_seq_length \
24 |  4 \
25 |  --partitioning_method \
26 |   pipedream \
27 |  --preset \
28 |   pipedream \
29 |   --disable_op_profiling \
30 |  --dont_use_async_meta_alg \
31 |  --save_memory_mode \
32 |  --special_blocks \
33 |   T5Block \
34 |  --basic_blocks \
35 |   T5Block
36 | # --output_file \
37 | #  lg \
38 | # --basic_blocks \
39 | # T5Block
40 | 
41 | 


--------------------------------------------------------------------------------
/t5_used_scripts_example/to_run.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | rm new_trace_cache_t53b_*
 3 | rm new_prof_cache_t53b_*
 4 | 
 5 | ### mpipe (layers graph)
 6 | bash t5_used_scripts_example/to_partition_mpipe_layergraph_t5_3b_boolq_multirc.sh
 7 | bash t5_used_scripts_example/to_partition_mpipe_layergraph_t5_3b_rte.sh
 8 | bash t5_used_scripts_example/to_partition_mpipe_layergraph_t5_3b_wic.sh
 9 | 
10 | ### mpipe (op graph)
11 | bash t5_used_scripts_example/to_partition_mpipe_t5_3b_opgraph_boolq_multirc.sh
12 | bash t5_used_scripts_example/to_partition_mpipe_t5_3b_opgraph_rte.sh
13 | bash t5_used_scripts_example/to_partition_mpipe_t5_3b_opgraph_wic.sh
14 | 
15 | 
16 | ### spipe (layers graph)
17 | bash t5_used_scripts_example/to_partition_spipe_t5_3b_boolq_multirc.sh
18 | bash t5_used_scripts_example/to_partition_spipe_t5_3b_rte.sh
19 | bash t5_used_scripts_example/to_partition_spipe_t5_3b_wic.sh
20 | 
21 | 
22 | # TODO: gpipe: partition with smaller micro batch.
23 | 
24 | #to_partition_mpipe_t5_base
25 | #to_partition_spipe_t5
26 | #to_partition_spipe_t5_base


--------------------------------------------------------------------------------
/t5_used_scripts_example/to_run_again_wic.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | rm new_trace_cache_t53b_*
 3 | rm new_prof_cache_t53b_*
 4 | rm prof_cache_t53b_64_4_lg_ftpipe
 5 | rm new_prof_cache_t53b_64_4_lg_ftpipe
 6 | rm new_trace_cache_t53b_64_4_lg
 7 | 
 8 | ### mpipe (layers graph)
 9 | # bash t5_used_scripts_example/to_partition_mpipe_layergraph_t5_3b_boolq_multirc.sh
10 | # bash t5_used_scripts_example/to_partition_mpipe_layergraph_t5_3b_rte.sh
11 | bash t5_used_scripts_example/to_partition_mpipe_layergraph_t5_3b_wic.sh  #Failed to size?
12 | 
13 | ### mpipe (op graph)
14 | #bash t5_used_scripts_example/to_partition_mpipe_t5_3b_opgraph_boolq_multirc.sh  # FAILED: MEM
15 | #bash t5_used_scripts_example/to_partition_mpipe_t5_3b_opgraph_rte.sh
16 | bash t5_used_scripts_example/to_partition_mpipe_t5_3b_opgraph_wic.sh
17 | 
18 | 
19 | ### spipe (layers graph)
20 | #bash t5_used_scripts_example/to_partition_spipe_t5_3b_boolq_multirc.sh
21 | #bash t5_used_scripts_example/to_partition_spipe_t5_3b_rte.sh
22 | bash t5_used_scripts_example/to_partition_spipe_t5_3b_wic.sh
23 | 
24 | 
25 | ### spipe (op graph)
26 | bash t5_used_scripts_example/to_partition_spipe_OP_t5_3b_wic.sh
27 | 
28 | # TODO: gpipe: partition with smaller micro batch.
29 | 
30 | #to_partition_mpipe_t5_base
31 | #to_partition_spipe_t5
32 | #to_partition_spipe_t5_base


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saareliad/FTPipe/986014df3bb985595f22139ab355a48ca00dc12f/tests/__init__.py


--------------------------------------------------------------------------------