├── LICENSE ├── README.md ├── code ├── datastore_v2.py ├── main.py ├── ps_startup.sh ├── resource_acquisition.py ├── resource_acquisition.pyc ├── shutdown.py ├── start-evaluator.sh ├── start_one_time_training.sh ├── start_one_time_traning_server.sh ├── start_sub.sh ├── startup.py ├── stop_one_time_training.sh └── tensor2tensor │ ├── .gitignore │ ├── .travis.yml │ ├── AUTHORS │ ├── CONTRIBUTING.md │ ├── ISSUE_TEMPLATE.md │ ├── LICENSE │ ├── README.md │ ├── docs │ ├── cloud_mlengine.md │ ├── cloud_tpu.md │ ├── distributed_training.md │ ├── index.md │ ├── new_model.md │ ├── new_problem.md │ ├── overview.md │ ├── tutorials │ │ └── asr_with_transformer.md │ └── walkthrough.md │ ├── floyd.yml │ ├── floyd_requirements.txt │ ├── oss_scripts │ ├── oss_integration_test.sh │ ├── oss_pip_install.sh │ ├── oss_release.sh │ └── oss_tests.sh │ ├── pylintrc │ ├── setup.py │ └── tensor2tensor │ ├── __init__.py │ ├── bin │ ├── __init__.py │ ├── build_vocab.py │ ├── make_tf_configs.py │ ├── t2t-avg-all │ ├── t2t-bleu │ ├── t2t-datagen │ ├── t2t-decoder │ ├── t2t-exporter │ ├── t2t-insights-server │ ├── t2t-make-tf-configs │ ├── t2t-query-server │ ├── t2t-trainer │ ├── t2t-translate-all │ ├── t2t_attack.py │ ├── t2t_avg_all.py │ ├── t2t_bleu.py │ ├── t2t_datagen.py │ ├── t2t_decoder.py │ ├── t2t_distill.py │ ├── t2t_prune.py │ ├── t2t_trainer.py │ ├── t2t_trainer_test.py │ └── t2t_translate_all.py │ ├── data_generators │ ├── README.md │ ├── __init__.py │ ├── algorithmic.py │ ├── algorithmic_math.py │ ├── algorithmic_math_test.py │ ├── algorithmic_test.py │ ├── all_problems.py │ ├── allen_brain.py │ ├── allen_brain_test.py │ ├── audio.py │ ├── audio_encoder.py │ ├── audio_test.py │ ├── babi_qa.py │ ├── bair_robot_pushing.py │ ├── celeba.py │ ├── celeba_test.py │ ├── celebahq.py │ ├── cifar.py │ ├── cipher.py │ ├── cnn_dailymail.py │ ├── cola.py │ ├── common_voice.py │ ├── common_voice_test.py │ ├── desc2code.py │ ├── desc2code_test.py │ ├── dna_encoder.py │ ├── dna_encoder_test.py │ ├── fsns.py │ ├── function_docstring.py │ ├── gene_expression.py │ ├── gene_expression_test.py │ ├── generator_utils.py │ ├── generator_utils_test.py │ ├── google_robot_pushing.py │ ├── gym_env.py │ ├── gym_env_test.py │ ├── gym_problems.py │ ├── gym_problems_specs.py │ ├── gym_problems_test.py │ ├── gym_utils.py │ ├── ice_parsing.py │ ├── image_lsun.py │ ├── image_utils.py │ ├── image_utils_test.py │ ├── imagenet.py │ ├── imagenet_test.py │ ├── imdb.py │ ├── inspect_tfrecord.py │ ├── lambada.py │ ├── librispeech.py │ ├── lm1b.py │ ├── lm1b_imdb.py │ ├── lm1b_mnli.py │ ├── mnist.py │ ├── mrpc.py │ ├── mscoco.py │ ├── mscoco_test.py │ ├── multi_problem.py │ ├── multi_problem_test.py │ ├── multinli.py │ ├── ocr.py │ ├── paraphrase_ms_coco.py │ ├── paraphrase_ms_coco_test.py │ ├── pointer_generator_word.py │ ├── problem.py │ ├── problem_hparams.py │ ├── problem_test.py │ ├── program_search.py │ ├── program_search_test.py │ ├── ptb.py │ ├── qnli.py │ ├── quora_qpairs.py │ ├── rte.py │ ├── scitail.py │ ├── snli.py │ ├── speech_recognition.py │ ├── squad.py │ ├── sst_binary.py │ ├── stanford_nli.py │ ├── style_transfer.py │ ├── style_transfer_test.py │ ├── subject_verb_agreement.py │ ├── test_data │ │ ├── 1.csv │ │ ├── corpus-1.txt │ │ ├── corpus-2.txt │ │ ├── vocab-1.txt │ │ └── vocab-2.txt │ ├── text_encoder.py │ ├── text_encoder_build_subword.py │ ├── text_encoder_test.py │ ├── text_problems.py │ ├── text_problems_test.py │ ├── timeseries.py │ ├── timeseries_data_generator.py │ ├── timeseries_data_generator_test.py │ ├── timeseries_test.py │ ├── tokenizer.py │ ├── tokenizer_test.py │ ├── translate.py │ ├── translate_encs.py │ ├── translate_ende.py │ ├── translate_enet.py │ ├── translate_enfr.py │ ├── translate_enid.py │ ├── translate_enmk.py │ ├── translate_envi.py │ ├── translate_enzh.py │ ├── translate_test.py │ ├── twentybn.py │ ├── video_generated.py │ ├── video_utils.py │ ├── video_utils_test.py │ ├── vqa.py │ ├── vqa_utils.py │ ├── wiki.py │ ├── wikisum │ │ ├── README.md │ │ ├── __init__.py │ │ ├── delete_instances.sh │ │ ├── generate_vocab.py │ │ ├── get_references_commoncrawl.py │ │ ├── get_references_web.py │ │ ├── get_references_web_single_group.py │ │ ├── html.py │ │ ├── parallel_launch.py │ │ ├── produce_examples.py │ │ ├── test_data │ │ │ ├── para_bad1.txt │ │ │ └── para_good1.txt │ │ ├── utils.py │ │ ├── utils_test.py │ │ ├── validate_data.py │ │ └── wikisum.py │ ├── wikitext103.py │ ├── wnli.py │ └── wsj_parsing.py │ ├── insights │ ├── README.md │ ├── __init__.py │ ├── graph.py │ ├── insight_configuration.proto │ ├── polymer │ │ ├── .bowerrc │ │ ├── attention_visualization │ │ │ ├── attention-visualization.html │ │ │ └── attention-visualization.js │ │ ├── bower.json │ │ ├── common-types.js │ │ ├── explore_view │ │ │ ├── explore-view.html │ │ │ └── explore-view.js │ │ ├── graph_visualization │ │ │ ├── graph-visualization.html │ │ │ └── graph-visualization.js │ │ ├── index.html │ │ ├── insights_app │ │ │ ├── insights-app.html │ │ │ └── insights-app.js │ │ ├── language_selector │ │ │ ├── language-selector-content.html │ │ │ ├── language-selector-content.js │ │ │ ├── language-selector.html │ │ │ └── language-selector.js │ │ ├── processing_visualization │ │ │ ├── processing-visualization.html │ │ │ └── processing-visualization.js │ │ ├── query_card │ │ │ ├── query-card.html │ │ │ └── query-card.js │ │ ├── tensor2tensor.html │ │ └── translation_result │ │ │ ├── translation-result.html │ │ │ └── translation-result.js │ ├── query_processor.py │ ├── server.py │ └── transformer_model.py │ ├── layers │ ├── __init__.py │ ├── common_attention.py │ ├── common_attention_test.py │ ├── common_audio.py │ ├── common_hparams.py │ ├── common_image_attention.py │ ├── common_image_attention_test.py │ ├── common_layers.py │ ├── common_layers_test.py │ ├── common_message_passing_attention.py │ ├── common_video.py │ ├── common_video_test.py │ ├── discretization.py │ ├── discretization_test.py │ ├── latent_layers.py │ ├── latent_layers_test.py │ ├── modalities.py │ ├── modalities_test.py │ ├── vq_discrete.py │ └── vqa_layers.py │ ├── models │ ├── README.md │ ├── __init__.py │ ├── basic.py │ ├── basic_test.py │ ├── bytenet.py │ ├── bytenet_test.py │ ├── distillation.py │ ├── image_transformer.py │ ├── image_transformer_2d.py │ ├── image_transformer_2d_test.py │ ├── image_transformer_test.py │ ├── lstm.py │ ├── lstm_test.py │ ├── mtf_image_transformer.py │ ├── mtf_image_transformer_test.py │ ├── mtf_resnet.py │ ├── mtf_transformer.py │ ├── mtf_transformer_test.py │ ├── neural_gpu.py │ ├── neural_gpu_test.py │ ├── research │ │ ├── __init__.py │ │ ├── adafactor_experiments.py │ │ ├── aligned.py │ │ ├── attention_lm.py │ │ ├── attention_lm_moe.py │ │ ├── autoencoders.py │ │ ├── autoencoders_test.py │ │ ├── cycle_gan.py │ │ ├── gene_expression.py │ │ ├── gene_expression_test.py │ │ ├── glow.py │ │ ├── glow_ops.py │ │ ├── glow_ops_test.py │ │ ├── glow_test.py │ │ ├── lm_experiments.py │ │ ├── moe.py │ │ ├── moe_experiments.py │ │ ├── rl.py │ │ ├── similarity_transformer.py │ │ ├── super_lm.py │ │ ├── transformer_aux.py │ │ ├── transformer_aux_test.py │ │ ├── transformer_moe.py │ │ ├── transformer_nat.py │ │ ├── transformer_revnet.py │ │ ├── transformer_revnet_test.py │ │ ├── transformer_sketch.py │ │ ├── transformer_symshard.py │ │ ├── transformer_vae.py │ │ ├── transformer_vae_test.py │ │ ├── universal_transformer.py │ │ ├── universal_transformer_test.py │ │ ├── universal_transformer_util.py │ │ ├── vqa_attention.py │ │ ├── vqa_attention_test.py │ │ ├── vqa_recurrent_self_attention.py │ │ └── vqa_self_attention.py │ ├── resnet.py │ ├── resnet_test.py │ ├── revnet.py │ ├── revnet_test.py │ ├── shake_shake.py │ ├── slicenet.py │ ├── slicenet_test.py │ ├── transformer.py │ ├── transformer_test.py │ ├── vanilla_gan.py │ ├── video │ │ ├── __init__.py │ │ ├── base.py │ │ ├── base_vae.py │ │ ├── basic_deterministic.py │ │ ├── basic_deterministic_params.py │ │ ├── basic_deterministic_test.py │ │ ├── basic_recurrent.py │ │ ├── basic_recurrent_test.py │ │ ├── basic_stochastic.py │ │ ├── basic_stochastic_test.py │ │ ├── emily.py │ │ ├── emily_test.py │ │ ├── epva.py │ │ ├── epva_params.py │ │ ├── savp.py │ │ ├── savp_params.py │ │ ├── savp_test.py │ │ ├── sv2p.py │ │ ├── sv2p_params.py │ │ ├── sv2p_test.py │ │ └── tests_utils.py │ ├── xception.py │ └── xception_test.py │ ├── notebooks │ ├── asr_transformer.ipynb │ ├── hello_t2t-rl.ipynb │ ├── hello_t2t.ipynb │ └── t2t_problem.ipynb │ ├── problems.py │ ├── problems_test.py │ ├── rl │ ├── README.md │ ├── __init__.py │ ├── collect.py │ ├── datagen_with_agent.py │ ├── envs │ │ ├── __init__.py │ │ ├── batch_env_factory.py │ │ ├── in_graph_batch_env.py │ │ ├── py_func_batch_env.py │ │ ├── simulated_batch_env.py │ │ ├── tf_atari_wrappers.py │ │ └── utils.py │ ├── model_rl_experiment_player.py │ ├── ppo.py │ ├── rl_trainer_lib.py │ ├── rl_trainer_lib_test.py │ ├── trainer_model_based.py │ ├── trainer_model_based_ae_test.py │ ├── trainer_model_based_agent_only.py │ ├── trainer_model_based_new.py │ ├── trainer_model_based_new_test.py │ ├── trainer_model_based_params.py │ ├── trainer_model_based_recurrent_test.py │ ├── trainer_model_based_stochastic_test.py │ ├── trainer_model_based_sv2p_test.py │ ├── trainer_model_based_test.py │ └── trainer_model_free.py │ ├── serving │ ├── README.md │ ├── __init__.py │ ├── export.py │ ├── query.py │ └── serving_utils.py │ ├── test_data │ ├── example_usr_dir │ │ ├── __init__.py │ │ ├── my_submodule.py │ │ └── requirements.txt │ ├── transformer_test_ckpt │ │ ├── checkpoint │ │ ├── flags.txt │ │ ├── hparams.json │ │ ├── model.ckpt-1.data-00000-of-00002 │ │ ├── model.ckpt-1.data-00001-of-00002 │ │ ├── model.ckpt-1.index │ │ └── model.ckpt-1.meta │ ├── vocab.translate_ende_wmt32k.32768.subwords │ └── vocab.translate_ende_wmt8k.8192.subwords │ ├── utils │ ├── __init__.py │ ├── adafactor.py │ ├── adv_attack_utils.py │ ├── avg_checkpoints.py │ ├── beam_search.py │ ├── beam_search_test.py │ ├── bleu_hook.py │ ├── bleu_hook_test.py │ ├── checkpoint_compatibility_test.py │ ├── cloud_mlengine.py │ ├── compute_video_metrics.py │ ├── data_reader.py │ ├── data_reader_test.py │ ├── decoding.py │ ├── devices.py │ ├── diet.py │ ├── diet_test.py │ ├── expert_utils.py │ ├── expert_utils_test.py │ ├── flags.py │ ├── get_cnndm_rouge.sh │ ├── get_ende_bleu.sh │ ├── get_rouge.py │ ├── learning_rate.py │ ├── metrics.py │ ├── metrics_hook.py │ ├── metrics_hook_test.py │ ├── metrics_test.py │ ├── modality.py │ ├── mtf_model.py │ ├── multistep_optimizer.py │ ├── multistep_optimizer_test.py │ ├── optimize.py │ ├── pruning_utils.py │ ├── quantization.py │ ├── registry.py │ ├── registry_test.py │ ├── restore_hook.py │ ├── rouge.py │ ├── rouge_test.py │ ├── t2t_model.py │ ├── t2t_model_test.py │ ├── trainer_lib.py │ ├── trainer_lib_test.py │ ├── update_ops_hook.py │ ├── usr_dir.py │ ├── video2gif.py │ ├── video_metrics.py │ ├── video_metrics_test.py │ ├── yellowfin.py │ └── yellowfin_test.py │ └── visualization │ ├── TransformerVisualization.ipynb │ ├── __init__.py │ ├── attention.js │ ├── attention.py │ ├── visualization.py │ └── visualization_test.py └── data ├── adaptive_lr ├── README.md ├── lr_baseline.csv ├── lr_case1.csv └── lr_case2.csv ├── img ├── cluster_4_spots_heatmap.png ├── cluster_8_spots_heatmap.png └── hetero_cost.png ├── lifetime ├── README.md ├── lifetime.csv ├── lifetime.py └── plot.py └── training ├── README.md ├── code ├── data_processor.py ├── datastore.py ├── heatmap.py ├── resource_acquisition.py ├── start_evaluator.sh ├── start_training.sh ├── start_training_server.sh ├── stop_training.sh ├── training_data_fetcher.py └── training_job.py ├── experiment_1ps_vs_2ps ├── 1ps2v │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 1ps3v │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 1ps4v │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 1ps5v │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 1ps6v │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 1ps7v │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 1ps8v │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 1ps_k80 │ ├── 2_data.csv │ ├── 3_data.csv │ ├── 4_data.csv │ ├── 5_data.csv │ ├── 6_data.csv │ ├── 7_data.csv │ └── 8_data.csv ├── 2ps2k │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 2ps2v │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 2ps3k │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 2ps3v │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 2ps4k │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 2ps4v │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 2ps5k │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 2ps5v │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 2ps6k │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 2ps6v │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 2ps7k │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 2ps7v │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 2ps8k │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv └── 2ps8v │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── experiment_cross_region ├── 2e1c1w-p100 │ ├── train_data.csv │ └── train_data_processed.csv ├── 2e1c1w │ ├── train_data.csv │ └── train_data_processed.csv ├── 2e2c-p100 │ ├── train_data.csv │ └── train_data_processed.csv ├── 2e2c │ ├── train_data.csv │ └── train_data_processed.csv ├── 3e1c-p100 │ ├── train_data.csv │ └── train_data_processed.csv └── 3e1c │ ├── train_data.csv │ └── train_data_processed.csv ├── experiment_mixed_gpu ├── 1v1p2k │ ├── 1v100-1p100-2k80-_train_data.csv │ ├── agg_data.csv │ ├── cost.csv │ └── train_data_processed.csv ├── 1v2p1k │ ├── 1v100-2p100-1k80-_train_data.csv │ ├── agg_data.csv │ ├── cost.csv │ └── train_data_processed.csv └── 2v1p1k │ ├── 2v100-1p100-1k80-_train_data.csv │ ├── agg_data.csv │ ├── cost.csv │ └── train_data_processed.csv ├── k80_on_demand ├── 2-demand │ ├── 2_ondemand_cluster_status.db │ ├── agg_data.csv │ ├── cost.csv │ ├── train_data.csv │ ├── train_data_processed.csv │ └── vm_data.csv ├── 4-demand │ ├── 4_ondemand_cluster_status.db │ ├── agg_data.csv │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv ├── 8-demand │ ├── 8_ondemand_cluster_status.db │ ├── agg_data.csv │ ├── cost.csv │ ├── train_data.csv │ └── train_data_processed.csv └── k80-demand │ ├── cost.csv │ ├── k80_ondemand_cluster_status.db │ ├── train_data.csv │ └── train_data_processed.csv ├── k80_spot ├── 2-spot │ ├── 2_spot_cluster_status.db │ ├── agg_data.csv │ ├── cost.csv │ ├── heat-compact.png │ ├── heat-png.png │ ├── heat.graffle │ │ ├── data.plist │ │ ├── image5.png │ │ └── image6.png │ ├── heat.png │ ├── train_data.csv │ ├── train_data_processed.csv │ └── vm_data.csv ├── 4-spot │ ├── 4-spot-train_data.csv │ ├── 4-spot_reminder.md │ ├── 4_spot_cluster_status.db │ ├── agg_data.csv │ ├── cost.csv │ ├── heat-32.png │ ├── heat-4-spot.graffle │ │ ├── data.plist │ │ ├── image2.png │ │ └── image3.png │ ├── heat-4-spot.png │ ├── heat-compact.png │ ├── heat-old.graffle │ │ ├── data.plist │ │ └── image3.png │ ├── heat.png │ ├── train_data_processed.csv │ └── vm_data.csv └── 8-spot │ ├── 8_spot_cluster_status.db │ ├── agg_data.csv │ ├── cost.csv │ ├── heat-8-spot.graffle │ ├── data.plist │ ├── image1.png │ └── image3.png │ ├── heat-compact.png │ ├── heat.png │ ├── train_data.csv │ ├── train_data_processed.csv │ └── vm_data.csv └── p100_v100_spot ├── 4p100 ├── 4p100_train_data.csv ├── agg_data.csv ├── cost.csv └── train_data_processed.csv ├── 4v100 ├── 4v100_train_data.csv ├── agg_data.csv ├── cost.csv └── train_data_processed.csv ├── p100 ├── agg_data.csv ├── cost.csv ├── p100.db ├── train_data.csv ├── train_data_processed.csv └── vm_data.csv └── v100 ├── agg_data.csv ├── cost.csv ├── train_data.csv ├── train_data_processed.csv ├── v100.db └── vm_data.csv /code/ps_startup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sudo python /home/ozymandias/proj_code/code/xmlrpc_server.py & -------------------------------------------------------------------------------- /code/resource_acquisition.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/code/resource_acquisition.pyc -------------------------------------------------------------------------------- /code/shutdown.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import xmlrpclib 3 | import json 4 | import os 5 | import syslog 6 | from datetime import datetime 7 | import socket 8 | import googleapiclient 9 | import googleapiclient.discovery 10 | 11 | def main(): 12 | name = socket.gethostname() 13 | compute = googleapiclient.discovery.build('compute', 'v1') 14 | projectName = "shijian-18" 15 | rpc_server_name = name.split('-')[0] + '-' + 'ps-0' 16 | request = compute.instances().get(project=projectName, zone='us-west1-b', instance=rpc_server_name) 17 | response = request.execute() 18 | ps_ip = response['networkInterfaces'][0]['networkIP'].encode("utf-8") 19 | 20 | s = xmlrpclib.ServerProxy('http://' + ps_ip + ':8000') 21 | # with open("/tmp/job_config.txt","r") as fp: 22 | # s_type = fp.readline().rstrip('\n') 23 | # s_index = fp.readline().rstrip('\n') 24 | try: 25 | role = name.split('-')[1] 26 | if role == 'master': 27 | role = 'chief' 28 | index = '0' 29 | else: 30 | index = name.split('-')[2] 31 | s.serverDown(role, index, name) 32 | except Exception as e: 33 | print('Encountered error', e) 34 | syslog.syslog('Encountered error') 35 | 36 | if __name__ == '__main__': 37 | out_file = open("/home/ozymandias/serverDown.log", "w") 38 | out_file.write(str(datetime.utcnow())) 39 | out_file.write('\n') 40 | main() 41 | out_file.write(str(datetime.utcnow())) 42 | out_file.close() -------------------------------------------------------------------------------- /code/start-evaluator.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export LD_LIBRARY_PATH=/usr/local/cuda-9.0/extras/CUPTI/lib64:$LD_LIBRARY_PATH 4 | cd tensor2tensor 5 | git pull 6 | cd ~ 7 | 8 | cd $(dirname $0) 9 | OUTDIR=$1 10 | DATADIR=$2 11 | MODEL=$3 12 | HPARAM=$4 13 | PROBLEM_DATA=$5 14 | TRAIN_STEPS=$6 15 | CKPT=$7 16 | JOBNAME=$8 17 | 18 | t2t-trainer \ 19 | --worker_job='/job:localhost' \ 20 | --data_dir=gs://shijian-18-ml/cifar_data \ 21 | --output_dir=gs://shijian-18-ml/30-cluster/${OUTDIR} \ 22 | --schedule=continuous_eval \ 23 | --model=resnet \ 24 | --problem=image_cifar10 \ 25 | --hparams_set=resnet_cifar_32_vanilla -------------------------------------------------------------------------------- /code/start_sub.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | JOBNAME=$1 4 | 5 | WORKDIR=/tmp/workdir 6 | ROOT=ozymandias 7 | 8 | VMNAME=$2 9 | INDEX=$3 10 | ZONE=$4 11 | ROLE="worker" 12 | 13 | gcloud compute ssh ${ROOT}@${VMNAME} --zone ${ZONE} -- sudo rm -rf $WORKDIR 14 | gcloud compute ssh ${ROOT}@${VMNAME} --zone ${ZONE} -- sudo mkdir -p $WORKDIR 15 | 16 | gcloud compute scp --zone ${ZONE} \ 17 | --recurse \ 18 | ${WORKDIR}/tf_config.json ${WORKDIR}/start_one_time_traning_server.sh \ 19 | root@${VMNAME}:${WORKDIR} 20 | 21 | readarray config < $WORKDIR/train_config.txt 22 | 23 | gcloud compute ssh ${ROOT}@$VMNAME --zone ${ZONE} -- $WORKDIR/start_one_time_traning_server.sh ${config[0]} ${config[1]} 2 $INDEX 0 ${config[2]} ${config[3]} ${config[4]} ${config[5]} ${config[6]} ${config[7]} ${config[8]} ${config[9]} ${config[10]} ${config[11]} ${config[12]} ${config[13]} 1 & 24 | 25 | 26 | -------------------------------------------------------------------------------- /code/startup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import xmlrpclib 3 | import json 4 | import os 5 | import syslog 6 | import time 7 | from datetime import datetime 8 | import socket 9 | import googleapiclient 10 | import googleapiclient.discovery 11 | 12 | ## Maybe invoke this startup inside of start_one_time_traning_server.sh 13 | 14 | def main(): 15 | name = socket.gethostname() 16 | compute = googleapiclient.discovery.build('compute', 'v1') 17 | projectName = "shijian-18" 18 | rpc_server_name = name.split('-')[0] + '-' + 'ps-0' 19 | request = compute.instances().get(project=projectName, zone='us-west1-b', instance=rpc_server_name) 20 | response = request.execute() 21 | ps_ip = response['networkInterfaces'][0]['networkIP'].encode("utf-8") 22 | 23 | s = xmlrpclib.ServerProxy('http://'+ ps_ip +':8000') 24 | # with open("/tmp/job_config.txt","r") as fp: 25 | # s_type = fp.readline().rstrip('\n') 26 | # s_index = fp.readline().rstrip('\n') 27 | try: 28 | role = name.split('-')[1] 29 | if role == 'master': 30 | role = 'chief' 31 | index = '0' 32 | else: 33 | index = name.split('-')[2] 34 | s.serverStarted(role, index, name) 35 | # if 'sub' in name.split('-'): 36 | # time.sleep(60) 37 | # os.system('sudo shutdown now') 38 | except Exception as e: 39 | print('Encountered error', e) 40 | syslog.syslog('Encountered error') 41 | 42 | if __name__ == '__main__': 43 | out_file = open("/home/ozymandias/serverStart.log", "w") 44 | out_file.write(str(datetime.utcnow())) 45 | out_file.write('\n') 46 | main() 47 | out_file.write(str(datetime.utcnow())) 48 | out_file.close() -------------------------------------------------------------------------------- /code/tensor2tensor/.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled python modules. 2 | *.pyc 3 | 4 | # Byte-compiled 5 | _pycache__/ 6 | .cache/ 7 | 8 | # Python egg metadata, regenerated from source files by setuptools. 9 | /*.egg-info 10 | .eggs/ 11 | 12 | # PyPI distribution artifacts. 13 | build/ 14 | dist/ 15 | 16 | # Sublime project files 17 | *.sublime-project 18 | *.sublime-workspace 19 | 20 | # Tests 21 | .pytest_cache/ 22 | 23 | # Other 24 | *.DS_Store 25 | -------------------------------------------------------------------------------- /code/tensor2tensor/.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | language: python 3 | git: 4 | depth: 10 5 | quiet: true 6 | services: 7 | - docker 8 | python: 9 | - "2.7" 10 | - "3.6" 11 | env: 12 | global: 13 | - T2T_PROBLEM=algorithmic_reverse_binary40_test 14 | - T2T_DATA_DIR=/tmp/t2t-data 15 | - T2T_TRAIN_DIR=/tmp/t2t-train 16 | - TF_LATEST="1.11.*" 17 | # This is necessary to have gsutil work with Python 2.7 18 | - BOTO_CONFIG=/dev/null 19 | matrix: 20 | # We test against recent versions of TensorFlow and tf-nightly. 21 | # If updating, also update TF_LATEST above 22 | - TF_VERSION="1.10.*" 23 | - TF_VERSION="1.11.*" 24 | - TF_VERSION="tf-nightly" 25 | matrix: 26 | exclude: 27 | # We test against all versions in Python 2 but only the latest in Python 3 28 | - python: "3.6" 29 | env: TF_VERSION="1.10.*" 30 | - python: "3.6" 31 | env: TF_VERSION="tf-nightly" 32 | before_install: 33 | - sudo apt-get update -qq 34 | - sudo apt-get install -qq libhdf5-dev 35 | install: 36 | - ./oss_scripts/oss_pip_install.sh 37 | script: 38 | - ./oss_scripts/oss_tests.sh 39 | - ./oss_scripts/oss_integration_test.sh 40 | 41 | # Conditional commands should each be in a separate block to get proper 42 | # errors on Travis. 43 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]] && [[ "$TF_VERSION" == "tf-nightly" ]]; then 44 | pylint -j 2 tensor2tensor; 45 | fi 46 | -------------------------------------------------------------------------------- /code/tensor2tensor/AUTHORS: -------------------------------------------------------------------------------- 1 | # This is the list of T2T authors for copyright purposes. 2 | # 3 | # This does not necessarily list everyone who has contributed code, since in 4 | # some cases, their employer may be the copyright holder. To see the full list 5 | # of contributors, see the revision history in source control. 6 | 7 | Google Inc. 8 | -------------------------------------------------------------------------------- /code/tensor2tensor/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | # Issues 4 | 5 | * Please tag your issue with `bug`, `feature request`, or `question` to help us 6 | effectively respond. 7 | * Please include the versions of TensorFlow and Tensor2Tensor you are running 8 | (run `pip list | grep tensor`) 9 | * Please provide the command line you ran as well as the log output. 10 | 11 | # Pull Requests 12 | 13 | We'd love to accept your patches and contributions to this project. There are 14 | just a few small guidelines you need to follow. 15 | 16 | ## Contributor License Agreement 17 | 18 | Contributions to this project must be accompanied by a Contributor License 19 | Agreement. You (or your employer) retain the copyright to your contribution, 20 | this simply gives us permission to use and redistribute your contributions as 21 | part of the project. Head over to to see 22 | your current agreements on file or to sign a new one. 23 | 24 | You generally only need to submit a CLA once, so if you've already submitted one 25 | (even if it was for a different project), you probably don't need to do it 26 | again. 27 | 28 | ## Code reviews 29 | 30 | All submissions, including submissions by project members, require review. We 31 | use GitHub pull requests for this purpose. Consult 32 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 33 | information on using pull requests. 34 | -------------------------------------------------------------------------------- /code/tensor2tensor/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Description 2 | 3 | ... 4 | 5 | ### Environment information 6 | 7 | ``` 8 | OS: 9 | 10 | $ pip freeze | grep tensor 11 | # your output here 12 | 13 | $ python -V 14 | # your output here 15 | ``` 16 | 17 | ### For bugs: reproduction and error logs 18 | 19 | ``` 20 | # Steps to reproduce: 21 | ... 22 | ``` 23 | 24 | ``` 25 | # Error logs: 26 | ... 27 | ``` 28 | -------------------------------------------------------------------------------- /code/tensor2tensor/docs/cloud_tpu.md: -------------------------------------------------------------------------------- 1 | # Running on Cloud TPUs 2 | 3 | Tensor2Tensor supports running on Google Cloud Platforms TPUs, chips 4 | specialized for ML training. See the official tutorials for [running the 5 | T2T Transformer for text on Cloud TPUs](https://cloud.google.com/tpu/docs/tutorials/transformer) and 6 | [Transformer for Speech Recognition](https://cloud.google.com/tpu/docs/tutorials/automated-speech-recognition). 7 | 8 | ## Other models on TPU 9 | 10 | Many of Tensor2Tensor's models work on TPU. 11 | 12 | You can provision a VM and TPU with `ctpu up`. Use the `t2t-trainer` command 13 | on the VM as usual with the additional flags `--use_tpu` and 14 | `--cloud_tpu_name=$TPU_NAME`. 15 | 16 | Note that because the `TPUEstimator` does not catch the `OutOfRangeError` 17 | during evaluation, you should ensure that `--eval_steps` is small enough to 18 | not exhaust the evaluation data. 19 | 20 | A non-exhaustive list of T2T models that work on TPU: 21 | 22 | * Image generation: `imagetransformer` with `imagetransformer_base_tpu` (or 23 | `imagetransformer_tiny_tpu`) 24 | * Super-resolution: `img2img_transformer` with `img2img_transformer_base_tpu` 25 | (or `img2img_transformer_tiny_tpu`) 26 | * `resnet` with `resnet_50` (or `resnet_18` or `resnet_34`) 27 | * `revnet` with `revnet_104` (or `revnet_38_cifar`) 28 | * `shake_shake` with `shakeshake_tpu` (or `shakeshake_small`) 29 | 30 | ## Example invocation 31 | 32 | Use `ctpu up` to bring up the VM and TPU machines; once the machines are ready 33 | it will SSH you into the VM and you can run the following: 34 | 35 | ``` 36 | # DATA_DIR and OUT_DIR should be GCS buckets 37 | # TPU_NAME should have been set automatically by the ctpu tool 38 | 39 | t2t-trainer \ 40 | --model=shake_shake \ 41 | --hparams_set=shakeshake_tpu \ 42 | --problem=image_cifar10 \ 43 | --train_steps=180000 \ 44 | --eval_steps=9 \ 45 | --local_eval_frequency=100 \ 46 | --data_dir=$DATA_DIR \ 47 | --output_dir=$OUT_DIR \ 48 | --use_tpu \ 49 | --cloud_tpu_name=$TPU_NAME 50 | ``` 51 | -------------------------------------------------------------------------------- /code/tensor2tensor/docs/new_model.md: -------------------------------------------------------------------------------- 1 | # T2T: Create Your Own Model 2 | 3 | [![PyPI 4 | version](https://badge.fury.io/py/tensor2tensor.svg)](https://badge.fury.io/py/tensor2tensor) 5 | [![GitHub 6 | Issues](https://img.shields.io/github/issues/tensorflow/tensor2tensor.svg)](https://github.com/tensorflow/tensor2tensor/issues) 7 | [![Contributions 8 | welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CONTRIBUTING.md) 9 | [![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/tensor2tensor/Lobby) 10 | [![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) 11 | 12 | Here we show how to create your own model in T2T. 13 | 14 | ## The T2TModel class 15 | 16 | TODO: complete. 17 | -------------------------------------------------------------------------------- /code/tensor2tensor/docs/tutorials/asr_with_transformer.md: -------------------------------------------------------------------------------- 1 | # Automated Speech Recognition with the Transformer model 2 | 3 | See the 4 | [official tutorial](https://cloud.google.com/tpu/docs/tutorials/automated-speech-recognition). 5 | -------------------------------------------------------------------------------- /code/tensor2tensor/floyd.yml: -------------------------------------------------------------------------------- 1 | env: tensorflow-1.9 2 | machine: gpu 3 | -------------------------------------------------------------------------------- /code/tensor2tensor/floyd_requirements.txt: -------------------------------------------------------------------------------- 1 | tensor2tensor 2 | -------------------------------------------------------------------------------- /code/tensor2tensor/oss_scripts/oss_integration_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Note that this test script requires docker to be installed and running. 4 | 5 | set -v # print commands as they're executed 6 | set -e # fail and exit on any command erroring 7 | 8 | : "${TF_VERSION:?}" 9 | : "${TF_LATEST:?}" 10 | : "${T2T_DATA_DIR:?}" 11 | : "${T2T_TRAIN_DIR:?}" 12 | : "${T2T_PROBLEM:?}" 13 | 14 | # Test --t2t_usr_dir 15 | t2t-trainer --registry_help --t2t_usr_dir=./tensor2tensor/test_data/example_usr_dir 2>&1 | grep my_very_own_hparams && echo passed 16 | 17 | # Run data generation, training, and decoding on a dummy problem 18 | t2t-datagen --problem=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR 19 | t2t-trainer --problem=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR --model=transformer --hparams_set=transformer_tiny --train_steps=5 --eval_steps=5 --output_dir=$T2T_TRAIN_DIR 20 | t2t-decoder --problem=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR --model=transformer --hparams_set=transformer_tiny --output_dir=$T2T_TRAIN_DIR --decode_hparams='num_samples=10' 21 | 22 | # Test serving 23 | if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]] && [[ "$TF_VERSION" == "$TF_LATEST" ]] 24 | then 25 | # Export for serving 26 | pip install tensorflow_hub 27 | t2t-exporter \ 28 | --problem=$T2T_PROBLEM \ 29 | --data_dir=$T2T_DATA_DIR \ 30 | --model=transformer \ 31 | --hparams_set=transformer_tiny \ 32 | --output_dir=$T2T_TRAIN_DIR 33 | 34 | # Run model server 35 | server_port=8500 36 | model_name=my_model 37 | docker run -d -p $server_port:$server_port \ 38 | --mount type=bind,source=$T2T_TRAIN_DIR/export,target=/models/$model_name \ 39 | -e MODEL_NAME=$model_name -t tensorflow/serving 40 | sleep 10 41 | 42 | # Query 43 | pip install tensorflow-serving-api 44 | t2t-query-server \ 45 | --server=localhost:$server_port \ 46 | --servable_name=$model_name \ 47 | --problem=$T2T_PROBLEM \ 48 | --data_dir=$T2T_DATA_DIR \ 49 | --inputs_once='1 0 1 0 1 0' 50 | fi 51 | -------------------------------------------------------------------------------- /code/tensor2tensor/oss_scripts/oss_pip_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -v # print commands as they're executed 4 | set -e # fail and exit on any command erroring 5 | 6 | : "${TF_VERSION:?}" 7 | 8 | if [[ "$TF_VERSION" == "tf-nightly" ]] 9 | then 10 | pip install tf-nightly; 11 | else 12 | pip install -q "tensorflow==$TF_VERSION" 13 | fi 14 | 15 | # First ensure that the base dependencies are sufficient for a full import 16 | pip install -q -e . 17 | t2t-trainer --registry_help 2>&1 >/dev/null 18 | t2t-datagen 2>&1 | grep translate_ende 2>&1 >/dev/null && echo passed 19 | 20 | # Then install the test dependencies 21 | pip install -q -e .[tests,allen] 22 | # Make sure to install the atari extras for gym 23 | pip install "gym[atari]" 24 | 25 | # Make sure we have the latest version of numpy - avoid problems we were 26 | # seeing with Python 3 27 | pip install -q -U numpy 28 | -------------------------------------------------------------------------------- /code/tensor2tensor/oss_scripts/oss_release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -v # print commands as they're executed 4 | set -e # fail and exit on any command erroring 5 | 6 | GIT_COMMIT_ID=${1:-""} 7 | [[ -z $GIT_COMMIT_ID ]] && echo "Must provide a commit" && exit 1 8 | 9 | TMP_DIR=$(mktemp -d) 10 | pushd $TMP_DIR 11 | 12 | echo "Cloning tensor2tensor and checking out commit $GIT_COMMIT_ID" 13 | git clone https://github.com/tensorflow/tensor2tensor.git 14 | cd tensor2tensor 15 | git checkout $GIT_COMMIT_ID 16 | 17 | pip install wheel twine pyopenssl 18 | 19 | # Build the distribution 20 | echo "Building distribution" 21 | python setup.py sdist 22 | python setup.py bdist_wheel --universal 23 | 24 | # Publish to PyPI 25 | echo "Publishing to PyPI" 26 | twine upload dist/* 27 | 28 | # Cleanup 29 | rm -rf build/ dist/ tensor2tensor.egg-info/ 30 | popd 31 | rm -rf $TMP_DIR 32 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/bin/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/bin/t2t-avg-all: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """t2t-avg-all.""" 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | from tensor2tensor.bin import t2t_avg_all 8 | 9 | import tensorflow as tf 10 | 11 | def main(argv): 12 | t2t_avg_all.main(argv) 13 | 14 | 15 | if __name__ == "__main__": 16 | tf.logging.set_verbosity(tf.logging.INFO) 17 | tf.app.run() 18 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/bin/t2t-bleu: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """t2t-bleu.""" 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | from tensor2tensor.bin import t2t_bleu 8 | 9 | import tensorflow as tf 10 | 11 | def main(argv): 12 | t2t_bleu.main(argv) 13 | 14 | 15 | 16 | if __name__ == "__main__": 17 | tf.logging.set_verbosity(tf.logging.INFO) 18 | tf.app.run() 19 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/bin/t2t-datagen: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Data generation for Tensor2Tensor. 3 | 4 | This script is used to generate data to train your models 5 | for a number problems for which open-source data is available. 6 | 7 | For example, to generate data for MNIST run this: 8 | 9 | t2t-datagen \ 10 | --problem=image_mnist \ 11 | --data_dir=~/t2t_data \ 12 | --tmp_dir=~/t2t_data/tmp 13 | """ 14 | from __future__ import absolute_import 15 | from __future__ import division 16 | from __future__ import print_function 17 | 18 | from tensor2tensor.bin import t2t_datagen 19 | 20 | import tensorflow as tf 21 | 22 | def main(argv): 23 | t2t_datagen.main(argv) 24 | 25 | 26 | if __name__ == "__main__": 27 | tf.logging.set_verbosity(tf.logging.INFO) 28 | tf.app.run() 29 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/bin/t2t-decoder: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """t2t-decoder.""" 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | from tensor2tensor.bin import t2t_decoder 8 | 9 | import tensorflow as tf 10 | 11 | def main(argv): 12 | t2t_decoder.main(argv) 13 | 14 | 15 | if __name__ == "__main__": 16 | tf.logging.set_verbosity(tf.logging.INFO) 17 | tf.app.run() 18 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/bin/t2t-exporter: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """t2t-exporter.""" 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | from tensor2tensor.serving import export 8 | 9 | import tensorflow as tf 10 | 11 | def main(argv): 12 | export.main(argv) 13 | 14 | 15 | if __name__ == "__main__": 16 | tf.logging.set_verbosity(tf.logging.INFO) 17 | tf.app.run() 18 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/bin/t2t-insights-server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """t2t-insights-server.""" 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | from tensor2tensor.insights import server 8 | 9 | import tensorflow as tf 10 | 11 | def main(argv): 12 | server.main(argv) 13 | 14 | 15 | if __name__ == "__main__": 16 | tf.logging.set_verbosity(tf.logging.INFO) 17 | tf.app.run() 18 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/bin/t2t-make-tf-configs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """t2t-make-tf-configs.""" 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | from tensor2tensor.bin import make_tf_configs 8 | 9 | import tensorflow as tf 10 | 11 | def main(argv): 12 | make_tf_configs.main(argv) 13 | 14 | 15 | if __name__ == "__main__": 16 | tf.logging.set_verbosity(tf.logging.INFO) 17 | tf.app.run() 18 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/bin/t2t-query-server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """t2t-query-server.""" 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | from tensor2tensor.serving import query 8 | 9 | import tensorflow as tf 10 | 11 | def main(argv): 12 | query.main(argv) 13 | 14 | 15 | if __name__ == "__main__": 16 | tf.logging.set_verbosity(tf.logging.INFO) 17 | tf.app.run() 18 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/bin/t2t-trainer: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Trainer for Tensor2Tensor. 3 | 4 | This script is used to train your models in Tensor2Tensor. 5 | 6 | For example, to train a shake-shake model on MNIST run this: 7 | 8 | t2t-trainer \ 9 | --generate_data \ 10 | --problem=image_mnist \ 11 | --data_dir=~/t2t_data \ 12 | --tmp_dir=~/t2t_data/tmp 13 | --model=shake_shake \ 14 | --hparams_set=shake_shake_quick \ 15 | --output_dir=~/t2t_train/mnist1 \ 16 | --train_steps=1000 \ 17 | --eval_steps=100 18 | """ 19 | from __future__ import absolute_import 20 | from __future__ import division 21 | from __future__ import print_function 22 | 23 | from tensor2tensor.bin import t2t_trainer 24 | 25 | import tensorflow as tf 26 | 27 | def main(argv): 28 | t2t_trainer.main(argv) 29 | 30 | 31 | if __name__ == "__main__": 32 | tf.logging.set_verbosity(tf.logging.INFO) 33 | tf.app.run() 34 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/bin/t2t-translate-all: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """t2t-translate-all.""" 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | from tensor2tensor.bin import t2t_translate_all 8 | 9 | import tensorflow as tf 10 | 11 | def main(argv): 12 | t2t_translate_all.main(argv) 13 | 14 | 15 | 16 | if __name__ == "__main__": 17 | tf.logging.set_verbosity(tf.logging.INFO) 18 | tf.app.run() 19 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/bin/t2t_trainer_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for t2t_trainer.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from tensor2tensor.bin import t2t_trainer 22 | from tensor2tensor.utils import trainer_lib_test 23 | 24 | import tensorflow as tf 25 | 26 | FLAGS = tf.flags.FLAGS 27 | 28 | 29 | class TrainerTest(tf.test.TestCase): 30 | 31 | @classmethod 32 | def setUpClass(cls): 33 | trainer_lib_test.TrainerLibTest.setUpClass() 34 | 35 | def testTrain(self): 36 | FLAGS.problem = "tiny_algo" 37 | FLAGS.model = "transformer" 38 | FLAGS.hparams_set = "transformer_tiny" 39 | FLAGS.train_steps = 1 40 | FLAGS.eval_steps = 1 41 | FLAGS.output_dir = tf.test.get_temp_dir() 42 | FLAGS.data_dir = tf.test.get_temp_dir() 43 | t2t_trainer.main(None) 44 | 45 | 46 | if __name__ == "__main__": 47 | tf.test.main() 48 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/audio_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for tensor2tensor.data_generators.audio.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import io 23 | import os 24 | from tensor2tensor.data_generators import audio 25 | 26 | import tensorflow as tf 27 | 28 | 29 | class AudioTest(tf.test.TestCase): 30 | 31 | def testDataCollection(self): 32 | # Generate a trivial source and target file. 33 | tmp_dir = self.get_temp_dir() 34 | test_files = [ 35 | "dir1/file1", 36 | "dir1/file2", 37 | "dir1/dir2/file3", 38 | "dir1/dir2/dir3/file4", 39 | ] 40 | for filename in test_files: 41 | input_filename = os.path.join(tmp_dir, filename + ".WAV") 42 | target_filename = os.path.join(tmp_dir, filename + ".WRD") 43 | directories = os.path.dirname(input_filename) 44 | if not os.path.exists(directories): 45 | os.makedirs(directories) 46 | io.open(input_filename, "wb") 47 | io.open(target_filename, "wb") 48 | 49 | data_dict = audio._collect_data(tmp_dir, ".WAV", ".WRD") 50 | expected = [os.path.join(tmp_dir, filename) for filename in test_files] 51 | self.assertEqual(sorted(list(data_dict)), sorted(expected)) 52 | 53 | # Clean up. 54 | for filename in test_files: 55 | os.remove(os.path.join(tmp_dir, "%s.WAV" % filename)) 56 | os.remove(os.path.join(tmp_dir, "%s.WRD" % filename)) 57 | 58 | 59 | if __name__ == "__main__": 60 | tf.test.main() 61 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/celeba_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for CelebA.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from absl.testing import parameterized 23 | from tensor2tensor.data_generators import celeba 24 | 25 | import tensorflow as tf 26 | 27 | 28 | class CelebaTest(parameterized.TestCase, tf.test.TestCase): 29 | 30 | @parameterized.named_parameters( 31 | ("Default", None), 32 | ("Area", "AREA"), 33 | ("Dilated", "DILATED")) 34 | def testCelebaMultiResolutionPreprocessExample(self, resize_method): 35 | example = {"inputs": tf.random_uniform([218, 178, 3], minval=-1.)} 36 | mode = tf.estimator.ModeKeys.TRAIN 37 | hparams = tf.contrib.training.HParams(resolutions=[8, 16, 32]) 38 | if resize_method is not None: 39 | hparams.resize_method = resize_method 40 | 41 | problem = celeba.ImageCelebaMultiResolution() 42 | preprocessed_example = problem.preprocess_example(example, mode, hparams) 43 | self.assertLen(preprocessed_example, 2) 44 | self.assertEqual(preprocessed_example["inputs"].shape, (138, 138, 3)) 45 | self.assertEqual(preprocessed_example["targets"].shape, (42, 32, 3)) 46 | 47 | 48 | if __name__ == "__main__": 49 | tf.test.main() 50 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/common_voice_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for tensor2tensor.data_generators.common_voice.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | from tensor2tensor.data_generators import common_voice 24 | 25 | import tensorflow as tf 26 | 27 | pkg_dir, _ = os.path.split(__file__) 28 | _TESTDATA = os.path.join(pkg_dir, "test_data") 29 | 30 | 31 | class CommonVoiceTest(tf.test.TestCase): 32 | 33 | def testCollectData(self): 34 | output = common_voice._collect_data(_TESTDATA) 35 | self.assertEqual(1, len(output)) 36 | 37 | # NOTE: No header. 38 | self.assertTrue("my_media" == output[0][0]) 39 | self.assertTrue("my_label" == output[0][2]) 40 | 41 | 42 | if __name__ == "__main__": 43 | tf.test.main() 44 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/desc2code_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for desc2code.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensor2tensor.data_generators import desc2code 23 | 24 | import tensorflow as tf 25 | 26 | CODE_CPP_IN = """ 27 | #include 28 | 29 | void main() { // This comment will be removed 30 | // This too. 31 | // 32 | /* Not this one */ 33 | \t 34 | \t 35 | int a \t\n = 3;// 36 | // 37 | } 38 | 39 | """ 40 | 41 | CODE_CPP_OUT = ("#include void main() { /* Not this one */ int a = " 42 | "3; }") 43 | 44 | 45 | class Desc2codeTest(tf.test.TestCase): 46 | 47 | def testCppPreprocess(self): 48 | """Check that the file correctly preprocess the code source.""" 49 | cpp_pb = desc2code.ProgrammingDesc2codeCpp() 50 | 51 | self.assertEqual( # Add space beween two lines 52 | cpp_pb.preprocess_target("firstline//comm1\nsecondline//comm2\n"), 53 | "firstline secondline") 54 | # Checking for boths comments and spaces 55 | self.assertEqual(cpp_pb.preprocess_target(CODE_CPP_IN), CODE_CPP_OUT) 56 | self.assertEqual( 57 | cpp_pb.preprocess_target(" not removed //abcd "), 58 | "not removed //abcd") 59 | 60 | 61 | if __name__ == "__main__": 62 | tf.test.main() 63 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/dna_encoder_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for tensor2tensor.data_generators.dna_encoder.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from tensor2tensor.data_generators import dna_encoder 22 | import tensorflow as tf 23 | 24 | 25 | class DnaEncoderTest(tf.test.TestCase): 26 | 27 | def test_encode_decode(self): 28 | original = 'TTCGCGGNNNAACCCAACGCCATCTATGTANNTTGAGTTGTTGAGTTAAA' 29 | 30 | # Encoding should be reversible for any reasonable chunk size. 31 | for chunk_size in [1, 2, 4, 6, 8]: 32 | encoder = dna_encoder.DNAEncoder(chunk_size=chunk_size) 33 | encoded = encoder.encode(original) 34 | decoded = encoder.decode(encoded) 35 | self.assertEqual(original, decoded) 36 | 37 | def test_delimited_dna_encoder(self): 38 | original = 'TTCGCGGNNN,AACCCAACGC,CATCTATGTA,NNTTGAGTTG,TTGAGTTAAA' 39 | 40 | # Encoding should be reversible for any reasonable chunk size. 41 | for chunk_size in [1, 2, 4, 6, 8]: 42 | encoder = dna_encoder.DelimitedDNAEncoder(chunk_size=chunk_size) 43 | encoded = encoder.encode(original) 44 | decoded = encoder.decode(encoded) 45 | self.assertEqual(original, decoded) 46 | 47 | 48 | if __name__ == '__main__': 49 | tf.test.main() 50 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/gym_problems_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Gym generators tests.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | import shutil 24 | 25 | from tensor2tensor.data_generators import gym_problems_specs 26 | 27 | import tensorflow as tf 28 | 29 | 30 | class GymProblemsTest(tf.test.TestCase): 31 | 32 | @classmethod 33 | def setUpClass(cls): 34 | cls.tmp_dir = tf.test.get_temp_dir() 35 | shutil.rmtree(cls.tmp_dir) 36 | os.mkdir(cls.tmp_dir) 37 | 38 | def testGymAtariGameModes(self): 39 | problem = gym_problems_specs.GymDiscreteProblemWithAgentOnWrappedFullPong() 40 | self.assertEqual(210, problem.frame_height) 41 | 42 | 43 | if __name__ == "__main__": 44 | tf.test.main() 45 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/imagenet_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for ImageNet.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from absl.testing import parameterized 23 | from tensor2tensor.data_generators import imagenet 24 | 25 | import tensorflow as tf 26 | 27 | 28 | class ImagenetTest(parameterized.TestCase, tf.test.TestCase): 29 | 30 | @parameterized.named_parameters( 31 | ("Default", None), 32 | ("Area", "AREA"), 33 | ("Dilated", "DILATED")) 34 | def testImagenetMultiResolutionPreprocessExample(self, resize_method): 35 | example = {"inputs": tf.random_uniform([64, 64, 3], minval=-1.)} 36 | mode = tf.estimator.ModeKeys.TRAIN 37 | hparams = tf.contrib.training.HParams(resolutions=[8, 16, 32]) 38 | if resize_method is not None: 39 | hparams.resize_method = resize_method 40 | 41 | problem = imagenet.ImageImagenetMultiResolutionGen() 42 | preprocessed_example = problem.preprocess_example(example, mode, hparams) 43 | self.assertLen(preprocessed_example, 1) 44 | self.assertEqual(preprocessed_example["inputs"].shape, (42, 32, 3)) 45 | 46 | 47 | if __name__ == "__main__": 48 | tf.test.main() 49 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/lm1b_imdb.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Data generators for LM1B and IMDb combined data-set.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensor2tensor.data_generators import imdb 23 | from tensor2tensor.data_generators import lm1b 24 | from tensor2tensor.data_generators import multi_problem 25 | from tensor2tensor.data_generators import text_problems 26 | from tensor2tensor.utils import registry 27 | 28 | 29 | @registry.register_problem 30 | class LanguagemodelLm1bSentimentIMDB(multi_problem.MultiProblem): 31 | """LM1b and IMDb mixed problem class for multitask learning.""" 32 | 33 | def __init__(self, was_reversed=False, was_copy=False): 34 | super(LanguagemodelLm1bSentimentIMDB, self).__init__(was_reversed, was_copy) 35 | self.task_list.append(lm1b.LanguagemodelLm1bCharacters()) 36 | self.task_list.append(imdb.SentimentIMDBCharacters()) 37 | 38 | @property 39 | def vocab_type(self): 40 | return text_problems.VocabType.CHARACTER 41 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/lm1b_mnli.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Data generators for LM1B and MNLI combined datasets.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensor2tensor.data_generators import lm1b 23 | from tensor2tensor.data_generators import multi_problem 24 | from tensor2tensor.data_generators import multinli 25 | from tensor2tensor.data_generators import text_problems 26 | from tensor2tensor.utils import registry 27 | 28 | 29 | @registry.register_problem 30 | class LanguagemodelLm1bMultiNLISubwords(multi_problem.MultiProblem): 31 | """LM1b and MNLI mixed problem class for multitask learning.""" 32 | 33 | def __init__(self, was_reversed=False, was_copy=False): 34 | super(LanguagemodelLm1bMultiNLISubwords, self).__init__( 35 | was_reversed, was_copy) 36 | self.task_list.append(lm1b.LanguagemodelLm1b32k()) 37 | self.task_list.append(multinli.MultiNLISharedVocab()) 38 | 39 | @property 40 | def vocab_type(self): 41 | return text_problems.VocabType.SUBWORD 42 | 43 | 44 | @registry.register_problem 45 | class LanguagemodelLm1bMultiNLI(multi_problem.MultiProblem): 46 | """LM1b and MNLI mixed problem class for multitask learning.""" 47 | 48 | def __init__(self, was_reversed=False, was_copy=False): 49 | super(LanguagemodelLm1bMultiNLI, self).__init__(was_reversed, was_copy) 50 | self.task_list.append(lm1b.LanguagemodelLm1bCharacters()) 51 | self.task_list.append(multinli.MultiNLICharacters()) 52 | 53 | @property 54 | def vocab_type(self): 55 | return text_problems.VocabType.CHARACTER 56 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/mscoco_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for MS COCO.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from absl.testing import parameterized 23 | from tensor2tensor.data_generators import mscoco 24 | 25 | import tensorflow as tf 26 | 27 | 28 | class MscocoTest(parameterized.TestCase, tf.test.TestCase): 29 | 30 | @parameterized.named_parameters( 31 | ("Default", None), 32 | ("Area", "AREA"), 33 | ("Dilated", "DILATED")) 34 | def testMsCocoMultiResolutionPreprocessExample(self, resize_method): 35 | example = {"inputs": tf.random_uniform([400, 400, 3], minval=-1.)} 36 | mode = tf.estimator.ModeKeys.TRAIN 37 | hparams = tf.contrib.training.HParams(resolutions=[8, 16, 32]) 38 | if resize_method is not None: 39 | hparams.resize_method = resize_method 40 | 41 | problem = mscoco.ImageTextMsCocoMultiResolution() 42 | preprocessed_example = problem.preprocess_example(example, mode, hparams) 43 | self.assertLen(preprocessed_example, 1) 44 | self.assertEqual(preprocessed_example["inputs"].shape, (42, 32, 3)) 45 | 46 | 47 | if __name__ == "__main__": 48 | tf.test.main() 49 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/test_data/1.csv: -------------------------------------------------------------------------------- 1 | media_name,label 2 | my_media,my_label 3 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/test_data/corpus-1.txt: -------------------------------------------------------------------------------- 1 | One morning I shot an elephant in my pajamas. How he got in my pajamas, I don't 2 | know. 3 | 4 | Groucho Marx 5 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/test_data/corpus-2.txt: -------------------------------------------------------------------------------- 1 | I haven't slept for 10 days... because that would be too long. 2 | 3 | Mitch Hedberg 4 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/test_data/vocab-1.txt: -------------------------------------------------------------------------------- 1 | lollipop,8 2 | reverberated,12 3 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/test_data/vocab-2.txt: -------------------------------------------------------------------------------- 1 | kattywampus,11 2 | kaput 3 | balderdash,10 4 | jiggery-pokery,14 5 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/translate_envi.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Data generators for En-Vi translation.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from tensor2tensor.data_generators import problem 22 | from tensor2tensor.data_generators import text_encoder 23 | from tensor2tensor.data_generators import translate 24 | from tensor2tensor.utils import registry 25 | 26 | # End-of-sentence marker. 27 | EOS = text_encoder.EOS_ID 28 | 29 | # For English-Vietnamese the IWSLT'15 corpus 30 | # from https://nlp.stanford.edu/projects/nmt/ is used. 31 | # The original dataset has 133K parallel sentences. 32 | _ENVI_TRAIN_DATASETS = [[ 33 | "https://github.com/stefan-it/nmt-en-vi/raw/master/data/train-en-vi.tgz", # pylint: disable=line-too-long 34 | ("train.en", "train.vi") 35 | ]] 36 | 37 | # For development 1,553 parallel sentences are used. 38 | _ENVI_TEST_DATASETS = [[ 39 | "https://github.com/stefan-it/nmt-en-vi/raw/master/data/dev-2012-en-vi.tgz", # pylint: disable=line-too-long 40 | ("tst2012.en", "tst2012.vi") 41 | ]] 42 | 43 | 44 | # See this PR on github for some results with Transformer on this Problem. 45 | # https://github.com/tensorflow/tensor2tensor/pull/611 46 | 47 | 48 | @registry.register_problem 49 | class TranslateEnviIwslt32k(translate.TranslateProblem): 50 | """Problem spec for IWSLT'15 En-Vi translation.""" 51 | 52 | @property 53 | def approx_vocab_size(self): 54 | return 2**15 # 32768 55 | 56 | def source_data_files(self, dataset_split): 57 | train = dataset_split == problem.DatasetSplit.TRAIN 58 | return _ENVI_TRAIN_DATASETS if train else _ENVI_TEST_DATASETS 59 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/wikisum/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/wikisum/delete_instances.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Delete Google Compute Engine instances with naming structure $NAME-$INDEX 4 | # (e.g. machines created with parallel_launch.py). 5 | # Example usage: 6 | # delete_instances.sh fetch-ref-urls 1000 7 | 8 | NAME=$1 9 | MAX=$2 10 | MIN=${3:-0} 11 | 12 | LOG_F=/tmp/delete-$NAME-logs.txt 13 | 14 | echo "Deleting $MAX instances starting with $NAME-$MIN" 15 | 16 | for i in $(seq $MIN $MAX) 17 | do 18 | gcloud compute instances delete --quiet $NAME-$i > $LOG_F 2>&1 & 19 | if [[ $(( i % 100 )) == 0 ]] 20 | then 21 | # Give it some room to breathe every 100 22 | sleep 30 23 | fi 24 | done 25 | 26 | echo "Delete commands launched. Logs redirected to $LOG_F" 27 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/wikisum/generate_vocab.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Generate vocab from references and wikis.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from tensor2tensor.data_generators.wikisum import wikisum 22 | 23 | import tensorflow as tf 24 | 25 | flags = tf.flags 26 | FLAGS = flags.FLAGS 27 | 28 | flags.DEFINE_string("out_dir", None, "Directory to write vocab to.") 29 | flags.DEFINE_string("wikis_dir", 30 | "gs://tensor2tensor-data/wikisum/wiki_content/", 31 | "Directory with wiki_content.tfrecords shards.") 32 | flags.DEFINE_string("refs_dir", None, 33 | "Directory with process_X folders with reference shards.") 34 | flags.DEFINE_bool("for_commoncrawl", False, 35 | "Whether to use WikisumCommoncrawl or WikisumWeb.") 36 | 37 | 38 | def main(_): 39 | if FLAGS.for_commoncrawl: 40 | problem = wikisum.WikisumCommoncrawl() 41 | else: 42 | problem = wikisum.WikisumWeb() 43 | problem.generate_vocab(FLAGS.out_dir, FLAGS.wikis_dir, FLAGS.refs_dir) 44 | 45 | 46 | if __name__ == "__main__": 47 | tf.logging.set_verbosity(tf.logging.INFO) 48 | tf.app.run() 49 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/wikisum/test_data/para_bad1.txt: -------------------------------------------------------------------------------- 1 | kolkata ward no 97 37 2 | you are here : india » west bengal » kolkata » kolkata 3 | this paragraph too short 4 | a | b | c | d | e | f | g | h | i | j | k | l | m | n | o | p | q | r | s | t | u | v | w | x | y | z 5 | 123 123 123 123 985 9880 1230 0980 . 12398 . 6 | - 5 . 7 % - 5 . 2 % - 15 . 1 % 4 . 7 % - 13 . 3 % 7 | http : / / www . bbc . co . uk / sport / football / 24351521 8 | no . - 26 beadon street . 9 | { { / playpopup } } { { ^ playpopup } } { { # playinvideopage } } { { / playinvideopage } } { { ^ playinvideopage } } { { / playinvideopage } } { { / playpopup } }

{ { # playpopup } } { { / playpopup } } { { ^ playpopup } } { { # playinvideopage } } { { / playinvideopage } } { { ^ playinvideopage } } { { / playinvideopage } } { { / playpopup } } { { genre } } 10 | denham , samuel coulter , sally 133 oct 28 1819 11 | browse by 12 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/wikisum/test_data/para_good1.txt: -------------------------------------------------------------------------------- 1 | this is a very good paragraph . it even has two sentences . 2 | the castle that was soon to figure so largely in lee’s life lay fourteen miles 3 | to the southwest of where he sat perched atop his tank . topped with storybook 4 | crenelations and accompanied by a rich history , schloss itter , as it’s called 5 | in german , was first mentioned in land records as early as 1240 . since then , 6 | itter has passed through a number of hands . after germany’s march 1938 7 | annexation of austria , the castle’s robust construction and relatively remote 8 | location attracted the attention of the notoriously secretive nazis . within 9 | months of absorbing austria into the greater reich , the german government 10 | requisitioned castle itter for unspecified “official use”—which included housing 11 | for several months in 1942 an organization called the “german association for 12 | combating the dangers of tobacco . ” on february 7 , 1943 , it fell into new 13 | hands yet again , for on that day , the structure and all its outbuildings were 14 | requisitioned by the wehrmacht on behalf of the ss . 15 | the url for the site is http : / / www . bbc . co . uk / sport / football / 24351521 . 16 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/data_generators/wikisum/utils_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for tensor2tensor.data_generators.wikisum.utils.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | from tensor2tensor.data_generators.wikisum import utils 24 | 25 | import tensorflow as tf 26 | 27 | pkg_dir, _ = os.path.split(__file__) 28 | _TESTDATA = os.path.join(pkg_dir, "test_data") 29 | 30 | 31 | def _get_testdata(filename): 32 | with tf.gfile.Open(os.path.join(_TESTDATA, filename)) as f: 33 | return f.read() 34 | 35 | 36 | class UtilsTest(tf.test.TestCase): 37 | 38 | def test_filter_paragraph(self): 39 | for bad in tf.gfile.Glob(os.path.join(_TESTDATA, "para_bad*.txt")): 40 | for p in _get_testdata(bad).split("\n"): 41 | self.assertTrue(utils.filter_paragraph(p), 42 | msg="Didn't filter %s" % p) 43 | for good in tf.gfile.Glob(os.path.join(_TESTDATA, "para_good*.txt")): 44 | for p in _get_testdata(good).split("\n"): 45 | p = _get_testdata(good) 46 | self.assertFalse(utils.filter_paragraph(p), msg="Filtered %s" % p) 47 | 48 | 49 | if __name__ == "__main__": 50 | tf.test.main() 51 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/insights/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/insights/insight_configuration.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package tensor2tensor; 4 | 5 | // Configures the Neural Machine Translation Insight Frontend with a set of 6 | // supported query processors and languages. 7 | message InsightConfiguration { 8 | // Specifies zero or more models to inspect. 9 | repeated QueryProcessorConfiguration configuration = 1; 10 | 11 | // Specifies language codes and display names. 12 | repeated Language language = 2; 13 | } 14 | 15 | // A displayable language name. 16 | message Language { 17 | // The BCP-47 Language code. 18 | string code = 1; 19 | // The language's display name. 20 | string name = 2; 21 | } 22 | 23 | // Configures a QueryProcessor and registers it with the Insight Frontend when 24 | // responding to analysis queries. 25 | message QueryProcessorConfiguration { 26 | // The model's BCP-47 source language code. 27 | string source_language = 1; 28 | // The model's BCP-47 target language code. 29 | string target_language = 2; 30 | // A short label for the model. 31 | string label = 3; 32 | // The QueryProcessor to use. By default we just use the TransformerModel. 33 | string query_processor = 4; 34 | 35 | // Configuration for the TransformerModel. 36 | TransformerConfiguration transformer = 5; 37 | } 38 | 39 | // Specifies the parameters for a trained Transformer model to inspect. These 40 | // parameters match those in t2t-trainer and t2t-decoder. 41 | message TransformerConfiguration { 42 | // The model type. 43 | string model = 1; 44 | // The trained model directory. 45 | string model_dir = 2; 46 | // The data directory for the model. 47 | string data_dir = 3; 48 | 49 | // The hyperparameter set for running the model. 50 | string hparams_set = 4; 51 | // Overriding hyperparameters. 52 | string hparams = 5; 53 | // The problem sets over which this model was trained and configured. 54 | string problems = 6; 55 | } 56 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/insights/polymer/.bowerrc: -------------------------------------------------------------------------------- 1 | { 2 | "directory": "." 3 | } 4 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/insights/polymer/insights_app/insights-app.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Copyright 2018 The Tensor2Tensor Authors. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * `` Manages the views of the NMT Insights App. 20 | * 21 | * ### Usage 22 | * 23 | * 24 | * 25 | */ 26 | class InsightsApp extends Polymer.Element { 27 | /** 28 | * @return {string} The component name. 29 | */ 30 | static get is() { 31 | return 'insights-app'; 32 | } 33 | 34 | /** 35 | * @return {!Object} The component properties. 36 | */ 37 | static get properties() { 38 | return { 39 | /** 40 | * @type {string} 41 | */ 42 | page: { 43 | type: String, 44 | reflectToAttribute: true, 45 | }, 46 | }; 47 | } 48 | 49 | /** 50 | * @return {!Array} The component observers. 51 | */ 52 | static get observers() { 53 | return [ 54 | 'routePageChanged_(routeData.page)', 55 | ]; 56 | } 57 | 58 | /** 59 | * Updates the page field if page exists or uses a default value. 60 | * @param {?string} page The current page name being viewed. 61 | * @private 62 | */ 63 | routePageChanged_(page) { 64 | if (page == this.page) { 65 | return; 66 | } 67 | this.page = page || 'explore'; 68 | this.set('routeData.page', this.page); 69 | 70 | // Refresh the now selected page in case it needs new data on a new view. 71 | let currentPage = this.get('currentPage'); 72 | if (currentPage) { 73 | currentPage.refresh(); 74 | } 75 | } 76 | } 77 | 78 | customElements.define(InsightsApp.is, InsightsApp); 79 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/insights/polymer/language_selector/language-selector.html: -------------------------------------------------------------------------------- 1 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/insights/polymer/processing_visualization/processing-visualization.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Copyright 2018 The Tensor2Tensor Authors. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * `` summarises pre/post processing steps. 20 | * 21 | * This element presents the pre-processing segmentation steps and 22 | * post-processing de-segmentation and rewrite steps that are applied to a 23 | * translation query. 24 | * 25 | * ### Usage 26 | * 27 | * 28 | */ 29 | class ProcessingVisualization extends Polymer.Element { 30 | /** 31 | * @return {string} The component name. 32 | */ 33 | static get is() { 34 | return 'processing-visualization'; 35 | } 36 | 37 | /** 38 | * @return {!Object} The component properties. 39 | */ 40 | static get properties() { 41 | return { 42 | /** 43 | * @type {!QueryProcessingVisualization} 44 | */ 45 | data: { 46 | type: Object, 47 | }, 48 | }; 49 | } 50 | } 51 | 52 | customElements.define(ProcessingVisualization.is, ProcessingVisualization); 53 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/insights/query_processor.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """A base class for all query processing classes.""" 17 | 18 | 19 | class QueryProcessor(object): 20 | """Base class for any class that wants to process sequence queries. 21 | 22 | QueryProcessor classes are expected to convert a string query to a series of 23 | visualization structures. 24 | 25 | TODO(kstevens): Define how the visualization structures should look once the 26 | protos are in better shape. 27 | """ 28 | 29 | def process(self, query): 30 | """Returns the generated visualizations for query. 31 | 32 | Args: 33 | query: The string input 34 | 35 | Returns: 36 | A dictionary with one key: 'result' that maps to a list of visualization 37 | objects. 38 | """ 39 | del query 40 | return {"result": []} 41 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/models/README.md: -------------------------------------------------------------------------------- 1 | # Constructing T2T Models. 2 | 3 | This directory contains T2T models, their hyperparameters, and a number 4 | of common layers and hyperparameter settings to help construct new models. 5 | Common building blocks are in `common_layers.py` and `common_attention.py`. 6 | Common hyperparameters are in `common_hparams.py`. Models are imported in 7 | `__init__.py`. 8 | 9 | ## Adding a new model. 10 | 11 | To add a model to the built-in set, create a new file (see, e.g., 12 | `neural_gpu.py`) and write your model class inheriting from `T2TModel` there and 13 | decorate it with `registry.register_model`. Import it in `__init__.py`. 14 | 15 | It is now available to use with the trainer binary (`t2t-trainer`) using the 16 | `--model=model_name` flag. 17 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/models/basic.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Basic models for testing simple tasks.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensor2tensor.layers import common_hparams 23 | from tensor2tensor.layers import common_layers 24 | from tensor2tensor.utils import registry 25 | from tensor2tensor.utils import t2t_model 26 | 27 | import tensorflow as tf 28 | 29 | 30 | @registry.register_model 31 | class BasicFcRelu(t2t_model.T2TModel): 32 | """Basic fully-connected + ReLU model.""" 33 | 34 | def body(self, features): 35 | hparams = self.hparams 36 | x = features["inputs"] 37 | shape = common_layers.shape_list(x) 38 | x = tf.reshape(x, [-1, shape[1] * shape[2] * shape[3]]) 39 | for i in range(hparams.num_hidden_layers): 40 | x = tf.layers.dense(x, hparams.hidden_size, name="layer_%d" % i) 41 | x = tf.nn.dropout(x, keep_prob=1.0 - hparams.dropout) 42 | x = tf.nn.relu(x) 43 | return tf.expand_dims(tf.expand_dims(x, axis=1), axis=1) # 4D For T2T. 44 | 45 | 46 | @registry.register_hparams 47 | def basic_fc_small(): 48 | """Small fully connected model.""" 49 | hparams = common_hparams.basic_params1() 50 | hparams.learning_rate = 0.1 51 | hparams.batch_size = 128 52 | hparams.hidden_size = 256 53 | hparams.num_hidden_layers = 2 54 | hparams.initializer = "uniform_unit_scaling" 55 | hparams.initializer_gain = 1.0 56 | hparams.weight_decay = 0.0 57 | hparams.dropout = 0.0 58 | return hparams 59 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/models/basic_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Basic nets tests.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | import numpy as np 22 | 23 | from tensor2tensor.data_generators import mnist # pylint: disable=unused-import 24 | from tensor2tensor.models import basic 25 | from tensor2tensor.utils import trainer_lib 26 | 27 | import tensorflow as tf 28 | 29 | 30 | class BasicTest(tf.test.TestCase): 31 | 32 | def testBasicFcRelu(self): 33 | x = np.random.random_integers(0, high=255, size=(1, 28, 28, 1)) 34 | y = np.random.random_integers(0, high=9, size=(1, 1)) 35 | hparams = trainer_lib.create_hparams( 36 | "basic_fc_small", problem_name="image_mnist", data_dir=".") 37 | with self.test_session() as session: 38 | features = { 39 | "inputs": tf.constant(x, dtype=tf.int32), 40 | "targets": tf.constant(y, dtype=tf.int32), 41 | } 42 | model = basic.BasicFcRelu(hparams, tf.estimator.ModeKeys.TRAIN) 43 | logits, _ = model(features) 44 | session.run(tf.global_variables_initializer()) 45 | res = session.run(logits) 46 | self.assertEqual(res.shape, (1, 1, 1, 1, 10)) 47 | 48 | 49 | if __name__ == "__main__": 50 | tf.test.main() 51 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/models/bytenet_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """ByteNet tests.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | import numpy as np 22 | 23 | from tensor2tensor.data_generators import problem_hparams 24 | from tensor2tensor.models import bytenet 25 | 26 | import tensorflow as tf 27 | 28 | 29 | class ByteNetTest(tf.test.TestCase): 30 | 31 | def testByteNet(self): 32 | vocab_size = 9 33 | x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) 34 | y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 6, 1, 1)) 35 | hparams = bytenet.bytenet_base() 36 | p_hparams = problem_hparams.test_problem_hparams(vocab_size, 37 | vocab_size, 38 | hparams) 39 | with self.test_session() as session: 40 | features = { 41 | "inputs": tf.constant(x, dtype=tf.int32), 42 | "targets": tf.constant(y, dtype=tf.int32), 43 | } 44 | model = bytenet.ByteNet( 45 | hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) 46 | logits, _ = model(features) 47 | session.run(tf.global_variables_initializer()) 48 | res = session.run(logits) 49 | self.assertEqual(res.shape, (3, 50, 1, 1, vocab_size)) 50 | 51 | 52 | if __name__ == "__main__": 53 | tf.test.main() 54 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/models/research/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/models/slicenet_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for SliceNet.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | import numpy as np 22 | 23 | from tensor2tensor.data_generators import cifar # pylint: disable=unused-import 24 | from tensor2tensor.layers import modalities # pylint: disable=unused-import 25 | from tensor2tensor.models import slicenet 26 | from tensor2tensor.utils import registry 27 | 28 | import tensorflow as tf 29 | 30 | 31 | class SliceNetTest(tf.test.TestCase): 32 | 33 | def testSliceNet(self): 34 | x = np.random.random_integers(0, high=255, size=(3, 5, 5, 3)) 35 | y = np.random.random_integers(0, high=9, size=(3, 5, 1, 1)) 36 | hparams = slicenet.slicenet_params1_tiny() 37 | hparams.add_hparam("data_dir", "") 38 | problem = registry.problem("image_cifar10") 39 | p_hparams = problem.get_hparams(hparams) 40 | hparams.problem_hparams = p_hparams 41 | with self.test_session() as session: 42 | features = { 43 | "inputs": tf.constant(x, dtype=tf.int32), 44 | "targets": tf.constant(y, dtype=tf.int32), 45 | "target_space_id": tf.constant(1, dtype=tf.int32), 46 | } 47 | model = slicenet.SliceNet(hparams, tf.estimator.ModeKeys.TRAIN, 48 | p_hparams) 49 | logits, _ = model(features) 50 | session.run(tf.global_variables_initializer()) 51 | res = session.run(logits) 52 | self.assertEqual(res.shape, (3, 1, 1, 1, 10)) 53 | 54 | 55 | if __name__ == "__main__": 56 | tf.test.main() 57 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/models/video/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/models/video/basic_deterministic_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Basic tests for basic deterministic model.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensor2tensor.models.video import basic_deterministic 23 | from tensor2tensor.models.video import basic_deterministic_params 24 | from tensor2tensor.models.video import tests_utils 25 | 26 | import tensorflow as tf 27 | 28 | 29 | class NextFrameTest(tests_utils.BaseNextFrameTest): 30 | 31 | def testBasicDeterministic(self): 32 | self.TestOnVariousInputOutputSizes( 33 | basic_deterministic_params.next_frame_basic_deterministic(), 34 | basic_deterministic.NextFrameBasicDeterministic, 35 | 256, 36 | False) 37 | 38 | if __name__ == "__main__": 39 | tf.test.main() 40 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/models/video/basic_recurrent.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Basic recurrent models for testing simple tasks.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensor2tensor.layers import common_video 23 | from tensor2tensor.models.video import basic_stochastic 24 | from tensor2tensor.utils import registry 25 | 26 | import tensorflow as tf 27 | 28 | 29 | tfl = tf.layers 30 | tfcl = tf.contrib.layers 31 | 32 | 33 | @registry.register_model 34 | class NextFrameBasicRecurrent( 35 | basic_stochastic.NextFrameBasicStochasticDiscrete): 36 | """Basic next-frame recurrent model.""" 37 | 38 | @property 39 | def is_recurrent_model(self): 40 | return True 41 | 42 | def middle_network(self, layer, internal_states): 43 | lstm_func = common_video.conv_lstm_2d 44 | hp = self.hparams 45 | 46 | lstm_states = internal_states 47 | if lstm_states is None: 48 | lstm_states = [None] * hp.num_lstm_layers 49 | 50 | # LSTM layers 51 | x = layer 52 | for j in range(hp.num_lstm_layers): 53 | x, lstm_states[j] = lstm_func(x, lstm_states[j], hp.num_lstm_filters) 54 | return x, lstm_states 55 | 56 | 57 | @registry.register_hparams 58 | def next_frame_basic_recurrent(): 59 | """Basic 2-frame recurrent model with stochastic tower.""" 60 | hparams = basic_stochastic.next_frame_basic_stochastic_discrete() 61 | hparams.video_num_input_frames = 4 62 | hparams.video_num_target_frames = 4 63 | hparams.add_hparam("num_lstm_layers", 2) 64 | hparams.add_hparam("num_lstm_filters", 256) 65 | return hparams 66 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/models/video/basic_recurrent_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Basic tests for basic deterministic model.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensor2tensor.models.video import basic_recurrent 23 | from tensor2tensor.models.video import tests_utils 24 | 25 | import tensorflow as tf 26 | 27 | 28 | class NextFrameTest(tests_utils.BaseNextFrameTest): 29 | 30 | def testBasicDeterministic(self): 31 | self.TestOnVariousInputOutputSizes( 32 | basic_recurrent.next_frame_basic_recurrent(), 33 | basic_recurrent.NextFrameBasicRecurrent, 34 | 256, 35 | False) 36 | 37 | if __name__ == "__main__": 38 | tf.test.main() 39 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/models/video/basic_stochastic_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Basic tests for basic stochastic model.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensor2tensor.models.video import basic_stochastic 23 | from tensor2tensor.models.video import tests_utils 24 | 25 | import tensorflow as tf 26 | 27 | 28 | class NextFrameTest(tests_utils.BaseNextFrameTest): 29 | 30 | def testBasicStochastic(self): 31 | self.TestOnVariousInputOutputSizes( 32 | basic_stochastic.next_frame_basic_stochastic(), 33 | basic_stochastic.NextFrameBasicStochastic, 34 | 256, 35 | False) 36 | 37 | if __name__ == "__main__": 38 | tf.test.main() 39 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/models/video/emily_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Basic tests for emily's model.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensor2tensor.models.video import emily 23 | from tensor2tensor.models.video import tests_utils 24 | 25 | 26 | import tensorflow as tf 27 | 28 | 29 | class NextFrameTest(tests_utils.BaseNextFrameTest): 30 | 31 | def testEmily(self): 32 | self.TestOnVariousInputOutputSizes( 33 | emily.next_frame_emily(), 34 | emily.NextFrameEmily, 35 | 1) 36 | 37 | 38 | if __name__ == "__main__": 39 | tf.test.main() 40 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/models/video/epva_params.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Param sets for EPVA model.""" 17 | 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from tensor2tensor.models.video import basic_deterministic_params 22 | from tensor2tensor.utils import registry 23 | 24 | 25 | @registry.register_hparams 26 | def next_frame_epva(): 27 | """EPVA hparams.""" 28 | hparams = basic_deterministic_params.next_frame_basic_deterministic() 29 | hparams.video_num_input_frames = 4 30 | hparams.video_num_target_frames = 4 31 | hparams.target_modality = "video:l2raw" 32 | hparams.input_modalities = "inputs:video:l2raw" 33 | hparams.learning_rate_constant = 1e-05 34 | hparams.batch_size = 2 35 | hparams.clip_grad_norm = 0.01 36 | # TODO(msaffar): disentangle EPVA from SV2P 37 | hparams.add_hparam("reward_prediction", False) 38 | hparams.add_hparam("context_frames", 5) 39 | hparams.add_hparam("enc_learning_rate", 1e-5) 40 | hparams.add_hparam("enc_pred_loss_scale", 0.1) 41 | hparams.add_hparam("enc_pred_loss_scale_delay", 6e5) 42 | hparams.add_hparam("enc_size", 64) 43 | hparams.add_hparam("enc_keep_prob", .65) 44 | hparams.add_hparam("enc_pred_use_l1_loss", False) 45 | hparams.add_hparam("enc_pred_use_l2norm", False) 46 | hparams.add_hparam("van_learning_rate", 3e-5) 47 | hparams.add_hparam("van_keep_prob", .9) 48 | hparams.add_hparam("sequence_length ", 64) 49 | hparams.add_hparam("skip_num", 2) 50 | hparams.add_hparam("pred_noise_std", 0) 51 | hparams.add_hparam("lstm_state_noise_stddev", 0) 52 | return hparams 53 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/models/video/savp_params.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Param sets for SAVP model.""" 17 | 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from tensor2tensor.models.video import sv2p_params 22 | from tensor2tensor.utils import registry 23 | 24 | 25 | @registry.register_hparams 26 | def next_frame_savp(): 27 | """SAVP model hparams.""" 28 | hparams = sv2p_params.next_frame_sv2p() 29 | hparams.add_hparam("z_dim", 8) 30 | hparams.add_hparam("num_discriminator_filters", 32) 31 | hparams.add_hparam("use_vae", True) 32 | hparams.add_hparam("use_gan", False) 33 | hparams.add_hparam("use_spectral_norm", True) 34 | hparams.add_hparam("gan_loss", "cross_entropy") 35 | hparams.add_hparam("gan_loss_multiplier", 0.01) 36 | hparams.add_hparam("gan_vae_loss_multiplier", 0.01) 37 | hparams.add_hparam("gan_optimization", "joint") 38 | hparams.target_modality = "video:l1raw" 39 | hparams.input_modalities = "inputs:video:l1raw" 40 | hparams.latent_loss_multiplier_schedule = "linear" 41 | hparams.upsample_method = "bilinear_upsample_conv" 42 | hparams.internal_loss = False 43 | return hparams 44 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/models/video/savp_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Basic tests for SAVP model.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensor2tensor.models.video import savp 23 | from tensor2tensor.models.video import savp_params 24 | from tensor2tensor.models.video import tests_utils 25 | 26 | 27 | import tensorflow as tf 28 | 29 | 30 | class NextFrameTest(tests_utils.BaseNextFrameTest): 31 | 32 | def testSavpVAE(self): 33 | savp_hparams = savp_params.next_frame_savp() 34 | savp_hparams.use_vae = True 35 | savp_hparams.use_gan = False 36 | self.TestOnVariousInputOutputSizes( 37 | savp_hparams, savp.NextFrameSAVP, 1) 38 | self.TestOnVariousUpSampleLayers( 39 | savp_hparams, savp.NextFrameSAVP, 1) 40 | 41 | def testSavpGAN(self): 42 | hparams = savp_params.next_frame_savp() 43 | hparams.use_gan = True 44 | hparams.use_vae = False 45 | self.TestVideoModel(7, 5, hparams, savp.NextFrameSAVP, 1) 46 | 47 | hparams.gan_optimization = "sequential" 48 | self.TestVideoModel(7, 5, hparams, savp.NextFrameSAVP, 1) 49 | 50 | def testSavpGANVAE(self): 51 | hparams = savp_params.next_frame_savp() 52 | hparams.use_vae = True 53 | hparams.use_gan = True 54 | self.TestVideoModel(7, 5, hparams, savp.NextFrameSAVP, 1) 55 | 56 | def testInvalidVAEGANCombinations(self): 57 | hparams = savp_params.next_frame_savp() 58 | hparams.use_gan = False 59 | hparams.use_vae = False 60 | self.assertRaises(ValueError, self.TestVideoModel, 61 | 7, 5, hparams, savp.NextFrameSAVP, 1) 62 | 63 | if __name__ == "__main__": 64 | tf.test.main() 65 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/models/video/sv2p_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Basic tests for SV2P model.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensor2tensor.models.video import sv2p 23 | from tensor2tensor.models.video import sv2p_params 24 | from tensor2tensor.models.video import tests_utils 25 | 26 | import tensorflow as tf 27 | 28 | 29 | class NextFrameTest(tests_utils.BaseNextFrameTest): 30 | 31 | def testSv2p(self): 32 | self.TestOnVariousInputOutputSizes( 33 | sv2p_params.next_frame_sv2p(), 34 | sv2p.NextFrameSv2p, 35 | 1, 36 | False) 37 | 38 | def testSv2pWithActions(self): 39 | self.TestWithActions( 40 | sv2p_params.next_frame_sv2p(), 41 | sv2p.NextFrameSv2p, 42 | 1, 43 | False) 44 | 45 | def testSv2pWithActionsAndRewards(self): 46 | hp = sv2p_params.next_frame_sv2p() 47 | hp.internal_loss = True 48 | self.TestWithActionAndRewards( 49 | hp, 50 | sv2p.NextFrameSv2p, 51 | 1, 52 | False) 53 | 54 | def testSv2pWithActionsAndRewardsExternalLoss(self): 55 | hp = sv2p_params.next_frame_sv2p() 56 | hp.internal_loss = False 57 | self.TestWithActionAndRewards( 58 | hp, 59 | sv2p.NextFrameSv2p, 60 | 1, 61 | False) 62 | 63 | def testSv2pTwoFrames(self): 64 | self.TestOnVariousInputOutputSizes( 65 | sv2p_params.next_frame_sv2p(), 66 | sv2p.NextFrameSv2pTwoFrames, 67 | 1, 68 | False) 69 | 70 | 71 | if __name__ == "__main__": 72 | tf.test.main() 73 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/problems.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Access T2T Problems.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from tensor2tensor.data_generators import all_problems 22 | from tensor2tensor.utils import registry 23 | 24 | 25 | def problem(name): 26 | return registry.problem(name) 27 | 28 | 29 | def available(): 30 | return sorted(registry.list_problems()) 31 | 32 | 33 | all_problems.import_modules(all_problems.ALL_MODULES) 34 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/problems_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """tensor2tensor.problems test.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from tensor2tensor import problems 22 | 23 | import tensorflow as tf 24 | 25 | 26 | class ProblemsTest(tf.test.TestCase): 27 | 28 | def testImport(self): 29 | self.assertIsNotNone(problems) 30 | 31 | if __name__ == "__main__": 32 | tf.test.main() 33 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/rl/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/rl/envs/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/rl/rl_trainer_lib_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests of basic flow of collecting trajectories and training PPO.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from tensor2tensor.rl import rl_trainer_lib 22 | from tensor2tensor.utils import registry 23 | 24 | import tensorflow as tf 25 | 26 | 27 | class TrainTest(tf.test.TestCase): 28 | 29 | def test_train_pong(self): 30 | hparams = registry.hparams("pong_model_free") 31 | hparams.epochs_num = 2 32 | hparams.num_agents = 2 33 | hparams.epoch_length = 3 34 | rl_trainer_lib.train(hparams) 35 | 36 | 37 | if __name__ == "__main__": 38 | tf.test.main() 39 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/rl/trainer_model_based_ae_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tiny run of trainer_model_based. Smoke test.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from tensor2tensor.rl import trainer_model_based 22 | 23 | import tensorflow as tf 24 | 25 | FLAGS = tf.flags.FLAGS 26 | 27 | 28 | class ModelRLExperimentTestAe(tf.test.TestCase): 29 | 30 | def test_ae(self): 31 | FLAGS.output_dir = tf.test.get_temp_dir() 32 | FLAGS.loop_hparams_set = "rlmb_ae_tiny" 33 | FLAGS.schedule = "train" # skip evaluation for world model training 34 | trainer_model_based.main(None) 35 | 36 | 37 | if __name__ == "__main__": 38 | tf.test.main() 39 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/rl/trainer_model_based_new_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tiny run of trainer_model_based_new. Smoke test.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from tensor2tensor.rl import trainer_model_based_new 22 | 23 | import tensorflow as tf 24 | 25 | FLAGS = tf.flags.FLAGS 26 | 27 | 28 | class ModelRLExperimentNewTest(tf.test.TestCase): 29 | 30 | def test_basic(self): 31 | FLAGS.output_dir = tf.test.get_temp_dir() 32 | FLAGS.loop_hparams_set = "rlmb_tiny" 33 | FLAGS.schedule = "train" # skip evaluation for world model training 34 | trainer_model_based_new.main(None) 35 | 36 | 37 | if __name__ == "__main__": 38 | tf.test.main() 39 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/rl/trainer_model_based_recurrent_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tiny run of trainer_model_based. Smoke test.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from tensor2tensor.rl import trainer_model_based 22 | 23 | import tensorflow as tf 24 | 25 | FLAGS = tf.flags.FLAGS 26 | 27 | 28 | class ModelRLExperimentRecurrentTest(tf.test.TestCase): 29 | 30 | def test_basic_recurrent(self): 31 | FLAGS.output_dir = tf.test.get_temp_dir() 32 | FLAGS.loop_hparams_set = "rlmb_tiny_recurrent" 33 | FLAGS.schedule = "train" # skip evaluation for world model training 34 | trainer_model_based.main(None) 35 | 36 | 37 | if __name__ == "__main__": 38 | tf.test.main() 39 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/rl/trainer_model_based_stochastic_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tiny run of trainer_model_based with stochastic model. Smoke test.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from tensor2tensor.rl import trainer_model_based 22 | 23 | import tensorflow as tf 24 | 25 | FLAGS = tf.flags.FLAGS 26 | 27 | 28 | class ModelRLExperimentStochasticTest(tf.test.TestCase): 29 | 30 | def test_basic_stochastic(self): 31 | FLAGS.output_dir = tf.test.get_temp_dir() 32 | FLAGS.loop_hparams_set = "rlmb_tiny_stochastic" 33 | FLAGS.schedule = "train" # skip evaluation for world model training 34 | trainer_model_based.main(None) 35 | 36 | 37 | if __name__ == "__main__": 38 | tf.test.main() 39 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/rl/trainer_model_based_sv2p_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tiny run of trainer_model_based with stochastic model. Smoke test.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from tensor2tensor.rl import trainer_model_based 22 | 23 | import tensorflow as tf 24 | 25 | FLAGS = tf.flags.FLAGS 26 | 27 | 28 | class ModelRLExperimentSv2pTest(tf.test.TestCase): 29 | 30 | def test_sv2p(self): 31 | FLAGS.output_dir = tf.test.get_temp_dir() 32 | FLAGS.loop_hparams_set = "rlmb_tiny_sv2p" 33 | trainer_model_based.main(None) 34 | 35 | 36 | if __name__ == "__main__": 37 | tf.test.main() 38 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/rl/trainer_model_based_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tiny run of trainer_model_based. Smoke test.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from tensor2tensor.rl import trainer_model_based 22 | 23 | import tensorflow as tf 24 | 25 | FLAGS = tf.flags.FLAGS 26 | 27 | 28 | class ModelRLExperimentTest(tf.test.TestCase): 29 | 30 | def test_basic(self): 31 | FLAGS.output_dir = tf.test.get_temp_dir() 32 | FLAGS.loop_hparams_set = "rlmb_tiny" 33 | FLAGS.schedule = "train" # skip evaluation for world model training 34 | trainer_model_based.main(None) 35 | 36 | 37 | if __name__ == "__main__": 38 | tf.test.main() 39 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/rl/trainer_model_free.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | r"""Training of RL agent with PPO algorithm. 17 | 18 | Example invocation: 19 | 20 | python -m tensor2tensor.rl.trainer_model_free \ 21 | --output_dir=$HOME/t2t/rl_v1 \ 22 | --hparams_set=pong_model_free \ 23 | --loop_hparams='num_agents=15' 24 | """ 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | from tensor2tensor.rl import rl_trainer_lib 30 | from tensor2tensor.utils import flags as t2t_flags # pylint: disable=unused-import 31 | from tensor2tensor.utils import trainer_lib 32 | 33 | import tensorflow as tf 34 | 35 | flags = tf.flags 36 | FLAGS = flags.FLAGS 37 | 38 | # To maintain compatibility with some internal libs, we guard against these flag 39 | # definitions possibly erring. Apologies for the ugliness. 40 | try: 41 | flags.DEFINE_string("output_dir", "", "Base output directory for run.") 42 | except: # pylint: disable=bare-except 43 | pass 44 | 45 | 46 | def main(_): 47 | hparams = trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams) 48 | rl_trainer_lib.train(hparams, FLAGS.output_dir, FLAGS.output_dir) 49 | 50 | 51 | if __name__ == "__main__": 52 | tf.app.run() 53 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/serving/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/test_data/example_usr_dir/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Example T2T user directory.""" 17 | from . import my_submodule 18 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/test_data/example_usr_dir/requirements.txt: -------------------------------------------------------------------------------- 1 | gutenberg 2 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/test_data/transformer_test_ckpt/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "model.ckpt-1" 2 | all_model_checkpoint_paths: "model.ckpt-1" 3 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/test_data/transformer_test_ckpt/flags.txt: -------------------------------------------------------------------------------- 1 | --eval_steps=1 2 | --hparams_range= 3 | --t2t_usr_dir= 4 | --enable_graph_rewriter=False 5 | --sync=False 6 | --eval_run_autoregressive=False 7 | --eval_use_test_set=False 8 | --worker_id=0 9 | --eval_early_stopping_metric_minimize=True 10 | --worker_replicas=1 11 | --random_seed=1234 12 | --worker_gpu_memory_fraction=0.95 13 | --train_steps=1 14 | --iterations_per_loop=1000 15 | --registry_help=False 16 | --worker_gpu=1 17 | --keep_checkpoint_max=20 18 | --save_checkpoints_secs=0 19 | --gpu_order= 20 | --master= 21 | --generate_data=False 22 | --local_eval_frequency=2000 23 | --export_saved_model=False 24 | --eval_early_stopping_steps=None 25 | --output_dir=/tmp/oss_train 26 | --profile=False 27 | --ps_job=/job:ps 28 | --tmp_dir=/tmp/t2t_datagen 29 | --schedule=continuous_train_and_eval 30 | --problem=translate_ende_wmt8k 31 | --hparams= 32 | --use_tpu=False 33 | --eval_early_stopping_metric_delta=0.1 34 | --ps_gpu=0 35 | --keep_checkpoint_every_n_hours=10000 36 | --decode_hparams= 37 | --tfdbg=False 38 | --data_dir=~/t2t/data 39 | --ps_replicas=0 40 | --eval_early_stopping_metric=loss 41 | --log_device_placement=False 42 | --hparams_set=transformer_test 43 | --dbgprofile=False 44 | --timit_paths= 45 | --tpu_num_shards=8 46 | --locally_shard_to_cpu=False 47 | --worker_job=/job:localhost 48 | --model=transformer 49 | --parsing_path= 50 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/test_data/transformer_test_ckpt/hparams.json: -------------------------------------------------------------------------------- 1 | {"daisy_chain_variables": true, "optimizer_adam_beta1": 0.9, "scheduled_sampling_prob": 0.0, "num_hidden_layers": 2, "moe_loss_coef": 0.01, "max_target_seq_length": 0, "clip_grad_norm": 0.0, "pos": "timing", "scheduled_sampling_gold_mixin_prob": 0.5, "initializer": "uniform_unit_scaling", "grad_noise_scale": 0.0, "optimizer_momentum_momentum": 0.9, "nbr_decoder_problems": 1, "attention_key_channels": 0, "eval_drop_long_sequences": false, "learning_rate_cosine_cycle_steps": 250000, "prepend_mode": "none", "weight_decay": 0.0, "symbol_modality_skip_top": false, "weight_noise": 0.0, "target_modality": "default", "attention_dropout": 0.1, "parameter_attention_value_channels": 0, "factored_logits": false, "relu_dropout": 0.1, "no_data_parallelism": false, "layer_preprocess_sequence": "n", "sampling_method": "argmax", "learning_rate": 0.2, "num_heads": 2, "max_length": 256, "summarize_grads": false, "attention_value_channels": 0, "num_encoder_layers": 0, "label_smoothing": 0.1, "use_fixed_batch_size": false, "optimizer": "Adam", "moe_k": 2, "self_attention_type": "dot_product", "learning_rate_decay_scheme": "noam", "sampling_temp": 1.0, "kernel_height": 3, "use_pad_remover": true, "batch_size": 4096, "max_relative_position": 0, "force_full_predict": false, "min_length_bucket": 8, "layer_prepostprocess_dropout": 0.1, "eval_run_autoregressive": false, "shared_embedding_and_softmax_weights": true, "symbol_modality_num_shards": 16, "dropout": 0.2, "compress_steps": 0, "parameter_attention_key_channels": 0, "length_bucket_step": 1.1, "kernel_width": 1, "hidden_size": 16, "num_decoder_layers": 0, "input_modalities": "default", "filter_size": 8, "optimizer_adam_beta2": 0.98, "scheduled_sampling_warmup_steps": 50000, "norm_type": "layer", "min_length": 0, "moe_num_experts": 64, "multiply_embedding_mode": "sqrt_depth", "max_input_seq_length": 0, "learning_rate_warmup_steps": 8000, "proximity_bias": false, "ffn_layer": "dense_relu_dense", "initializer_gain": 1.0, "layer_postprocess_sequence": "da", "moe_hidden_sizes": "2048", "optimizer_adam_epsilon": 1e-09, "norm_epsilon": 1e-06} 2 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/test_data/transformer_test_ckpt/model.ckpt-1.data-00000-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/code/tensor2tensor/tensor2tensor/test_data/transformer_test_ckpt/model.ckpt-1.data-00000-of-00002 -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/test_data/transformer_test_ckpt/model.ckpt-1.data-00001-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/code/tensor2tensor/tensor2tensor/test_data/transformer_test_ckpt/model.ckpt-1.data-00001-of-00002 -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/test_data/transformer_test_ckpt/model.ckpt-1.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/code/tensor2tensor/tensor2tensor/test_data/transformer_test_ckpt/model.ckpt-1.index -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/test_data/transformer_test_ckpt/model.ckpt-1.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/code/tensor2tensor/tensor2tensor/test_data/transformer_test_ckpt/model.ckpt-1.meta -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/utils/compute_video_metrics.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Computes and saves the metrics for video prediction and generation.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | 24 | from tensor2tensor.bin import t2t_decoder 25 | from tensor2tensor.utils import video_metrics 26 | import tensorflow as tf 27 | 28 | 29 | FLAGS = tf.flags.FLAGS 30 | 31 | 32 | def main(_): 33 | hparams = t2t_decoder.create_hparams() 34 | problem = hparams.problem 35 | frame_shape = [problem.frame_height, 36 | problem.frame_width, 37 | problem.num_channels] 38 | decode_hp = t2t_decoder.create_decode_hparams() 39 | 40 | output_dirs = [ 41 | os.path.join(FLAGS.output_dir, "decode_%05d" % decode_id) 42 | for decode_id in range(decode_hp.num_decodes) 43 | ] 44 | 45 | video_metrics.compute_and_save_video_metrics( 46 | output_dirs, 47 | FLAGS.problem, 48 | hparams.video_num_target_frames, 49 | frame_shape) 50 | 51 | 52 | if __name__ == "__main__": 53 | tf.app.run(main) 54 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/utils/diet_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for common layers.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from tensor2tensor.utils import diet 22 | 23 | import tensorflow as tf 24 | 25 | 26 | class DietVarTest(tf.test.TestCase): 27 | 28 | def testDiet(self): 29 | 30 | params = diet.diet_adam_optimizer_params() 31 | 32 | @diet.fn_with_diet_vars(params) 33 | def model_fn(x): 34 | y = tf.layers.dense(x, 10, use_bias=False) 35 | return y 36 | 37 | @diet.fn_with_diet_vars(params) 38 | def model_fn2(x): 39 | y = tf.layers.dense(x, 10, use_bias=False) 40 | return y 41 | 42 | x = tf.random_uniform((10, 10)) 43 | y = model_fn(x) + 10. 44 | y = model_fn2(y) + 10. 45 | grads = tf.gradients(y, [x]) 46 | with tf.control_dependencies(grads): 47 | incr_step = tf.assign_add(tf.train.get_or_create_global_step(), 1) 48 | 49 | train_op = tf.group(incr_step, *grads) 50 | with self.test_session() as sess: 51 | sess.run(tf.global_variables_initializer()) 52 | orig_vals = sess.run(tf.global_variables()) 53 | for _ in range(10): 54 | sess.run(train_op) 55 | new_vals = sess.run(tf.global_variables()) 56 | 57 | different = [] 58 | for old, new in zip(orig_vals, new_vals): 59 | try: 60 | self.assertAllClose(old, new) 61 | except AssertionError: 62 | different.append(True) 63 | self.assertEqual(len(different), len(tf.global_variables())) 64 | 65 | 66 | if __name__ == "__main__": 67 | tf.test.main() 68 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/utils/get_cnndm_rouge.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Path to moses dir 4 | mosesdecoder=$1 5 | 6 | # Path to file containing gold summaries, one per line 7 | targets_file=$2 8 | # Path to file containing model generated summaries, one per line 9 | decodes_file=$3 10 | 11 | # Tokenize. 12 | perl $mosesdecoder/scripts/tokenizer/tokenizer.perl -l en < $targets_file > $targets_file.tok 13 | perl $mosesdecoder/scripts/tokenizer/tokenizer.perl -l en < $decodes_file > $decodes_file.tok 14 | 15 | # Get rouge scores 16 | python get_rouge.py --decodes_filename $decodes_file.tok --targets_filename $targets_file.tok 17 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/utils/get_ende_bleu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mosesdecoder=~/mosesdecoder 4 | tok_gold_targets=newstest2013.tok.de 5 | 6 | decodes_file=$1 7 | 8 | # Replace unicode. 9 | perl $mosesdecoder/scripts/tokenizer/replace-unicode-punctuation.perl -l de < $decodes_file > $decodes_file.n 10 | 11 | # Tokenize. 12 | perl $mosesdecoder/scripts/tokenizer/tokenizer.perl -l de < $decodes_file.n > $decodes_file.tok 13 | 14 | # Put compounds in ATAT format (comparable to papers like GNMT, ConvS2S). 15 | # See https://nlp.stanford.edu/projects/nmt/ : 16 | # 'Also, for historical reasons, we split compound words, e.g., 17 | # "rich-text format" --> rich ##AT##-##AT## text format."' 18 | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' < $tok_gold_targets > $tok_gold_targets.atat 19 | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' < $decodes_file.tok > $decodes_file.tok.atat 20 | 21 | # Get BLEU. 22 | perl $mosesdecoder/scripts/generic/multi-bleu.perl $tok_gold_targets.atat < $decodes_file.tok.atat 23 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/utils/t2t_model_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for T2TModel.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensor2tensor.utils import t2t_model 23 | 24 | import tensorflow as tf 25 | 26 | 27 | class T2TModelTest(tf.test.TestCase): 28 | 29 | @tf.contrib.eager.run_test_in_graph_and_eager_modes() 30 | def testSummarizeLosses(self): 31 | with tf.Graph().as_default(): 32 | model = t2t_model.T2TModel(tf.contrib.training.HParams()) 33 | losses = {"training": tf.random_normal([]), 34 | "extra": tf.random_normal([])} 35 | outputs = model._summarize_losses(losses) 36 | self.assertIsNone(outputs, None) 37 | self.assertEquals( 38 | len(tf.get_collection(tf.GraphKeys.SUMMARIES, scope="losses")), 39 | len(losses)) 40 | 41 | if __name__ == "__main__": 42 | tf.test.main() 43 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/utils/update_ops_hook.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Hook to run tf.GraphKeys.UPDATE_OPS.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | 24 | class UpdateOpsHook(tf.train.SessionRunHook): 25 | """Hook to run assign_ops.""" 26 | 27 | def before_run(self, run_context): 28 | del run_context 29 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 30 | return tf.train.SessionRunArgs(update_ops) 31 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/utils/usr_dir.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Utility to load code from an external user-supplied directory.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import importlib 22 | import os 23 | import sys 24 | import tensorflow as tf 25 | 26 | 27 | INTERNAL_USR_DIR_PACKAGE = "t2t_usr_dir_internal" 28 | 29 | 30 | def import_usr_dir(usr_dir): 31 | """Import module at usr_dir, if provided.""" 32 | if not usr_dir: 33 | return 34 | if usr_dir == INTERNAL_USR_DIR_PACKAGE: 35 | # The package has been installed with pip under this name for Cloud ML 36 | # Engine so just import it. 37 | importlib.import_module(INTERNAL_USR_DIR_PACKAGE) 38 | return 39 | 40 | dir_path = os.path.abspath(os.path.expanduser(usr_dir).rstrip("/")) 41 | containing_dir, module_name = os.path.split(dir_path) 42 | tf.logging.info("Importing user module %s from path %s", module_name, 43 | containing_dir) 44 | sys.path.insert(0, containing_dir) 45 | importlib.import_module(module_name) 46 | sys.path.pop(0) 47 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/utils/video_metrics_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """video metrics test.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import numpy as np 23 | from tensor2tensor.utils import video_metrics 24 | import tensorflow as tf 25 | 26 | 27 | class VideoMetricsTest(tf.test.TestCase): 28 | 29 | def test_reduce_to_best_decode(self): 30 | # num_decodes=2, num_samples=3, num_frames=4 31 | decode1 = [ 32 | [30.0, 32.0, 33.0, 34.0], 33 | [22.0, 19.0, 12.0, 13.0], 34 | [30.0, 10.0, 30.0, 10.0]] 35 | decode2 = [ 36 | [22.0, 19.0, 12.0, 13.0], 37 | [30.0, 32.0, 33.0, 34.0], 38 | [25.0, 25.0, 25.0, 25.0]] 39 | all_decodes = [decode1, decode2] 40 | all_decodes = np.array(all_decodes) 41 | best_decode = video_metrics.reduce_to_best_decode(all_decodes, np.argmax) 42 | worst_decode = video_metrics.reduce_to_best_decode(all_decodes, np.argmin) 43 | exp_best_decode = [ 44 | [30.0, 32.0, 33.0, 34.0], 45 | [30.0, 32.0, 33.0, 34.0], 46 | [25.0, 25.0, 25.0, 25.0]] 47 | exp_worst_decode = [ 48 | [22.0, 19.0, 12.0, 13.0], 49 | [22.0, 19.0, 12.0, 13.0], 50 | [30.0, 10.0, 30.0, 10.0]] 51 | self.assertTrue(np.allclose(best_decode, exp_best_decode)) 52 | self.assertTrue(np.allclose(worst_decode, exp_worst_decode)) 53 | 54 | 55 | if __name__ == '__main__': 56 | tf.test.main() 57 | -------------------------------------------------------------------------------- /code/tensor2tensor/tensor2tensor/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Tensor2Tensor Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | -------------------------------------------------------------------------------- /data/adaptive_lr/README.md: -------------------------------------------------------------------------------- 1 | This folder contains the data and plotting script for adaptive learning rate(III.F and fig.5) 2 | 3 | * The data are csv files directly crawled through TensorBoard API. 4 | * `lr_baseline` corresponds to `1 K80 + 1 PS` 5 | * `lr_case1` corresponds to `Naive learning rate` 6 | * `lr_case2` corresponds to `Adaptive learning rate` -------------------------------------------------------------------------------- /data/adaptive_lr/lr_baseline.csv: -------------------------------------------------------------------------------- 1 | Wall time,Step,Value 2 | 1549329304.293007,0,0.10000000149011612 3 | 1549330156.335282,4000,0.781000018119812 4 | 1549331069.050087,8000,0.8450999855995178 5 | 1549331921.074308,12000,0.8295999765396118 6 | 1549332772.921892,16000,0.8765000104904175 7 | 1549333624.731184,20000,0.8705000281333923 8 | 1549334476.222614,24000,0.8533999919891357 9 | 1549335388.087721,28000,0.8572999835014343 10 | 1549336241.964327,32000,0.8787999749183655 11 | 1549337093.492757,36000,0.9259999990463257 12 | 1549337945.30655,40000,0.9283999800682068 13 | 1549338857.530423,44000,0.9282000064849854 14 | 1549339709.572015,48000,0.9286999702453613 15 | 1549340561.347803,52000,0.929099977016449 16 | 1549341475.399528,56000,0.9302999973297119 17 | 1549342327.302275,60000,0.9298999905586243 18 | 1549343179.171644,64000,0.9301000237464905 19 | -------------------------------------------------------------------------------- /data/adaptive_lr/lr_case1.csv: -------------------------------------------------------------------------------- 1 | Wall time,Step,Value 2 | 1549378867.166298,0,0.09929999709129333 3 | 1549379781.835995,4000,0.7627999782562256 4 | 1549380636.127301,8000,0.8413000106811523 5 | 1549381490.441055,12000,0.8288999795913696 6 | 1549382344.714217,16000,0.8755000233650208 7 | 1549383018.713985,20002,0.8834999799728394 8 | 1549383454.246745,24002,0.8862000107765198 9 | 1549383886.689496,28003,0.8849999904632568 10 | 1549384319.187733,32002,0.8847000002861023 11 | 1549384812.04268,32002,0.8847000002861023 12 | 1549385184.337756,36006,0.9154000282287598 13 | 1549385436.581039,40010,0.9162999987602234 14 | 1549385748.468387,44012,0.9172000288963318 15 | 1549386060.581205,48002,0.9175999760627747 16 | 1549386855.22196,52009,0.9171000123023987 17 | 1549387050.712848,56014,0.91839998960495 18 | 1549387302.512244,60018,0.91839998960495 19 | 1549387493.990558,64005,0.9175000190734863 20 | -------------------------------------------------------------------------------- /data/adaptive_lr/lr_case2.csv: -------------------------------------------------------------------------------- 1 | Wall time,Step,Value 2 | 1549385283.737253,0,0.11010000109672546 3 | 1549386138.4054,4000,0.7477999925613403 4 | 1549387052.996262,8000,0.8305000066757202 5 | 1549387909.580481,12000,0.8537999987602234 6 | 1549388763.859379,16000,0.8500000238418579 7 | 1549389437.565579,20002,0.8619999885559082 8 | 1549389870.062511,24003,0.8805000185966492 9 | 1549390362.942887,28003,0.8766000270843506 10 | 1549390795.631517,32002,0.8844000101089478 11 | 1549391590.147807,36007,0.9261000156402588 12 | 1549391902.057982,40009,0.9279000163078308 13 | 1549392214.104426,44011,0.9265999794006348 14 | 1549392465.880317,48003,0.9282000064849854 15 | 1549393019.305186,48003,0.9282000064849854 16 | 1549393271.30246,52008,0.9279999732971191 17 | 1549393463.533628,56011,0.9272000193595886 18 | 1549393715.981079,60013,0.9283000230789185 19 | 1549393907.870486,64005,0.9284999966621399 20 | -------------------------------------------------------------------------------- /data/img/cluster_4_spots_heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/img/cluster_4_spots_heatmap.png -------------------------------------------------------------------------------- /data/img/cluster_8_spots_heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/img/cluster_8_spots_heatmap.png -------------------------------------------------------------------------------- /data/img/hetero_cost.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/img/hetero_cost.png -------------------------------------------------------------------------------- /data/lifetime/README.md: -------------------------------------------------------------------------------- 1 | This folder contains the experiment data and scripts for VM lifetime, with the runs from May to June 2018 and in Feburary 2019. 2 | 3 | #### Data 4 | 5 | * The older runs are in us-east1-b with n1-standard-4 VM type. GPU instances are n1-standard-4 with 1 K80 equipped. Suffix '-less' means there's no stress workload imposed on this VM. Other VMs ran with `stress-ng` during the whole uptime with 90% CPU and 4 GB memory usage. 6 | 7 | * The newer runs have their zones specified by VM names. The meaning of '-less' suffix and lack thereof is same is above. K80 VMs have 4 vCPUs and 61 GB memory, P100 and V100 have 8 vCPUs and 61 GB memory. 8 | 9 | * I used Google Sheets to process the duration: first convert the timestamps into epoch time, then do subtraction to get the length of duration in datetime format. 10 | 11 | #### Scripts 12 | 13 | * `lifetime.py` runs the experiments, and `plot.py` will give the result of `fig.3` in the paper. -------------------------------------------------------------------------------- /data/training/code/start_evaluator.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export LD_LIBRARY_PATH=/usr/local/cuda-9.0/extras/CUPTI/lib64:$LD_LIBRARY_PATH 4 | cd tensor2tensor 5 | git pull 6 | cd ~ 7 | 8 | cd $(dirname $0) 9 | OUTDIR=$1 10 | DATADIR=$2 11 | MODEL=$3 12 | HPARAM=$4 13 | PROBLEM_DATA=$5 14 | TRAIN_STEPS=$6 15 | CKPT=$7 16 | JOBNAME=$8 17 | 18 | t2t-trainer \ 19 | --worker_job='/job:localhost' \ 20 | --data_dir=gs://shijian-18-ml/cifar_data \ 21 | --output_dir=gs://shijian-18-ml/30-cluster/${OUTDIR} \ 22 | --schedule=continuous_eval \ 23 | --model=resnet \ 24 | --problem=image_cifar10 \ 25 | --hparams_set=resnet_cifar_32_vanilla -------------------------------------------------------------------------------- /data/training/code/stop_training.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | JOBNAME=$1 4 | echo "Job name:" 5 | echo $JOBNAME 6 | NUM_PS=$2 7 | NUM_WORKER=$3 8 | ROOT=ozymandias 9 | 10 | for i in $(seq 0 $NUM_PS); do 11 | echo "Terminating ps-${i}..." 12 | ZONE=`gcloud compute instances list ${JOBNAME}-ps-${i} --format 'csv[no-heading](zone)'` 13 | gcloud compute ssh ${ROOT}@${JOBNAME}-ps-${i} --zone ${ZONE} -- pkill -f t2t-trainer 14 | done 15 | 16 | # Stop workers 17 | echo "Terminating worker..." 18 | if [[ $NUM_WORKER -ge 0 ]]; then 19 | for i in $(seq 0 $NUM_WORKER); do 20 | echo "Terminating worker-${i}..." 21 | ZONE=`gcloud compute instances list ${JOBNAME}-worker-${i} --format 'csv[no-heading](zone)'` 22 | gcloud compute ssh ${ROOT}@${JOBNAME}-worker-${i} --zone ${ZONE} -- pkill -f t2t-trainer 23 | done 24 | fi 25 | 26 | # Stop a master 27 | echo "Terminating master-0..." 28 | ZONE=`gcloud compute instances list ${JOBNAME}-master --format 'csv[no-heading](zone)'` 29 | gcloud compute ssh ${ROOT}@${JOBNAME}-master --zone ${ZONE} -- pkill -f t2t-trainer 30 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps2v/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.2297483652777441 3 | 2,1.3548544392801682 4 | 3,1.2294914310990828 5 | 4,1.1995738163680674 6 | 5,1.2610488647039189 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps2v/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,2373.878895,0.924700022,1.229748365 3 | 2,2615.380878,0.925999999,1.354854439 4 | 3,2373.382915,0.930499971,1.229491431 5 | 4,2315.630617,0.930000007,1.199573816 6 | 5,2434.300683,0.925999999,1.261048865 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps3v/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.3836639300946656 3 | 2,1.383348979577796 4 | 3,1.3848049540769978 5 | 4,1.3842130446129186 6 | 5,1.4290085208799113 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps3v/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,1827.342798,0.902999997,1.38366393 3 | 2,1826.926857,0.904799998,1.38334898 4 | 3,1828.849697,0.927100003,1.384804954 5 | 4,1828.067989,0.918799996,1.384213045 6 | 5,1887.227362,0.905300021,1.429008521 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps4v/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.5849969778828914 3 | 2,1.6412425712618 4 | 3,1.5812209068026961 5 | 4,1.6375050625245833 6 | 5,1.638274196831386 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps4v/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,1590.776843,0.920000017,1.584996978 3 | 2,1647.227542,0.897800028,1.641242571 4 | 3,1586.987002,0.917999983,1.581220907 5 | 4,1643.476404,0.902400017,1.637505063 6 | 5,1644.248343,0.912,1.638274197 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps5v/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.8834582133061948 3 | 2,1.811860792080822 4 | 3,1.810385567670145 5 | 4,1.819742833651951 6 | 5,1.9617389925489628 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps5v/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,1524.409106,0.900399983,1.883458213 3 | 2,1466.460509,0.918900013,1.811860792 4 | 3,1465.266511,0.894500017,1.810385568 5 | 4,1472.839974,0.911899984,1.819742834 6 | 5,1587.766993,0.909099996,1.961738993 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps6v/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,2.1608646411576884 3 | 2,2.2541150424084284 4 | 3,2.252214157625317 5 | 4,2.160488549956022 6 | 5,2.33907521915986 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps6v/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,1465.291042,0.902899981,2.160864641 3 | 2,1528.524516,0.909200013,2.254115042 4 | 3,1527.235518,0.905600011,2.252214158 5 | 4,1465.036013,0.914099991,2.16048855 6 | 5,1586.136355,0.901199996,2.339075219 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps7v/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,2.308802720121167 3 | 2,2.408581055963974 4 | 3,2.4142370743832196 5 | 4,2.2134946727484053 6 | 5,2.410316202165037 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps7v/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,1347.130922,0.90200001,2.30880272 3 | 2,1405.349184,0.915899992,2.408581056 4 | 3,1408.649335,0.90170002,2.414237074 5 | 4,1291.520966,0.887399971,2.213494673 6 | 5,1406.3616,0.897499979,2.410316202 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps8v/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,2.7454025600192518 3 | 2,2.8714944688625317 4 | 3,2.911120590210621 5 | 4,2.74848591157081 6 | 5,2.62469728159741 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps8v/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,1405.712124,0.886799991,2.74540256 3 | 2,1470.274213,0.905499995,2.871494469 4 | 3,1490.56374,0.895900011,2.91112059 5 | 4,1407.290874,0.872099996,2.748485912 6 | 5,1343.908119,0.889599979,2.624697282 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps_k80/2_data.csv: -------------------------------------------------------------------------------- 1 | Wall Time,Step,Accuracy 2 | 1549599572.030833,0,0.09269999712705612 3 | 1549600007.055048,4002,0.5432000160217285 4 | 1549600441.903693,8003,0.6639999747276306 5 | 1549600876.813806,12004,0.779699981212616 6 | 1549601311.382017,16005,0.8172000050544739 7 | 1549601746.614094,20006,0.8148000240325928 8 | 1549602181.448467,24006,0.8518000245094299 9 | 1549602616.239951,28007,0.8551999926567078 10 | 1549603051.363633,32008,0.8722000122070312 11 | 1549603486.351304,36010,0.9161999821662903 12 | 1549603921.64288,40013,0.9175999760627747 13 | 1549604356.74891,44014,0.9178000092506409 14 | 1549604792.031111,48014,0.9171000123023987 15 | 1549605227.973615,52014,0.9196000099182129 16 | 1549605662.97643,56014,0.9189000129699707 17 | 1549606098.338723,60015,0.917900025844574 18 | 1549606533.298298,64002,0.9182000160217285 19 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps_k80/3_data.csv: -------------------------------------------------------------------------------- 1 | Wall Time,Step,Accuracy 2 | 1549601781.068465,0,0.10000000149011612 3 | 1549602035.45046,4008,0.4359999895095825 4 | 1549602350.307008,8009,0.6524999737739563 5 | 1549602664.866693,12013,0.7196000218391418 6 | 1549602918.740083,16014,0.7742999792098999 7 | 1549603233.081835,20016,0.7954999804496765 8 | 1549603547.892137,24019,0.8166000247001648 9 | 1549603861.769614,28021,0.8230000138282776 10 | 1549604115.882648,32023,0.8690999746322632 11 | 1549604429.838126,36025,0.8952999711036682 12 | 1549604744.152389,40029,0.8962000012397766 13 | 1549604998.847245,44032,0.8946999907493591 14 | 1549605313.767925,48033,0.8967999815940857 15 | 1549605628.200158,52036,0.8966000080108643 16 | 1549605942.717604,56037,0.8967999815940857 17 | 1549606196.752053,60041,0.8973000049591064 18 | 1549606511.506652,64003,0.8973000049591064 19 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps_k80/4_data.csv: -------------------------------------------------------------------------------- 1 | Wall Time,Step,Accuracy 2 | 1549606697.411022,0,0.10000000149011612 3 | 1549606891.211089,4006,0.46380001306533813 4 | 1549607085.500389,8010,0.699400007724762 5 | 1549607339.52033,12012,0.7660999894142151 6 | 1549607532.906216,16016,0.7567999958992004 7 | 1549607726.445224,20017,0.7906000018119812 8 | 1549607980.237117,24020,0.8611999750137329 9 | 1549608174.034091,28023,0.8435999751091003 10 | 1549608428.70045,32027,0.8870000243186951 11 | 1549608622.796052,36029,0.9108999967575073 12 | 1549608876.689461,40029,0.9104999899864197 13 | 1549609070.167884,44031,0.9099000096321106 14 | 1549609263.841002,48035,0.906499981880188 15 | 1549609517.849781,52040,0.9125999808311462 16 | 1549609711.553759,56045,0.9118000268936157 17 | 1549609965.469422,60047,0.9125999808311462 18 | 1549610160.916253,64004,0.9121999740600586 19 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps_k80/5_data.csv: -------------------------------------------------------------------------------- 1 | Wall Time,Step,Accuracy 2 | 1549607138.348746,0,0.09709999710321426 3 | 1549607272.130756,4010,0.47609999775886536 4 | 1549607465.978671,8015,0.690500020980835 5 | 1549607599.506984,12019,0.7710000276565552 6 | 1549607792.993544,16027,0.7904000282287598 7 | 1549607986.176674,20030,0.8215000033378601 8 | 1549608119.485213,24035,0.84170001745224 9 | 1549608313.091693,28041,0.8215000033378601 10 | 1549608506.822189,32046,0.8845999836921692 11 | 1549608700.573472,36046,0.9021000266075134 12 | 1549608833.910747,40052,0.9052000045776367 13 | 1549609027.544839,44057,0.9010000228881836 14 | 1549609221.760957,48060,0.9054999947547913 15 | 1549609356.020351,52062,0.9067000150680542 16 | 1549609549.740601,56066,0.9078999757766724 17 | 1549609743.177118,60068,0.9067000150680542 18 | 1549609877.024625,64007,0.9067999720573425 19 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps_k80/6_data.csv: -------------------------------------------------------------------------------- 1 | Wall Time,Step,Accuracy 2 | 1549602189.461185,0,0.10000000149011612 3 | 1549602323.139171,4010,0.36980000138282776 4 | 1549602457.730623,8018,0.4916999936103821 5 | 1549602590.939678,12020,0.6273999810218811 6 | 1549602784.666045,16025,0.767799973487854 7 | 1549602917.919068,20035,0.7998999953269958 8 | 1549603051.40205,24049,0.8331000208854675 9 | 1549603184.533786,28049,0.8062000274658203 10 | 1549603318.243604,32052,0.885699987411499 11 | 1549603512.053086,36057,0.9009000062942505 12 | 1549603645.228308,40059,0.9010000228881836 13 | 1549603779.200262,44062,0.9018999934196472 14 | 1549603913.352941,48071,0.9004999995231628 15 | 1549604107.303848,52071,0.9021999835968018 16 | 1549604240.985819,56078,0.9014999866485596 17 | 1549604374.108725,60076,0.9018999934196472 18 | 1549604507.57641,64008,0.9021000266075134 19 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps_k80/7_data.csv: -------------------------------------------------------------------------------- 1 | Wall Time,Step,Accuracy 2 | 1549602772.294721,0,0.10000000149011612 3 | 1549602845.566539,4018,0.3190000057220459 4 | 1549602978.898191,8029,0.41679999232292175 5 | 1549603113.602919,12037,0.5533000230789185 6 | 1549603247.043285,16039,0.656000018119812 7 | 1549603380.722276,20043,0.6610999703407288 8 | 1549603514.00208,24055,0.8130000233650208 9 | 1549603647.572363,28058,0.8223999738693237 10 | 1549603720.611546,32061,0.8766999840736389 11 | 1549603853.980545,36073,0.8939999938011169 12 | 1549603987.255375,40080,0.8949000239372253 13 | 1549604120.715808,44091,0.8978999853134155 14 | 1549604254.287628,48102,0.8956000208854675 15 | 1549604387.674346,52105,0.8964999914169312 16 | 1549604520.919029,56116,0.8974999785423279 17 | 1549604654.01548,60117,0.8974999785423279 18 | 1549604727.319932,64010,0.8974999785423279 19 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/1ps_k80/8_data.csv: -------------------------------------------------------------------------------- 1 | Wall Time,Step,Accuracy 2 | 1549600213.265215,0,0.11729999631643295 3 | 1549600347.059492,4024,0.2847999930381775 4 | 1549600419.766613,8040,0.44179999828338623 5 | 1549600553.139782,12067,0.5835999846458435 6 | 1549600686.513074,16074,0.6365000009536743 7 | 1549600759.743659,20081,0.7605999708175659 8 | 1549600893.179923,24089,0.8309999704360962 9 | 1549601026.470852,28097,0.7972999811172485 10 | 1549601099.887128,32108,0.8855000138282776 11 | 1549601233.172608,36111,0.89410001039505 12 | 1549601306.28111,40119,0.8966000080108643 13 | 1549601439.551114,44128,0.8966000080108643 14 | 1549601573.050869,48144,0.8932999968528748 15 | 1549601646.477675,52140,0.89410001039505 16 | 1549601780.258827,56152,0.8952999711036682 17 | 1549601913.593542,60160,0.8950999975204468 18 | 1549601986.955545,64011,0.8978999853134155 19 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps2k/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.4509558838428975 3 | 2,1.4508648530206205 4 | 3,1.4778063200955391 5 | 4,1.4721098275750637 6 | 5,1.472892943115616 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps2k/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,6553.549611,0.924799979,1.450955884 3 | 2,6553.138451,0.925100029,1.450864853 4 | 3,6674.825294,0.905099988,1.47780632 5 | 4,6649.095879,0.923799992,1.472109828 6 | 5,6652.632986,0.923399985,1.472892943 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps2v/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.1188251269893497 3 | 2,1.08426848879347 4 | 3,1.0899047421016514 5 | 4,1.0870252714230815 6 | 5,1.0863309464271553 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps2v/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,2006.021672,0.926599979,1.118825127 3 | 2,1944.062601,0.931299984,1.084268489 4 | 3,1954.168243,0.925800025,1.089904742 5 | 4,1949.005434,0.931100011,1.087025271 6 | 5,1947.760529,0.924700022,1.086330946 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps3k/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.281854118762398 3 | 2,1.2817193775856022 4 | 3,1.2807027745214465 5 | 4,1.3023641196681979 6 | 5,1.298011395708752 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps3k/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,4383.906015,0.922900021,1.281854119 3 | 2,4383.445204,0.914200008,1.281719378 4 | 3,4379.968449,0.921899974,1.280702775 5 | 4,4454.049657,0.916700006,1.30236412 6 | 5,4439.16346,0.919200003,1.298011396 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps3v/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.0529525112048437 3 | 2,1.099299177113179 4 | 3,1.1052680934379564 5 | 4,1.0543974029372032 6 | 5,1.1007865551276077 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps3v/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,1321.310751,0.927600026,1.052952511 3 | 2,1379.46945,0.922800004,1.099299177 4 | 3,1386.95962,0.925100029,1.105268093 5 | 4,1323.123892,0.918600023,1.054397403 6 | 5,1381.335905,0.922500014,1.100786555 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps4k/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.196675795471716 3 | 2,1.2188653312871458 4 | 3,1.2190310438857557 5 | 4,1.219334816122103 6 | 5,1.2190377595404627 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps4k/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,3292.998887,0.919499993,1.196675795 3 | 2,3354.0598,0.921199977,1.218865331 4 | 3,3354.515806,0.894699991,1.219031044 5 | 4,3355.351723,0.917500019,1.219334816 6 | 5,3354.534286,0.921700001,1.21903776 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps4v/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.0938730571294533 3 | 2,1.096546729736676 4 | 3,1.157805361192253 5 | 4,1.0984223044936774 6 | 5,1.0965829921580024 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps4v/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,1055.794111,0.901000023,1.093873057 3 | 2,1058.37471,0.910600007,1.09654673 4 | 3,1117.50086,0.904100001,1.157805361 5 | 4,1060.184994,0.897400022,1.098422304 6 | 5,1058.40971,0.913399994,1.096582992 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps5k/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.1689704 3 | 2,1.1681016000000002 4 | 3,1.1689704 5 | 4,1.1385624 6 | 5,1.1681016000000002 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps5k/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,2691,0.911300004,1.1689704 3 | 2,2689,0.902499974,1.1681016 4 | 3,2691,0.897099972,1.1689704 5 | 4,2621,0.906700015,1.1385624 6 | 5,2689,0.881799996,1.1681016 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps5v/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.1973982013294808 3 | 2,1.193889590412175 4 | 3,1.1608727469492228 5 | 4,1.4309229744033665 6 | 5,1.1837679964711731 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps5v/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,938.9640322,0.891499996,1.197398201 3 | 2,936.2126839,0.89139998,1.19388959 4 | 3,910.3218579,0.909500003,1.160872747 5 | 4,1122.087209,0.899900019,1.430922974 6 | 5,928.2756312,0.898400009,1.183767996 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps6k/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.146582862976265 3 | 2,1.1167112383814812 4 | 3,1.1167311293824673 5 | 4,1.1161400251051428 6 | 5,1.1182377248275281 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps6k/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,2268.664153,0.906799972,1.146582863 3 | 2,2209.559237,0.890100002,1.116711238 4 | 3,2209.598594,0.902499974,1.116731129 5 | 4,2208.429017,0.912,1.116140025 6 | 5,2212.57959,0.904299974,1.118237725 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps6v/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.3121348672387345 3 | 2,1.3101440536739932 4 | 3,1.2840513062838836 5 | 4,1.2899341092306125 6 | 5,1.400649500645074 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps6v/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,866.4387889,0.895200014,1.312134867 3 | 2,865.1241999,0.904100001,1.310144054 4 | 3,847.8944402,0.895799994,1.284051306 5 | 4,851.779017,0.903999984,1.289934109 6 | 5,924.887439,0.880100012,1.400649501 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps7k/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.0985838373337746 3 | 2,1.1002146366041183 4 | 3,1.0993836930933953 5 | 4,1.099274677422905 6 | 5,1.0992600403427124 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps7k/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,1905.940037,0.902100027,1.098583837 3 | 2,1908.769321,0.883000016,1.100214637 4 | 3,1907.327712,0.909399986,1.099383693 5 | 4,1907.13858,0.8829,1.099274677 6 | 5,1907.113186,0.893400013,1.09926004 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps7v/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.4397607969052744 3 | 2,1.4722797810948192 4 | 3,1.4899550066987843 5 | 4,1.4944008212526545 6 | 5,1.4687659723257631 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps7v/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,821.0471051,0.900300026,1.439760797 3 | 2,839.5915868,0.908299983,1.472279781 4 | 3,849.6711729,0.903500021,1.489955007 5 | 4,852.2064712,0.906199992,1.494400821 6 | 5,837.58778,0.911199987,1.468765972 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps8k/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.1146402110856533 3 | 2,1.114120636306715 4 | 3,1.118919088201046 5 | 4,1.1156042744781018 6 | 5,1.0929750860597134 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps8k/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,1721.717966,0.907500029,1.114640211 3 | 2,1720.91541,0.891700029,1.114120636 4 | 3,1728.327291,0.893000007,1.118919088 5 | 4,1723.207097,0.880400002,1.115604274 6 | 5,1688.253145,0.836600006,1.092975086 -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps8v/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.6177303972356083 3 | 2,1.6993622517025402 4 | 3,1.5729247455543842 5 | 4,1.647087251970518 6 | 5,1.554804790136562 7 | -------------------------------------------------------------------------------- /data/training/experiment_1ps_vs_2ps/2ps8v/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,811.8148141,0.900699973,1.617730397 3 | 2,852.7795811,0.8829,1.699362252 4 | 3,789.3302939,0.903199971,1.572924746 5 | 4,826.546768,0.899699986,1.647087252 6 | 5,780.237278,0.871299982,1.55480479 -------------------------------------------------------------------------------- /data/training/experiment_cross_region/2e1c1w-p100/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy 2 | 1,2490.2080738544464,0.9071999788284302 3 | 2,2554.417284011841,0.9193999767303467 4 | 3,2491.789762020111,0.9150999784469604 5 | 4,2431.1588549613953,0.9172999858856201 6 | 5,2613.7531900405884,0.9221000075340271 7 | -------------------------------------------------------------------------------- /data/training/experiment_cross_region/2e1c1w/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy 2 | 1,4723.227193117142,0.9226999878883362 3 | 2,4601.186982870102,0.9200999736785889 4 | 3,4483.54295706749,0.9212999939918518 5 | 4,4602.768432855606,0.920799970626831 6 | 5,4599.846872806549,0.9175000190734863 7 | -------------------------------------------------------------------------------- /data/training/experiment_cross_region/2e2c-p100/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy 2 | 1,2310.6871571540833,0.8998000025749207 3 | 2,2433.8026399612427,0.9211000204086304 4 | 3,2670.5892820358276,0.9258999824523926 5 | 4,2614.8551659584045,0.913100004196167 6 | 5,2493.83362698555,0.9106000065803528 7 | -------------------------------------------------------------------------------- /data/training/experiment_cross_region/2e2c/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy 2 | 1,4421.993453025818,0.8956999778747559 3 | 2,4240.804141044617,0.8995000123977661 4 | 3,4239.966008901596,0.9244999885559082 5 | 4,4600.585135936737,0.9168000221252441 6 | 5,4536.9851059913635,0.9061999917030334 7 | -------------------------------------------------------------------------------- /data/training/experiment_cross_region/3e1c-p100/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy 2 | 1,1888.529273033142,0.8984000086784363 3 | 2,2008.2953639030457,0.8925999999046326 4 | 3,2008.371838092804,0.902899980545044 5 | 4,2011.5693171024323,0.9049000144004822 6 | 5,1951.1879200935364,0.9085999727249146 7 | -------------------------------------------------------------------------------- /data/training/experiment_cross_region/3e1c/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy 2 | 1,3879.5696721076965,0.9189000129699707 3 | 2,3819.0815150737762,0.9218000173568726 4 | 3,3878.152351140976,0.909600019454956 5 | 4,3818.915142059326,0.9225000143051147 6 | 5,3939.3316757678986,0.911899983882904 7 | -------------------------------------------------------------------------------- /data/training/experiment_mixed_gpu/1v1p2k/agg_data.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,1967.411442,0.908100009,1.129141123 3 | 2,2019.320987,0.921299994,1.158933164 4 | 3,2021.621882,0.91839999,1.160253698 5 | 4,1959.693868,0.918500006,1.124711836 6 | 5,1896.298373,0.906899989,1.088327753 7 | 6,2015.813857,0.897199988,1.156920344 8 | 7,2019.270651,0.912100017,1.158904275 9 | 8,2015.161808,0.91140002,1.156546118 10 | 9,2077.321334,0.908299983,1.192220851 11 | 10,2016.782338,0.893100023,1.157476177 -------------------------------------------------------------------------------- /data/training/experiment_mixed_gpu/1v1p2k/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.1291411227956856 3 | 2,1.1589331635504785 4 | 3,1.1602536982798886 5 | 4,1.1247118356011445 6 | 5,1.088327753042343 7 | 6,1.156920343803147 8 | 7,1.1589042746686369 9 | 8,1.1565461183629426 10 | 9,1.192220850823538 11 | 10,1.1574761765956507 12 | -------------------------------------------------------------------------------- /data/training/experiment_mixed_gpu/1v1p2k/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy 2 | 1,1967.411442041397,0.9081000089645386 3 | 2,2019.3209869861603,0.9212999939918518 4 | 3,2021.6218819618225,0.91839998960495 5 | 4,1959.6938679218292,0.9185000061988831 6 | 5,1896.2983729839325,0.9068999886512756 7 | 6,2015.8138570785522,0.8971999883651733 8 | 7,2019.270651102066,0.9121000170707703 9 | 8,2015.161808013916,0.9114000201225281 10 | 9,2077.321333885193,0.90829998254776 11 | 10,2016.782338142395,0.8931000232696533 12 | -------------------------------------------------------------------------------- /data/training/experiment_mixed_gpu/1v2p1k/agg_data.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,1839,0.919399977,1.206343101 3 | 2,1841,0.897300005,1.207655056 4 | 3,1839,0.921400011,1.206343101 5 | 4,1779,0.893000007,1.166984435 6 | 5,1791,0.912299991,1.174856168 7 | 6,1779,0.907199979,1.166984435 8 | 7,1784,0.914499998,1.170264324 9 | 8,1724,0.922599971,1.130905658 10 | 9,1784,0.910000026,1.170264324 11 | 10,1725,0.912400007,1.131561636 -------------------------------------------------------------------------------- /data/training/experiment_mixed_gpu/1v2p1k/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.20634310064 3 | 2,1.2076550561600001 4 | 3,1.20634310064 5 | 4,1.16698443504 6 | 5,1.17485616816 7 | 6,1.16698443504 8 | 7,1.1702643238400001 9 | 8,1.1309056582400001 10 | 9,1.1702643238400001 11 | 10,1.131561636 12 | -------------------------------------------------------------------------------- /data/training/experiment_mixed_gpu/1v2p1k/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy 2 | 1,1839.0,0.919399977 3 | 2,1841.0,0.897300005 4 | 3,1839.0,0.921400011 5 | 4,1779.0,0.893000007 6 | 5,1791.0,0.912299991 7 | 6,1779.0,0.907199979 8 | 7,1784.0,0.914499998 9 | 8,1724.0,0.922599971 10 | 9,1784.0,0.910000026 11 | 10,1725.0,0.912400007 12 | -------------------------------------------------------------------------------- /data/training/experiment_mixed_gpu/2v1p1k/agg_data.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,1714.506269,0.911000013,1.364701432 3 | 2,1776.155372,0.915099978,1.36618561 4 | 3,1722.053407,0.905200005,1.364701432 5 | 4,1778.565727,0.91900003,1.3201761 6 | 5,1836.229716,0.907800019,1.329081166 7 | 6,1716.078924,0.912400007,1.3201761 8 | 7,1777.563329,0.903199971,1.323886544 9 | 8,1724.806188,0.909099996,1.279361212 10 | 9,1714.335581,0.921199977,1.323886544 11 | 10,1842.359798,0.914099991,1.280103301 -------------------------------------------------------------------------------- /data/training/experiment_mixed_gpu/2v1p1k/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.36470143193 3 | 2,1.36618560967 4 | 3,1.36470143193 5 | 4,1.32017609973 6 | 5,1.32908116617 7 | 6,1.32017609973 8 | 7,1.32388654408 9 | 8,1.27936121188 10 | 9,1.32388654408 11 | 10,1.28010330075 12 | -------------------------------------------------------------------------------- /data/training/experiment_mixed_gpu/2v1p1k/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy 2 | 1,1714.506268978119,0.9110000133514404 3 | 2,1776.1553719043732,0.9150999784469604 4 | 3,1722.0534069538116,0.9052000045776367 5 | 4,1778.5657269954681,0.9190000295639038 6 | 5,1836.2297160625458,0.907800018787384 7 | 6,1716.0789239406586,0.9124000072479248 8 | 7,1777.5633292198181,0.9031999707221985 9 | 8,1724.8061878681183,0.9090999960899353 10 | 9,1714.3355808258057,0.9211999773979187 11 | 10,1842.359798192978,0.9140999913215637 12 | -------------------------------------------------------------------------------- /data/training/k80_on_demand/2-demand/2_ondemand_cluster_status.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_on_demand/2-demand/2_ondemand_cluster_status.db -------------------------------------------------------------------------------- /data/training/k80_on_demand/2-demand/agg_data.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Duration,Accuracy,Cost 2 | us-east1-c,1,7283.886768,0.90990001,3.214816264 3 | us-east1-c,2,6982.140929,0.924199998,3.08163772 4 | us-east1-c,3,7406.4029,0.930199981,3.268889984 5 | us-east1-c,4,7166.934799,0.920700014,3.163198343 6 | us-east1-c,5,6923.670836,0.917100012,3.05583136 7 | us-east1-c,6,7163.595199,0.918600023,3.161724377 8 | us-east1-c,7,6926.126685,0.920899987,3.056915274 9 | us-east1-c,8,6924.409527,0.905499995,3.056157389 10 | us-east1-c,9,7410.11651,0.916899979,3.270529023 11 | us-east1-c,10,7536.45765,0.925899982,3.326290948 -------------------------------------------------------------------------------- /data/training/k80_on_demand/2-demand/cost.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Cost 2 | demand2-us-east1-c,1,3.2148162638645554 3 | demand2-us-east1-c,2,3.081637720416241 4 | demand2-us-east1-c,3,3.2688899839354137 5 | demand2-us-east1-c,4,3.163198342867184 6 | demand2-us-east1-c,5,3.055831360164528 7 | demand2-us-east1-c,6,3.161724376973133 8 | demand2-us-east1-c,7,3.056915273754502 9 | demand2-us-east1-c,8,3.056157388864689 10 | demand2-us-east1-c,9,3.2705290228158193 11 | demand2-us-east1-c,10,3.3262909483802607 12 | -------------------------------------------------------------------------------- /data/training/k80_on_demand/2-demand/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Duration,Accuracy 2 | us-east1-c,1,7283.886767864227,0.9099000096321106 3 | us-east1-c,2,6982.140928983688,0.9241999983787537 4 | us-east1-c,3,7406.402899980545,0.9301999807357788 5 | us-east1-c,4,7166.934798955917,0.9207000136375427 6 | us-east1-c,5,6923.670835971832,0.9171000123023987 7 | us-east1-c,6,7163.595198869705,0.9186000227928162 8 | us-east1-c,7,6926.126685142517,0.9208999872207642 9 | us-east1-c,8,6924.40952706337,0.9054999947547913 10 | us-east1-c,9,7410.116509914398,0.9168999791145325 11 | us-east1-c,10,7536.457649946213,0.9258999824523926 12 | -------------------------------------------------------------------------------- /data/training/k80_on_demand/2-demand/vm_data.csv: -------------------------------------------------------------------------------- 1 | VM_name,Uptime 2 | demand2-us-east1-c-run1-master,7516.300136 3 | demand2-us-east1-c-run1-worker-0,7514.283644 4 | demand2-us-east1-c-run2-master,7226.880177 5 | demand2-us-east1-c-run2-worker-0,7224.513332 6 | demand2-us-east1-c-run3-master,7634.830869 7 | demand2-us-east1-c-run3-worker-0,7633.237503 8 | demand2-us-east1-c-run4-master,7470.031676 9 | demand2-us-east1-c-run4-worker-0,7468.259497 10 | demand2-us-east1-c-run5-master,7147.197748 11 | demand2-us-east1-c-run5-worker-0,7145.132591 12 | demand2-us-east1-c-run6-master,7473.871535 13 | demand2-us-east1-c-run6-worker-0,7471.636055 14 | demand2-us-east1-c-run7-master,7133.256691 15 | demand2-us-east1-c-run7-worker-0,7131.085527 16 | demand2-us-east1-c-run8-master,7197.700908 17 | demand2-us-east1-c-run8-worker-0,7194.895476 18 | demand2-us-east1-c-run9-master,7697.015784 19 | demand2-us-east1-c-run9-worker-0,7694.726216 20 | demand2-us-east1-c-run10-master,7817.751411 21 | demand2-us-east1-c-run10-worker-0,7815.705407 22 | -------------------------------------------------------------------------------- /data/training/k80_on_demand/4-demand/4_ondemand_cluster_status.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_on_demand/4-demand/4_ondemand_cluster_status.db -------------------------------------------------------------------------------- /data/training/k80_on_demand/4-demand/agg_data.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Duration,Accuracy,Cost 2 | us-east1-c,1,3654.454548,0.917900026,3.080778273 3 | us-east1-c,2,3534.730384,0.907800019,2.979848408 4 | us-east1-c,3,3474.777911,0.913699985,2.929307274 5 | us-east1-c,4,3594.947718,0.899699986,3.030612825 6 | us-east1-c,5,3650.108237,0.921199977,3.077114246 7 | us-east1-c,6,3532.886483,0.907999992,2.978293963 8 | us-east1-c,7,3651.781341,0.912899971,3.078524706 9 | us-east1-c,8,3592.46353,0.919099987,3.028518605 10 | us-east1-c,9,3594.189482,0.926500022,3.029973617 11 | us-east1-c,10,3531.988252,0.893400013,2.977536736 -------------------------------------------------------------------------------- /data/training/k80_on_demand/4-demand/cost.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Cost 2 | us-east1-c,1,3.0807782729555515 3 | us-east1-c,2,2.9798484084135963 4 | us-east1-c,3,2.9293072744872144 5 | us-east1-c,4,3.030612825148311 6 | us-east1-c,5,3.077114245979448 7 | us-east1-c,6,2.9782939630608944 8 | us-east1-c,7,3.0785247061538032 9 | us-east1-c,8,3.0285186049132493 10 | us-east1-c,9,3.0299736170934253 11 | us-east1-c,10,2.977536736338396 12 | -------------------------------------------------------------------------------- /data/training/k80_on_demand/4-demand/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Duration,Accuracy 2 | us-east1-c,1,3654.45454788208,0.917900025844574 3 | us-east1-c,2,3534.7303841114044,0.907800018787384 4 | us-east1-c,3,3474.7779109477997,0.9136999845504761 5 | us-east1-c,4,3594.9477179050446,0.8996999859809875 6 | us-east1-c,5,3650.108237028122,0.9211999773979187 7 | us-east1-c,6,3532.886483192444,0.9079999923706055 8 | us-east1-c,7,3651.781341075897,0.9128999710083008 9 | us-east1-c,8,3592.4635298252106,0.9190999865531921 10 | us-east1-c,9,3594.189481973648,0.9265000224113464 11 | us-east1-c,10,3531.9882521629333,0.8934000134468079 12 | -------------------------------------------------------------------------------- /data/training/k80_on_demand/8-demand/8_ondemand_cluster_status.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_on_demand/8-demand/8_ondemand_cluster_status.db -------------------------------------------------------------------------------- /data/training/k80_on_demand/8-demand/agg_data.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Duration,Accuracy,Cost 2 | us-central1-c,1,1775.308646,0.825200021,2.922761636 3 | us-central1-c,2,1833.544573,0.886200011,3.018637772 4 | us-west1-b,1,1836.217654,0.8847,3.023038572 5 | us-west1-b,2,1833.243472,0.877799988,3.018142057 6 | us-west1-b,3,1834.687302,0.895600021,3.020519093 7 | us-west1-b,4,1833.505311,0.902199984,3.018573134 8 | europe-west1-b,1,1838.648894,0.8926,3.02704122 9 | europe-west1-b,2,1837.576873,0.888199985,3.025276309 10 | europe-west1-b,3,1838.83081,0.88319999,3.027340716 11 | europe-west1-b,4,1837.490226,0.904699981,3.025133658 -------------------------------------------------------------------------------- /data/training/k80_on_demand/8-demand/cost.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Cost 2 | us-central1-c,1,2.9227616361958266 3 | us-central1-c,2,3.01863777242579 4 | us-west1-b,1,3.023038572469554 5 | us-west1-b,2,3.0181420574634505 6 | us-west1-b,3,3.020519092567506 7 | us-west1-b,4,3.0185731337319375 8 | europe-west1-b,1,3.0270412202658035 9 | europe-west1-b,2,3.0252763092002537 10 | europe-west1-b,3,3.027340715850706 11 | europe-west1-b,4,3.025133658330288 12 | -------------------------------------------------------------------------------- /data/training/k80_on_demand/8-demand/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Duration,Accuracy 2 | us-central1-c,1,1775.3086459636688,0.8252000212669373 3 | us-central1-c,2,1833.5445730686188,0.8862000107765198 4 | us-west1-b,1,1836.2176539897919,0.8847000002861023 5 | us-west1-b,2,1833.2434718608856,0.8777999877929688 6 | us-west1-b,3,1834.6873018741608,0.8956000208854675 7 | us-west1-b,4,1833.505311012268,0.9021999835968018 8 | europe-west1-b,1,1838.648894071579,0.8925999999046326 9 | europe-west1-b,2,1837.5768730640411,0.8881999850273132 10 | europe-west1-b,3,1838.8308100700378,0.8831999897956848 11 | europe-west1-b,4,1837.4902257919312,0.904699981212616 12 | -------------------------------------------------------------------------------- /data/training/k80_on_demand/k80-demand/cost.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Cost 2 | us-east1-c,1,2.8257915932374646 3 | us-east1-c,2,2.8262850336043073 4 | us-east1-c,3,2.8253207669687797 5 | us-east1-c,4,2.8738798664060523 6 | us-east1-c,5,2.849035221621027 7 | us-east1-c,6,2.801573392900317 8 | us-east1-c,7,2.825137746195934 9 | us-east1-c,8,2.8257162739912367 10 | us-east1-c,9,2.825015185092566 11 | us-east1-c,10,2.801126140468645 12 | -------------------------------------------------------------------------------- /data/training/k80_on_demand/k80-demand/k80_ondemand_cluster_status.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_on_demand/k80-demand/k80_ondemand_cluster_status.db -------------------------------------------------------------------------------- /data/training/k80_on_demand/k80-demand/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Duration,Accuracy 2 | us-east1-c,1,14070.56512093544,0.9309999942779541 3 | us-east1-c,2,14073.022126197815,0.9308000206947327 4 | us-east1-c,3,14068.220718860626,0.927299976348877 5 | us-east1-c,4,14310.012778997421,0.933899998664856 6 | us-east1-c,5,14186.30295085907,0.9352999925613403 7 | us-east1-c,6,13949.974570035934,0.9297999739646912 8 | us-east1-c,7,14067.309396982193,0.9304999709129333 9 | us-east1-c,8,14070.190081119537,0.9273999929428101 10 | us-east1-c,9,14066.699124097824,0.9304999709129333 11 | us-east1-c,10,13947.747550010681,0.9301000237464905 12 | -------------------------------------------------------------------------------- /data/training/k80_spot/2-spot/2_spot_cluster_status.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/2-spot/2_spot_cluster_status.db -------------------------------------------------------------------------------- /data/training/k80_spot/2-spot/agg_data.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Duration,Accuracy,Cost 2 | us-east1-c,2,6879.188981,0.909799993,1.249948638 3 | us-east1-c,3,9651.353379,0.92110002,1.753650909 4 | us-east1-c,4,9231.450971,0.923799992,1.677354641 5 | us-east1-c,5,12978.63934,0.92750001,2.358218768 6 | us-east1-c,6,6922.379609,0.91170001,1.257796375 7 | us-east1-c,7,6990.41072,0.92869997,1.270157628 8 | us-east1-c,8,6923.625568,0.92019999,1.258022766 9 | us-central1-c,1,6992.56139,0.918600023,1.270548405 10 | us-central1-c,2,6985.918288,0.917999983,1.269341353 11 | us-central1-c,3,6989.464339,0.917100012,1.26998567 12 | us-central1-c,4,6985.671379,0.930899978,1.26929649 13 | us-central1-c,5,6982.023029,0.926199973,1.268633584 14 | us-central1-c,6,7046.324273,0.919700027,1.28031712 15 | us-central1-c,7,7046.988037,0.912500024,1.280437726 16 | us-central1-c,8,7045.714015,0.9278,1.280206237 17 | us-west1-b,1,7102.458603,0.919799984,1.290516728 18 | us-west1-b,2,7048.772484,0.922699988,1.28076196 19 | us-west1-b,3,7042.758465,0.922699988,1.279669213 20 | us-west1-b,4,13887.34532,0.909099996,2.523330645 21 | us-west1-b,5,6990.501665,0.917299986,1.270174153 22 | us-west1-b,6,7045.86101,0.919099987,1.280232946 23 | us-west1-b,7,11344.13564,0.922399998,2.061229446 24 | us-west1-b,8,7108.508678,0.902700007,1.291616027 25 | europe-west1-b,1,7049.455164,0.902400017,1.280886003 26 | europe-west1-b,2,7952.290686,0.923600018,1.444931218 27 | europe-west1-b,3,7044.702678,0.918299973,1.280022477 28 | europe-west1-b,4,7049.520827,0.924799979,1.280897934 29 | europe-west1-b,5,7047.77082,0.919799984,1.280579958 30 | europe-west1-b,6,7114.449398,0.914200008,1.292695456 31 | europe-west1-b,7,7108.505724,0.921500027,1.29161549 32 | europe-west1-b,8,7110.926767,0.924600005,1.292055394 -------------------------------------------------------------------------------- /data/training/k80_spot/2-spot/cost.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Cost 2 | spot2-us-east1-c,1,0.21494646665000003 3 | spot2-us-east1-c,2,1.2499486378477 4 | spot2-us-east1-c,3,1.7536509089643002 5 | spot2-us-east1-c,4,1.6773546414307001 6 | spot2-us-east1-c,5,2.3582187680780002 7 | spot2-us-east1-c,6,1.2577963749553 8 | spot2-us-east1-c,7,1.270157627824 9 | spot2-us-east1-c,8,1.2580227657056002 10 | spot2-us-central1-c,1,1.2705484045630002 11 | spot2-us-central1-c,2,1.2693413529296 12 | spot2-us-central1-c,3,1.2699856703963 13 | spot2-us-central1-c,4,1.2692964895643 14 | spot2-us-central1-c,5,1.2686335843693 15 | spot2-us-central1-c,6,1.2803171204041002 16 | spot2-us-central1-c,7,1.2804377263229 17 | spot2-us-central1-c,8,1.2802062365255 18 | spot2-us-west1-b,1,1.2905167281651 19 | spot2-us-west1-b,2,1.2807619603428002 20 | spot2-us-west1-b,3,1.2796692130905 21 | spot2-us-west1-b,4,2.523330644644 22 | spot2-us-west1-b,5,1.2701741525305001 23 | spot2-us-west1-b,6,1.280232945517 24 | spot2-us-west1-b,7,2.0612294457880003 25 | spot2-us-west1-b,8,1.2916160267926 26 | spot2-europe-west1-b,1,1.2808860032988 27 | spot2-europe-west1-b,2,1.4449312176462001 28 | spot2-europe-west1-b,3,1.2800224765925998 29 | spot2-europe-west1-b,4,1.2808979342659002 30 | spot2-europe-west1-b,5,1.280579957994 31 | spot2-europe-west1-b,6,1.2926954556166 32 | spot2-europe-west1-b,7,1.2916154900508 33 | spot2-europe-west1-b,8,1.2920553935639 34 | -------------------------------------------------------------------------------- /data/training/k80_spot/2-spot/heat-compact.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/2-spot/heat-compact.png -------------------------------------------------------------------------------- /data/training/k80_spot/2-spot/heat-png.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/2-spot/heat-png.png -------------------------------------------------------------------------------- /data/training/k80_spot/2-spot/heat.graffle/data.plist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/2-spot/heat.graffle/data.plist -------------------------------------------------------------------------------- /data/training/k80_spot/2-spot/heat.graffle/image5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/2-spot/heat.graffle/image5.png -------------------------------------------------------------------------------- /data/training/k80_spot/2-spot/heat.graffle/image6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/2-spot/heat.graffle/image6.png -------------------------------------------------------------------------------- /data/training/k80_spot/2-spot/heat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/2-spot/heat.png -------------------------------------------------------------------------------- /data/training/k80_spot/2-spot/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Duration,Accuracy 2 | us-east1-c,1,1182.9745,0.0993 3 | us-east1-c,2,6879.188981,0.909799993 4 | us-east1-c,3,9651.353379,0.92110002 5 | us-east1-c,4,9231.450971,0.923799992 6 | us-east1-c,5,12978.63934,0.92750001 7 | us-east1-c,6,6922.379609,0.91170001 8 | us-east1-c,7,6990.41072,0.92869997 9 | us-east1-c,8,6923.625568,0.92019999 10 | us-central1-c,1,6992.56139,0.918600023 11 | us-central1-c,2,6985.918288,0.917999983 12 | us-central1-c,3,6989.464339,0.917100012 13 | us-central1-c,4,6985.671379,0.930899978 14 | us-central1-c,5,6982.023029,0.926199973 15 | us-central1-c,6,7046.324273,0.919700027 16 | us-central1-c,7,7046.988037,0.912500024 17 | us-central1-c,8,7045.714015,0.9278 18 | us-west1-b,1,7102.458603,0.919799984 19 | us-west1-b,2,7048.772484,0.922699988 20 | us-west1-b,3,7042.758465,0.922699988 21 | us-west1-b,4,13887.34532,0.909099996 22 | us-west1-b,5,6990.501665,0.917299986 23 | us-west1-b,6,7045.86101,0.919099987 24 | us-west1-b,7,11344.13564,0.922399998 25 | us-west1-b,8,7108.508678,0.902700007 26 | europe-west1-b,1,7049.455164,0.902400017 27 | europe-west1-b,2,7952.290686,0.923600018 28 | europe-west1-b,3,7044.702678,0.918299973 29 | europe-west1-b,4,7049.520827,0.924799979 30 | europe-west1-b,5,7047.77082,0.919799984 31 | europe-west1-b,6,7114.449398,0.914200008 32 | europe-west1-b,7,7108.505724,0.921500027 33 | europe-west1-b,8,7110.926767,0.924600005 -------------------------------------------------------------------------------- /data/training/k80_spot/4-spot/4-spot_reminder.md: -------------------------------------------------------------------------------- 1 | gsutil cp gs://shijian-18-ml/resnet32-4/model.ckpt-0.data-00000-of-00001 gs://shijian-18-ml/resnet32-4-copy/ 2 | gsutil cp gs://shijian-18-ml/resnet32-4/model.ckpt-0.index gs://shijian-18-ml/resnet32-4-copy/ 3 | gsutil cp gs://shijian-18-ml/resnet32-4/model.ckpt-0.meta gs://shijian-18-ml/resnet32-4-copy/ 4 | 5 | gsutil cp --recurse gs://shijian-18-ml/cifar gs://shijian-18-ml/30-cluster/ 6 | 7 | ## Finished 8 | 9 | us-east1-c run1, run2, run3, run4, run5, run6, run7, run8 10 | us-west1-b run1, run2, run3, run5, run6, run7, run8 11 | europe-west1-b run1, run2, run3, run4, run5, run6, run7, run8 12 | us-central1-c run1, run2, run3, run4, run5, run6, run7, run8 13 | 14 | ## Problematic runs 15 | 16 | us-central1-c run5(Not in DB) 17 | 18 | ## Retrying 19 | 20 | us-west1-b run2 21 | europe-west1-b 22 | us-central1-c 23 | 24 | ## Revoc 25 | 26 | us-west1-b-run2-worker-2 2019-02-12 16:32:41.164458 27 | us-west1-b-run2-worker-0 2019-02-12 16:33:47.731902 28 | us-east1-c-run3-worker-0 2019-02-12 12:20:33.351 29 | europe-west1-b-run3-worker-0 2019-02-12 13:00:57.800313 30 | us-west1-b-run7-worker-0 2019-02-12 13:37:40.958104 31 | us-central1-c-run3-worker-0 2019-02-12 14:20:48.230392 32 | us-central1-c-run8-worker-1 2019-02-12 14:48:20.667359 33 | us-west1-b-run8-worker-2 2019-02-12 14:53:52.577811 34 | us-central1-c-run4-worker-0 2019-02-12 15:29:53.777 35 | us-central1-c-run4-worker-1 2019-02-12 15:29:21.426649 36 | us-central1-c-run5-worker-0 2019-02-12 16:51:09.432099 37 | us-west1-b-run4-worker-0 2019-02-12 17:05:09.77864 38 | 39 | ## Down time 40 | 41 | us-west2-b-run2-master0 2019-02-12 18:14:23.453 42 | us-west2-b-run2-worker1 2019-02-12 18:14:40.076 -------------------------------------------------------------------------------- /data/training/k80_spot/4-spot/4_spot_cluster_status.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/4-spot/4_spot_cluster_status.db -------------------------------------------------------------------------------- /data/training/k80_spot/4-spot/agg_data.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Duration,Accuracy,Cost 2 | us-east1-c,1,3474.32292,0.913399994,1.124638329 3 | us-east1-c,2,3476.354922,0.917100012,1.125296088 4 | us-east1-c,3,4323.07351,0.921599984,1.195709186 5 | us-east1-c,4,3538.91941,0.925199986,1.145548213 6 | us-east1-c,5,3479.186108,0.913900018,1.126212543 7 | us-east1-c,6,3480.904836,0.921400011,1.126768895 8 | us-east1-c,8,3477.297369,0.924799979,1.125601158 9 | us-central1-c,1,3537.690414,0.907400012,1.145150387 10 | us-central1-c,2,3536.906202,0.908999979,1.144896538 11 | us-central1-c,3,3657.145875,0.923799992,1.176821033 12 | us-central1-c,4,3962.819795,0.908999979,1.204933527 13 | us-central1-c,5,3597.281042,0.916000009,1.164304343 14 | us-central1-c,6,3482.536591,0.917699993,1.127297095 15 | us-central1-c,7,3477.636107,0.921400011,1.125710808 16 | us-central1-c,8,4565.080019,0.897800028,1.198654701 17 | us-west1-b,1,3533.335732,0.882200003,1.143740776 18 | us-west1-b,2,6504.615242,0.904699981,1.298524719 19 | us-west1-b,3,3480.394205,0.921700001,1.126603604 20 | us-west1-b,4,3660.545987,0.9278,1.163482276 21 | us-west1-b,5,3479.596072,0.91930002,1.126345249 22 | us-west1-b,6,3535.272348,0.909600019,1.144367659 23 | us-west1-b,7,4263.623173,0.921299994,1.192056908 24 | us-west1-b,8,4564.913513,0.918900013,1.201573734 25 | europe-west1-b,1,3484.360736,0.889299989,1.12788757 26 | europe-west1-b,2,3540.06859,0.874300003,1.145920203 27 | europe-west1-b,3,3841.344155,0.919499993,1.182370012 28 | europe-west1-b,4,3597.070784,0.919499993,1.164371813 29 | europe-west1-b,5,3537.119268,0.909600019,1.144965507 30 | europe-west1-b,6,3542.601776,0.919099987,1.146740195 31 | europe-west1-b,7,3541.249127,0.914900005,1.146302342 32 | europe-west1-b,8,3538.086964,0.89139998,1.14527875 -------------------------------------------------------------------------------- /data/training/k80_spot/4-spot/cost.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Cost 2 | us-east1-c,1,1.124638329204 3 | us-east1-c,2,1.1252960882514 4 | us-east1-c,3,1.1957091860240001 5 | us-east1-c,4,1.145548213017 6 | us-east1-c,5,1.1262125431596 7 | us-east1-c,6,1.1267688954132 8 | us-east1-c,7,1.06002259116 9 | us-east1-c,8,1.1256011583453 10 | us-central1-c,1,1.1451503870118 11 | us-central1-c,2,1.1448965375874 12 | us-central1-c,3,1.1768210328175002 13 | us-central1-c,4,1.2049335269945 14 | us-central1-c,5,1.1643043426734003 15 | us-central1-c,6,1.1272970945067 16 | us-central1-c,7,1.1257108078359 17 | us-central1-c,8,1.1986547012893003 18 | us-west1-b,1,1.1437407764484 19 | us-west1-b,2,1.2985247188064 20 | us-west1-b,3,1.1266036041585001 21 | us-west1-b,4,1.1634822756959 22 | us-west1-b,5,1.1263452485064 23 | us-west1-b,6,1.1443676590476 24 | us-west1-b,7,1.1920569077551002 25 | us-west1-b,8,1.2015737335441001 26 | europe-west1-b,1,1.1278875702432 27 | europe-west1-b,2,1.145920202583 28 | europe-west1-b,3,1.1823700124904999 29 | europe-west1-b,4,1.1643718127808003 30 | europe-west1-b,5,1.1449655070516 31 | europe-west1-b,6,1.1467401948912002 32 | europe-west1-b,7,1.1463023424099001 33 | europe-west1-b,8,1.1452787502468 34 | -------------------------------------------------------------------------------- /data/training/k80_spot/4-spot/heat-32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/4-spot/heat-32.png -------------------------------------------------------------------------------- /data/training/k80_spot/4-spot/heat-4-spot.graffle/data.plist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/4-spot/heat-4-spot.graffle/data.plist -------------------------------------------------------------------------------- /data/training/k80_spot/4-spot/heat-4-spot.graffle/image2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/4-spot/heat-4-spot.graffle/image2.png -------------------------------------------------------------------------------- /data/training/k80_spot/4-spot/heat-4-spot.graffle/image3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/4-spot/heat-4-spot.graffle/image3.png -------------------------------------------------------------------------------- /data/training/k80_spot/4-spot/heat-4-spot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/4-spot/heat-4-spot.png -------------------------------------------------------------------------------- /data/training/k80_spot/4-spot/heat-compact.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/4-spot/heat-compact.png -------------------------------------------------------------------------------- /data/training/k80_spot/4-spot/heat-old.graffle/data.plist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/4-spot/heat-old.graffle/data.plist -------------------------------------------------------------------------------- /data/training/k80_spot/4-spot/heat-old.graffle/image3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/4-spot/heat-old.graffle/image3.png -------------------------------------------------------------------------------- /data/training/k80_spot/4-spot/heat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/4-spot/heat.png -------------------------------------------------------------------------------- /data/training/k80_spot/4-spot/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Duration,Accuracy 2 | us-east1-c,1,3474.32292,0.913399994 3 | us-east1-c,2,3476.354922,0.917100012 4 | us-east1-c,3,4323.07351,0.921599984 5 | us-east1-c,4,3538.91941,0.925199986 6 | us-east1-c,5,3479.186108,0.913900018 7 | us-east1-c,6,3480.904836,0.921400011 8 | us-east1-c,7,3274.7068,0.9192 9 | us-east1-c,8,3477.297369,0.924799979 10 | us-central1-c,1,3537.690414,0.907400012 11 | us-central1-c,2,3536.906202,0.908999979 12 | us-central1-c,3,3657.145875,0.923799992 13 | us-central1-c,4,3962.819795,0.908999979 14 | us-central1-c,5,3597.281042,0.916000009 15 | us-central1-c,6,3482.536591,0.917699993 16 | us-central1-c,7,3477.636107,0.921400011 17 | us-central1-c,8,4565.080019,0.897800028 18 | us-west1-b,1,3533.335732,0.882200003 19 | us-west1-b,2,6504.615242,0.904699981 20 | us-west1-b,3,3480.394205,0.921700001 21 | us-west1-b,4,3660.545987,0.9278 22 | us-west1-b,5,3479.596072,0.91930002 23 | us-west1-b,6,3535.272348,0.909600019 24 | us-west1-b,7,4263.623173,0.921299994 25 | us-west1-b,8,4564.913513,0.918900013 26 | europe-west1-b,1,3484.360736,0.889299989 27 | europe-west1-b,2,3540.06859,0.874300003 28 | europe-west1-b,3,3841.344155,0.919499993 29 | europe-west1-b,4,3597.070784,0.919499993 30 | europe-west1-b,5,3537.119268,0.909600019 31 | europe-west1-b,6,3542.601776,0.919099987 32 | europe-west1-b,7,3541.249127,0.914900005 33 | europe-west1-b,8,3538.086964,0.89139998 -------------------------------------------------------------------------------- /data/training/k80_spot/8-spot/8_spot_cluster_status.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/8-spot/8_spot_cluster_status.db -------------------------------------------------------------------------------- /data/training/k80_spot/8-spot/agg_data.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Duration,Accuracy,Cost 2 | us-east1-c,1,1901,0.884500027,1.141368441 3 | us-east1-c,2,1832,0.911199987,1.1133064 4 | us-east1-c,3,1837,0.899100006,1.1163449 5 | us-east1-c,4,1778,0.887399971,1.0804906 6 | us-east1-c,5,1958,0.901000023,1.168183703 7 | us-east1-c,6,1844,0.882300019,1.1205988 8 | us-east1-c,7,1838,0.843400002,1.1169526 9 | us-east1-c,8,2016,0.864099979,1.117555802 10 | us-central1-c,1,1839,0.899900019,1.1175603 11 | us-central1-c,2,1840,0.894400001,1.145034169 12 | us-central1-c,3,1834,0.904600024,1.1145218 13 | us-central1-c,4,1840,0.889900029,1.118168 14 | us-central1-c,5,1837,0.883000016,1.1163449 15 | us-central1-c,6,1777,0.896899998,1.0798829 16 | us-central1-c,7,1778,0.882700026,1.0804906 17 | us-central1-c,8,1779,0.875100017,1.0810983 18 | us-central1-c,9,1835,0.887700021,1.1151295 19 | us-west1-b,1,1901,0.90109998,1.144413652 20 | us-west1-b,2,1773,0.889500022,1.0774521 21 | us-west1-b,3,1779,0.891099989,1.0810983 22 | us-west1-b,4,1840,0.89349997,1.118168 23 | us-west1-b,5,1838,0.905600011,1.1169526 24 | us-west1-b,6,1841,0.882300019,1.1187757 25 | us-west1-b,7,1841,0.868799984,1.1187757 26 | us-west1-b,8,1784,0.870199978,1.0841368 27 | us-west1-b,9,1900,0.889400005,1.15463 28 | europe-west1-b,1,1838,0.874300003,1.1169526 29 | europe-west1-b,2,1836,0.864199996,1.1157372 30 | europe-west1-b,3,1839,0.897800028,1.1175603 31 | europe-west1-b,4,1840,0.909200013,1.118168 32 | europe-west1-b,5,1842,0.884000003,1.1193834 33 | europe-west1-b,6,1841,0.903299987,1.1187757 -------------------------------------------------------------------------------- /data/training/k80_spot/8-spot/cost.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Cost 2 | spot8-us-east1-c,1,1.1413684405779998 3 | spot8-us-east1-c,2,1.1133064 4 | spot8-us-east1-c,3,1.1163449000000003 5 | spot8-us-east1-c,4,1.0804906000000003 6 | spot8-us-east1-c,5,1.168183702898 7 | spot8-us-east1-c,6,1.1205988 8 | spot8-us-east1-c,7,1.1169526 9 | spot8-us-east1-c,8,1.11755580233 10 | spot8-us-central1-c,1,1.1175603 11 | spot8-us-central1-c,2,1.145034169179 12 | spot8-us-central1-c,3,1.1145218000000001 13 | spot8-us-central1-c,4,1.118168 14 | spot8-us-central1-c,5,1.1163449000000003 15 | spot8-us-central1-c,6,1.0798829 16 | spot8-us-central1-c,7,1.0804906000000003 17 | spot8-us-central1-c,8,1.0810983 18 | spot8-us-central1-c,9,1.1151295 19 | spot8-us-west1-b,1,1.144413651523 20 | spot8-us-west1-b,2,1.0774521 21 | spot8-us-west1-b,3,1.0810983 22 | spot8-us-west1-b,4,1.118168 23 | spot8-us-west1-b,5,1.1169526 24 | spot8-us-west1-b,6,1.1187757000000003 25 | spot8-us-west1-b,7,1.1187757000000003 26 | spot8-us-west1-b,8,1.0841368 27 | spot8-us-west1-b,9,1.1546300000000003 28 | spot8-europe-west1-b,1,1.1169526 29 | spot8-europe-west1-b,2,1.1157372 30 | spot8-europe-west1-b,3,1.1175603 31 | spot8-europe-west1-b,4,1.118168 32 | spot8-europe-west1-b,5,1.1193833999999998 33 | spot8-europe-west1-b,6,1.1187757000000003 34 | -------------------------------------------------------------------------------- /data/training/k80_spot/8-spot/heat-8-spot.graffle/data.plist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/8-spot/heat-8-spot.graffle/data.plist -------------------------------------------------------------------------------- /data/training/k80_spot/8-spot/heat-8-spot.graffle/image1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/8-spot/heat-8-spot.graffle/image1.png -------------------------------------------------------------------------------- /data/training/k80_spot/8-spot/heat-8-spot.graffle/image3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/8-spot/heat-8-spot.graffle/image3.png -------------------------------------------------------------------------------- /data/training/k80_spot/8-spot/heat-compact.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/8-spot/heat-compact.png -------------------------------------------------------------------------------- /data/training/k80_spot/8-spot/heat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/k80_spot/8-spot/heat.png -------------------------------------------------------------------------------- /data/training/k80_spot/8-spot/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Duration,Accuracy 2 | us-east1-c,1,1901.0,0.884500027 3 | us-east1-c,2,1832.0,0.911199987 4 | us-east1-c,3,1837.0,0.899100006 5 | us-east1-c,4,1778.0,0.887399971 6 | us-east1-c,5,1958.0,0.901000023 7 | us-east1-c,6,1844.0,0.882300019 8 | us-east1-c,7,1838.0,0.843400002 9 | us-east1-c,8,2016.0,0.864099979 10 | us-central1-c,1,1839.0,0.899900019 11 | us-central1-c,2,1840.0,0.894400001 12 | us-central1-c,3,1834.0,0.904600024 13 | us-central1-c,4,1840.0,0.889900029 14 | us-central1-c,5,1837.0,0.883000016 15 | us-central1-c,6,1777.0,0.896899998 16 | us-central1-c,7,1778.0,0.882700026 17 | us-central1-c,8,1779.0,0.875100017 18 | us-central1-c,9,1835.0,0.887700021 19 | us-west1-b,1,1901.0,0.90109998 20 | us-west1-b,2,1773.0,0.889500022 21 | us-west1-b,3,1779.0,0.891099989 22 | us-west1-b,4,1840.0,0.89349997 23 | us-west1-b,5,1838.0,0.905600011 24 | us-west1-b,6,1841.0,0.882300019 25 | us-west1-b,7,1841.0,0.868799984 26 | us-west1-b,8,1784.0,0.870199978 27 | us-west1-b,9,1900.0,0.889400005 28 | europe-west1-b,1,1838.0,0.874300003 29 | europe-west1-b,2,1836.0,0.864199996 30 | europe-west1-b,3,1839.0,0.897800028 31 | europe-west1-b,4,1840.0,0.909200013 32 | europe-west1-b,5,1842.0,0.884000003 33 | europe-west1-b,6,1841.0,0.903299987 34 | -------------------------------------------------------------------------------- /data/training/p100_v100_spot/4p100/agg_data.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,1722.64083,0.908100009,1.1230278 3 | 2,1717.299487,0.911899984,1.11954566 4 | 3,1657.454554,0.919399977,1.080531419 5 | 4,1597.93413,0.923699975,1.041728733 6 | 5,1658.486772,0.915000021,1.081204345 7 | 6,1962.926918,0.907299995,1.279675635 8 | 7,1654.803491,0.912899971,1.078803132 9 | 8,1656.975719,0.906599998,1.080219256 10 | 9,1655.816262,0.908100009,1.07946338 11 | 10,1653.001679,0.921800017,1.077628491 -------------------------------------------------------------------------------- /data/training/p100_v100_spot/4p100/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.1230277997294886 3 | 2,1.1195456596981996 4 | 3,1.0805314191410502 5 | 4,1.0417287334542964 6 | 5,1.0812043449570805 7 | 6,1.2796756348411973 8 | 7,1.0788031323403127 9 | 8,1.080219256216238 10 | 9,1.0794633803232232 11 | 10,1.0776284911406278 12 | -------------------------------------------------------------------------------- /data/training/p100_v100_spot/4p100/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy 2 | 1,1722.640830039978,0.9081000089645386 3 | 2,1717.2994871139526,0.911899983882904 4 | 3,1657.4545538425446,0.9193999767303467 5 | 4,1597.9341299533844,0.9236999750137329 6 | 5,1658.4867718219757,0.9150000214576721 7 | 6,1962.9269180297852,0.9072999954223633 8 | 7,1654.8034908771515,0.9128999710083008 9 | 8,1656.9757192134857,0.9065999984741211 10 | 9,1655.8162620067596,0.9081000089645386 11 | 10,1653.001678943634,0.9218000173568726 12 | -------------------------------------------------------------------------------- /data/training/p100_v100_spot/4v100/agg_data.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy,Cost 2 | 1,1539.009739,0.905499995,1.533417963 3 | 2,1654.769116,0.899100006,1.648756744 4 | 3,1478.066655,0.91049999,1.472696307 5 | 4,1534.562068,0.906000018,1.528986452 6 | 5,1594.446566,0.921700001,1.588653367 7 | 6,1593.283927,0.918600023,1.587494953 8 | 7,1541.423377,0.913200021,1.535822831 9 | 8,1535.98961,0.920700014,1.530408807 10 | 9,1594.29287,0.894800007,1.58850023 11 | 10,1532.756763,0.913900018,1.527187706 -------------------------------------------------------------------------------- /data/training/p100_v100_spot/4v100/cost.csv: -------------------------------------------------------------------------------- 1 | Run,Cost 2 | 1,1.5334179627346616 3 | 2,1.648756744247351 4 | 3,1.4726963068968082 5 | 4,1.528986451550199 6 | 5,1.5886533674919139 7 | 6,1.5874949528749331 8 | 7,1.5358228309958573 9 | 8,1.530408806747509 10 | 9,1.5885002301024027 11 | 10,1.5271877058690688 12 | -------------------------------------------------------------------------------- /data/training/p100_v100_spot/4v100/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Run,Duration,Accuracy 2 | 1,1539.0097391605377,0.9054999947547913 3 | 2,1654.7691161632538,0.8991000056266785 4 | 3,1478.0666551589966,0.9104999899864197 5 | 4,1534.5620679855347,0.906000018119812 6 | 5,1594.4465658664703,0.9217000007629395 7 | 6,1593.2839269638062,0.9186000227928162 8 | 7,1541.4233770370483,0.9132000207901001 9 | 8,1535.9896099567413,0.9207000136375427 10 | 9,1594.2928700447083,0.8948000073432922 11 | 10,1532.7567629814148,0.9139000177383423 12 | -------------------------------------------------------------------------------- /data/training/p100_v100_spot/p100/agg_data.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Duration,Accuracy,Cost 2 | us-central1-c,1,5348.122676,0.930899978,0.818559858 3 | us-central1-c,2,5346.979346,0.930800021,0.818384865 4 | us-central1-c,3,5531.979966,0.931599975,0.846700236 5 | us-central1-c,4,5587.225668,0.932500005,0.855155898 6 | us-central1-c,5,5345.576559,0.934199989,0.81817016 7 | us-central1-c,6,5355.348747,0.932799995,0.819665848 8 | us-central1-c,7,5348.840239,0.932699978,0.818669685 9 | us-central1-c,8,5351.24607,0.927900016,0.81903791 10 | us-central1-c,9,5289.144793,0.932200015,0.809532965 11 | us-central1-c,10,5289.825024,0.9278,0.809637078 12 | us-central1-c,11,5285.847296,0.929400027,0.809028265 13 | us-central1-c,12,5287.530916,0.925800025,0.809285952 14 | us-central1-c,13,5352.438563,0.932799995,0.819220428 15 | us-central1-c,14,5289.518009,0.929099977,0.809590088 16 | us-central1-c,15,5591.301632,0.932699978,0.855779746 17 | us-central1-c,16,5527.804317,0.933600008,0.84606113 18 | us-east1-c,1,5896.536504,0.932699978,0.902497638 19 | us-east1-c,2,5713.230647,0.931500018,0.874441659 20 | us-east1-c,3,5659.677171,0.933600008,0.866245002 21 | us-east1-c,4,5408.814053,0.931100011,0.82784901 22 | us-east1-c,6,5709.407584,0.925100029,0.873856518 23 | us-east1-c,7,5346.039407,0.932900012,0.818241002 24 | us-east1-c,8,5344.109885,0.930800021,0.817945678 25 | us-east1-c,9,5345.278229,0.930800021,0.818124499 26 | us-east1-c,10,5347.89714,0.930199981,0.818525338 27 | us-east1-c,11,5409.515,0.927399993,0.827956294 28 | us-east1-c,12,5346.490499,0.928799987,0.818310044 29 | us-east1-c,13,5348.056499,0.92900002,0.818549729 30 | us-east1-c,14,5414.845691,0.933000028,0.828772185 31 | us-east1-c,16,5353.759215,0.931400001,0.819422561 32 | us-east1-c,17,5354.110921,0.933300018,0.819476392 33 | us-east1-c,18,5352.160353,0.933000028,0.819177847 34 | us-east1-c,19,5287.205358,0.934400022,0.809236124 -------------------------------------------------------------------------------- /data/training/p100_v100_spot/p100/cost.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Cost 2 | us-central1-c,1,0.8185598576266867 3 | us-central1-c,2,0.8183848646463198 4 | us-central1-c,3,0.8467002362736653 5 | us-central1-c,4,0.855155897582739 6 | us-central1-c,5,0.8181701603150724 7 | us-central1-c,6,0.8196658479161916 8 | us-central1-c,7,0.8186696846131325 9 | us-central1-c,8,0.8190379104151292 10 | us-central1-c,9,0.8095329652909026 11 | us-central1-c,10,0.809637078435177 12 | us-central1-c,11,0.8090282651052204 13 | us-central1-c,12,0.8092859524866448 14 | us-central1-c,13,0.8192204280812814 15 | us-central1-c,14,0.8095900880943451 16 | us-central1-c,15,0.8557797464905595 17 | us-central1-c,16,0.846061130030431 18 | us-east1-c,1,0.9024976377194237 19 | us-east1-c,2,0.8744416589302803 20 | us-east1-c,3,0.8662450022286089 21 | us-east1-c,4,0.827849009738617 22 | us-east1-c,6,0.8738565179359369 23 | us-east1-c,7,0.8182410017623313 24 | us-east1-c,8,0.8179456777056436 25 | us-east1-c,9,0.8181244992423431 26 | us-east1-c,10,0.8185253381101206 27 | us-east1-c,11,0.8279562935378149 28 | us-east1-c,12,0.8183100438972228 29 | us-east1-c,13,0.8185497288861874 30 | us-east1-c,14,0.8287721853958779 31 | us-east1-c,16,0.8194225612007331 32 | us-east1-c,17,0.8194763917602845 33 | us-east1-c,18,0.8191778465447326 34 | us-east1-c,19,0.8092361240359855 35 | -------------------------------------------------------------------------------- /data/training/p100_v100_spot/p100/p100.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/p100_v100_spot/p100/p100.db -------------------------------------------------------------------------------- /data/training/p100_v100_spot/p100/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Duration,Accuracy 2 | us-central1-c,1,5348.122675895691,0.930899977684021 3 | us-central1-c,2,5346.979346036911,0.9308000206947327 4 | us-central1-c,3,5531.979965925217,0.9315999746322632 5 | us-central1-c,4,5587.225667953491,0.9325000047683716 6 | us-central1-c,5,5345.5765590667725,0.9341999888420105 7 | us-central1-c,6,5355.348747014999,0.9327999949455261 8 | us-central1-c,7,5348.840238809586,0.932699978351593 9 | us-central1-c,8,5351.246069908142,0.9279000163078308 10 | us-central1-c,9,5289.144792795181,0.932200014591217 11 | us-central1-c,10,5289.825023889542,0.9277999997138977 12 | us-central1-c,11,5285.847295999527,0.9294000267982483 13 | us-central1-c,12,5287.530915975571,0.9258000254631042 14 | us-central1-c,13,5352.438562870026,0.9327999949455261 15 | us-central1-c,14,5289.518008947372,0.929099977016449 16 | us-central1-c,15,5591.30163192749,0.932699978351593 17 | us-central1-c,16,5527.804316997528,0.9336000084877014 18 | us-east1-c,1,5896.536504030228,0.932699978351593 19 | us-east1-c,2,5713.230646848679,0.9315000176429749 20 | us-east1-c,3,5659.677170991898,0.9336000084877014 21 | us-east1-c,4,5408.814053058624,0.9311000108718872 22 | us-east1-c,6,5709.40758395195,0.9251000285148621 23 | us-east1-c,7,5346.039407014847,0.9329000115394592 24 | us-east1-c,8,5344.109884977341,0.9308000206947327 25 | us-east1-c,9,5345.278228998184,0.9308000206947327 26 | us-east1-c,10,5347.8971400260925,0.9301999807357788 27 | us-east1-c,11,5409.514999866486,0.9273999929428101 28 | us-east1-c,12,5346.490499019623,0.9287999868392944 29 | us-east1-c,13,5348.056499004364,0.9290000200271606 30 | us-east1-c,14,5414.8456909656525,0.9330000281333923 31 | us-east1-c,16,5353.759214878082,0.9314000010490417 32 | us-east1-c,17,5354.110920906067,0.9333000183105469 33 | us-east1-c,18,5352.160353183746,0.9330000281333923 34 | us-east1-c,19,5287.205358028412,0.9344000220298767 35 | -------------------------------------------------------------------------------- /data/training/p100_v100_spot/p100/vm_data.csv: -------------------------------------------------------------------------------- 1 | VM_name,Uptime,Revoked 2 | p100-spot-us-east1-c-run1-master,6173.536555,Job completed 3 | p100-spot-us-east1-c-run2-master,5928.437794,Job completed 4 | p100-spot-us-east1-c-run3-master,5922.384884,Job completed 5 | p100-spot-us-east1-c-run4-master,5742.988335,Job completed 6 | p100-spot-us-east1-c-run5-master,344.39957,Manually stopped due to revoked master 7 | p100-spot-us-east1-c-run6-master,6203.212396,Job completed 8 | p100-spot-us-east1-c-run7-master,5602.157917,Job completed 9 | p100-spot-us-east1-c-run8-master,5560.082029,Job completed 10 | p100-spot-us-east1-c-run9-master,5568.646016,Job completed 11 | p100-spot-us-east1-c-run10-master,5568.606677,Job completed 12 | p100-spot-us-east1-c-run11-master,5635.075945,Job completed 13 | p100-spot-us-east1-c-run12-master,5594.541901,Job completed 14 | p100-spot-us-east1-c-run13-master,5553.835902,Job completed 15 | p100-spot-us-east1-c-run14-master,5631.814656,Job completed 16 | p100-spot-us-east1-c-run15-master,438.412532,Manually stopped due to revoked master 17 | p100-spot-us-east1-c-run16-master,5572.49502,Job completed 18 | p100-spot-us-east1-c-run17-master,5581.436841,Job completed 19 | p100-spot-us-east1-c-run18-master,5591.23522,Job completed 20 | p100-spot-us-east1-c-run19-master,5537.220556,Job completed 21 | p100-spot-us-central1-c-run1-master,5712.070891,Job completed 22 | p100-spot-us-central1-c-run2-master,5581.578625,Job completed 23 | p100-spot-us-central1-c-run3-master,5787.524081,Job completed 24 | p100-spot-us-central1-c-run4-master,5804.951357,Job completed 25 | p100-spot-us-central1-c-run5-master,5638.036576,Job completed 26 | p100-spot-us-central1-c-run6-master,5603.806082,Job completed 27 | p100-spot-us-central1-c-run7-master,5655.42068,Job completed 28 | p100-spot-us-central1-c-run8-master,5637.284288,Job completed 29 | p100-spot-us-central1-c-run9-master,5532.381852,Job completed 30 | p100-spot-us-central1-c-run10-master,5520.08716,Job completed 31 | p100-spot-us-central1-c-run11-master,5550.800768,Job completed 32 | p100-spot-us-central1-c-run12-master,5539.096777,Job completed 33 | p100-spot-us-central1-c-run13-master,5592.953425,Job completed 34 | p100-spot-us-central1-c-run14-master,5946.060985,Job completed 35 | p100-spot-us-central1-c-run15-master,5835.474572,Job completed 36 | p100-spot-us-central1-c-run16-master,6251.074336,Job completed -------------------------------------------------------------------------------- /data/training/p100_v100_spot/v100/agg_data.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Duration,Accuracy,Cost 2 | us-central1-a,1,4378.274335,0.9296,1.047137249 3 | us-central1-a,4,4442.889963,0.923200011,1.062591153 4 | us-central1-a,7,4503.15611,0.933899999,1.077004806 5 | us-central1-a,8,4620.309343,0.932099998,1.105023954 6 | us-central1-a,9,4502.35519,0.924799979,1.076813253 7 | us-central1-a,10,4441.893455,0.924300015,1.062352822 8 | us-central1-a,12,4379.107417,0.930199981,1.047336495 9 | us-central1-a,13,4445.139259,0.922800004,1.06312911 10 | us-central1-a,15,4502.619657,0.936200023,1.076876505 11 | us-central1-a,16,4746.053093,0.931999981,1.135097666 12 | us-central1-a,17,4864.316209,0.933700025,1.163382261 13 | us-west1-b,3,4321.635752,0.928799987,1.033591189 14 | us-west1-b,4,4382.811859,0.934599996,1.048222474 15 | us-west1-b,5,4321.972189,0.926299989,1.033671653 16 | us-west1-b,7,4377.486932,0.930299997,1.046948929 17 | us-west1-b,9,4438.534057,0.931299984,1.061549366 18 | us-west1-b,13,4260.289748,0.932399988,1.01891927 19 | us-west1-b,15,4380.513059,0.923799992,1.047672677 20 | us-west1-b,17,4378.18076,0.930599988,1.047114869 21 | us-west1-b,18,4318.157651,0.930899978,1.032759343 22 | us-west1-b,20,4379.181527,0.929199994,1.047354219 23 | us-west1-b,24,4379.784009,0.935199976,1.047498313 24 | us-west1-b,25,4319.25409,0.92869997,1.033021574 25 | us-west1-b,26,4384.580057,0.930800021,1.048645368 -------------------------------------------------------------------------------- /data/training/p100_v100_spot/v100/cost.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Cost 2 | us-central1-a,1,1.0471372493005775 3 | us-central1-a,4,1.0625911532341146 4 | us-central1-a,7,1.0770048062988429 5 | us-central1-a,8,1.1050239536989022 6 | us-central1-a,9,1.0768132529352166 7 | us-central1-a,10,1.0623528217150346 8 | us-central1-a,12,1.0473364946735975 9 | us-central1-a,13,1.0631291098338123 10 | us-central1-a,15,1.076876504567495 11 | us-central1-a,16,1.135097666425086 12 | us-central1-a,17,1.1633822609090076 13 | us-west1-b,3,1.0335911885334983 14 | us-west1-b,4,1.0482224736996963 15 | us-west1-b,5,1.0336716530439611 16 | us-west1-b,7,1.046948928729477 17 | us-west1-b,9,1.061549365685485 18 | us-west1-b,13,1.018919269707282 19 | us-west1-b,15,1.0476726773836955 20 | us-west1-b,17,1.0471148692231638 21 | us-west1-b,18,1.032759342787598 22 | us-west1-b,20,1.0473542193792367 23 | us-west1-b,24,1.0474983129491082 24 | us-west1-b,25,1.0330215744135545 25 | us-west1-b,26,1.0486453677697791 26 | -------------------------------------------------------------------------------- /data/training/p100_v100_spot/v100/train_data_processed.csv: -------------------------------------------------------------------------------- 1 | Zone,Run,Duration,Accuracy 2 | us-central1-a,1,4378.27433514595,0.9296000003814697 3 | us-central1-a,4,4442.889963150024,0.9232000112533569 4 | us-central1-a,7,4503.156110048294,0.933899998664856 5 | us-central1-a,8,4620.3093428611755,0.9320999979972839 6 | us-central1-a,9,4502.355190038681,0.9247999787330627 7 | us-central1-a,10,4441.893455028534,0.9243000149726868 8 | us-central1-a,12,4379.10741686821,0.9301999807357788 9 | us-central1-a,13,4445.13925909996,0.9228000044822693 10 | us-central1-a,15,4502.619656801224,0.9362000226974487 11 | us-central1-a,16,4746.053092956543,0.9319999814033508 12 | us-central1-a,17,4864.316209077835,0.9337000250816345 13 | us-west1-b,3,4321.635751962662,0.9287999868392944 14 | us-west1-b,4,4382.811858892441,0.9345999956130981 15 | us-west1-b,5,4321.972188949585,0.9262999892234802 16 | us-west1-b,7,4377.486932039261,0.9302999973297119 17 | us-west1-b,9,4438.534056901932,0.9312999844551086 18 | us-west1-b,13,4260.2897481918335,0.9323999881744385 19 | us-west1-b,15,4380.513058900833,0.923799991607666 20 | us-west1-b,17,4378.180759906769,0.9305999875068665 21 | us-west1-b,18,4318.157651185989,0.930899977684021 22 | us-west1-b,20,4379.181527137756,0.9291999936103821 23 | us-west1-b,24,4379.784008979797,0.9351999759674072 24 | us-west1-b,25,4319.2540900707245,0.9286999702453613 25 | us-west1-b,26,4384.580057144165,0.9308000206947327 26 | -------------------------------------------------------------------------------- /data/training/p100_v100_spot/v100/v100.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cake-lab/transient-deep-learning/87c6717e4026801623cf0327e78ad57f51cb1461/data/training/p100_v100_spot/v100/v100.db --------------------------------------------------------------------------------