├── .gitignore ├── LICENSE ├── README.md ├── benchmarks ├── ResNet-50_v1.5_Performance_Comparison_TensorFlow_1.12_GCP.md └── ShapeMask_Performance_Comparison_TensorFlow_1.14_GCP.md ├── configs └── spinenet │ └── sn143-imat-v2.yaml ├── requiremets-spotty.txt ├── setup.py ├── spotty.yaml ├── tf_tpu_models ├── __init__.py ├── common │ ├── __init__.py │ ├── imagenet.py │ ├── inference_warmup.py │ └── tpu_profiler_hook.py ├── experimental │ ├── cifar_keras │ │ ├── README.md │ │ └── cifar_keras.py │ ├── dcgan │ │ ├── README.md │ │ ├── cifar_input.py │ │ ├── cifar_model.py │ │ ├── dcgan_main.py │ │ ├── mnist_input.py │ │ └── mnist_model.py │ ├── deeplab │ │ ├── README.md │ │ ├── data_pipeline.py │ │ ├── main.py │ │ └── model.py │ ├── densenet_keras │ │ ├── README.md │ │ ├── densenet_keras_imagenet.py │ │ ├── densenet_keras_model.py │ │ └── vgg_preprocessing.py │ ├── distribution_strategy │ │ ├── imagenet_input.py │ │ ├── resnet_estimator.py │ │ ├── resnet_model.py │ │ └── resnet_preprocessing.py │ ├── embedding │ │ ├── README.md │ │ ├── create_data.py │ │ └── model.py │ ├── inception │ │ ├── imagenet.py │ │ ├── inception_preprocessing.py │ │ ├── inception_v2.py │ │ ├── inception_v2_tpu_model.py │ │ ├── inception_v3.py │ │ ├── inception_v3_k8s.yaml │ │ ├── inception_v3_old.py │ │ ├── inception_v4.py │ │ ├── inception_v4_model.py │ │ └── vgg_preprocessing.py │ ├── inference │ │ ├── api_config.yaml │ │ ├── api_descriptor.pb │ │ ├── load_test_client.py │ │ ├── object_detection.ipynb │ │ ├── openapi.yaml │ │ └── setup-pool.sh │ ├── keras_application │ │ └── application_model.py │ ├── keras_colab │ │ ├── README.md │ │ └── shakespeare_lstm.py │ ├── mnist_jupyter │ │ ├── Cloud-TPU-Demo.ipynb │ │ └── launch.sh │ ├── mnist_keras │ │ ├── mnist.py │ │ └── mnist_tf2_with_summary.py │ ├── ncf │ │ ├── README.md │ │ └── ncf_main.py │ ├── qanet │ │ ├── README.md │ │ ├── data.py │ │ ├── model.py │ │ ├── preprocess.py │ │ ├── run.py │ │ ├── run_lib.py │ │ ├── testdata │ │ │ ├── train-v1.1.json │ │ │ ├── train_0000 │ │ │ └── vocab.vec │ │ └── utils.py │ ├── resnet50_keras │ │ ├── README.md │ │ ├── imagenet_input.py │ │ ├── model_saving_utils.py │ │ ├── resnet50.py │ │ ├── resnet50_ctl_tf1.py │ │ ├── resnet50_ctl_tf2.py │ │ ├── resnet50_test.py │ │ ├── resnet50_tf2.py │ │ ├── resnet_model.py │ │ └── resnet_preprocessing.py │ └── show_and_tell │ │ ├── README │ │ ├── configuration.py │ │ ├── image_embedding.py │ │ ├── image_processing.py │ │ ├── inputs.py │ │ ├── show_and_tell_model.py │ │ └── train.py ├── hyperparameters │ ├── __init__.py │ ├── common_hparams_flags.py │ ├── common_tpu_flags.py │ ├── flags_to_params.py │ └── params_dict.py ├── official │ ├── __init__.py │ ├── amoeba_net │ │ ├── README.md │ │ ├── amoeba_net.py │ │ ├── amoeba_net_k8s.yaml │ │ ├── amoeba_net_model.py │ │ ├── inception_preprocessing.py │ │ ├── model_builder.py │ │ ├── model_specs.py │ │ ├── network_utils.py │ │ ├── network_utils_test.py │ │ └── tf_hub.py │ ├── bert │ │ └── README.md │ ├── densenet │ │ ├── README.md │ │ ├── densenet_imagenet.py │ │ ├── densenet_model.py │ │ └── vgg_preprocessing.py │ ├── detection │ │ ├── GETTING_STARTED.md │ │ ├── MODEL_ZOO.md │ │ ├── README.md │ │ ├── configs │ │ │ ├── __init__.py │ │ │ ├── base_config.py │ │ │ ├── classification_config.py │ │ │ ├── detection_config.py │ │ │ ├── factory.py │ │ │ ├── maskrcnn_config.py │ │ │ ├── retinanet_config.py │ │ │ ├── shapemask_config.py │ │ │ ├── spinenet │ │ │ │ ├── spinenet-mbconv49_retinanet.yaml │ │ │ │ ├── spinenet143_classification.yaml │ │ │ │ ├── spinenet143_mrcnn.yaml │ │ │ │ ├── spinenet143_retinanet.yaml │ │ │ │ ├── spinenet190_classification.yaml │ │ │ │ ├── spinenet190_retinanet.yaml │ │ │ │ ├── spinenet49S_retinanet.yaml │ │ │ │ ├── spinenet49_classification.yaml │ │ │ │ ├── spinenet49_mrcnn.yaml │ │ │ │ ├── spinenet49_retinanet.yaml │ │ │ │ ├── spinenet96_classification.yaml │ │ │ │ ├── spinenet96_mrcnn.yaml │ │ │ │ └── spinenet96_retinanet.yaml │ │ │ └── yaml │ │ │ │ ├── retinanet_autoaugment.yaml │ │ │ │ └── retinanet_nasfpn.yaml │ │ ├── dataloader │ │ │ ├── __init__.py │ │ │ ├── anchor.py │ │ │ ├── classification_parser.py │ │ │ ├── factory.py │ │ │ ├── input_reader.py │ │ │ ├── maskrcnn_parser.py │ │ │ ├── mode_keys.py │ │ │ ├── retinanet_parser.py │ │ │ ├── shapemask_parser.py │ │ │ └── tf_example_decoder.py │ │ ├── datasets │ │ │ └── coco_label_map.csv │ │ ├── evaluate_model.py │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ ├── attributes.py │ │ │ ├── coco_evaluator.py │ │ │ ├── coco_utils.py │ │ │ ├── cocoeval.py │ │ │ ├── factory.py │ │ │ └── submission.py │ │ ├── executor │ │ │ ├── __init__.py │ │ │ └── tpu_executor.py │ │ ├── export_inference_graph.py │ │ ├── export_saved_model.py │ │ ├── export_tflite_model.py │ │ ├── inference.py │ │ ├── k8s │ │ │ └── retinanet_k8s.yaml │ │ ├── main.py │ │ ├── modeling │ │ │ ├── __init__.py │ │ │ ├── architecture │ │ │ │ ├── __init__.py │ │ │ │ ├── factory.py │ │ │ │ ├── fpn.py │ │ │ │ ├── heads.py │ │ │ │ ├── identity.py │ │ │ │ ├── nasfpn.py │ │ │ │ ├── nn_blocks.py │ │ │ │ ├── nn_ops.py │ │ │ │ ├── resnet.py │ │ │ │ ├── spinenet.py │ │ │ │ └── spinenet_mbconv.py │ │ │ ├── base_model.py │ │ │ ├── classification_model.py │ │ │ ├── factory.py │ │ │ ├── learning_rates.py │ │ │ ├── losses.py │ │ │ ├── maskrcnn_model.py │ │ │ ├── model_builder.py │ │ │ ├── retinanet_model.py │ │ │ └── shapemask_model.py │ │ ├── ops │ │ │ ├── __init__.py │ │ │ ├── nms.py │ │ │ ├── postprocess_ops.py │ │ │ ├── roi_ops.py │ │ │ ├── sampling_ops.py │ │ │ └── spatial_transform_ops.py │ │ ├── serving │ │ │ ├── __init__.py │ │ │ ├── detection.py │ │ │ └── inputs.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── autoaugment_utils.py │ │ │ ├── benchmark_utils.py │ │ │ ├── box_utils.py │ │ │ ├── class_utils.py │ │ │ ├── config_utils.py │ │ │ ├── dataloader_utils.py │ │ │ ├── imat2020 │ │ │ ├── __init__.py │ │ │ └── mask.py │ │ │ ├── input_utils.py │ │ │ ├── mask_utils.py │ │ │ ├── object_detection │ │ │ ├── __init__.py │ │ │ ├── argmax_matcher.py │ │ │ ├── balanced_positive_negative_sampler.py │ │ │ ├── box_coder.py │ │ │ ├── box_list.py │ │ │ ├── box_list_ops.py │ │ │ ├── faster_rcnn_box_coder.py │ │ │ ├── matcher.py │ │ │ ├── minibatch_sampler.py │ │ │ ├── ops.py │ │ │ ├── preprocessor.py │ │ │ ├── region_similarity_calculator.py │ │ │ ├── shape_utils.py │ │ │ ├── target_assigner.py │ │ │ └── visualization_utils.py │ │ │ ├── paths.py │ │ │ └── scripts │ │ │ ├── __init__.py │ │ │ ├── generate_coco_file_images.py │ │ │ └── test_augmentations.py │ ├── efficientnet │ │ ├── README.md │ │ ├── __init__.py │ │ ├── autoaugment.py │ │ ├── condconv │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── condconv_layers.py │ │ │ └── efficientnet_condconv_builder.py │ │ ├── edgetpu │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ └── efficientnet_edgetpu_builder.py │ │ ├── efficientnet_builder.py │ │ ├── efficientnet_model.py │ │ ├── eval_ckpt_example.ipynb │ │ ├── eval_ckpt_main.py │ │ ├── export_model.py │ │ ├── g3doc │ │ │ ├── condconv-layer.png │ │ │ ├── efficientnet-edgetpu.png │ │ │ ├── flops.png │ │ │ ├── lite-float-gpu.png │ │ │ ├── lite-quant-cpu.png │ │ │ ├── lite-quant-size.png │ │ │ └── params.png │ │ ├── imagenet_input.py │ │ ├── inspect_model_architecture.py │ │ ├── lite │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── efficientnet_lite_builder.py │ │ │ └── efficientnet_lite_builder_test.py │ │ ├── main.py │ │ ├── model_builder_factory.py │ │ ├── preprocessing.py │ │ ├── tpu │ │ │ ├── __init__.py │ │ │ └── efficientnet_tpu_builder.py │ │ └── utils.py │ ├── mask_rcnn │ │ ├── README.md │ │ ├── __init__.py │ │ ├── anchors.py │ │ ├── box_utils.py │ │ ├── coco_metric.py │ │ ├── coco_utils.py │ │ ├── configs │ │ │ ├── __init__.py │ │ │ ├── cloud │ │ │ │ ├── v2-128.yaml │ │ │ │ ├── v2-32.yaml │ │ │ │ ├── v2-8.yaml │ │ │ │ ├── v3-128.yaml │ │ │ │ ├── v3-32.yaml │ │ │ │ └── v3-8.yaml │ │ │ └── mask_rcnn_config.py │ │ ├── dataloader.py │ │ ├── distributed_executer.py │ │ ├── evaluation.py │ │ ├── export_saved_model.py │ │ ├── fpn.py │ │ ├── heads.py │ │ ├── learning_rates.py │ │ ├── losses.py │ │ ├── mask_rcnn_demo.ipynb │ │ ├── mask_rcnn_k8s.yaml │ │ ├── mask_rcnn_main.py │ │ ├── mask_rcnn_model.py │ │ ├── nn_ops.py │ │ ├── object_detection │ │ │ ├── __init__.py │ │ │ ├── argmax_matcher.py │ │ │ ├── balanced_positive_negative_sampler.py │ │ │ ├── box_coder.py │ │ │ ├── box_list.py │ │ │ ├── faster_rcnn_box_coder.py │ │ │ ├── matcher.py │ │ │ ├── minibatch_sampler.py │ │ │ ├── ops.py │ │ │ ├── preprocessor.py │ │ │ ├── region_similarity_calculator.py │ │ │ ├── shape_utils.py │ │ │ ├── target_assigner.py │ │ │ ├── tf_example_decoder.py │ │ │ └── visualization_utils.py │ │ ├── postprocess_ops.py │ │ ├── preprocess_ops.py │ │ ├── resnet.py │ │ ├── roi_ops.py │ │ ├── serving.py │ │ ├── spatial_transform_ops.py │ │ ├── submission.py │ │ ├── tpu_normalization.py │ │ └── training_ops.py │ ├── mnasnet │ │ ├── README.md │ │ ├── configs │ │ │ ├── __init__.py │ │ │ ├── cloud │ │ │ │ ├── gpu.yaml │ │ │ │ ├── v2-32.yaml │ │ │ │ ├── v2-8.yaml │ │ │ │ ├── v3-32.yaml │ │ │ │ └── v3-8.yaml │ │ │ └── mnasnet_config.py │ │ ├── eval_ckpt_main.py │ │ ├── g3doc │ │ │ ├── mnasnet_vs_mobilenetv2.png │ │ │ └── mnasnet_vs_mobilenetv2_2.png │ │ ├── imagenet_input.py │ │ ├── mixnet │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── custom_layers.py │ │ │ ├── g3doc │ │ │ │ └── mixnet-flops.png │ │ │ ├── mixnet_builder.py │ │ │ ├── mixnet_eval_example.ipynb │ │ │ └── mixnet_model.py │ │ ├── mnas_utils.py │ │ ├── mnasnet_example.ipynb │ │ ├── mnasnet_main.py │ │ ├── mnasnet_model.py │ │ ├── mnasnet_models.py │ │ ├── post_quantization.py │ │ └── preprocessing.py │ ├── mnist │ │ ├── README.md │ │ └── mnist_tpu.py │ ├── mobilenet │ │ ├── README.md │ │ ├── configs │ │ │ ├── __init__.py │ │ │ ├── cloud │ │ │ │ ├── v2-128.yaml │ │ │ │ ├── v2-256.yaml │ │ │ │ ├── v2-32.yaml │ │ │ │ ├── v2-512.yaml │ │ │ │ ├── v2-8.yaml │ │ │ │ ├── v3-1024.yaml │ │ │ │ ├── v3-128.yaml │ │ │ │ ├── v3-2048.yaml │ │ │ │ ├── v3-256.yaml │ │ │ │ ├── v3-32.yaml │ │ │ │ ├── v3-512.yaml │ │ │ │ ├── v3-64.yaml │ │ │ │ └── v3-8.yaml │ │ │ └── mobilenet_config.py │ │ ├── inception_preprocessing.py │ │ ├── mobilenet.py │ │ ├── mobilenet_model.py │ │ ├── supervised_images.py │ │ └── vgg_preprocessing.py │ ├── resnet │ │ ├── README.md │ │ ├── __init__.py │ │ ├── benchmark │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── read_training_time.py │ │ │ └── resnet_benchmark.py │ │ ├── configs │ │ │ ├── __init__.py │ │ │ ├── cloud │ │ │ │ ├── randaugment-32.yaml │ │ │ │ ├── randaugment-8.yaml │ │ │ │ ├── v2-128.yaml │ │ │ │ ├── v2-256.yaml │ │ │ │ ├── v2-32.yaml │ │ │ │ ├── v2-512.yaml │ │ │ │ ├── v2-8.yaml │ │ │ │ ├── v3-1024.yaml │ │ │ │ ├── v3-128.yaml │ │ │ │ ├── v3-2048.yaml │ │ │ │ ├── v3-256.yaml │ │ │ │ ├── v3-32.yaml │ │ │ │ ├── v3-512.yaml │ │ │ │ ├── v3-64.yaml │ │ │ │ └── v3-8.yaml │ │ │ └── resnet_config.py │ │ ├── imagenet_input.py │ │ ├── lars_util.py │ │ ├── resnet_k8s.yaml │ │ ├── resnet_main.py │ │ ├── resnet_model.py │ │ ├── resnet_model_test.py │ │ └── resnet_preprocessing.py │ ├── retinanet │ │ ├── README.md │ │ ├── anchors.py │ │ ├── coco_metric.py │ │ ├── dataloader.py │ │ ├── evaluation.py │ │ ├── object_detection │ │ │ ├── __init__.py │ │ │ ├── argmax_matcher.py │ │ │ ├── box_coder.py │ │ │ ├── box_list.py │ │ │ ├── faster_rcnn_box_coder.py │ │ │ ├── matcher.py │ │ │ ├── preprocessor.py │ │ │ ├── region_similarity_calculator.py │ │ │ ├── shape_utils.py │ │ │ ├── target_assigner.py │ │ │ └── tf_example_decoder.py │ │ ├── postprocess.py │ │ ├── retinanet_architecture.py │ │ ├── retinanet_k8s.yaml │ │ ├── retinanet_main.py │ │ ├── retinanet_model.py │ │ ├── retinanet_segmentation_main.py │ │ ├── retinanet_segmentation_model.py │ │ └── retinanet_tensorrt.py │ ├── squeezenet │ │ ├── configs │ │ │ ├── __init__.py │ │ │ └── squeezenet_config.py │ │ ├── data_pipeline.py │ │ ├── squeezenet_main.py │ │ └── squeezenet_model.py │ ├── transformer │ │ └── README.md │ └── unet3d │ │ ├── README.md │ │ ├── configs │ │ └── cloud │ │ │ ├── v3-128_256x256x256_ce.yaml │ │ │ ├── v3-128_256x256x256_dice.yaml │ │ │ ├── v3-32_256x256x256_ce.yaml │ │ │ ├── v3-32_256x256x256_dice.yaml │ │ │ ├── v3-8_128x128x128_ce.yaml │ │ │ └── v3-8_128x128x128_dice.yaml │ │ ├── data_preprocess │ │ ├── convert_lits.py │ │ ├── convert_lits_nii_to_npy.py │ │ └── generate_fake_lits.py │ │ ├── export_saved_model.py │ │ ├── input_reader.py │ │ ├── metrics.py │ │ ├── requirements.txt │ │ ├── saved_model_inference.py │ │ ├── tpu_executor.py │ │ ├── unet_config.py │ │ ├── unet_main.py │ │ └── unet_model.py └── samples │ └── core │ └── get_started │ ├── custom_tpuestimator.py │ └── iris_data_tpu.py └── tools ├── colab ├── BUILD ├── autoencoder_embeddings.ipynb ├── bert_finetuning_with_cloud_tpus.ipynb ├── classification_iris_data_with_keras.ipynb ├── classification_iris_data_with_tpuestimator.ipynb ├── custom_training.ipynb ├── fashion_mnist.ipynb ├── image_classification_converter.ipynb ├── keras_mnist_tpu.ipynb ├── mnist_estimator.ipynb ├── mnist_tpuestimator.ipynb ├── regression_sine_data_with_keras.ipynb ├── shakespeare_with_tpu_and_keras.ipynb ├── shakespeare_with_tpuestimator.ipynb ├── shapemask_instance_segmentation.ipynb └── tfgan_on_tpus.ipynb ├── ctpu ├── .gitignore ├── README.md ├── commands │ ├── auth.go │ ├── auth_test.go │ ├── common.go │ ├── common_test.go │ ├── config_cmd.go │ ├── delete.go │ ├── delete_test.go │ ├── list.go │ ├── list_test.go │ ├── pause.go │ ├── pause_test.go │ ├── quota.go │ ├── restart.go │ ├── restart_test.go │ ├── status.go │ ├── status_test.go │ ├── tf_versions.go │ ├── tpu_locations.go │ ├── tpu_locations_test.go │ ├── tpu_size.go │ ├── tpu_size_test.go │ ├── up.go │ ├── up_test.go │ └── version.go ├── config │ ├── config.go │ ├── config_gce.go │ ├── config_gcloud.go │ ├── config_gcloud_test.go │ ├── config_test.go │ ├── devshell.go │ ├── devshell_test.go │ └── testdata │ │ └── gcloud │ │ ├── clean │ │ ├── README.md │ │ ├── active_config │ │ ├── application_default_credentials.json │ │ └── configurations │ │ │ └── config_ctpu9 │ │ ├── corrupted │ │ ├── README.md │ │ ├── active_config │ │ ├── application_default_credentials.json │ │ └── configurations │ │ │ └── config_default │ │ ├── corrupted2 │ │ ├── README.md │ │ ├── active_config │ │ └── application_default_credentials.json │ │ ├── incomplete │ │ ├── README.md │ │ ├── active_config │ │ ├── application_default_credentials.json │ │ └── configurations │ │ │ └── config_ctpu9 │ │ ├── no_app_creds │ │ ├── README.md │ │ ├── active_config │ │ └── configurations │ │ │ └── config_ctpu9 │ │ └── no_config │ │ └── README.md ├── ctrl │ ├── ctrl.go │ ├── ctrl_test.go │ ├── gce.go │ ├── gce_test.go │ ├── gcloud_cli.go │ ├── gcloud_cli_test.go │ ├── resourcemgmt.go │ ├── resourcemgmt_test.go │ ├── servicemgmt.go │ ├── tpu.go │ └── tpu_test.go ├── main.go └── tutorial.md ├── data_converter ├── README.md ├── __init__.py ├── image_classification │ ├── __init__.py │ ├── image_classification_data.py │ └── simple_example.py ├── image_utils.py └── object_detection │ ├── __init__.py │ ├── bbox_utils.py │ ├── object_detection_data.py │ └── simple_example.py ├── dataset_profiler ├── __init__.py └── profile_imagenet.py ├── datasets ├── README.md ├── create_coco_tf_record.py ├── download_and_preprocess_coco.sh ├── download_and_preprocess_coco_k8s.yaml ├── imagenet_to_gcs.py ├── imagenet_to_gcs_k8s.yaml └── jpeg_to_tf_record.py ├── diagnostics └── diagnostics.py ├── docker ├── Dockerfile └── Dockerfile.util └── kubernetes ├── tensorboard_k8s.yaml └── tpu_profiler_k8s.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .ipynb_checkpoints 3 | node_modules 4 | /.bazelrc 5 | /.tf_configure.bazelrc 6 | /bazel-* 7 | /bazel_pip 8 | /third_party/eigen3/mkl_include 9 | /third_party/mkl/* 10 | /tools/python_bin_path.sh 11 | /tools/git/gen 12 | /pip_test 13 | /_python_build 14 | *.pyc 15 | __pycache__ 16 | *.swp 17 | .vscode/ 18 | .idea/ 19 | *.egg-info/ 20 | configs/ 21 | data/ 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # iMaterialist 2020 - 1st Place Solution 2 | 3 | ![](https://www.googleapis.com/download/storage/v1/b/kaggle-user-content/o/inbox%2F853978%2F378129d2e9afb90bbd1f320858b9b73b%2Fimat2020_2.jpg?generation=1591441185889910&alt=media) 4 | 5 | __Model architecture:__ 6 | 7 | - Mask R-CNN model 8 | - SpineNet-143 + FPN backbone 9 | - An extra head to classify attributes 10 | 11 | __Training:__ 12 | 13 | - Pre-trained on the COCO dataset 14 | - Image resolution: 1280x1280 15 | - Focal loss for the attributes head 16 | - Augmentations: random scaling (0.5x - 2.0x), v3 policy from the AutoAugment (modified to support masks) 17 | 18 | All the changes were made on top of the [TPU Object Detection and Segmentation Framework](https://github.com/tensorflow/tpu/tree/master/models/official/detection). 19 | 20 | Read more about the solution in the [Kaggle post](https://www.kaggle.com/c/imaterialist-fashion-2020-fgvc7/discussion/154306). 21 | 22 | Download the model weights [here](https://drive.google.com/file/d/1bdC-LVj5_rJFfSiWWpeyQwKXYOLR-oNb/view?usp=sharing). 23 | 24 | --- 25 | 26 | # Cloud TPUs # 27 | 28 | This repository is a collection of reference models and tools used with 29 | [Cloud TPUs](https://cloud.google.com/tpu/). 30 | 31 | The fastest way to get started training a model on a Cloud TPU is by following 32 | the tutorial. Click the button below to launch the tutorial using Google Cloud 33 | Shell. 34 | 35 | [![Open in Cloud Shell](http://gstatic.com/cloudssh/images/open-btn.svg)](https://console.cloud.google.com/cloudshell/open?git_repo=https%3A%2F%2Fgithub.com%2Ftensorflow%2Ftpu&page=shell&tutorial=tools%2Fctpu%2Ftutorial.md) 36 | 37 | _Note:_ This repository is a public mirror, pull requests will not be accepted. 38 | Please file an issue if you have a feature or bug request. 39 | 40 | ## Running Models 41 | 42 | To run models in the `models` subdirectory, you may need to add the top-level 43 | `/models` folder to the Python path with the command: 44 | 45 | ``` 46 | export PYTHONPATH="$PYTHONPATH:/path/to/models" 47 | ``` 48 | -------------------------------------------------------------------------------- /configs/spinenet/sn143-imat-v2.yaml: -------------------------------------------------------------------------------- 1 | architecture: 2 | backbone: 'spinenet' 3 | min_level: 3 4 | max_level: 7 5 | multilevel_features: 'identity' 6 | parser: 'maskrcnn_parser' 7 | num_classes: 47 8 | num_attributes: 294 9 | train: 10 | total_steps: 200000 11 | train_batch_size: 64 12 | learning_rate: 13 | warmup_steps: 2000 14 | init_learning_rate: 0.07 15 | learning_rate_levels: [0.007, 0.0007] 16 | learning_rate_steps: [60000, 75000] 17 | l2_weight_decay: 0.00004 18 | train_file_pattern: gs://kaggle-imaterialist2020-data-europe-west4/datasets/imaterialist2020/tfrecords/train-* 19 | checkpoint: 20 | path: gs://kaggle-imaterialist2020-data-europe-west4/models/spinenet-143-fixed/model.ckpt-164150 21 | skip_variables_regex: '^(global_step).*$' 22 | keep_checkpoint_max: 10 23 | save_checkpoints_secs: 1800 24 | eval: 25 | type: box_mask_and_attributes_no_rescale 26 | val_json_file: gs://kaggle-imaterialist2020-data-europe-west4/datasets/imaterialist2020/split/evaluation/valid_coco_1024.json 27 | eval_file_pattern: gs://kaggle-imaterialist2020-data-europe-west4/datasets/imaterialist2020/tfrecords/valid-* 28 | test_file_pattern: gs://kaggle-imaterialist2020-data-europe-west4/datasets/imaterialist2020/tfrecords/test-* 29 | per_category_metrics: true 30 | eval_batch_size: 16 31 | eval_samples: 5691 32 | # score_threshold: 0.5 33 | anchor: 34 | anchor_size: 3.0 35 | batch_norm_activation: 36 | batch_norm_epsilon: 0.001 37 | batch_norm_momentum: 0.99 38 | use_sync_bn: true 39 | spinenet: 40 | model_id: '143' 41 | maskrcnn_parser: 42 | output_size: [1280, 1280] 43 | aug_scale_min: 0.5 44 | aug_scale_max: 2.0 45 | use_autoaugment: true 46 | autoaugment_policy_name: 'v3' 47 | apply_autoaugment_after_resizing: true 48 | rpn_head: 49 | use_batch_norm: true 50 | frcnn_head: 51 | num_convs: 4 52 | num_fcs: 1 53 | use_batch_norm: true 54 | mrcnn_head: 55 | use_batch_norm: true 56 | attributes_loss: 57 | type: focal 58 | focal_loss_alpha: 0.25 59 | focal_loss_gamma: 2.0 60 | focal_loss_weight: 50.0 61 | enable_summary: true 62 | -------------------------------------------------------------------------------- /requiremets-spotty.txt: -------------------------------------------------------------------------------- 1 | Cython 2 | matplotlib 3 | opencv-python-headless 4 | pyyaml 5 | Pillow 6 | git+https://github.com/cocodataset/cocoapi#egg=pycocotools&subdirectory=PythonAPI 7 | scikit-learn 8 | pandas 9 | tqdm 10 | 11 | # TPU 12 | google-api-python-client 13 | oauth2client 14 | gcsfs 15 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Cloud TPU samples.""" 16 | 17 | from setuptools import find_packages 18 | from setuptools import setup 19 | 20 | setup( 21 | name='tf_tpu_models', 22 | packages=find_packages() 23 | ) 24 | -------------------------------------------------------------------------------- /tf_tpu_models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/__init__.py -------------------------------------------------------------------------------- /tf_tpu_models/common/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/experimental/cifar_keras/README.md: -------------------------------------------------------------------------------- 1 | # Cifar Keras # 2 | 3 | This directory contains an example using the Keras layers API inside an 4 | Estimator/TPUEstimator. If you have a complete Keras model already built, 5 | consider the new experimental Cloud TPU-Keras integration available since TF 6 | 1.9. For examples, see [`models/experimental/keras`](https://github.com/tensorflow/tpu/tree/master/models/experimental/keras) 7 | -------------------------------------------------------------------------------- /tf_tpu_models/experimental/dcgan/README.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | 3 | This example uses a DCGAN architecture to learn to produce MNIST digits and 4 | CIFAR10 images. It trains on Google Cloud TPUs. It uses an open source library 5 | called TF-GAN to abstract away many of the GAN and TPU infrastructure details. 6 | 7 | To run this example, be sure to install TF-GAN with: 8 | 9 | pip install tensorflow-gan 10 | -------------------------------------------------------------------------------- /tf_tpu_models/experimental/deeplab/README.md: -------------------------------------------------------------------------------- 1 | # Deeplab on TPU 2 | 3 | ## Prerequisites 4 | 5 | ### Setup a Google Cloud project 6 | 7 | Follow the instructions at the [Quickstart Guide](https://cloud.google.com/tpu/docs/quickstart) 8 | to get a GCE VM with access to Cloud TPU. 9 | 10 | To run this model, you will need: 11 | 12 | * A GCE VM instance with an associated Cloud TPU resource 13 | * A GCS bucket to store your training checkpoints 14 | * A GCS bucket to store your training and evaluation data. 15 | 16 | ### Setup Deeplab under tensorflow/models 17 | 18 | Deeplab on Cloud TPU depends on [Deeplab under tensorflow/models](https://github.com/tensorflow/models/tree/master/research/deeplab). Please follow the [instructions](https://github.com/tensorflow/models/blob/master/research/deeplab/g3doc/installation.md) to add the library to `PYTHONPATH` and test the installation. 19 | 20 | You can use their [script](https://github.com/tensorflow/models/blob/master/research/deeplab/datasets/download_and_convert_voc2012.sh) to download PASCAL VOC 2012 semantic segmentation dataset and convert it to TFRecord. 21 | 22 | You can download their [pretrained checkpoints](https://github.com/tensorflow/models/blob/master/research/deeplab/g3doc/model_zoo.md). In particular, we use a [modified resnet 101 pretrained on ImageNet](http://download.tensorflow.org/models/resnet_v1_101_2018_05_04.tar.gz) below. 23 | 24 | ## Train and Eval 25 | 26 | ```shell 27 | python main.py \ 28 | --mode='train' \ 29 | --num_shards=8 \ 30 | --train_split='train' \ 31 | --alsologtostderr=true \ 32 | --model_dir=${MODEL_DIR} \ 33 | --dataset_dir=${DATASET_DIR} \ 34 | --init_checkpoint=${INIT_CHECKPOINT} \ 35 | --model_variant=resnet_v1_101_beta \ 36 | --image_pyramid=1. \ 37 | --aspp_with_separable_conv=false \ 38 | --multi_grid=1,2,4 \ 39 | --decoder_use_separable_conv=false 40 | ``` 41 | You can use `mode=eval` for evaluation after training is completed. The model should train to close to 0.74 MIOU in around 9 hours. If you have train_aug split available and use `--train_split=train_aug`, then MIOU should be close to 0.77. 42 | 43 | -------------------------------------------------------------------------------- /tf_tpu_models/experimental/densenet_keras/README.md: -------------------------------------------------------------------------------- 1 | # Cloud TPU Port of DenseNet using Keras API 2 | 3 | This folder contains an implementation of the [DenseNet](https://arxiv.org/pdf/1608.06993.pdf) 4 | image classification model using the Keras API. 5 | 6 | ## Running the model on ImageNet 7 | 8 | The process for running on ImageNet is similar, just specify the directory 9 | containing your converted tfrecord files: 10 | 11 | ``` 12 | python densenet_keras_imagenet.py\ 13 | --alsologtostderr\ 14 | --num_shards=8\ 15 | --batch_size=1024\ 16 | --master=grpc://$TPU_WORKER:8470\ 17 | --use_tpu=1\ 18 | --model_dir=gs://my-cloud-bucket/models/densenet-keras-imagenet/0\ 19 | --data_dir=gs://my-cloud-bucket/data/imagenet 20 | -------------------------------------------------------------------------------- /tf_tpu_models/experimental/embedding/README.md: -------------------------------------------------------------------------------- 1 | # TPU Embedding example model 2 | 3 | ## Prerequisites 4 | 5 | ### Setup a Google Cloud project 6 | 7 | Follow the instructions at the [Quickstart Guide](https://cloud.google.com/tpu/docs/quickstart) 8 | to get a GCE VM with access to Cloud TPU. 9 | 10 | To run this model, you will need: 11 | 12 | * A GCE VM instance with an associated Cloud TPU resource. It might be helpful if the VM has a large number of CPUs and large memory as it is used for generating training and evaluation data. 13 | * A GCS bucket to store data. 14 | 15 | ## Setup Model 16 | Clone the `tpu` respository and move to the example directory: 17 | 18 | ```shell 19 | git clone https://github.com/tensorflow/tpu 20 | cd tpu/models/experimental/embedding 21 | ``` 22 | 23 | Setup a Google Cloud Bucket for your training data and model storage: 24 | 25 | ```shell 26 | BUCKET_NAME=your_bucket_name 27 | ``` 28 | 29 | Create a new `embedding` subdirectory in your bucket. 30 | 31 | ## Run the training data generator 32 | 33 | ```shell 34 | python3 models/experimental/embedding/create_data.py \ 35 | --train_dataset_path gs://${BUCKET_NAME}/embedding/train.tfrecord \ 36 | --eval_dataset_path gs://${BUCKET_NAME}/embedding/eval.tfrecord 37 | ``` 38 | 39 | ## Train and Eval 40 | 41 | ```shell 42 | python3 models/experimental/embedding/model.py \ 43 | --train_dataset_path="gs://${BUCKET_NAME}/embedding/train.tfrecord*" \ 44 | --eval_dataset_path="gs://${BUCKET_NAME}/embedding/eval.tfrecord*" \ 45 | --model_dir="gs://${BUCKET_NAME}/embedding/model_dir" 46 | ``` 47 | -------------------------------------------------------------------------------- /tf_tpu_models/experimental/inception/inception_v3_k8s.yaml: -------------------------------------------------------------------------------- 1 | # Train Inception v3 with fake ImageNet dataset using Cloud TPU and Google 2 | # Kubernetes Engine. 3 | # 4 | # [Training Data] 5 | # In this example, we use the randomly generated fake ImageNet dataset at 6 | # gs://cloud-tpu-test-datasets/fake_imagenet as the training data. 7 | # 8 | # [Instructions] 9 | # 1. Follow the instructions on https://cloud.google.com/tpu/docs/kubernetes-engine-setup 10 | # to create a Kubernetes Engine cluster. 11 | # 2. Change the environment variable MODEL_BUCKET in the Job spec to the 12 | # Google Cloud Storage location where you want to store the output model. 13 | # 3. Run `kubectl create -f inception_v3_k8s.yaml`. 14 | 15 | apiVersion: batch/v1 16 | kind: Job 17 | metadata: 18 | name: inception-v3-tpu 19 | spec: 20 | template: 21 | metadata: 22 | annotations: 23 | # The Cloud TPUs that will be created for this Job must support 24 | # TensorFlow 1.11. This version MUST match the TensorFlow version that 25 | # your model is built on. 26 | tf-version.cloud-tpus.google.com: "1.11" 27 | spec: 28 | restartPolicy: Never 29 | containers: 30 | - name: inception-v3-tpu 31 | # The official TensorFlow 1.11 TPU model image built from https://github.com/tensorflow/tpu/blob/r1.11/tools/docker/Dockerfile. 32 | image: gcr.io/tensorflow/tpu-models:r1.11 33 | command: 34 | - python 35 | - /tensorflow_tpu_models/models/experimental/inception/inception_v3.py 36 | - --learning_rate=0.165 37 | - --train_steps=250000 38 | - --iterations=500 39 | - --use_data=real 40 | - --mode=train_and_eval 41 | - --train_steps_per_eval=2000 42 | - --data_dir=$(DATA_BUCKET) 43 | - --model_dir=$(MODEL_BUCKET) 44 | env: 45 | # The Google Cloud Storage location where the fake ImageNet dataset is 46 | # stored. 47 | - name: DATA_BUCKET 48 | value: "gs://cloud-tpu-test-datasets/fake_imagenet" 49 | # [REQUIRED] Must specify the Google Cloud Storage location where your 50 | # output model will be stored. 51 | - name: MODEL_BUCKET 52 | value: "gs:///inception_v3" 53 | resources: 54 | limits: 55 | # Request a single v2-8 Cloud TPU device to train the model. 56 | # A single v2-8 Cloud TPU device consists of 4 chips, each of which 57 | # has 2 cores, so there are 8 cores in total. 58 | cloud-tpus.google.com/v2: 8 59 | -------------------------------------------------------------------------------- /tf_tpu_models/experimental/inference/api_config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # 16 | # A TF Serving API configuration. 17 | # 18 | # Below, replace MY_PROJECT_ID with your Google Cloud Project ID. 19 | # 20 | 21 | # The configuration schema is defined by service.proto file 22 | # https://github.com/googleapis/googleapis/blob/master/google/api/service.proto 23 | type: google.api.Service 24 | config_version: 3 25 | 26 | # 27 | # Name of the service configuration. 28 | # 'tf-tpu-serving' is the endpoint name. Users are free to rename it. 29 | # 30 | name: tf-tpu-serving.endpoints..cloud.goog 31 | 32 | # 33 | # API title to appear in the user interface (Google Cloud Console). 34 | # 35 | title: tf-tpu-serving 36 | apis: 37 | - name: tensorflow.serving.PredictionService 38 | 39 | # 40 | # API usage restrictions. 41 | # 42 | usage: 43 | rules: 44 | - selector: tensorflow.serving.PredictionService.Predict 45 | allow_unregistered_calls: true 46 | -------------------------------------------------------------------------------- /tf_tpu_models/experimental/inference/api_descriptor.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/experimental/inference/api_descriptor.pb -------------------------------------------------------------------------------- /tf_tpu_models/experimental/keras_colab/README.md: -------------------------------------------------------------------------------- 1 | # Keras Colab # 2 | 3 | This directory contains an example of using the experimental Cloud TPU-Keras 4 | integration that was added in TF 1.9 in an interactive collaboratory 5 | environment. To learn more about this new integration, 6 | check out the documentation (coming soon!). 7 | -------------------------------------------------------------------------------- /tf_tpu_models/experimental/ncf/README.md: -------------------------------------------------------------------------------- 1 | # Neural Collaborative Filtering (NCF) on TPU 2 | 3 | ## Prerequisites 4 | 5 | ### Setup a Google Cloud project 6 | 7 | Follow the instructions at the [Quickstart Guide](https://cloud.google.com/tpu/docs/quickstart) 8 | to get a GCE VM with access to Cloud TPU. 9 | 10 | To run this model, you will need: 11 | 12 | * A GCE VM instance with an associated Cloud TPU resource. It might be helpful if the VM has a large number of CPUs and large memory as it is used for generating training and evaluation data. TF nightly is required. 13 | * A GCS bucket to store data. To avoid downloading MovieLens dataset, you can copy it from `gs://ncf/data_dir`. 14 | 15 | ### Setup NCF from tensorflow/models 16 | 17 | Neural collaborative filtering on Cloud TPU depends on [the same model under tensorflow/models](https://github.com/tensorflow/models/tree/master/official/recommendation). In your working directory, run `git clone https://github.com/tensorflow/models.git`, and add `models/` to your python path by running `export PYTHONPATH=$PYTHONPATH:/your/working/directory/models/`. 18 | 19 | ## Setup NCF 20 | Copy `./ncf_main.py` to your working directory. 21 | 22 | ``` 23 | wget https://raw.githubusercontent.com/tensorflow/tpu/master/models/experimental/ncf/ncf_main.py 24 | ``` 25 | 26 | Setup a Google Cloud Bucket for your training data and model storage: 27 | 28 | ```shell 29 | BUCKET_NAME=your_bucket_name 30 | ``` 31 | 32 | ## Run the training data generator 33 | 34 | From the `models/` directory run the command: 35 | 36 | ```shell 37 | python official/recommendation/create_ncf_data.py \ 38 | --data_dir gs://${BUCKET_NAME}/ncf_data \ 39 | --meta_data_file_path gs://${BUCKET_NAME}/ncf_data/metadata.json \ 40 | --train_prebatch_size 12288 \ 41 | --eval_prebatch_size 20000 42 | ``` 43 | 44 | This will download an preprocess your data and take several minutes to process 45 | the data. 46 | 47 | NOTE The pre-batch sizes must be the same as the `--batch_size` and 48 | `--eval_batch_size` passed to `ncf_main.py` divided by the value of 49 | `--num_tpu_shards` (the number of TPU cores being trained on). By default this 50 | model trains on a single host with 8 TPU cores, giving the pre-batch sizes 51 | above. 52 | 53 | ## Train and Eval 54 | 55 | ```shell 56 | EXPERIMENT_NAME=your_experiment_name 57 | python ncf_main.py \ 58 | --train_dataset_path="gs://${BUCKET_NAME}/ncf_data/training_cycle_{}/*" \ 59 | --eval_dataset_path="gs://${BUCKET_NAME}/ncf_data/eval_data/*" \ 60 | --input_meta_data_path=gs://${BUCKET_NAME}/ncf_data/metadata.json \ 61 | --model_dir gs://${BUCKET_NAME}/model_dirs/${EXPERIMENT_NAME} |& tee ${EXPERIMENT_NAME}.log 62 | ``` 63 | 64 | Most of the time, the hit rate metric (HR) reaches 0.635 in around 10 epochs. 65 | 66 | -------------------------------------------------------------------------------- /tf_tpu_models/experimental/qanet/run.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | from absl import app 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """Main launcher for QANet train/eval/predict.""" 17 | import tensorflow.compat.v1 as tf 18 | 19 | import run_lib 20 | 21 | 22 | def main(_): 23 | run_lib.run() 24 | 25 | 26 | if __name__ == "__main__": 27 | app.run(main) 28 | -------------------------------------------------------------------------------- /tf_tpu_models/experimental/qanet/testdata/train-v1.1.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [{ 3 | "paragraphs": [{ 4 | "context": "The quick brown fox jumped over the lazy dog.", 5 | "qas": [{ 6 | "answers": [{ 7 | "answer_start": 11, 8 | "text": "brown fox" 9 | }], 10 | "id": "56be4db0acb8001400a502ec", 11 | "question": "What jumped?" 12 | }], 13 | "qas": [{ 14 | "answers": [{ 15 | "answer_start": 0, 16 | "text": "The" 17 | }], 18 | "id": "56be4db0acb8001400a502ef", 19 | "question": "question?" 20 | }] 21 | }], 22 | "title": "test question" 23 | },{ 24 | "paragraphs": [{ 25 | "context": "The text for this question is too long and will be filtered out. a b c d e f g h i j k l m n o p q r s t u v w x y z.", 26 | "qas": [{ 27 | "answers": [{ 28 | "answer_start": 34, 29 | "text": "long" 30 | }], 31 | "id": "56be4db0acb8001400a502ed", 32 | "question": "8th word" 33 | }] 34 | }], 35 | "title": "Too long context" 36 | } 37 | ] 38 | } 39 | -------------------------------------------------------------------------------- /tf_tpu_models/experimental/qanet/testdata/train_0000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/experimental/qanet/testdata/train_0000 -------------------------------------------------------------------------------- /tf_tpu_models/experimental/qanet/testdata/vocab.vec: -------------------------------------------------------------------------------- 1 | UNK 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2 | fox 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 3 | -------------------------------------------------------------------------------- /tf_tpu_models/experimental/resnet50_keras/README.md: -------------------------------------------------------------------------------- 1 | # ResNet-50 # 2 | 3 | This directory contains an example of using the experimental Cloud TPU-Keras 4 | integration that was added in TF 1.9. ResNet-50 is a commonly used convolutional 5 | neural network used for image classification. The ResNet-family of models were 6 | introduced for the ImageNet 2015 competition and performed very well there. 7 | -------------------------------------------------------------------------------- /tf_tpu_models/experimental/resnet50_keras/model_saving_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | r"""Utilities to save models.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | 24 | import tensorflow.compat.v1 as tf 25 | 26 | try: 27 | import h5py as _ # pylint: disable=g-import-not-at-top 28 | HAS_H5PY = True 29 | except ImportError: 30 | tf.logging.warning('`h5py` is not installed. Please consider installing it ' 31 | 'to save weights for long-running training.') 32 | HAS_H5PY = False 33 | 34 | 35 | def save_model(model, model_dir, weights_file): 36 | """Saves the model weights.""" 37 | weights_file_path = os.path.join(model_dir, weights_file) 38 | del model_dir, weights_file # avoid accident usages. 39 | 40 | if not HAS_H5PY: 41 | tf.logging.warning('`h5py` is not installed. Skip saving model weights.') 42 | return 43 | 44 | tf.logging.info('Saving weights and optimizer states into %s', 45 | weights_file_path) 46 | tf.logging.info('This might take a while...') 47 | model.save(weights_file_path, overwrite=True, include_optimizer=True) 48 | 49 | -------------------------------------------------------------------------------- /tf_tpu_models/experimental/show_and_tell/README: -------------------------------------------------------------------------------- 1 | This model demonstrates using TPUEstimator to train the ShowAndTell image captioning model. 2 | 3 | The original model source is from the Tensorflow models repository: 4 | 5 | https://github.com/tensorflow/models/blob/master/research/im2txt/ 6 | 7 | You will need TFRecords from the COCO dataset for training. These can be generated by following 8 | the directions here: 9 | 10 | https://cloud.google.com/tpu/docs/tutorials/retinanet#prepare_the_coco_dataset 11 | 12 | To run the trainer: 13 | 14 | python train.py\ 15 | --tpu=$TPU_WORKER \ 16 | --input_file_pattern=gs://bucket/mscoco-records/train-*\ 17 | --model_dir=$MODEL_DIR\ 18 | --train_steps=10000\ 19 | --train_batch_size=1024 20 | 21 | 22 | NOTE: 23 | 24 | This version uses the non-reversible tf.strings.to_hash_bucket to hash captions to indices: it is 25 | not usable for inference! -------------------------------------------------------------------------------- /tf_tpu_models/hyperparameters/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/hyperparameters/common_tpu_flags.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Defining common TPU flags used across all the models.""" 16 | 17 | from absl import flags 18 | 19 | 20 | def define_common_tpu_flags(): 21 | """Define the flags related to TPU's.""" 22 | flags.DEFINE_string( 23 | 'tpu', default=None, 24 | help='The Cloud TPU to use for training. This should be either the name ' 25 | 'used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 ' 26 | 'url.') 27 | 28 | flags.DEFINE_string( 29 | 'gcp_project', default=None, 30 | help='Project name for the Cloud TPU-enabled project. If not specified, we ' 31 | 'will attempt to automatically detect the GCE project from metadata.') 32 | 33 | flags.DEFINE_string( 34 | 'tpu_zone', default=None, 35 | help='GCE zone where the Cloud TPU is located in. If not specified, we ' 36 | 'will attempt to automatically detect the GCE project from metadata.') 37 | 38 | flags.DEFINE_string( 39 | 'eval_master', default=None, 40 | help='GRPC URL of the eval master. Set to an appropiate value when running ' 41 | 'on CPU/GPU.') 42 | -------------------------------------------------------------------------------- /tf_tpu_models/official/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/official/__init__.py -------------------------------------------------------------------------------- /tf_tpu_models/official/amoeba_net/amoeba_net_k8s.yaml: -------------------------------------------------------------------------------- 1 | # Train AmoebaNet-D with fake ImageNet dataset using Cloud TPU and Google 2 | # Kubernetes Engine. 3 | # 4 | # [Training Data] 5 | # In this example, we use the randomly generated fake ImageNet dataset at 6 | # gs://cloud-tpu-test-datasets/fake_imagenet as the training data. 7 | # 8 | # [Instructions] 9 | # 1. Follow the instructions on https://cloud.google.com/tpu/docs/kubernetes-engine-setup 10 | # to create a Kubernetes Engine cluster. 11 | # Note: Use a base machine type with more memory than the default n1-standard-1. 12 | # 2. Change the environment variable MODEL_BUCKET in the Job spec to the 13 | # Google Cloud Storage location where you want to store the output model. 14 | # 3. Run `kubectl create -f amoeba_net_k8s.yaml`. 15 | 16 | apiVersion: batch/v1 17 | kind: Job 18 | metadata: 19 | name: amoeba-net-tpu 20 | spec: 21 | template: 22 | metadata: 23 | annotations: 24 | # The Cloud TPUs that will be created for this Job must support 25 | # TensorFlow 1.11. This version MUST match the TensorFlow version that 26 | # your model is built on. 27 | tf-version.cloud-tpus.google.com: "1.11" 28 | spec: 29 | restartPolicy: Never 30 | containers: 31 | - name: amoeba-net-tpu 32 | # The official TensorFlow 1.11 TPU model image built from https://github.com/tensorflow/tpu/blob/r1.11/tools/docker/Dockerfile. 33 | image: gcr.io/tensorflow/tpu-models:r1.11 34 | command: 35 | - python 36 | - /tensorflow_tpu_models/models/official/amoeba_net/amoeba_net.py 37 | - --data_dir=$(DATA_BUCKET) 38 | - --model_dir=$(MODEL_BUCKET) 39 | env: 40 | # The Google Cloud Storage location where the fake ImageNet dataset is 41 | # stored. 42 | - name: DATA_BUCKET 43 | value: "gs://cloud-tpu-test-datasets/fake_imagenet" 44 | # [REQUIRED] Must specify the Google Cloud Storage location where your 45 | # output model will be stored. 46 | - name: MODEL_BUCKET 47 | value: "gs:///amoeba_net" 48 | resources: 49 | limits: 50 | # Request a single v2-8 Cloud TPU device to train the model. 51 | # A single v2-8 Cloud TPU device consists of 4 chips, each of which 52 | # has 2 cores, so there are 8 cores in total. 53 | cloud-tpus.google.com/v2: 8 54 | -------------------------------------------------------------------------------- /tf_tpu_models/official/amoeba_net/network_utils_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.nets.nasnet.nasnet_utils.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow.compat.v1 as tf 22 | 23 | import network_utils 24 | 25 | 26 | class NetworkUtilsTest(tf.test.TestCase): 27 | 28 | def testCalcReductionLayers(self): 29 | num_cells = 18 30 | num_reduction_layers = 2 31 | reduction_layers = network_utils.calc_reduction_layers( 32 | num_cells, num_reduction_layers) 33 | self.assertEqual(len(reduction_layers), 2) 34 | self.assertEqual(reduction_layers[0], 6) 35 | self.assertEqual(reduction_layers[1], 12) 36 | 37 | def testGetChannelIndex(self): 38 | data_formats = ['NHWC', 'NCHW'] 39 | for data_format in data_formats: 40 | index = network_utils.get_channel_index(data_format) 41 | correct_index = 3 if data_format == 'NHWC' else 1 42 | self.assertEqual(index, correct_index) 43 | 44 | def testGetChannelDim(self): 45 | data_formats = ['NHWC', 'NCHW'] 46 | shape = [10, 20, 30, 40] 47 | for data_format in data_formats: 48 | dim = network_utils.get_channel_dim(shape, data_format) 49 | correct_dim = shape[3] if data_format == 'NHWC' else shape[1] 50 | self.assertEqual(dim, correct_dim) 51 | 52 | def testGlobalAvgPool(self): 53 | data_formats = ['NHWC', 'NCHW'] 54 | inputs = tf.placeholder(tf.float32, (5, 10, 20, 10)) 55 | for data_format in data_formats: 56 | output = network_utils.global_avg_pool( 57 | inputs, data_format) 58 | self.assertEqual(output.shape, [5, 10]) 59 | 60 | 61 | if __name__ == '__main__': 62 | tf.test.main() 63 | -------------------------------------------------------------------------------- /tf_tpu_models/official/bert/README.md: -------------------------------------------------------------------------------- 1 | See https://github.com/google-research/bert/blob/master/README.md 2 | -------------------------------------------------------------------------------- /tf_tpu_models/official/densenet/README.md: -------------------------------------------------------------------------------- 1 | # Cloud TPU Port of DenseNet 2 | 3 | This folder contains an implementation of the [DenseNet](https://arxiv.org/pdf/1608.06993.pdf) 4 | image classification model. 5 | 6 | ## Running the model on ImageNet 7 | 8 | The process for running on ImageNet is similar, just specify the directory 9 | containing your converted tfrecord files: 10 | 11 | ``` 12 | python densenet_imagenet.py\ 13 | --alsologtostderr\ 14 | --num_shards=8\ 15 | --batch_size=1024\ 16 | --master=grpc://$TPU_WORKER:8470\ 17 | --use_tpu=1\ 18 | --model_dir=gs://my-cloud-bucket/models/densenet-imagenet/0\ 19 | --data_dir=gs://my-cloud-bucket/data/imagenet 20 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/README.md: -------------------------------------------------------------------------------- 1 | # TPU Object Detection and Segmentation Framework 2 | 3 | TPU Object Detection and Segmentation Framework provides implementations of 4 | common image classification, object detection and instance segmentation models in Tensorflow. 5 | Our models produce the competitive results, 6 | can be trained on multiple platforms including GPU and [TPUs](https://cloud.google.com/tpu), 7 | and have been highly optimized for TPU performance. 8 | It also features latest research including 9 | [Auto-Augument](https://arxiv.org/abs/1805.09501), 10 | [NAS-FPN](https://arxiv.org/abs/1904.07392), 11 | [ShapeMask](https://arxiv.org/abs/1904.03239), and 12 | [SpineNet](https://arxiv.org/abs/1912.05027). 13 | 14 | ![alt text](https://storage.googleapis.com/gweb-cloudblog-publish/images/Mask_R-CNN_instance_segmentation_results..max-2000x2000.png) 15 | ** Instance segmentation results of our Mask R-CNN model. 16 | 17 | 18 | ## Updates 19 | 20 | * April 10, 2020: Launch the new 21 | [README.md](https://github.com/tensorflow/tpu/blob/master/models/official/detection/README.md), 22 | [GETTING_STARTED.md](https://github.com/tensorflow/tpu/blob/master/models/official/detection/GETTING_STARTED.md), and 23 | [MODEL_ZOO.md](https://github.com/tensorflow/tpu/blob/master/models/official/detection/MODEL_ZOO.md). 24 | Release initial models. 25 | 26 | ## Major Features 27 | 28 | * Tasks: 29 | - Image classification 30 | - Object detection 31 | - Instance segmentation 32 | * Meta-architectures: 33 | - RetinaNet 34 | - Faster / Mask R-CNN 35 | - **[ShapeMask](https://arxiv.org/abs/1904.03239)** 36 | * Backbones: 37 | - ResNet 38 | - **[SpineNet](https://arxiv.org/abs/1912.05027)** 39 | * Feature pyramids: 40 | - FPN 41 | - **[NAS-FPN](https://arxiv.org/abs/1904.07392)** 42 | * Other model features: 43 | - **[Auto-Augument](https://arxiv.org/abs/1805.09501)** 44 | * Training platforms: 45 | - Single machine GPUs 46 | - [Cloud TPU](https://cloud.google.com/tpu) 47 | - [Cloud TPU Pods](https://cloud.google.com/blog/products/ai-machine-learning/googles-scalable-supercomputers-for-machine-learning-cloud-tpu-pods-are-now-publicly-available-in-beta) 48 | 49 | 50 | ## Model Zoo 51 | 52 | [MODEL_ZOO.md](https://github.com/tensorflow/tpu/blob/master/models/official/detection/MODEL_ZOO.md) 53 | provides a large collection of baselines and checkpoints for object detection, instance segmentation, and image classification. 54 | 55 | 56 | ## Get started 57 | 58 | Please follow the instructions in [GETTING_STARTED.md](https://github.com/tensorflow/tpu/blob/master/models/official/detection/GETTING_STARTED.md). 59 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/detection_config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Detection config template.""" 16 | 17 | from configs import base_config 18 | from hyperparameters import params_dict 19 | 20 | # pylint: disable=line-too-long 21 | 22 | # For ResNet, this freezes the variables of the first conv1 and conv2_x 23 | # layers [1], which leads to higher training speed and slightly better testing 24 | # accuracy. The intuition is that the low-level architecture (e.g., ResNet-50) 25 | # is able to capture low-level features such as edges; therefore, it does not 26 | # need to be fine-tuned for the detection task. 27 | # Note that we need to trailing `/` to avoid the incorrect match. 28 | # [1]: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/config.py#L198 29 | RESNET_FROZEN_VAR_PREFIX = r'(resnet\d+)\/(conv2d(|_([1-9]|10))|batch_normalization(|_([1-9]|10)))\/' 30 | 31 | DETECTION_CFG = params_dict.ParamsDict(base_config.BASE_CFG) 32 | DETECTION_CFG.override({ 33 | 'architecture': { 34 | # Note that `num_classes` is the total number of classes including 35 | # one background classes whose index is 0. 36 | 'num_classes': 91 37 | }, 38 | 'eval': { 39 | 'type': 'box', 40 | 'eval_samples': 5000, 41 | 'use_json_file': True, 42 | 'val_json_file': '', 43 | 'per_category_metrics': False, 44 | 'score_threshold': 0.05, 45 | }, 46 | 'anchor': { 47 | 'num_scales': 3, 48 | 'aspect_ratios': [1.0, 2.0, 0.5], 49 | 'anchor_size': 4.0, 50 | }, 51 | 'fpn': { 52 | 'fpn_feat_dims': 256, 53 | 'use_separable_conv': False, 54 | 'use_batch_norm': True, 55 | }, 56 | 'nasfpn': { 57 | 'fpn_feat_dims': 256, 58 | 'num_repeats': 5, 59 | 'use_separable_conv': False, 60 | 'init_drop_connect_rate': None, 61 | 'block_fn': 'conv', 62 | }, 63 | 'postprocess': { 64 | 'apply_nms': True, 65 | 'use_batched_nms': False, 66 | 'max_total_size': 100, 67 | 'nms_iou_threshold': 0.5, 68 | 'score_threshold': 0.05, 69 | 'pre_nms_num_boxes': 5000, 70 | }, 71 | }, is_strict=False) 72 | # pylint: enable=line-too-long 73 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/factory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Factory to provide model configs.""" 16 | 17 | from configs import classification_config 18 | from configs import maskrcnn_config 19 | from configs import retinanet_config 20 | from configs import shapemask_config 21 | from hyperparameters import params_dict 22 | 23 | 24 | def config_generator(model): 25 | """Model function generator.""" 26 | if model == 'classification': 27 | default_config = classification_config.CLASSIFICATION_CFG 28 | restrictions = classification_config.CLASSIFICATION_RESTRICTIONS 29 | elif model == 'retinanet': 30 | default_config = retinanet_config.RETINANET_CFG 31 | restrictions = retinanet_config.RETINANET_RESTRICTIONS 32 | elif model == 'mask_rcnn': 33 | default_config = maskrcnn_config.MASKRCNN_CFG 34 | restrictions = maskrcnn_config.MASKRCNN_RESTRICTIONS 35 | elif model == 'shapemask': 36 | default_config = shapemask_config.SHAPEMASK_CFG 37 | restrictions = shapemask_config.SHAPEMASK_RESTRICTIONS 38 | else: 39 | raise ValueError('Model %s is not supported.' % model) 40 | 41 | return params_dict.ParamsDict(default_config, restrictions) 42 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/retinanet_config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Config template to train Retinanet.""" 16 | 17 | from configs import detection_config 18 | from hyperparameters import params_dict 19 | 20 | # pylint: disable=line-too-long 21 | RETINANET_CFG = params_dict.ParamsDict(detection_config.DETECTION_CFG) 22 | RETINANET_CFG.override({ 23 | 'type': 'retinanet', 24 | 'architecture': { 25 | 'parser': 'retinanet_parser', 26 | 'backbone': 'resnet', 27 | 'multilevel_features': 'fpn', 28 | }, 29 | 'retinanet_parser': { 30 | 'output_size': [640, 640], 31 | 'match_threshold': 0.5, 32 | 'unmatched_threshold': 0.5, 33 | 'aug_rand_hflip': True, 34 | 'aug_scale_min': 1.0, 35 | 'aug_scale_max': 1.0, 36 | 'use_autoaugment': False, 37 | 'autoaugment_policy_name': 'v0', 38 | 'skip_crowd_during_training': True, 39 | 'max_num_instances': 100, 40 | 'regenerate_source_id': False, 41 | }, 42 | 'retinanet_head': { 43 | 'anchors_per_location': 9, 44 | 'num_convs': 4, 45 | 'num_filters': 256, 46 | 'use_separable_conv': False, 47 | 'use_batch_norm': True, 48 | }, 49 | 'retinanet_loss': { 50 | 'focal_loss_alpha': 0.25, 51 | 'focal_loss_gamma': 1.5, 52 | 'huber_loss_delta': 0.1, 53 | 'box_loss_weight': 50, 54 | }, 55 | }, is_strict=False) 56 | 57 | RETINANET_RESTRICTIONS = [ 58 | ] 59 | # pylint: enable=line-too-long 60 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/spinenet/spinenet-mbconv49_retinanet.yaml: -------------------------------------------------------------------------------- 1 | # SpineNet49-MBConv + RetinaNet with swish. 28.5% mAP. 2 | architecture: 3 | backbone: 'spinenet_mbconv' 4 | multilevel_features: 'identity' 5 | train: 6 | total_steps: 277800 7 | train_batch_size: 256 8 | learning_rate: 9 | type: 'step' 10 | init_learning_rate: 0.28 11 | learning_rate_levels: [0.028, 0.0028] 12 | learning_rate_steps: [263910, 273170] 13 | l2_weight_decay: 0.00003 14 | batch_norm_activation: 15 | use_sync_bn: true 16 | activation: 'swish' 17 | retinanet_head: 18 | num_filters: 48 19 | use_separable_conv: true 20 | spinenet_mbconv: 21 | model_id: '49' 22 | anchor: 23 | anchor_size: 3.0 24 | retinanet_parser: 25 | output_size: [384, 384] 26 | aug_scale_min: 0.5 27 | aug_scale_max: 2.0 28 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/spinenet/spinenet143_classification.yaml: -------------------------------------------------------------------------------- 1 | # SpineNet-143 ImageNet classification: 2 | # - 79.0 Top-1, 94.4 Top-5 accuracy with init_drop_connect_rate null, activation 'relu', label_smoothing 0.0 3 | # - 80.1 Top-1, 95.0 Top-5 accuracy with init_drop_connect_rate 0.2, activation 'swish', label_smoothing 0.1 4 | architecture: 5 | backbone: 'spinenet' 6 | multilevel_features: 'identity' 7 | parser: 'classification_parser' 8 | num_classes: 1001 9 | train: 10 | total_steps: 62557 11 | train_batch_size: 4096 12 | learning_rate: 13 | type: cosine 14 | warmup_steps: 1564 15 | init_learning_rate: 1.6 16 | l2_weight_decay: 0.0001 17 | label_smoothing: 0.1 18 | batch_norm_activation: 19 | batch_norm_epsilon: 1.0e-05 20 | batch_norm_momentum: 0.9 21 | use_sync_bn: false 22 | activation: 'swish' 23 | spinenet: 24 | model_id: '143' 25 | init_drop_connect_rate: 0.2 26 | classification_head: 27 | aggregation: 'all' 28 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/spinenet/spinenet143_mrcnn.yaml: -------------------------------------------------------------------------------- 1 | # SpineNet-143 + Mask R-CNN with l2 weight regularization. 48.7 mAP. 2 | architecture: 3 | backbone: 'spinenet' 4 | min_level: 3 5 | max_level: 7 6 | multilevel_features: 'identity' 7 | parser: 'maskrcnn_parser' 8 | train: 9 | total_steps: 164150 10 | train_batch_size: 256 11 | learning_rate: 12 | warmup_steps: 2000 13 | init_learning_rate: 0.28 14 | learning_rate_levels: [0.028, 0.0028] 15 | learning_rate_steps: [150080, 159460] 16 | l2_weight_decay: 0.00004 17 | anchor: 18 | anchor_size: 3.0 19 | batch_norm_activation: 20 | batch_norm_epsilon: 0.001 21 | batch_norm_momentum: 0.99 22 | use_sync_bn: true 23 | spinenet: 24 | model_id: '143' 25 | maskrcnn_parser: 26 | output_size: [1280, 1280] 27 | aug_scale_min: 0.5 28 | aug_scale_max: 2.0 29 | rpn_head: 30 | use_batch_norm: true 31 | frcnn_head: 32 | num_convs: 4 33 | num_fcs: 1 34 | use_batch_norm: true 35 | mrcnn_head: 36 | use_batch_norm: true 37 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/spinenet/spinenet143_retinanet.yaml: -------------------------------------------------------------------------------- 1 | # SpineNet-143 + RetinaNet: 2 | # - 48.0% mAP with init_drop_connect_rate null, activation 'relu', total_steps 162050, learning_rate_steps [148160, 157420], aug_scale_min 0.5, aug_scale_max 2.0 3 | # - 50.6% mAP with init_drop_connect_rate: 0.16, activation: 'swish', total_steps 231500, learning_rate_steps [217610, 226870], aug_scale_min 0.1, aug_scale_max 1.9 4 | architecture: 5 | backbone: 'spinenet' 6 | multilevel_features: 'identity' 7 | train: 8 | total_steps: 231500 9 | train_batch_size: 256 10 | learning_rate: 11 | warmup_steps: 2000 12 | init_learning_rate: 0.28 13 | learning_rate_levels: [0.028, 0.0028] 14 | learning_rate_steps: [217610, 226870] 15 | l2_weight_decay: 0.00004 16 | gradient_clip_norm: 10.0 17 | batch_norm_activation: 18 | use_sync_bn: true 19 | activation: 'swish' 20 | spinenet: 21 | model_id: '143' 22 | init_drop_connect_rate: 0.16 23 | retinanet_parser: 24 | output_size: [1280, 1280] 25 | aug_scale_min: 0.1 26 | aug_scale_max: 1.9 27 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/spinenet/spinenet190_classification.yaml: -------------------------------------------------------------------------------- 1 | # SpineNet-190 ImageNet classification: 2 | # - 80.8 Top-1, 95.3 Top-5 accuracy with init_drop_connect_rate 0.2, activation 'swish', label_smoothing 0.1 3 | architecture: 4 | backbone: 'spinenet' 5 | multilevel_features: 'identity' 6 | parser: 'classification_parser' 7 | num_classes: 1001 8 | train: 9 | total_steps: 62557 10 | train_batch_size: 4096 11 | learning_rate: 12 | type: cosine 13 | warmup_steps: 1564 14 | init_learning_rate: 1.6 15 | l2_weight_decay: 0.0001 16 | label_smoothing: 0.1 17 | batch_norm_activation: 18 | batch_norm_epsilon: 1.0e-05 19 | batch_norm_momentum: 0.9 20 | use_sync_bn: false 21 | activation: 'swish' 22 | spinenet: 23 | model_id: '190' 24 | init_drop_connect_rate: 0.2 25 | classification_head: 26 | aggregation: 'all' 27 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/spinenet/spinenet190_retinanet.yaml: -------------------------------------------------------------------------------- 1 | # SpineNet-190 + RetinaNet: 2 | - 52.0% mAP with init_drop_connect_rate: 0.2, activation: 'swish' 3 | architecture: 4 | backbone: 'spinenet' 5 | multilevel_features: 'identity' 6 | train: 7 | total_steps: 185200 8 | train_batch_size: 256 9 | learning_rate: 10 | warmup_steps: 2000 11 | init_learning_rate: 0.3 12 | learning_rate_levels: [0.03, 0.003] 13 | learning_rate_steps: [171310, 180570] 14 | l2_weight_decay: 0.00004 15 | gradient_clip_norm: 10.0 16 | batch_norm_activation: 17 | use_sync_bn: true 18 | activation: 'swish' 19 | spinenet: 20 | model_id: '190' 21 | init_drop_connect_rate: 0.2 22 | retinanet_head: 23 | retinanet_head_num_filters: 512 24 | retinanet_head_num_convs: 7 25 | retinanet_parser: 26 | output_size: [1280, 1280] 27 | aug_scale_min: 0.1 28 | aug_scale_max: 1.9 29 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/spinenet/spinenet49S_retinanet.yaml: -------------------------------------------------------------------------------- 1 | # SpineNet-49S + RetinaNet: 2 | # - 39.7% mAP with init_drop_connect_rate null, activation 'relu', total_steps 162050, learning_rate_steps [148160, 157420] 3 | # - 41.5% mAP with init_drop_connect_rate: 0.2, activation: 'swish', total_steps 231500, learning_rate_steps [217610, 226870] 4 | architecture: 5 | backbone: 'spinenet' 6 | multilevel_features: 'identity' 7 | train: 8 | total_steps: 231500 9 | train_batch_size: 256 10 | learning_rate: 11 | warmup_steps: 2000 12 | init_learning_rate: 0.28 13 | learning_rate_levels: [0.028, 0.0028] 14 | learning_rate_steps: [217610, 226870] 15 | l2_weight_decay: 0.00004 16 | gradient_clip_norm: 10.0 17 | batch_norm_activation: 18 | use_sync_bn: true 19 | activation: 'swish' 20 | spinenet: 21 | model_id: '49S' 22 | init_drop_connect_rate: 0.2 23 | retinanet_head: 24 | num_filters: 128 25 | anchor: 26 | anchor_size: 3.0 27 | retinanet_parser: 28 | output_size: [640, 640] 29 | aug_scale_min: 0.5 30 | aug_scale_max: 2.0 31 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/spinenet/spinenet49_classification.yaml: -------------------------------------------------------------------------------- 1 | # SpineNet-49 ImageNet classification: 2 | # - 77.0 Top-1, 93.3 Top-5 accuracy with init_drop_connect_rate null, activation 'relu', label_smoothing 0.0 3 | # - 78.1 Top-1, 94.0 Top-5 accuracy with init_drop_connect_rate 0.2, activation 'swish', label_smoothing 0.1 4 | architecture: 5 | backbone: 'spinenet' 6 | multilevel_features: 'identity' 7 | parser: 'classification_parser' 8 | num_classes: 1001 9 | train: 10 | total_steps: 62557 11 | train_batch_size: 4096 12 | learning_rate: 13 | type: cosine 14 | warmup_steps: 1564 15 | init_learning_rate: 1.6 16 | l2_weight_decay: 0.0001 17 | label_smoothing: 0.1 18 | batch_norm_activation: 19 | batch_norm_epsilon: 1.0e-05 20 | batch_norm_momentum: 0.9 21 | use_sync_bn: false 22 | activation: 'swish' 23 | spinenet: 24 | model_id: '49' 25 | init_drop_connect_rate: 0.2 26 | classification_head: 27 | aggregation: 'all' 28 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/spinenet/spinenet49_mrcnn.yaml: -------------------------------------------------------------------------------- 1 | # SpineNet-49 + Mask R-CNN with l2 weight regularization. 42.8 mAP. 2 | architecture: 3 | backbone: 'spinenet' 4 | min_level: 3 5 | max_level: 7 6 | multilevel_features: 'identity' 7 | parser: 'maskrcnn_parser' 8 | train: 9 | total_steps: 164150 10 | train_batch_size: 256 11 | learning_rate: 12 | warmup_steps: 2000 13 | init_learning_rate: 0.28 14 | learning_rate_levels: [0.028, 0.0028] 15 | learning_rate_steps: [150080, 159460] 16 | l2_weight_decay: 0.00004 17 | anchor: 18 | anchor_size: 3.0 19 | batch_norm_activation: 20 | batch_norm_epsilon: 0.001 21 | batch_norm_momentum: 0.99 22 | use_sync_bn: true 23 | spinenet: 24 | model_id: '49' 25 | maskrcnn_parser: 26 | output_size: [640, 640] 27 | aug_scale_min: 0.5 28 | aug_scale_max: 2.0 29 | rpn_head: 30 | min_level: 3 31 | max_level: 7 32 | use_batch_norm: true 33 | frcnn_head: 34 | num_convs: 4 35 | num_fcs: 1 36 | use_batch_norm: true 37 | mrcnn_head: 38 | use_batch_norm: true 39 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/spinenet/spinenet49_retinanet.yaml: -------------------------------------------------------------------------------- 1 | # SpineNet-49 + RetinaNet: 2 | # - 42.8% mAP with init_drop_connect_rate null, activation 'relu', total_steps 162050, learning_rate_steps [148160, 157420] 3 | # - 44.3% mAP with init_drop_connect_rate: 0.2, activation: 'swish', total_steps 231500, learning_rate_steps [217610, 226870] 4 | architecture: 5 | backbone: 'spinenet' 6 | multilevel_features: 'identity' 7 | train: 8 | total_steps: 231500 9 | train_batch_size: 256 10 | learning_rate: 11 | warmup_steps: 2000 12 | init_learning_rate: 0.28 13 | learning_rate_levels: [0.028, 0.0028] 14 | learning_rate_steps: [217610, 226870] 15 | l2_weight_decay: 0.00004 16 | gradient_clip_norm: 10.0 17 | batch_norm_activation: 18 | use_sync_bn: true 19 | activation: 'swish' 20 | spinenet: 21 | model_id: '49' 22 | init_drop_connect_rate: 0.2 23 | anchor: 24 | anchor_size: 3.0 25 | retinanet_parser: 26 | output_size: [640, 640] 27 | aug_scale_min: 0.5 28 | aug_scale_max: 2.0 29 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/spinenet/spinenet96_classification.yaml: -------------------------------------------------------------------------------- 1 | # SpineNet-96 ImageNet classification: 2 | # - 78.2 Top-1, 94.0 Top-5 accuracy with init_drop_connect_rate null, activation 'relu', label_smoothing 0.0 3 | # - 79.4 Top-1, 94.6 Top-5 accuracy with init_drop_connect_rate 0.2, activation 'swish', label_smoothing 0.1 4 | architecture: 5 | backbone: 'spinenet' 6 | multilevel_features: 'identity' 7 | parser: 'classification_parser' 8 | num_classes: 1001 9 | train: 10 | total_steps: 62557 11 | train_batch_size: 4096 12 | learning_rate: 13 | type: cosine 14 | warmup_steps: 1564 15 | init_learning_rate: 1.6 16 | l2_weight_decay: 0.0001 17 | label_smoothing: 0.1 18 | batch_norm_activation: 19 | batch_norm_epsilon: 1.0e-05 20 | batch_norm_momentum: 0.9 21 | use_sync_bn: false 22 | activation: 'swish' 23 | spinenet: 24 | model_id: '96' 25 | init_drop_connect_rate: 0.2 26 | classification_head: 27 | aggregation: 'all' 28 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/spinenet/spinenet96_mrcnn.yaml: -------------------------------------------------------------------------------- 1 | # SpineNet-96 + Mask R-CNN with l2 weight regularization. 46.8 mAP. 2 | architecture: 3 | backbone: 'spinenet' 4 | min_level: 3 5 | max_level: 7 6 | multilevel_features: 'identity' 7 | parser: 'maskrcnn_parser' 8 | train: 9 | total_steps: 164150 10 | train_batch_size: 256 11 | learning_rate: 12 | warmup_steps: 2000 13 | init_learning_rate: 0.28 14 | learning_rate_levels: [0.028, 0.0028] 15 | learning_rate_steps: [150080, 159460] 16 | l2_weight_decay: 0.00004 17 | anchor: 18 | anchor_size: 3.0 19 | batch_norm_activation: 20 | batch_norm_epsilon: 0.001 21 | batch_norm_momentum: 0.99 22 | use_sync_bn: true 23 | spinenet: 24 | model_id: '96' 25 | maskrcnn_parser: 26 | output_size: [1024, 1024] 27 | aug_scale_min: 0.5 28 | aug_scale_max: 2.0 29 | rpn_head: 30 | use_batch_norm: true 31 | frcnn_head: 32 | num_convs: 4 33 | num_fcs: 1 34 | use_batch_norm: true 35 | mrcnn_head: 36 | use_batch_norm: true 37 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/spinenet/spinenet96_retinanet.yaml: -------------------------------------------------------------------------------- 1 | # SpineNet-96 + RetinaNet: 2 | # - 46.7% mAP with init_drop_connect_rate null, activation 'relu', total_steps 162050, learning_rate_steps [148160, 157420] 3 | # - 48.5% mAP with init_drop_connect_rate: 0.2, activation: 'swish', total_steps 231500, learning_rate_steps [217610, 226870] 4 | 5 | architecture: 6 | backbone: 'spinenet' 7 | multilevel_features: 'identity' 8 | train: 9 | total_steps: 231500 10 | train_batch_size: 256 11 | learning_rate: 12 | warmup_steps: 2000 13 | init_learning_rate: 0.28 14 | learning_rate_levels: [0.028, 0.0028] 15 | learning_rate_steps: [217610, 226870] 16 | l2_weight_decay: 0.00004 17 | gradient_clip_norm: 10.0 18 | batch_norm_activation: 19 | use_sync_bn: true 20 | activation: 'swish' 21 | spinenet: 22 | model_id: '96' 23 | init_drop_connect_rate: 0.2 24 | anchor: 25 | anchor_size: 3.0 26 | retinanet_parser: 27 | output_size: [1024, 1024] 28 | aug_scale_min: 0.5 29 | aug_scale_max: 2.0 30 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/yaml/retinanet_autoaugment.yaml: -------------------------------------------------------------------------------- 1 | # ---------- TRAINING PARAMETERS ---------- 2 | # AutoAugment achieves best results when trained for long and when not using a pretrained 3 | # checkpoint for the backbone. 4 | # Right now the code uses a pretrained checkpoint, but a future version will change this. 5 | # To train the ResNet 101 or ResNet 200 version, simply change the 'resnet_depth' to 101 or 200. 6 | # Expected accuracy on ResNet 50 with using autoaugment: 38.0 7 | # Expected accuracy on ResNet 50 without using autoaugment: 36.4 8 | train: 9 | total_steps: 277200 10 | learning_rate: 11 | init_learning_rate: 0.08 12 | learning_rate_levels: [0.008, 0.0008] 13 | learning_rate_steps: [220000, 258700] 14 | 15 | resnet: 16 | resnet_depth: 50 17 | 18 | retinanet_parser: 19 | use_autoaugment: true 20 | aug_scale_min: 0.8 21 | aug_scale_max: 1.2 22 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/configs/yaml/retinanet_nasfpn.yaml: -------------------------------------------------------------------------------- 1 | # ---------- RetianNet + NAS-FPN ---------- 2 | # Expected accuracy with using NAS-FPN l3-l7 and image size 640x640: 39.5 3 | train: 4 | total_steps: 90000 5 | learning_rate: 6 | init_learning_rate: 0.08 7 | learning_rate_levels: [0.008, 0.0008] 8 | learning_rate_steps: [60000, 80000] 9 | 10 | architecture: 11 | multilevel_features: 'nasfpn' 12 | 13 | nasfpn: 14 | fpn_feat_dims: 256 15 | min_level: 3 16 | max_level: 7 17 | num_repeats: 5 18 | use_separable_conv: False 19 | 20 | retinanet_parser: 21 | aug_scale_min: 0.8 22 | aug_scale_max: 1.2 23 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/dataloader/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/dataloader/mode_keys.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Standard names for input dataloader modes. 16 | 17 | The following standard keys are defined: 18 | 19 | * `TRAIN`: training mode. 20 | * `EVAL`: evaluation mode. 21 | * `PREDICT`: prediction mode. 22 | * `PREDICT_WITH_GT`: prediction mode with groundtruths in returned variables. 23 | """ 24 | 25 | from __future__ import absolute_import 26 | from __future__ import division 27 | from __future__ import print_function 28 | 29 | 30 | TRAIN = 'train' 31 | EVAL = 'eval' 32 | PREDICT = 'predict' 33 | PREDICT_WITH_GT = 'predict_with_gt' 34 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/datasets/coco_label_map.csv: -------------------------------------------------------------------------------- 1 | 1:person 2 | 2:bicycle 3 | 3:car 4 | 4:motorcycle 5 | 5:airplane 6 | 6:bus 7 | 7:train 8 | 8:truck 9 | 9:boat 10 | 10:traffic light 11 | 11:fire hydrant 12 | 13:stop sign 13 | 14:parking meter 14 | 15:bench 15 | 16:bird 16 | 17:cat 17 | 18:dog 18 | 19:horse 19 | 20:sheep 20 | 21:cow 21 | 22:elephant 22 | 23:bear 23 | 24:zebra 24 | 25:giraffe 25 | 27:backpack 26 | 28:umbrella 27 | 31:handbag 28 | 32:tie 29 | 33:suitcase 30 | 34:frisbee 31 | 35:skis 32 | 36:snowboard 33 | 37:sports ball 34 | 38:kite 35 | 39:baseball bat 36 | 40:baseball glove 37 | 41:skateboard 38 | 42:surfboard 39 | 43:tennis racket 40 | 44:bottle 41 | 46:wine glass 42 | 47:cup 43 | 48:fork 44 | 49:knife 45 | 50:spoon 46 | 51:bowl 47 | 52:banana 48 | 53:apple 49 | 54:sandwich 50 | 55:orange 51 | 56:broccoli 52 | 57:carrot 53 | 58:hot dog 54 | 59:pizza 55 | 60:donut 56 | 61:cake 57 | 62:chair 58 | 63:couch 59 | 64:potted plant 60 | 65:bed 61 | 67:dining table 62 | 70:toilet 63 | 72:tv 64 | 73:laptop 65 | 74:mouse 66 | 75:remote 67 | 76:keyboard 68 | 77:cell phone 69 | 78:microwave 70 | 79:oven 71 | 80:toaster 72 | 81:sink 73 | 82:refrigerator 74 | 84:book 75 | 85:clock 76 | 86:vase 77 | 87:scissors 78 | 88:teddy bear 79 | 89:hair drier 80 | 90:toothbrush 81 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/evaluation/factory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Evaluator factory.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from evaluation import coco_evaluator 22 | 23 | 24 | def evaluator_generator(params): 25 | """Generator function for various evaluators.""" 26 | if params.type == 'box': 27 | evaluator = coco_evaluator.COCOEvaluator( 28 | annotation_file=params.val_json_file, 29 | include_mask=False, 30 | per_category_metrics=params.per_category_metrics) 31 | elif params.type == 'box_and_mask': 32 | evaluator = coco_evaluator.COCOEvaluator( 33 | annotation_file=params.val_json_file, 34 | include_mask=True, 35 | per_category_metrics=params.per_category_metrics) 36 | elif params.type == 'box_mask_and_attributes_no_rescale': 37 | evaluator = coco_evaluator.COCOEvaluator( 38 | annotation_file=params.val_json_file, 39 | include_mask=True, 40 | per_category_metrics=params.per_category_metrics, 41 | include_attributes=True, 42 | use_eval_image_sizes=True, 43 | score_threshold=params.score_threshold) 44 | elif params.type == 'shapemask_box_and_mask': 45 | evaluator = coco_evaluator.ShapeMaskCOCOEvaluator( 46 | mask_eval_class=params.mask_eval_class, 47 | annotation_file=params.val_json_file, 48 | include_mask=True, 49 | per_category_metrics=params.per_category_metrics) 50 | else: 51 | raise ValueError('The detection evaluation type `{}` is not supported.' 52 | .format(params.type)) 53 | 54 | return evaluator 55 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/evaluation/submission.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tensorflow.python.platform import gfile 3 | from tensorflow.python.summary import summary_iterator 4 | import os 5 | 6 | 7 | def encode_mask(mask: np.ndarray) -> str: 8 | pixels = mask.T.flatten() 9 | 10 | # We need to allow for cases where there is a '1' at either end of the sequence. 11 | # We do this by padding with a zero at each end when needed. 12 | use_padding = False 13 | if pixels[0] or pixels[-1]: 14 | use_padding = True 15 | pixel_padded = np.zeros([len(pixels) + 2], dtype=pixels.dtype) 16 | pixel_padded[1:-1] = pixels 17 | pixels = pixel_padded 18 | 19 | rle = np.where(pixels[1:] != pixels[:-1])[0] + 2 20 | if use_padding: 21 | rle = rle - 1 22 | 23 | rle[1::2] = rle[1::2] - rle[:-1:2] 24 | 25 | return ' '.join(str(x) for x in rle) 26 | 27 | 28 | def get_metrics(model_dir: str, step: int): 29 | """Returns the best evaluation result based on the compare function.""" 30 | eval_result = {} 31 | for event_file in gfile.Glob(os.path.join(model_dir, 'eval', '*.tfevents.*')): 32 | for event in summary_iterator.summary_iterator(event_file): 33 | if event.step == step: 34 | assert event.HasField('summary') 35 | 36 | for value in event.summary.value: 37 | if value.HasField('simple_value'): 38 | eval_result[value.tag] = value.simple_value 39 | 40 | break 41 | 42 | return eval_result 43 | 44 | 45 | def get_new_image_size(image_size, output_size: int): 46 | image_height, image_width = image_size 47 | 48 | if image_width > image_height: 49 | scale = image_width / output_size 50 | new_width = output_size 51 | new_height = int(image_height / scale) 52 | else: 53 | scale = image_height / output_size 54 | new_height = output_size 55 | new_width = int(image_width / scale) 56 | 57 | return new_height, new_width 58 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/executor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/export_tflite_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | r"""A binary to export the tflite model.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | 24 | from absl import flags 25 | import tensorflow.compat.v1 as tf 26 | 27 | FLAGS = flags.FLAGS 28 | 29 | flags.DEFINE_string('saved_model_dir', None, 'The saved model directory.') 30 | flags.DEFINE_string('output_dir', None, 'The export tflite model directory.') 31 | 32 | 33 | def export(saved_model_dir, tflite_model_dir): 34 | converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir) 35 | 36 | tflite_model = converter.convert() 37 | tflite_model_path = os.path.join(tflite_model_dir, 'model.tflite') 38 | 39 | with tf.gfile.GFile(tflite_model_path, 'wb') as f: 40 | f.write(tflite_model) 41 | 42 | 43 | def main(argv): 44 | del argv # Unused. 45 | export(FLAGS.saved_model_dir, FLAGS.output_dir) 46 | 47 | 48 | if __name__ == '__main__': 49 | flags.mark_flag_as_required('saved_model_dir') 50 | flags.mark_flag_as_required('output_dir') 51 | tf.app.run(main) 52 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/modeling/architecture/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/modeling/architecture/identity.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Identity Fn that forwards the input features.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | 22 | class Identity(object): 23 | """Identity function that forwards the input features.""" 24 | 25 | def __call__(self, features, is_training=False): 26 | """Only forwards the input features.""" 27 | return features 28 | 29 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/modeling/factory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Factory to build different models.""" 16 | 17 | from modeling import classification_model 18 | from modeling import maskrcnn_model 19 | from modeling import retinanet_model 20 | from modeling import shapemask_model 21 | 22 | 23 | def model_generator(params): 24 | """Model function generator.""" 25 | if params.type == 'classification': 26 | model_fn = classification_model.ClassificationModel(params) 27 | elif params.type == 'retinanet': 28 | model_fn = retinanet_model.RetinanetModel(params) 29 | elif params.type == 'mask_rcnn': 30 | model_fn = maskrcnn_model.MaskrcnnModel(params) 31 | elif params.type == 'shapemask': 32 | model_fn = shapemask_model.ShapeMaskModel(params) 33 | else: 34 | raise ValueError('Model %s is not supported.'% params.type) 35 | 36 | return model_fn 37 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/modeling/model_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Model builder for detection model.""" 16 | 17 | import tensorflow.compat.v1 as tf 18 | 19 | from modeling import factory 20 | 21 | 22 | class ModelFn(object): 23 | """Model function for tf.Estimator.""" 24 | 25 | def __init__(self, params): 26 | self._model = factory.model_generator(params) 27 | 28 | def __call__(self, features, labels, mode, params): 29 | """Model function for tf.Estimator. 30 | 31 | Args: 32 | features: the input image tensor and auxiliary information, such as 33 | `image_info` and `source_ids`. The image tensor has a shape of 34 | [batch_size, height, width, 3]. The height and width are fixed and 35 | equal. 36 | labels: the input labels in a dictionary. The labels include score targets 37 | and box targets which are dense label maps. The labels are generated 38 | from inputFn in dataloader/input_reader.py 39 | mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT. 40 | params: the dictionary defines hyperparameters of model. 41 | 42 | Returns: 43 | tpu_spec: the TPUEstimatorSpec to run training, evaluation, or 44 | prediction.). 45 | """ 46 | if mode == tf.estimator.ModeKeys.TRAIN: 47 | return self._model.train(features, labels) 48 | elif mode == tf.estimator.ModeKeys.EVAL: 49 | return self._model.evaluate(features, labels) 50 | elif mode == tf.estimator.ModeKeys.PREDICT: 51 | return self._model.predict(features) 52 | else: 53 | raise ValueError('%s mode is not supported.' % mode) 54 | 55 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/ops/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/serving/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/utils/benchmark_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Benchmark utils for detection models.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import json 22 | from absl import logging 23 | import numpy as np 24 | import tensorflow.compat.v1 as tf 25 | 26 | 27 | def compute_model_statistics(batch_size, json_file_path=None): 28 | """Compute number of parameters and FLOPS.""" 29 | num_trainable_params = np.sum( 30 | [np.prod(var.get_shape().as_list()) for var in tf.trainable_variables()]) 31 | num_trainable_params_million = num_trainable_params * 1. / 10**6 32 | logging.info('number of trainable params: %f M.', 33 | num_trainable_params_million) 34 | 35 | options = tf.profiler.ProfileOptionBuilder.float_operation() 36 | options['output'] = 'none' 37 | flops = tf.profiler.profile( 38 | tf.get_default_graph(), options=options).total_float_ops 39 | flops_per_image = flops * 1. / batch_size / 10**9 / 2 40 | logging.info('number of FLOPS (multi-adds) per image: %f B.', 41 | flops_per_image) 42 | 43 | if json_file_path: 44 | with tf.gfile.Open(json_file_path, 'w') as fp: 45 | json.dump( 46 | { 47 | 'multi_add_flops_billion': 48 | float(flops_per_image), 49 | 'num_trainable_params_million': 50 | float(num_trainable_params_million) 51 | }, fp) 52 | 53 | return num_trainable_params, flops_per_image 54 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/utils/class_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Utility functions for handling dataset object categories.""" 16 | 17 | 18 | def coco_split_class_ids(split_name): 19 | """Return the COCO class split ids based on split name and training mode. 20 | 21 | Args: 22 | split_name: The name of dataset split. 23 | 24 | Returns: 25 | class_ids: a python list of integer. 26 | """ 27 | if split_name == 'all': 28 | return [] 29 | 30 | elif split_name == 'voc': 31 | return [ 32 | 1, 2, 3, 4, 5, 6, 7, 9, 16, 17, 18, 19, 20, 21, 44, 62, 63, 64, 67, 72 33 | ] 34 | 35 | elif split_name == 'nonvoc': 36 | return [ 37 | 8, 10, 11, 13, 14, 15, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 38 | 37, 38, 39, 40, 41, 42, 43, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 39 | 57, 58, 59, 60, 61, 65, 70, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 40 | 85, 86, 87, 88, 89, 90 41 | ] 42 | 43 | else: 44 | raise ValueError('Invalid split name {}!!!'.format(split_name)) 45 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/utils/config_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Config utils.""" 16 | 17 | import os 18 | 19 | import tensorflow.compat.v1 as tf 20 | 21 | from hyperparameters import params_dict 22 | 23 | 24 | _PARSERS = [ 25 | 'classification_parser', 26 | 'retinanet_parser', 27 | 'maskrcnn_parser', 28 | 'shapemask_parser', 29 | ] 30 | 31 | _BACKBONES = [ 32 | 'resnet', 33 | 'spinenet', 34 | 'spinenet_mbconv', 35 | ] 36 | 37 | _MULTILEVEL_FEATURES = [ 38 | 'fpn', 39 | 'nasfpn', 40 | ] 41 | 42 | 43 | def filter_unused_blocks(params): 44 | """Filters unused architecture params blocks.""" 45 | filtered_params = params_dict.ParamsDict(params) 46 | if 'parser' in params.architecture.as_dict().keys(): 47 | for parser in _PARSERS: 48 | if (parser in params.as_dict().keys() and 49 | parser != params.architecture.parser): 50 | delattr(filtered_params, parser) 51 | if 'backbone' in params.architecture.as_dict().keys(): 52 | for backbone in _BACKBONES: 53 | if (backbone in params.as_dict().keys() and 54 | backbone != params.architecture.backbone): 55 | delattr(filtered_params, backbone) 56 | if 'multilevel_features' in params.architecture.as_dict().keys(): 57 | for features in _MULTILEVEL_FEATURES: 58 | if (features in params.as_dict().keys() and 59 | features != params.architecture.multilevel_features): 60 | delattr(filtered_params, features) 61 | return filtered_params 62 | 63 | 64 | def save_config(params, model_dir): 65 | if model_dir: 66 | params = filter_unused_blocks(params) 67 | if not tf.gfile.Exists(model_dir): 68 | tf.gfile.MakeDirs(model_dir) 69 | params_dict.save_params_dict_to_yaml( 70 | params, os.path.join(model_dir, 'params.yaml')) 71 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/utils/dataloader_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Utility functions for dataloader.""" 16 | 17 | import tensorflow.compat.v1 as tf 18 | 19 | from utils import input_utils 20 | 21 | 22 | def process_source_id(source_id): 23 | """Processes source_id to the right format.""" 24 | if source_id.dtype == tf.string: 25 | source_id = tf.cast(tf.string_to_number(source_id), tf.int64) 26 | with tf.control_dependencies([source_id]): 27 | source_id = tf.cond(tf.equal(tf.size(source_id), 0), 28 | lambda: tf.cast(tf.constant(-1), tf.int64), 29 | lambda: tf.identity(source_id)) 30 | return source_id 31 | 32 | 33 | def pad_groundtruths_to_fixed_size(gt, n): 34 | """Pads the first dimension of groundtruths labels to the fixed size.""" 35 | gt['boxes'] = input_utils.pad_to_fixed_size(gt['boxes'], n, -1) 36 | gt['is_crowds'] = input_utils.pad_to_fixed_size(gt['is_crowds'], n, 0) 37 | gt['areas'] = input_utils.pad_to_fixed_size(gt['areas'], n, -1) 38 | gt['classes'] = input_utils.pad_to_fixed_size(gt['classes'], n, -1) 39 | return gt 40 | 41 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/utils/imat2020/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/official/detection/utils/imat2020/__init__.py -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/utils/object_detection/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/utils/paths.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def root_dir(path: str = ''): 5 | # return the path if it's already absolute 6 | if path and (is_bucket_path(path) or os.path.isabs(path)): 7 | return path 8 | 9 | res_path = os.path.abspath(os.path.join(os.path.dirname(__file__), *['..'] * 4)) 10 | if path: 11 | res_path = os.path.join(res_path, path) 12 | 13 | return res_path 14 | 15 | 16 | def is_bucket_path(path: str): 17 | return path.startswith('gs://') or path.startswith('s3://') 18 | -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/utils/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/official/detection/utils/scripts/__init__.py -------------------------------------------------------------------------------- /tf_tpu_models/official/detection/utils/scripts/generate_coco_file_images.py: -------------------------------------------------------------------------------- 1 | import json 2 | from glob import glob 3 | import os 4 | import sys 5 | from tf_tpu_models.official.detection.utils.imat2020.mask import convert_to_coco_rle 6 | import PIL.Image 7 | 8 | 9 | def generate_coco_annotations_from_images(images_dir: str): 10 | images = [] 11 | annotations = [] 12 | for i, file_path in enumerate(sorted(glob(os.path.join(images_dir, '*.jpg')))): 13 | image = PIL.Image.open(file_path) 14 | 15 | images.append({ 16 | 'id': i + 1, 17 | 'width': image.width, 18 | 'height': image.height, 19 | 'file_name': os.path.basename(file_path), 20 | }) 21 | 22 | annotations.append({ 23 | 'id': i + 1, 24 | 'image_id': i + 1, 25 | 'category_id': 1, 26 | 'segmentation': convert_to_coco_rle([1, 1], image.height, image.width), 27 | 'area': 1, 28 | 'bbox': [0, 0, 1, 1], 29 | 'iscrowd': 0, 30 | 'attribute_ids': [], 31 | }) 32 | 33 | if i % 1000 == 0: 34 | print(i) 35 | 36 | return { 37 | 'info': { 38 | 'num_attributes': 294, 39 | }, 40 | 'images': images, 41 | 'categories': [{ 42 | 'id': 1, 43 | 'name': '', 44 | 'supercategory': '', 45 | }], 46 | 'annotations': annotations, 47 | } 48 | 49 | 50 | if __name__ == '__main__': 51 | images_dir = sys.argv[1] 52 | output_path = sys.argv[2] 53 | 54 | res = generate_coco_annotations_from_images(images_dir) 55 | with open(output_path, 'w') as f: 56 | json.dump(res, f) 57 | 58 | -------------------------------------------------------------------------------- /tf_tpu_models/official/efficientnet/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/efficientnet/condconv/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/efficientnet/edgetpu/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/efficientnet/g3doc/condconv-layer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/official/efficientnet/g3doc/condconv-layer.png -------------------------------------------------------------------------------- /tf_tpu_models/official/efficientnet/g3doc/efficientnet-edgetpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/official/efficientnet/g3doc/efficientnet-edgetpu.png -------------------------------------------------------------------------------- /tf_tpu_models/official/efficientnet/g3doc/flops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/official/efficientnet/g3doc/flops.png -------------------------------------------------------------------------------- /tf_tpu_models/official/efficientnet/g3doc/lite-float-gpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/official/efficientnet/g3doc/lite-float-gpu.png -------------------------------------------------------------------------------- /tf_tpu_models/official/efficientnet/g3doc/lite-quant-cpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/official/efficientnet/g3doc/lite-quant-cpu.png -------------------------------------------------------------------------------- /tf_tpu_models/official/efficientnet/g3doc/lite-quant-size.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/official/efficientnet/g3doc/lite-quant-size.png -------------------------------------------------------------------------------- /tf_tpu_models/official/efficientnet/g3doc/params.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/official/efficientnet/g3doc/params.png -------------------------------------------------------------------------------- /tf_tpu_models/official/efficientnet/lite/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/efficientnet/lite/efficientnet_lite_builder_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for efficientnet_lite_builder.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import numpy as np 22 | import tensorflow.compat.v1 as tf 23 | 24 | from lite import efficientnet_lite_builder 25 | 26 | 27 | class EfficientnetBuilderTest(tf.test.TestCase): 28 | 29 | def _test_model_params(self, 30 | model_name, 31 | input_size, 32 | expected_params, 33 | override_params=None, 34 | features_only=False, 35 | pooled_features_only=False): 36 | images = tf.zeros((1, input_size, input_size, 3), dtype=tf.float32) 37 | efficientnet_lite_builder.build_model( 38 | images, 39 | model_name=model_name, 40 | override_params=override_params, 41 | training=True, 42 | features_only=features_only, 43 | pooled_features_only=pooled_features_only) 44 | num_params = np.sum([np.prod(v.shape) for v in tf.trainable_variables()]) 45 | 46 | self.assertEqual(num_params, expected_params) 47 | 48 | def test_efficientnet_b0(self): 49 | self._test_model_params( 50 | 'efficientnet-lite0', 224, expected_params=4652008) 51 | 52 | def test_efficientnet_b1(self): 53 | self._test_model_params( 54 | 'efficientnet-lite1', 240, expected_params=5416680) 55 | 56 | def test_efficientnet_b2(self): 57 | self._test_model_params( 58 | 'efficientnet-lite2', 260, expected_params=6092072) 59 | 60 | def test_efficientnet_b3(self): 61 | self._test_model_params( 62 | 'efficientnet-lite3', 280, expected_params=8197096) 63 | 64 | def test_efficientnet_b4(self): 65 | self._test_model_params( 66 | 'efficientnet-lite4', 300, expected_params=13006568) 67 | 68 | 69 | if __name__ == '__main__': 70 | tf.test.main() 71 | -------------------------------------------------------------------------------- /tf_tpu_models/official/efficientnet/model_builder_factory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Utilities for model builder or input size.""" 16 | 17 | import efficientnet_builder 18 | from condconv import efficientnet_condconv_builder 19 | from edgetpu import efficientnet_edgetpu_builder 20 | from lite import efficientnet_lite_builder 21 | from tpu import efficientnet_tpu_builder 22 | 23 | 24 | def get_model_builder(model_name): 25 | """Get the model_builder module for a given model name.""" 26 | if model_name.startswith('efficientnet-lite'): 27 | return efficientnet_lite_builder 28 | elif model_name.startswith('efficientnet-edgetpu-'): 29 | return efficientnet_edgetpu_builder 30 | elif model_name.startswith('efficientnet-condconv-'): 31 | return efficientnet_condconv_builder 32 | elif model_name.startswith('efficientnet-tpu-'): 33 | return efficientnet_tpu_builder 34 | elif model_name.startswith('efficientnet-'): 35 | return efficientnet_builder 36 | else: 37 | raise ValueError( 38 | 'Model must be either efficientnet-b* or efficientnet-edgetpu* or' 39 | 'efficientnet-condconv*, efficientnet-lite*') 40 | 41 | 42 | def get_model_input_size(model_name): 43 | """Get model input size for a given model name.""" 44 | if model_name.startswith('efficientnet-lite'): 45 | _, _, image_size, _ = ( 46 | efficientnet_lite_builder.efficientnet_lite_params(model_name)) 47 | elif model_name.startswith('efficientnet-edgetpu-'): 48 | _, _, image_size, _ = ( 49 | efficientnet_edgetpu_builder.efficientnet_edgetpu_params(model_name)) 50 | elif model_name.startswith('efficientnet-condconv-'): 51 | _, _, image_size, _, _ = ( 52 | efficientnet_condconv_builder.efficientnet_condconv_params(model_name)) 53 | elif model_name.startswith('efficientnet-tpu'): 54 | _, _, image_size, _ = efficientnet_tpu_builder.efficientnet_tpu_params( 55 | model_name) 56 | elif model_name.startswith('efficientnet'): 57 | _, _, image_size, _ = efficientnet_builder.efficientnet_params(model_name) 58 | else: 59 | raise ValueError( 60 | 'Model must be either efficientnet-b* or efficientnet-tpu-b* or efficientnet-edgetpu* or ' 61 | 'efficientnet-condconv*, efficientnet-lite*') 62 | return image_size 63 | 64 | -------------------------------------------------------------------------------- /tf_tpu_models/official/efficientnet/tpu/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mask_rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Try it 2 | 3 | Try to run our pre-trained COCO Mask R-CNN using [Colab](https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/models/official/mask_rcnn/mask_rcnn_demo.ipynb). 4 | 5 | # Installing extra packages 6 | 7 | Mask R-CNN requires a few extra packages. We can install them now: 8 | 9 | ``` 10 | sudo apt-get install -y python-tk && \ 11 | pip install --user Cython matplotlib opencv-python-headless pyyaml Pillow && \ 12 | pip install --user 'git+https://github.com/cocodataset/cocoapi#egg=pycocotools&subdirectory=PythonAPI' 13 | ``` 14 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mask_rcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/official/mask_rcnn/__init__.py -------------------------------------------------------------------------------- /tf_tpu_models/official/mask_rcnn/configs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mask_rcnn/configs/cloud/v2-128.yaml: -------------------------------------------------------------------------------- 1 | # ---------- MODEL PARAMETERS ------------- 2 | backbone: 'resnet50' 3 | num_cores: 128 4 | # ---------- TRAINING PARAMETERS ---------- 5 | train_batch_size: 512 6 | init_learning_rate: 0.24 7 | warmup_learning_rate: 0.0067 8 | warmup_steps: 1600 9 | learning_rate_levels: [0.024, 0.0024, 0.00024] 10 | learning_rate_steps: [6000, 8000, 10000] 11 | total_steps: 11250 12 | global_gradient_clip_ratio: 0.02 13 | num_batch_norm_group: 1 14 | momentum: 0.95 15 | precision: 'bfloat16' 16 | # ---------- EVAL PARAMETERS -------------- 17 | eval_batch_size: 8 18 | eval_samples: 5000 19 | num_steps_per_eval: 2500 20 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mask_rcnn/configs/cloud/v2-32.yaml: -------------------------------------------------------------------------------- 1 | # ---------- MODEL PARAMETERS ------------- 2 | backbone: 'resnet50' 3 | num_cores: 32 4 | # ---------- TRAINING PARAMETERS ---------- 5 | train_batch_size: 128 6 | init_learning_rate: 0.16 7 | warmup_learning_rate: 0.0067 8 | warmup_steps: 1000 9 | learning_rate_levels: [0.016, 0.0016] 10 | learning_rate_steps: [7500, 10000] 11 | total_steps: 11250 12 | global_gradient_clip_ratio: 0.02 13 | num_batch_norm_group: 1 14 | precision: 'bfloat16' 15 | # ---------- EVAL PARAMETERS -------------- 16 | eval_batch_size: 8 17 | eval_samples: 5000 18 | num_steps_per_eval: 2500 19 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mask_rcnn/configs/cloud/v2-8.yaml: -------------------------------------------------------------------------------- 1 | # ---------- MODEL PARAMETERS ------------- 2 | backbone: 'resnet50' 3 | num_cores: 8 4 | # ---------- TRAINING PARAMETERS ---------- 5 | train_batch_size: 32 6 | init_learning_rate: 0.04 7 | warmup_learning_rate: 0.0067 8 | warmup_steps: 500 9 | learning_rate_levels: [0.004, 0.0004] 10 | learning_rate_steps: [30000, 40000] 11 | total_steps: 45000 12 | precision: 'bfloat16' 13 | # ---------- EVAL PARAMETERS -------------- 14 | eval_batch_size: 8 15 | eval_samples: 5000 16 | num_steps_per_eval: 2500 17 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mask_rcnn/configs/cloud/v3-128.yaml: -------------------------------------------------------------------------------- 1 | # ---------- MODEL PARAMETERS ------------- 2 | backbone: 'resnet50' 3 | num_cores: 128 4 | # ---------- TRAINING PARAMETERS ---------- 5 | train_batch_size: 512 6 | init_learning_rate: 0.24 7 | warmup_learning_rate: 0.0067 8 | warmup_steps: 1600 9 | learning_rate_levels: [0.024, 0.0024, 0.00024] 10 | learning_rate_steps: [6000, 8000, 10000] 11 | total_steps: 11250 12 | global_gradient_clip_ratio: 0.02 13 | num_batch_norm_group: 1 14 | momentum: 0.95 15 | precision: 'bfloat16' 16 | # ---------- EVAL PARAMETERS -------------- 17 | eval_batch_size: 8 18 | eval_samples: 5000 19 | num_steps_per_eval: 2500 20 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mask_rcnn/configs/cloud/v3-32.yaml: -------------------------------------------------------------------------------- 1 | # ---------- MODEL PARAMETERS ------------- 2 | backbone: 'resnet50' 3 | num_cores: 32 4 | # ---------- TRAINING PARAMETERS ---------- 5 | train_batch_size: 128 6 | init_learning_rate: 0.16 7 | warmup_learning_rate: 0.0067 8 | warmup_steps: 1000 9 | learning_rate_levels: [0.016, 0.0016] 10 | learning_rate_steps: [7500, 10000] 11 | total_steps: 11250 12 | global_gradient_clip_ratio: 0.02 13 | num_batch_norm_group: 1 14 | precision: 'bfloat16' 15 | # ---------- EVAL PARAMETERS -------------- 16 | eval_batch_size: 8 17 | eval_samples: 5000 18 | num_steps_per_eval: 2500 19 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mask_rcnn/configs/cloud/v3-8.yaml: -------------------------------------------------------------------------------- 1 | # ---------- MODEL PARAMETERS ------------- 2 | backbone: 'resnet50' 3 | num_cores: 8 4 | # ---------- TRAINING PARAMETERS ---------- 5 | train_batch_size: 64 6 | init_learning_rate: 0.08 7 | warmup_learning_rate: 0.0067 8 | warmup_steps: 500 9 | learning_rate_levels: [0.008, 0.0008] 10 | learning_rate_steps: [15000, 20000] 11 | total_steps: 22500 12 | precision: 'bfloat16' 13 | # ---------- EVAL PARAMETERS -------------- 14 | eval_batch_size: 8 15 | eval_samples: 5000 16 | num_steps_per_eval: 2500 17 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mask_rcnn/learning_rates.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Learning rate schedule.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import numpy as np 22 | import tensorflow.compat.v1 as tf 23 | 24 | 25 | def step_learning_rate_with_linear_warmup(global_step, 26 | init_learning_rate, 27 | warmup_learning_rate, 28 | warmup_steps, 29 | learning_rate_levels, 30 | learning_rate_steps): 31 | """Creates the step learning rate tensor with linear warmup.""" 32 | linear_warmup = (warmup_learning_rate + 33 | tf.cast(global_step, dtype=tf.float32) / warmup_steps * 34 | (init_learning_rate - warmup_learning_rate)) 35 | learning_rate = tf.where(global_step < warmup_steps, 36 | linear_warmup, init_learning_rate) 37 | 38 | for next_learning_rate, start_step in zip(learning_rate_levels, 39 | learning_rate_steps): 40 | learning_rate = tf.where(global_step >= start_step, 41 | next_learning_rate, learning_rate) 42 | return learning_rate 43 | 44 | 45 | def cosine_learning_rate_with_linear_warmup(global_step, 46 | init_learning_rate, 47 | warmup_learning_rate, 48 | warmup_steps, 49 | total_steps): 50 | """Creates the cosine learning rate tensor with linear warmup.""" 51 | global_step = tf.cast(global_step, dtype=tf.float32) 52 | linear_warmup = (warmup_learning_rate + global_step / warmup_steps * 53 | (init_learning_rate - warmup_learning_rate)) 54 | cosine_learning_rate = ( 55 | init_learning_rate * (tf.cos( 56 | np.pi * (global_step - warmup_steps) / (total_steps - warmup_steps)) 57 | + 1.0) / 2.0) 58 | learning_rate = tf.where(global_step < warmup_steps, 59 | linear_warmup, cosine_learning_rate) 60 | return learning_rate 61 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mask_rcnn/object_detection/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mnasnet/configs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mnasnet/configs/cloud/gpu.yaml: -------------------------------------------------------------------------------- 1 | use_tpu: False 2 | train_steps: 3503192 # 1281167 * 350 / train_batch_size 3 | train_batch_size: 128 # 1024 / 8 4 | eval_batch_size: 128 # 1024 / 8 5 | model_name: 'mnasnet-a1' 6 | dropout_rate: null 7 | depth_multiplier: null 8 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mnasnet/configs/cloud/v2-32.yaml: -------------------------------------------------------------------------------- 1 | train_steps: 109474 2 | train_batch_size: 4096 3 | eval_batch_size: 256 4 | iterations_per_loop: 100 5 | skip_host_call: false 6 | model_name: 'mnasnet-a1' 7 | dropout_rate: null 8 | depth_multiplier: null 9 | use_keras: true 10 | precision: 'float32' 11 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mnasnet/configs/cloud/v2-8.yaml: -------------------------------------------------------------------------------- 1 | train_steps: 437899 2 | train_batch_size: 1024 3 | eval_batch_size: 1024 4 | iterations_per_loop: 1251 5 | skip_host_call: True 6 | model_name: 'mnasnet-a1' 7 | dropout_rate: null 8 | depth_multiplier: null 9 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mnasnet/configs/cloud/v3-32.yaml: -------------------------------------------------------------------------------- 1 | train_steps: 109474 2 | train_batch_size: 4096 3 | eval_batch_size: 256 4 | iterations_per_loop: 100 5 | skip_host_call: false 6 | model_name: 'mnasnet-a1' 7 | dropout_rate: null 8 | depth_multiplier: null 9 | use_keras: true 10 | precision: 'float32' 11 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mnasnet/configs/cloud/v3-8.yaml: -------------------------------------------------------------------------------- 1 | train_steps: 437899 2 | train_batch_size: 1024 3 | eval_batch_size: 1024 4 | iterations_per_loop: 1251 5 | skip_host_call: True 6 | model_name: 'mnasnet-a1' 7 | dropout_rate: null 8 | depth_multiplier: null 9 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mnasnet/configs/mnasnet_config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Config to train MNasNet.""" 16 | 17 | MNASNET_CFG = { 18 | 'use_tpu': True, 19 | 'train_batch_size': 1024, 20 | 'eval_batch_size': 1024, 21 | 'num_train_images': 1281167, 22 | 'num_eval_images': 50000, 23 | 'iterations_per_loop': 1251, 24 | 'num_parallel_calls': 64, 25 | 'num_label_classes': 1000, 26 | 'transpose_input': True, 27 | 'base_learning_rate': 0.016, 28 | 'momentum': 0.9, 29 | 'moving_average_decay': 0.9999, 30 | 'weight_decay': 0.00001, 31 | 'label_smoothing': 0.1, 32 | 'dropout_rate': 0.2, 33 | 'use_cache': False, 34 | 'use_async_checkpointing': False, 35 | 'precision': 'float32', 36 | 'use_keras': True, 37 | 'skip_host_call': False, 38 | 'input_image_size': 224, 39 | 'train_steps': 437898, 40 | 'model_name': 'mnasnet-a1', 41 | 'data_format': 'channels_last', 42 | 'batch_norm_momentum': None, 43 | 'batch_norm_epsilon': None, 44 | 'depth_multiplier': None, 45 | 'depth_divisor': None, 46 | 'min_depth': 0, 47 | } 48 | 49 | MNASNET_RESTRICTIONS = [ 50 | ] 51 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mnasnet/g3doc/mnasnet_vs_mobilenetv2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/official/mnasnet/g3doc/mnasnet_vs_mobilenetv2.png -------------------------------------------------------------------------------- /tf_tpu_models/official/mnasnet/g3doc/mnasnet_vs_mobilenetv2_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/official/mnasnet/g3doc/mnasnet_vs_mobilenetv2_2.png -------------------------------------------------------------------------------- /tf_tpu_models/official/mnasnet/mixnet/README.md: -------------------------------------------------------------------------------- 1 | # MixNet 2 | 3 | [1] Mingxing Tan and Quoc V. Le. MixConv: Mixed Depthwise Convolutional Kernels. 4 | BMVC 2019. https://arxiv.org/abs/1907.09595 5 | 6 | ## 1. About MixNet 7 | 8 | MixNets are a family of mobile-sizes image classification models equipped with MixConv, a new type of mixed depthwise convolutions. They are developed based on [AutoML MNAS Mobile framework](https://ai.googleblog.com/2018/08/mnasnet-towards-automating-design-of.html), with an extended search space including MixConv. Currently, MixNets achieve better accuracy and efficiency than previous mobile models. In particular, our MixNet-L achieves a new state-of-the-art 78.9% ImageNet top-1 accuracy under typical mobile FLOPS (<600M) constraint: 9 | 10 | 11 | 12 | 15 | 16 |
13 | 14 |
17 | 18 | 19 | 20 | ## 2. Using Pretrained Checkpoints 21 | 22 | We have provided a list of EfficientNet checkpoints for [MixNet-S](https://storage.googleapis.com/cloud-tpu-checkpoints/mixnet/mixnet-s.tar.gz), [MixNet-M](https://storage.googleapis.com/cloud-tpu-checkpoints/mixnet/mixnet-m.tar.gz), and [MixNet-L](https://storage.googleapis.com/cloud-tpu-checkpoints/mixnet/mixnet-l.tar.gz). A quick way to use these checkpoints is to run: 23 | 24 | $ export MODEL=mixnet-s 25 | $ wget https://storage.googleapis.com/cloud-tpu-checkpoints/mixnet/${MODEL}.tar.gz 26 | $ tar zxf ${MODEL}.tar.gz 27 | $ wget https://upload.wikimedia.org/wikipedia/commons/f/fe/Giant_Panda_in_Beijing_Zoo_1.JPG -O panda.jpg 28 | $ wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/eval_data/labels_map.txt 29 | $ python eval_ckpt_main.py --model_name=$MODEL --ckpt_dir=$MODEL --example_img=panda.jpg --labels_map_file=labels_map.txt 30 | 31 | Please refer to the following colab for more instructions on how to obtain and use those checkpoints. 32 | 33 | * [`mixnet_eval_example.ipynb`](mixnet_eval_example.ipynb): A colab example to load 34 | pretrained checkpoints files and use the restored model to classify images. 35 | 36 | 37 | ## 3. Training and Evaluating MixNets. 38 | 39 | MixNets are trained using the same hyper parameters as MnasNet, except specifying different model_name=mixnet-s/m/l. 40 | 41 | For more instructions, please refer to the MnasNet tutorial: https://cloud.google.com/tpu/docs/tutorials/mnasnet 42 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mnasnet/mixnet/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mnasnet/mixnet/g3doc/mixnet-flops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/official/mnasnet/mixnet/g3doc/mixnet-flops.png -------------------------------------------------------------------------------- /tf_tpu_models/official/mnasnet/mnas_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Utils for MnasNet.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from ..efficientnet import utils as efficientnet_utils 22 | 23 | 24 | # Import common utils from efficientnet. 25 | archive_ckpt = efficientnet_utils.archive_ckpt 26 | build_learning_rate = efficientnet_utils.build_learning_rate 27 | build_optimizer = efficientnet_utils.build_optimizer 28 | drop_connect = efficientnet_utils.drop_connect 29 | get_ema_vars = efficientnet_utils.get_ema_vars 30 | DepthwiseConv2D = efficientnet_utils.DepthwiseConv2D 31 | EvalCkptDriver = efficientnet_utils.EvalCkptDriver 32 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mnist/README.md: -------------------------------------------------------------------------------- 1 | `mnist_tpu.py` can be used to train a simple model on the MNIST dataset using 2 | a Cloud TPU. 3 | 4 | See https://cloud.google.com/tpu/docs/quickstart for more details. 5 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mobilenet/README.md: -------------------------------------------------------------------------------- 1 | # Cloud TPU Port of the MobileNet v1 model 2 | 3 | This is a straightforward port of the [MobileNet v1 model](https://arxiv.org/pdf/1704.04861.pdf). The code was based on the original version from the [tensorflow/models](https://github.com/tensorflow/models/tree/master/research/slim/nets) repository. 4 | 5 | The only adjustments have been to add the required code to enable using the 6 | TPUEstimator interface, along with the data processing pipeline for ImageNet. 7 | 8 | ## Running the model 9 | 10 | Assuming you have a version of ImageNet converted to the tfrecord format located 11 | at `gs://my-cloud-bucket/data/imagenet/`, you can run this model with the 12 | following command: 13 | 14 | ``` 15 | python mobilenet.py\ 16 | --alsologtostderr\ 17 | --master=$TPU_WORKER\ 18 | --data_dir=gs://my-cloud-bucket/data/imagenet\ 19 | --model_dir=gs://my-cloud-bucket/models/mobilenet/v0\ 20 | --num_shards=8\ 21 | --batch_size=1024\ 22 | --use_tpu=1\ 23 | ``` 24 | 25 | Note that the mobilenet network requires a large number of epochs to converge 26 | completely. 27 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mobilenet/configs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mobilenet/configs/cloud/v2-128.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_batch_size: 16384 3 | train_steps: 500000 4 | eval_batch_size: 1024 5 | iterations_per_loop: 100 6 | num_cores: 128 7 | train_steps_per_eval: 2000 8 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mobilenet/configs/cloud/v2-256.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_batch_size: 32768 3 | train_steps: 250000 4 | eval_batch_size: 1024 5 | iterations_per_loop: 100 6 | num_cores: 256 7 | train_steps_per_eval: 2000 8 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mobilenet/configs/cloud/v2-32.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_batch_size: 4096 3 | train_steps: 2000000 4 | eval_batch_size: 1024 5 | iterations_per_loop: 100 6 | num_cores: 32 7 | train_steps_per_eval: 2000 8 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mobilenet/configs/cloud/v2-512.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_batch_size: 65536 3 | train_steps: 125000 4 | eval_batch_size: 1024 5 | iterations_per_loop: 100 6 | num_cores: 512 7 | train_steps_per_eval: 2000 8 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mobilenet/configs/cloud/v2-8.yaml: -------------------------------------------------------------------------------- 1 | train_batch_size: 1024 2 | train_steps: 8000000 3 | eval_batch_size: 1024 4 | iterations_per_loop: 100 5 | num_cores: 8 6 | train_steps_per_eval: 2000 7 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mobilenet/configs/cloud/v3-1024.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_batch_size: 131072 3 | train_steps: 62500 4 | eval_batch_size: 1024 5 | iterations_per_loop: 100 6 | num_cores: 1024 7 | train_steps_per_eval: 2000 8 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mobilenet/configs/cloud/v3-128.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_batch_size: 16384 3 | train_steps: 500000 4 | eval_batch_size: 1024 5 | iterations_per_loop: 100 6 | num_cores: 128 7 | train_steps_per_eval: 2000 8 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mobilenet/configs/cloud/v3-2048.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_batch_size: 262144 3 | train_steps: 31250 4 | eval_batch_size: 1024 5 | iterations_per_loop: 100 6 | num_cores: 2048 7 | train_steps_per_eval: 2000 8 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mobilenet/configs/cloud/v3-256.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_batch_size: 32768 3 | train_steps: 250000 4 | eval_batch_size: 1024 5 | iterations_per_loop: 100 6 | num_cores: 256 7 | train_steps_per_eval: 2000 8 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mobilenet/configs/cloud/v3-32.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_batch_size: 4096 3 | train_steps: 2000000 4 | eval_batch_size: 1024 5 | iterations_per_loop: 100 6 | num_cores: 32 7 | train_steps_per_eval: 2000 8 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mobilenet/configs/cloud/v3-512.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_batch_size: 65536 3 | train_steps: 125000 4 | eval_batch_size: 1024 5 | iterations_per_loop: 100 6 | num_cores: 512 7 | train_steps_per_eval: 2000 8 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mobilenet/configs/cloud/v3-64.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_batch_size: 8192 3 | train_steps: 1000000 4 | eval_batch_size: 1024 5 | iterations_per_loop: 100 6 | num_cores: 64 7 | train_steps_per_eval: 2000 8 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mobilenet/configs/cloud/v3-8.yaml: -------------------------------------------------------------------------------- 1 | train_batch_size: 1024 2 | train_steps: 8000000 3 | eval_batch_size: 1024 4 | iterations_per_loop: 100 5 | num_cores: 8 6 | train_steps_per_eval: 2000 7 | -------------------------------------------------------------------------------- /tf_tpu_models/official/mobilenet/configs/mobilenet_config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Config to train MobileNet.""" 16 | 17 | MOBILENET_CFG = { 18 | 'use_tpu': True, 19 | 'train_batch_size': 1024, 20 | 'train_steps': 8000000, 21 | 'eval_batch_size': 1024, 22 | 'iterations_per_loop': 100, 23 | 'num_cores': 8, 24 | 'eval_total_size': 0, 25 | 'train_steps_per_eval': 2000, 26 | 'min_eval_interval': 180, 27 | 'learning_rate': 0.165, 28 | 'depth_multiplier': 1.0, 29 | 'optimizer': 'RMS', 30 | 'num_classes': 1001, 31 | 'use_fused_batchnorm': True, 32 | 'moving_average': True, 33 | 'learning_rate_decay': 0.94, 34 | 'learning_rate_decay_epochs': 3, 35 | 'use_logits': True, 36 | 'transpose_enabled': False, 37 | 'serving_image_size': 224, 38 | 'post_quantize': True, 39 | 'num_train_images': 1281167, 40 | 'num_eval_images': 50000, 41 | } 42 | 43 | MOBILENET_RESTRICTIONS = [ 44 | ] 45 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/official/resnet/__init__.py -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/benchmark/README.md: -------------------------------------------------------------------------------- 1 | # ResNet-50 Benchmark on Cloud TPU pods 2 | 3 | Submission for [DAWNBench](https://dawn.cs.stanford.edu/benchmark/index.html). 4 | 5 | This subdirectory contains the code needed to replicate the DAWNBench results 6 | for ResNet-50 on a Cloud TPU pod. The model used here is identical to the model 7 | in the parent directory. The only difference is that `resnet_benchmark.py` will 8 | generate checkpoints at every epoch and evaluate in a separate job. 9 | 10 | ## Instructions for training on single Cloud TPU 11 | 12 | 1. Add the top-level `/models` folder to the Python path with the command 13 | 14 | ``` 15 | export PYTHONPATH="$PYTHONPATH:/path/to/models" 16 | ``` 17 | 18 | 1. Train the model (roughly 90 epochs, 1 checkpoint per epoch): 19 | ``` 20 | python resnet_benchmark.py \ 21 | --tpu=[TPU NAME] \ 22 | --mode=train \ 23 | --data_dir=[PATH TO DATA] \ 24 | --model_dir=[PATH TO MODEL] \ 25 | --train_batch_size=1024 \ 26 | --train_steps=112590 \ 27 | --iterations_per_loop=1251 28 | ``` 29 | 30 | 1. Evaluate the model (run after train completes): 31 | ``` 32 | python resnet_benchmark.py \ 33 | --tpu=[TPU NAME] \ 34 | --mode=eval \ 35 | --data_dir=[PATH TO DATA] \ 36 | --model_dir=[PATH TO MODEL] 37 | ``` 38 | 39 | ## Instructions for training on a half TPU Pod 40 | 41 | Not yet available due to TPU Pod availability in Cloud. 42 | 43 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/benchmark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apls777/kaggle-imaterialist2020-model/6a653615fa48cbeaf34adda7c0545a49739b3189/tf_tpu_models/official/resnet/benchmark/__init__.py -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/configs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/configs/cloud/randaugment-32.yaml: -------------------------------------------------------------------------------- 1 | resnet_depth: 50 2 | train_steps: 56304 3 | train_batch_size: 4096 4 | eval_batch_size: 1024 5 | iterations_per_loop: 1000 6 | num_cores: 32 7 | skip_host_call: True 8 | augment_name: 'randaugment' 9 | randaug_num_layers: 2 10 | randaug_magnitude: 9 11 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/configs/cloud/randaugment-8.yaml: -------------------------------------------------------------------------------- 1 | resnet_depth: 50 2 | train_steps: 225216 3 | train_batch_size: 1024 4 | eval_batch_size: 1024 5 | iterations_per_loop: 1000 6 | num_cores: 8 7 | skip_host_call: True 8 | augment_name: 'randaugment' 9 | randaug_num_layers: 2 10 | randaug_magnitude: 9 11 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/configs/cloud/v2-128.yaml: -------------------------------------------------------------------------------- 1 | train_steps: 7116 2 | train_batch_size: 16384 3 | eval_batch_size: 1024 4 | iterations_per_loop: 7116 5 | skip_host_call: True 6 | num_cores: 128 7 | enable_lars: True 8 | label_smoothing: 0.1 9 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/configs/cloud/v2-256.yaml: -------------------------------------------------------------------------------- 1 | train_steps: 3558 2 | train_batch_size: 32768 3 | eval_batch_size: 1024 4 | iterations_per_loop: 3558 5 | skip_host_call: True 6 | num_cores: 256 7 | enable_lars: True 8 | label_smoothing: 0.1 9 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/configs/cloud/v2-32.yaml: -------------------------------------------------------------------------------- 1 | train_steps: 28464 2 | train_batch_size: 4096 3 | eval_batch_size: 1024 4 | iterations_per_loop: 28464 5 | skip_host_call: True 6 | num_cores: 32 7 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/configs/cloud/v2-512.yaml: -------------------------------------------------------------------------------- 1 | train_steps: 3558 2 | train_batch_size: 32768 3 | eval_batch_size: 1024 4 | iterations_per_loop: 3558 5 | skip_host_call: True 6 | num_cores: 512 7 | enable_lars: True 8 | label_smoothing: 0.1 9 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/configs/cloud/v2-8.yaml: -------------------------------------------------------------------------------- 1 | train_steps: 113854 2 | train_batch_size: 1024 3 | eval_batch_size: 1024 4 | iterations_per_loop: 113854 5 | skip_host_call: True 6 | num_cores: 8 7 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/configs/cloud/v3-1024.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_steps: 3558 3 | train_batch_size: 32768 4 | eval_batch_size: 1024 5 | iterations_per_loop: 3558 6 | skip_host_call: True 7 | num_cores: 1024 8 | enable_lars: True 9 | label_smoothing: 0.1 10 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/configs/cloud/v3-128.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_steps: 7116 3 | train_batch_size: 16384 4 | eval_batch_size: 1024 5 | iterations_per_loop: 7116 6 | skip_host_call: True 7 | num_cores: 128 8 | enable_lars: True 9 | label_smoothing: 0.1 10 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/configs/cloud/v3-2048.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_steps: 3558 3 | train_batch_size: 32768 4 | eval_batch_size: 1024 5 | iterations_per_loop: 3558 6 | skip_host_call: True 7 | num_cores: 2048 8 | enable_lars: True 9 | label_smoothing: 0.1 10 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/configs/cloud/v3-256.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_steps: 3558 3 | train_batch_size: 32768 4 | eval_batch_size: 1024 5 | iterations_per_loop: 3558 6 | skip_host_call: True 7 | num_cores: 256 8 | enable_lars: True 9 | label_smoothing: 0.1 10 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/configs/cloud/v3-32.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_steps: 28464 3 | train_batch_size: 4096 4 | eval_batch_size: 1024 5 | iterations_per_loop: 28464 6 | skip_host_call: True 7 | num_cores: 32 8 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/configs/cloud/v3-512.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_steps: 3558 3 | train_batch_size: 32768 4 | eval_batch_size: 1024 5 | iterations_per_loop: 3558 6 | skip_host_call: True 7 | num_cores: 512 8 | enable_lars: True 9 | label_smoothing: 0.1 10 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/configs/cloud/v3-64.yaml: -------------------------------------------------------------------------------- 1 | # DISCLAIMER: These parameters have not been optimized 2 | train_steps: 14232 3 | train_batch_size: 8192 4 | eval_batch_size: 1024 5 | iterations_per_loop: 14232 6 | skip_host_call: True 7 | num_cores: 64 8 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/configs/cloud/v3-8.yaml: -------------------------------------------------------------------------------- 1 | train_steps: 113854 2 | train_batch_size: 1024 3 | eval_batch_size: 1024 4 | iterations_per_loop: 113854 5 | skip_host_call: True 6 | num_cores: 8 7 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/configs/resnet_config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Config template to train Resnet.""" 16 | 17 | # pylint: disable=line-too-long 18 | RESNET_CFG = { 19 | 'resnet_depth': 50, 20 | 'train_batch_size': 1024, 21 | 'eval_batch_size': 1024, 22 | 'num_train_images': 1281167, 23 | 'num_eval_images': 50000, 24 | 'train_steps': 112590, 25 | 'base_learning_rate': 0.1, 26 | 'iterations_per_loop': 1251, 27 | 'use_tpu': True, 28 | 'num_cores': 8, 29 | 'enable_lars': False, 30 | 'transpose_input': True, 31 | 'precision': 'bfloat16', 32 | 'num_label_classes': 1000, 33 | 'use_cache': True, 34 | 'use_async_checkpointing': False, 35 | 'image_size': 224, 36 | 'momentum': 0.9, 37 | 'weight_decay': 0.0001, 38 | 'label_smoothing': 0.0, 39 | 'poly_rate': 0.0, 40 | 'skip_host_call': False, 41 | 'num_parallel_calls': 8, 42 | 'dropblock_groups': '', 43 | 'dropblock_keep_prob': None, 44 | 'dropblock_size': 7, 45 | 'pre_activation': False, 46 | 'data_format': 'channels_last', 47 | } 48 | 49 | RESNET_RESTRICTIONS = [ 50 | ] 51 | 52 | # pylint: enable=line-too-long 53 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/resnet_k8s.yaml: -------------------------------------------------------------------------------- 1 | # Train ResNet-50 with fake ImageNet dataset using Cloud TPU and Google 2 | # Kubernetes Engine. 3 | # 4 | # The tutorial is at https://cloud.google.com/tpu/docs/tutorials/kubernetes-engine-resnet. 5 | # 6 | # [Training Data] 7 | # In this example, we use randomly generated fake ImageNet dataset at 8 | # gs://cloud-tpu-test-datasets/fake_imagenet as the training data. 9 | # 10 | # [Instructions] 11 | # 1. Follow the instructions on https://cloud.google.com/tpu/docs/kubernetes-engine-setup 12 | # to create a Kubernetes Engine cluster. 13 | # 2. Change the environment variable MODEL_BUCKET in the Job spec to the 14 | # Google Cloud Storage location where you want to store the output model. 15 | # 3. Run `kubectl create -f resnet_k8s.yaml`. 16 | 17 | apiVersion: batch/v1 18 | kind: Job 19 | metadata: 20 | name: resnet-tpu 21 | spec: 22 | template: 23 | metadata: 24 | annotations: 25 | # The Cloud TPUs that will be created for this Job must support 26 | # TensorFlow 1.11. This version MUST match the TensorFlow version that 27 | # your model is built on. 28 | tf-version.cloud-tpus.google.com: "1.11" 29 | spec: 30 | restartPolicy: Never 31 | containers: 32 | - name: resnet-tpu 33 | # The official TensorFlow 1.11 TPU model image built from https://github.com/tensorflow/tpu/blob/r1.11/tools/docker/Dockerfile. 34 | image: gcr.io/tensorflow/tpu-models:r1.11 35 | command: 36 | - python 37 | - /tensorflow_tpu_models/models/official/resnet/resnet_main.py 38 | - --data_dir=$(DATA_BUCKET) 39 | - --model_dir=$(MODEL_BUCKET) 40 | env: 41 | # The Google Cloud Storage location where the fake ImageNet dataset is 42 | # stored. 43 | - name: DATA_BUCKET 44 | value: "gs://cloud-tpu-test-datasets/fake_imagenet" 45 | # [REQUIRED] Must specify the Google Cloud Storage location where your 46 | # output model will be stored. 47 | - name: MODEL_BUCKET 48 | value: "gs:///resnet" 49 | # Point PYTHONPATH to the top level models folder 50 | - name: PYTHONPATH 51 | value: "/tensorflow_tpu_models/models" 52 | resources: 53 | limits: 54 | # Request a single v2-8 Cloud TPU device to train the model. 55 | # A single v2-8 Cloud TPU device consists of 4 chips, each of which 56 | # has 2 cores, so there are 8 cores in total. 57 | cloud-tpus.google.com/v2: 8 58 | -------------------------------------------------------------------------------- /tf_tpu_models/official/resnet/resnet_model_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests that the resnet model loads without error.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import numpy as np 22 | import tensorflow.compat.v1 as tf 23 | from official.resnet import resnet_model 24 | 25 | 26 | class ResnetModelTest(tf.test.TestCase): 27 | 28 | def test_load_resnet18_v1(self): 29 | network = resnet_model.resnet_v1(resnet_depth=18, 30 | num_classes=10, 31 | data_format='channels_last') 32 | input_bhw3 = tf.placeholder(tf.float32, [1, 28, 28, 3]) 33 | resnet_output = network(inputs=input_bhw3, is_training=True) 34 | 35 | sess = tf.Session() 36 | sess.run(tf.global_variables_initializer()) 37 | _ = sess.run(resnet_output, 38 | feed_dict={input_bhw3: np.random.randn(1, 28, 28, 3)}) 39 | 40 | def test_load_resnet18_v2(self): 41 | network = resnet_model.resnet_v2(resnet_depth=18, 42 | num_classes=10, 43 | data_format='channels_last') 44 | input_bhw3 = tf.placeholder(tf.float32, [1, 28, 28, 3]) 45 | resnet_output = network(inputs=input_bhw3, is_training=True) 46 | 47 | sess = tf.Session() 48 | sess.run(tf.global_variables_initializer()) 49 | _ = sess.run(resnet_output, 50 | feed_dict={input_bhw3: np.random.randn(1, 28, 28, 3)}) 51 | 52 | if __name__ == '__main__': 53 | tf.test.main() 54 | -------------------------------------------------------------------------------- /tf_tpu_models/official/retinanet/object_detection/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/retinanet/object_detection/shape_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Utils used to manipulate tensor shapes.""" 17 | 18 | import tensorflow.compat.v1 as tf 19 | 20 | 21 | def assert_shape_equal(shape_a, shape_b): 22 | """Asserts that shape_a and shape_b are equal. 23 | 24 | If the shapes are static, raises a ValueError when the shapes 25 | mismatch. 26 | 27 | If the shapes are dynamic, raises a tf InvalidArgumentError when the shapes 28 | mismatch. 29 | 30 | Args: 31 | shape_a: a list containing shape of the first tensor. 32 | shape_b: a list containing shape of the second tensor. 33 | 34 | Returns: 35 | Either a tf.no_op() when shapes are all static and a tf.assert_equal() op 36 | when the shapes are dynamic. 37 | 38 | Raises: 39 | ValueError: When shapes are both static and unequal. 40 | """ 41 | if (all(isinstance(dim, int) for dim in shape_a) and 42 | all(isinstance(dim, int) for dim in shape_b)): 43 | if shape_a != shape_b: 44 | raise ValueError('Unequal shapes {}, {}'.format(shape_a, shape_b)) 45 | else: return tf.no_op() 46 | else: 47 | return tf.assert_equal(shape_a, shape_b) 48 | 49 | 50 | def combined_static_and_dynamic_shape(tensor): 51 | """Returns a list containing static and dynamic values for the dimensions. 52 | 53 | Returns a list of static and dynamic values for shape dimensions. This is 54 | useful to preserve static shapes when available in reshape operation. 55 | 56 | Args: 57 | tensor: A tensor of any type. 58 | 59 | Returns: 60 | A list of size tensor.shape.ndims containing integers or a scalar tensor. 61 | """ 62 | static_tensor_shape = tensor.shape.as_list() 63 | dynamic_tensor_shape = tf.shape(tensor) 64 | combined_shape = [] 65 | for index, dim in enumerate(static_tensor_shape): 66 | if dim is not None: 67 | combined_shape.append(dim) 68 | else: 69 | combined_shape.append(dynamic_tensor_shape[index]) 70 | return combined_shape 71 | -------------------------------------------------------------------------------- /tf_tpu_models/official/squeezenet/configs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tf_tpu_models/official/squeezenet/configs/squeezenet_config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Config template to train Resnet.""" 16 | 17 | # pylint: disable=line-too-long 18 | SQUEEZENET_CFG = { 19 | 'model_dir': '', 20 | 'use_tpu': True, 21 | 'use_async_checkpointing': False, 22 | 'train': { 23 | 'iterations_per_loop': 100, 24 | 'train_batch_size': 1024, 25 | 'num_epochs': 150, 26 | 'num_cores_per_replica': 8, 27 | 'num_examples_per_epoch': 1300 * 1000, 28 | 'optimizer': { 29 | 'type': 'momentum', 30 | 'momentum': 0.9, 31 | }, 32 | 'learning_rate': { 33 | 'init_learning_rate': 0.03, 34 | 'end_learning_rate': 0.005, 35 | }, 36 | }, 37 | 'eval': { 38 | 'eval_batch_size': 1024, 39 | 'num_evals': 10, 40 | 'num_eval_examples': 50000, 41 | }, 42 | 'num_classes': 1001, 43 | } 44 | 45 | SQUEEZENET_RESTRICTIONS = [ 46 | ] 47 | 48 | # pylint: enable=line-too-long 49 | -------------------------------------------------------------------------------- /tf_tpu_models/official/transformer/README.md: -------------------------------------------------------------------------------- 1 | Tensor2Tensor: See https://github.com/tensorflow/tensor2tensor/blob/master/docs/cloud_tpu.md 2 | 3 | BERT: See https://github.com/google-research/bert/blob/master/README.md 4 | -------------------------------------------------------------------------------- /tf_tpu_models/official/unet3d/configs/cloud/v3-128_256x256x256_ce.yaml: -------------------------------------------------------------------------------- 1 | init_learning_rate: 0.0001 # with 0.005 the network is unstable 2 | loss: 'cross_entropy' 3 | train_batch_size: 32 4 | eval_batch_size: 1 5 | input_partition_dims: [1,16,1,1,1] 6 | use_index_label_in_train: true 7 | 8 | input_image_size: [256,256,256] 9 | label_dtype: 'float32' 10 | -------------------------------------------------------------------------------- /tf_tpu_models/official/unet3d/configs/cloud/v3-128_256x256x256_dice.yaml: -------------------------------------------------------------------------------- 1 | init_learning_rate: 0.0001 # with 0.005 the network is unstable 2 | loss: 'adaptive_dice32' 3 | train_batch_size: 32 4 | eval_batch_size: 1 5 | input_partition_dims: [1,16,1,1,1] 6 | use_index_label_in_train: false 7 | 8 | input_image_size: [256,256,256] 9 | label_dtype: 'float32' 10 | -------------------------------------------------------------------------------- /tf_tpu_models/official/unet3d/configs/cloud/v3-32_256x256x256_ce.yaml: -------------------------------------------------------------------------------- 1 | init_learning_rate: 0.00005 # with 0.005 the network is unstable 2 | loss: 'cross_entropy' 3 | train_batch_size: 8 4 | eval_batch_size: 1 5 | input_partition_dims: [1,16,1,1,1] 6 | use_index_label_in_train: true 7 | 8 | input_image_size: [256,256,256] 9 | label_dtype: 'float32' 10 | -------------------------------------------------------------------------------- /tf_tpu_models/official/unet3d/configs/cloud/v3-32_256x256x256_dice.yaml: -------------------------------------------------------------------------------- 1 | init_learning_rate: 0.00005 # with 0.005 the network is unstable 2 | loss: 'adaptive_dice32' 3 | train_batch_size: 8 4 | eval_batch_size: 1 5 | input_partition_dims: [1,16,1,1,1] 6 | use_index_label_in_train: false 7 | 8 | input_image_size: [256,256,256] 9 | label_dtype: 'float32' 10 | -------------------------------------------------------------------------------- /tf_tpu_models/official/unet3d/configs/cloud/v3-8_128x128x128_ce.yaml: -------------------------------------------------------------------------------- 1 | init_learning_rate: 0.0001 2 | loss: 'cross_entropy' 3 | train_batch_size: 16 4 | eval_batch_size: 8 5 | input_partition_dims: [1,8,1,1,1] 6 | use_index_label_in_train: true 7 | 8 | input_image_size: [128,128,128] 9 | label_dtype: 'float32' 10 | -------------------------------------------------------------------------------- /tf_tpu_models/official/unet3d/configs/cloud/v3-8_128x128x128_dice.yaml: -------------------------------------------------------------------------------- 1 | init_learning_rate: 0.0001 # with 0.005 the network is unstable 2 | loss: 'adaptive_dice32' 3 | train_batch_size: 32 4 | eval_batch_size: 8 5 | input_partition_dims: # Yaml reads None as a string. Instead, put empty string here for NoneType. 6 | use_index_label_in_train: false 7 | 8 | input_image_size: [128,128,128] 9 | label_dtype: 'float32' 10 | -------------------------------------------------------------------------------- /tf_tpu_models/official/unet3d/data_preprocess/convert_lits_nii_to_npy.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | r"""Converts .nii files in LiTS dataset to .npy files. 16 | 17 | This script should be run just once before running convert_lits.{py,borg}. 18 | 19 | """ 20 | 21 | from __future__ import absolute_import 22 | from __future__ import division 23 | #Standard imports 24 | from __future__ import print_function 25 | 26 | import glob 27 | import multiprocessing 28 | import os 29 | # Standard Imports 30 | import nibabel as nib 31 | import numpy as np 32 | 33 | 34 | num_processes = 2 35 | input_path = "Downloads/LiTS/Train/" # where the .nii files are. 36 | output_path = "Downloads/LiTS/Train_np/" # where you want to put the npy files. 37 | 38 | 39 | def process_one_file(image_path): 40 | """Convert one nii file to npy.""" 41 | im_id = os.path.basename(image_path).split("volume-")[1].split(".nii")[0] 42 | label_path = image_path.replace("volume-", "segmentation-") 43 | 44 | image = nib.load(image_path).get_data().astype(np.float32) 45 | label = nib.load(label_path).get_data().astype(np.float32) 46 | 47 | print("image shape: {}, dtype: {}".format(image.shape, image.dtype)) 48 | print("label shape: {}, dtype: {}".format(label.shape, label.dtype)) 49 | 50 | np.save(os.path.join(output_path, "volume-{}.npy".format(im_id)), image) 51 | np.save(os.path.join(output_path, "segmentation-{}.npy".format(im_id)), label) 52 | 53 | 54 | nii_dir = os.path.join(input_path, "volume-*") 55 | p = multiprocessing.Pool(num_processes) 56 | p.map(process_one_file, glob.glob(nii_dir)) 57 | -------------------------------------------------------------------------------- /tf_tpu_models/official/unet3d/requirements.txt: -------------------------------------------------------------------------------- 1 | cloud-tpu-profiler>=1.12 2 | numpy 3 | pyyaml 4 | pytype 5 | -------------------------------------------------------------------------------- /tf_tpu_models/official/unet3d/unet_config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Config to train UNet.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | #Standard imports 20 | from __future__ import print_function 21 | 22 | UNET_CONFIG = { 23 | # Place holder for tpu configs. 24 | 'tpu_config': {}, 25 | 'model_dir': '', 26 | 'training_file_pattern': '', 27 | 'eval_file_pattern': '', 28 | # The input files are GZip compressed and need decompression. 29 | 'compressed_input': True, 30 | 'use_bfloat16': True, 31 | 'label_dtype': 'float32', 32 | 'train_batch_size': 8, 33 | 'eval_batch_size': 8, 34 | 'predict_batch_size': 8, 35 | 'train_epochs': 10, 36 | 'train_item_count': 1000, 37 | 'eval_item_count': 100, 38 | 'train_steps': 100000, 39 | 'eval_steps': 10, 40 | 'num_steps_per_eval': 100, 41 | 'min_eval_interval': 180, 42 | 'eval_timeout': None, 43 | 'optimizer': 'adam', 44 | 'momentum': 0.9, 45 | # Spatial dimension of input image. 46 | 'input_image_size': [128, 128, 128], 47 | # Number of channels of the input image. 48 | 'num_channels': 1, 49 | # Spatial partition dimensions. 50 | 'input_partition_dims': None, 51 | # Use deconvolution to upsample, otherwise upsampling. 52 | 'deconvolution': True, 53 | # Number of areas i need to segment 54 | 'num_classes': 3, 55 | # Number of filters used by the architecture 56 | 'num_base_filters': 32, 57 | # Depth of the network 58 | 'depth': 4, 59 | # Dropout values to use across the network 60 | 'dropout_rate': 0.5, 61 | # Number of levels that contribute to the output. 62 | 'num_segmentation_levels': 2, 63 | # Use batch norm. 64 | 'use_batch_norm': True, 65 | 'init_learning_rate': 0.00005, 66 | # learning rate decay steps. 67 | 'lr_decay_steps': 100000, 68 | # learning rate decay rate. 69 | 'lr_decay_rate': 0.5, 70 | # Data format, 'channels_last' and 'channels_first' 71 | 'data_format': 'channels_last', 72 | # Use class index for training. Otherwise, use one-hot encoding. 73 | 'use_index_label_in_train': False, 74 | # e.g. softmax cross entropy, adaptive_dice32 75 | 'loss': 'adaptive_dice32', 76 | } 77 | 78 | UNET_RESTRICTIONS = [] 79 | -------------------------------------------------------------------------------- /tools/colab/BUILD: -------------------------------------------------------------------------------- 1 | # Colab notebooks for Cloud TPU users. 2 | 3 | package( 4 | default_visibility = ["//visibility:public"], 5 | ) 6 | 7 | licenses(["notice"]) # Apache 2.0 8 | 9 | exports_files(glob([ 10 | "*.ipynb", 11 | ])) 12 | -------------------------------------------------------------------------------- /tools/ctpu/.gitignore: -------------------------------------------------------------------------------- 1 | ctpu 2 | ctpu.exe 3 | *~ 4 | -------------------------------------------------------------------------------- /tools/ctpu/commands/delete_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================== 15 | 16 | package commands 17 | 18 | import ( 19 | "context" 20 | "testing" 21 | 22 | "github.com/google/subcommands" 23 | "google.golang.org/api/compute/v1" 24 | "google.golang.org/api/tpu/v1alpha1" 25 | ) 26 | 27 | func testDeleteWorkflow(t *testing.T, libs *testLibs, expectedGCEAction, expectedTPUAction string, expectedExitCode subcommands.ExitStatus) { 28 | t.Helper() 29 | c := deleteCmd{ 30 | cfg: libs.cfg, 31 | gce: libs.gce, 32 | tpu: libs.tpu, 33 | } 34 | c.tpuCmd.skipConfirmation = true 35 | 36 | exit := c.Execute(context.Background(), nil) 37 | if exit != expectedExitCode { 38 | t.Fatalf("Exit code incorrect: %d", exit) 39 | } 40 | 41 | verifySingleOperation(t, libs.gce.OperationsPerformed, expectedGCEAction) 42 | verifySingleOperation(t, libs.tpu.OperationsPerformed, expectedTPUAction) 43 | } 44 | 45 | func TestDeleteNotExistent(t *testing.T) { 46 | libs := newTestLibs() 47 | testDeleteWorkflow(t, libs, "", "", subcommands.ExitFailure) 48 | } 49 | 50 | func TestDeleteNotRunning(t *testing.T) { 51 | libs := newTestLibs() 52 | libs.gce.instance = &compute.Instance{Status: "STOPPED"} 53 | libs.tpu.instance = &tpu.Node{State: "CREATING"} 54 | testDeleteWorkflow(t, libs, "DELETE", "DELETE", subcommands.ExitSuccess) 55 | } 56 | 57 | func TestDelete(t *testing.T) { 58 | libs := newTestLibs() 59 | libs.gce.instance = &compute.Instance{Status: "RUNNING"} 60 | libs.tpu.instance = &tpu.Node{State: "READY"} 61 | testDeleteWorkflow(t, libs, "DELETE", "DELETE", subcommands.ExitSuccess) 62 | } 63 | -------------------------------------------------------------------------------- /tools/ctpu/commands/list_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================== 15 | 16 | package commands 17 | 18 | import ( 19 | "testing" 20 | 21 | "github.com/tensorflow/tpu/tools/ctpu/ctrl" 22 | "google.golang.org/api/compute/v1" 23 | "google.golang.org/api/tpu/v1alpha1" 24 | ) 25 | 26 | func TestListFlockStatus(t *testing.T) { 27 | c := listCmd{} 28 | 29 | type flockStatusTest struct { 30 | flock flock 31 | expected string 32 | } 33 | 34 | flockStatusTests := []flockStatusTest{ 35 | flockStatusTest{ 36 | flock: flock{ 37 | vm: &ctrl.GCEInstance{&compute.Instance{Status: "RUNNING"}}, 38 | tpu: &ctrl.TPUInstance{&tpu.Node{State: "READY"}}, 39 | }, 40 | expected: "running", 41 | }, 42 | flockStatusTest{ 43 | flock: flock{ 44 | vm: &ctrl.GCEInstance{&compute.Instance{Status: "STOPPED"}}, 45 | }, 46 | expected: "paused", 47 | }, 48 | flockStatusTest{ 49 | flock: flock{}, 50 | expected: "--", 51 | }, 52 | flockStatusTest{ 53 | flock: flock{ 54 | vm: &ctrl.GCEInstance{&compute.Instance{Status: "STOPPING"}}, 55 | tpu: &ctrl.TPUInstance{&tpu.Node{State: "DELETING"}}, 56 | }, 57 | expected: "unknown", 58 | }, 59 | } 60 | 61 | for _, test := range flockStatusTests { 62 | status := c.flockStatus(&test.flock) 63 | if status != test.expected { 64 | t.Errorf("c.flockStatus(%v) = %q, want: %q", test.flock, status, test.expected) 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /tools/ctpu/commands/pause_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================== 15 | 16 | package commands 17 | 18 | import ( 19 | "context" 20 | "testing" 21 | 22 | "github.com/google/subcommands" 23 | "google.golang.org/api/compute/v1" 24 | "google.golang.org/api/tpu/v1alpha1" 25 | ) 26 | 27 | func testPauseWorkflow(t *testing.T, libs *testLibs, expectedGCEAction, expectedTPUAction string, expectedExitCode subcommands.ExitStatus) { 28 | t.Helper() 29 | c := pauseCmd{ 30 | cfg: libs.cfg, 31 | gce: libs.gce, 32 | tpu: libs.tpu, 33 | } 34 | c.tpuCmd.skipConfirmation = true 35 | 36 | exit := c.Execute(context.Background(), nil) 37 | if exit != expectedExitCode { 38 | t.Fatalf("Exit code incorrect: %d", exit) 39 | } 40 | 41 | verifySingleOperation(t, libs.gce.OperationsPerformed, expectedGCEAction) 42 | verifySingleOperation(t, libs.tpu.OperationsPerformed, expectedTPUAction) 43 | 44 | } 45 | 46 | func TestPauseNotExistent(t *testing.T) { 47 | libs := newTestLibs() 48 | testPauseWorkflow(t, libs, "", "", subcommands.ExitFailure) 49 | } 50 | 51 | func TestPauseNotRunning(t *testing.T) { 52 | libs := newTestLibs() 53 | libs.gce.instance = &compute.Instance{Status: "STOPPING"} 54 | libs.tpu.instance = &tpu.Node{State: "CREATING"} 55 | testPauseWorkflow(t, libs, "", "DELETE", subcommands.ExitSuccess) 56 | } 57 | 58 | func TestPause(t *testing.T) { 59 | libs := newTestLibs() 60 | libs.gce.instance = &compute.Instance{Status: "RUNNING"} 61 | libs.tpu.instance = &tpu.Node{State: "READY"} 62 | testPauseWorkflow(t, libs, "STOP", "DELETE", subcommands.ExitSuccess) 63 | } 64 | -------------------------------------------------------------------------------- /tools/ctpu/commands/quota.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================== 15 | 16 | package commands 17 | 18 | import ( 19 | "context" 20 | "fmt" 21 | "log" 22 | 23 | "flag" 24 | "github.com/google/subcommands" 25 | "github.com/tensorflow/tpu/tools/ctpu/config" 26 | ) 27 | 28 | type quotaCmd struct { 29 | cfg *config.Config 30 | } 31 | 32 | // QuotaCommand creates the quota command. 33 | func QuotaCommand(config *config.Config) subcommands.Command { 34 | return "aCmd{config} 35 | } 36 | 37 | func (quotaCmd) Name() string { 38 | return "quota" 39 | } 40 | 41 | func (c *quotaCmd) SetFlags(f *flag.FlagSet) { 42 | c.cfg.SetFlags(f) // Allow users to specify cfg flags either before or after the subcommand name. 43 | } 44 | 45 | func (quotaCmd) Synopsis() string { 46 | return "prints URL where quota can be seen" 47 | } 48 | 49 | func (quotaCmd) Usage() string { 50 | return `ctpu quota 51 | ` 52 | } 53 | 54 | func (q *quotaCmd) Execute(ctx context.Context, flags *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { 55 | err := q.cfg.Validate() 56 | if err != nil { 57 | log.Print(err) 58 | return subcommands.ExitFailure 59 | } 60 | 61 | fmt.Printf("Quotas cannot currently be displayed within ctpu. To view your quota, open:\n\thttps://console.cloud.google.com/iam-admin/quotas?project=%s&service=tpu.googleapis.com\n\n", q.cfg.Project) 62 | fmt.Printf("Request additional quota from:\n\thttps://console.cloud.google.com/iam-admin/quotas?project=%s&service=tpu.googleapis.com\n", q.cfg.Project) 63 | 64 | return subcommands.ExitSuccess 65 | } 66 | -------------------------------------------------------------------------------- /tools/ctpu/commands/tpu_locations.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================== 15 | 16 | package commands 17 | 18 | import ( 19 | "context" 20 | "fmt" 21 | "log" 22 | "sort" 23 | 24 | "flag" 25 | "github.com/google/subcommands" 26 | "github.com/tensorflow/tpu/tools/ctpu/config" 27 | "google.golang.org/api/tpu/v1alpha1" 28 | ) 29 | 30 | // TPULocationsCP lists available TPU locations. 31 | type TPULocationsCP interface { 32 | // ListLocations lists available locations. 33 | ListLocations() ([]*tpu.Location, error) 34 | } 35 | 36 | type tpuLocationsCmd struct { 37 | cfg *config.Config 38 | tpus TPULocationsCP 39 | } 40 | 41 | // TPULocationsCommand creates the tpu-locations command. 42 | func TPULocationsCommand(cfg *config.Config, tpus TPULocationsCP) subcommands.Command { 43 | return &tpuLocationsCmd{cfg, tpus} 44 | } 45 | 46 | func (tpuLocationsCmd) Name() string { 47 | return "tpu-locations" 48 | } 49 | 50 | func (c *tpuLocationsCmd) SetFlags(f *flag.FlagSet) { 51 | c.cfg.SetFlags(f) // Allow users to specify cfg flags either before or after the subcommand name. 52 | } 53 | 54 | func (tpuLocationsCmd) Synopsis() string { 55 | return "queries for all locations with TPUs available." 56 | } 57 | 58 | func (tpuLocationsCmd) Usage() string { 59 | return `ctpu tpu-locations 60 | ` 61 | } 62 | 63 | func sortLocations(locations []*tpu.Location) { 64 | sort.Slice(locations, func(i, j int) bool { return locations[i].LocationId < locations[j].LocationId }) 65 | } 66 | 67 | func (t *tpuLocationsCmd) Execute(ctx context.Context, flags *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { 68 | err := t.cfg.Validate() 69 | if err != nil { 70 | log.Print(err) 71 | return subcommands.ExitFailure 72 | } 73 | 74 | locations, err := t.tpus.ListLocations() 75 | if err != nil { 76 | log.Print(err) 77 | return subcommands.ExitFailure 78 | } 79 | if len(locations) == 0 { 80 | fmt.Printf("No available Cloud TPU locations.\n") 81 | return subcommands.ExitFailure 82 | } 83 | 84 | sortLocations(locations) 85 | 86 | fmt.Printf("Cloud TPU Locations:\n") 87 | for _, loc := range locations { 88 | fmt.Printf("\t%s\n", loc.LocationId) 89 | } 90 | 91 | return subcommands.ExitSuccess 92 | } 93 | -------------------------------------------------------------------------------- /tools/ctpu/commands/tpu_locations_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================== 15 | 16 | package commands 17 | 18 | import ( 19 | "testing" 20 | 21 | "github.com/google/go-cmp/cmp" 22 | "google.golang.org/api/tpu/v1alpha1" 23 | ) 24 | 25 | func TestTpuLocationsSort(t *testing.T) { 26 | testcases := []struct { 27 | locations []string 28 | want []string 29 | }{{ 30 | locations: []string{"us-central1-f", "us-central1-c"}, 31 | want: []string{"us-central1-c", "us-central1-f"}, 32 | }, { 33 | locations: []string{"us-central1-f", "us-central1-c", "us-central1-b"}, 34 | want: []string{"us-central1-b", "us-central1-c", "us-central1-f"}, 35 | }, { 36 | locations: []string{"us-central1-b", "us-central1-c", "us-central1-f"}, 37 | want: []string{"us-central1-b", "us-central1-c", "us-central1-f"}, 38 | }} 39 | for _, testcase := range testcases { 40 | input := make([]*tpu.Location, 0, len(testcase.locations)) 41 | for _, locID := range testcase.locations { 42 | input = append(input, &tpu.Location{LocationId: locID}) 43 | } 44 | sortLocations(input) 45 | output := make([]string, 0, len(testcase.locations)) 46 | for _, loc := range input { 47 | output = append(output, loc.LocationId) 48 | } 49 | if !cmp.Equal(output, testcase.want) { 50 | t.Errorf("sort.Sort(byLocID(%#v)) = %#v, want: %#v", testcase.locations, output, testcase.want) 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /tools/ctpu/commands/tpu_size_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================== 15 | 16 | package commands 17 | 18 | import ( 19 | "fmt" 20 | "reflect" 21 | "testing" 22 | 23 | "google.golang.org/api/tpu/v1alpha1" 24 | ) 25 | 26 | func TestTPUSizeSort(t *testing.T) { 27 | testcases := []struct { 28 | input []string 29 | want []string 30 | }{{ 31 | input: []string{"v2-8"}, 32 | want: []string{"v2-8"}, 33 | }, { 34 | input: []string{"v2-8", "v3-8"}, 35 | want: []string{"v2-8", "v3-8"}, 36 | }, { 37 | input: []string{"v3-8", "v2-8"}, 38 | want: []string{"v2-8", "v3-8"}, 39 | }, { 40 | input: []string{"v3-8", "v2-8", "v3-32", "v2-64"}, 41 | want: []string{"v2-8", "v2-64", "v3-8", "v3-32"}, 42 | }} 43 | 44 | for i, tt := range testcases { 45 | if len(tt.input) != len(tt.want) { 46 | t.Fatalf("Invalid test case: %d.", i) 47 | } 48 | input := make([]*tpu.AcceleratorType, len(tt.input)) 49 | for i, name := range tt.input { 50 | input[i] = &tpu.AcceleratorType{Type: name} 51 | } 52 | want := make([]*tpu.AcceleratorType, len(tt.want)) 53 | for i, name := range tt.want { 54 | want[i] = &tpu.AcceleratorType{Type: name} 55 | } 56 | sortTpuSizes(input) 57 | if !reflect.DeepEqual(input, want) { 58 | fmt.Printf("Failure on test case %d\n", i) 59 | for i, _ := range tt.want { 60 | fmt.Printf("%d: want: %q, got: %q\n", i, want[i].Type, input[i].Type) 61 | } 62 | t.Errorf("Input: %v, want: %v, got: %v", tt.input, want, input) 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /tools/ctpu/commands/version.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================== 15 | 16 | package commands 17 | 18 | import ( 19 | "context" 20 | "fmt" 21 | 22 | "flag" 23 | "github.com/google/subcommands" 24 | ) 25 | 26 | type versionCmd struct { 27 | version string 28 | } 29 | 30 | // VersionCommand creates the version command. 31 | func VersionCommand(version string) subcommands.Command { 32 | return &versionCmd{version: version} 33 | } 34 | 35 | func (versionCmd) Name() string { 36 | return "version" 37 | } 38 | 39 | func (versionCmd) SetFlags(f *flag.FlagSet) {} 40 | 41 | func (versionCmd) Synopsis() string { 42 | return "prints out the ctpu version." 43 | } 44 | 45 | func (versionCmd) Usage() string { 46 | return `ctpu version 47 | ` 48 | } 49 | 50 | func (v *versionCmd) Execute(ctx context.Context, flags *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { 51 | fmt.Printf("ctpu version: %s\n", v.version) 52 | return subcommands.ExitSuccess 53 | } 54 | -------------------------------------------------------------------------------- /tools/ctpu/config/config_gce.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================== 15 | 16 | package config 17 | 18 | import ( 19 | "os" 20 | "os/user" 21 | "path" 22 | 23 | "cloud.google.com/go/compute/metadata" 24 | ) 25 | 26 | func gceConfig() (*Config, error) { 27 | cfg := &Config{} 28 | 29 | // Load config from the filesystem if present. 30 | if user, err := user.Current(); err == nil { 31 | configDir := path.Join(user.HomeDir, ".config", "gcloud") 32 | if _, err := os.Stat(configDir); err == nil { 33 | fsCfg, err := buildGcloudEnvConfig(configDir, false) 34 | if err == nil { 35 | cfg = fsCfg 36 | } 37 | } 38 | } 39 | 40 | cfg.Environment = "gce" 41 | 42 | if cfg.Project == "" { 43 | p, err := metadata.ProjectID() 44 | if err != nil { 45 | return nil, err 46 | } 47 | cfg.Project = p 48 | } 49 | 50 | if cfg.Zone == "" { 51 | z, err := metadata.Zone() 52 | if err != nil { 53 | return nil, err 54 | } 55 | cfg.Zone = z 56 | } 57 | 58 | if cfg.FlockName == "" { 59 | fn, err := metadata.InstanceName() 60 | if err != nil { 61 | return nil, err 62 | } 63 | cfg.FlockName = fn 64 | } 65 | 66 | return cfg, nil 67 | } 68 | -------------------------------------------------------------------------------- /tools/ctpu/config/devshell.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================== 15 | 16 | package config 17 | 18 | import ( 19 | "fmt" 20 | "log" 21 | "os" 22 | "strings" 23 | ) 24 | 25 | func isDevshell() bool { 26 | for _, e := range os.Environ() { 27 | pair := strings.Split(e, "=") 28 | if pair[0] == "DEVSHELL_CLIENT_PORT" { 29 | return true 30 | } 31 | } 32 | return false 33 | } 34 | 35 | func devshellEnvParseError(e string) error { 36 | return fmt.Errorf("devshell: unexpected environment variable value: %q", e) 37 | } 38 | 39 | func devshellFilesystemConfig(env []string) *Config { 40 | for _, e := range env { 41 | pair := strings.Split(e, "=") 42 | switch pair[0] { 43 | case "CLOUDSDK_CONFIG": 44 | if len(pair) != 2 { 45 | log.Printf("Unable to parse CLOUDSDK_CONFIG environment variable.") 46 | return nil 47 | } 48 | cfg, err := buildGcloudEnvConfig(pair[1], false) 49 | if err != nil { 50 | log.Printf("Error parsing CLOUDSDK_CONFIG at %q: %v.", pair[1], err) 51 | return nil 52 | } 53 | cfg.Environment = "devshell" 54 | return cfg 55 | } 56 | } 57 | return nil 58 | } 59 | 60 | func devshellConfig() (*Config, error) { 61 | cfg := devshellFilesystemConfig(os.Environ()) 62 | if cfg == nil { 63 | cfg = &Config{} 64 | } 65 | cfg.Environment = "devshell" 66 | 67 | // Add environment overrides. 68 | for _, e := range os.Environ() { 69 | pair := strings.Split(e, "=") 70 | switch pair[0] { 71 | case "DEVSHELL_PROJECT_ID": 72 | if len(pair) != 2 { 73 | return nil, devshellEnvParseError(e) 74 | } 75 | cfg.Project = pair[1] 76 | case "DEVSHELL_GCLOUD_CONFIG": 77 | if len(pair) != 2 { 78 | return nil, devshellEnvParseError(e) 79 | } 80 | cfg.ActiveConfiguration = pair[1] 81 | default: 82 | // Nothing 83 | } 84 | } 85 | 86 | if cfg.Project == "" { 87 | log.Printf("WARNING: devshell: could not find DEVSHELL_PROJECT_ID") 88 | } 89 | return cfg, nil 90 | } 91 | -------------------------------------------------------------------------------- /tools/ctpu/config/devshell_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================== 15 | 16 | package config 17 | 18 | import ( 19 | "reflect" 20 | "testing" 21 | ) 22 | 23 | func TestDevshellFilesystemConfig(t *testing.T) { 24 | cfgDir := testGcloudConfigDir("no_app_creds") 25 | env := []string{ 26 | "CLOUDSDK_CONFIG=" + cfgDir, 27 | "EXTRA_ENV_VAR=foo", 28 | } 29 | got := devshellFilesystemConfig(env) 30 | want := &Config{ 31 | Environment: "devshell", 32 | Project: "ctpu9-test-project", 33 | account: "saeta@google.com", 34 | Zone: "us-central1-c", 35 | FlockName: "", 36 | ActiveConfiguration: "ctpu9", 37 | } 38 | if !reflect.DeepEqual(got, want) { 39 | t.Errorf("devshellFilesystemConfig(%v) = %#v, want: %#v", env, got, want) 40 | } 41 | } 42 | 43 | func TestDevshellFilesystemConfigEmpty(t *testing.T) { 44 | cfgDir := testGcloudConfigDir("no_config") 45 | env := []string{ 46 | "CLOUDSDK_CONFIG=" + cfgDir, 47 | "EXTRA_ENV_VAR=foo", 48 | } 49 | got := devshellFilesystemConfig(env) 50 | want := &Config{ 51 | Environment: "devshell", 52 | } 53 | if !reflect.DeepEqual(got, want) { 54 | t.Errorf("devshellFilesystemConfig(%v) = %#v, want: %#v", env, got, want) 55 | } 56 | } 57 | 58 | func TestDevshellFilesystemConfigBadEnv(t *testing.T) { 59 | env := []string{ 60 | "CLOUDSDK_CONFIG=", 61 | "EXTRA_ENV_VAR=foo", 62 | } 63 | got := devshellFilesystemConfig(env) 64 | if got != nil { 65 | t.Errorf("devshellFilesystemConfig(%v) = %v, want: nil", env, got) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/clean/README.md: -------------------------------------------------------------------------------- 1 | # Clean # 2 | 3 | This is a complete working example, validating that everything is parsed correctly. -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/clean/active_config: -------------------------------------------------------------------------------- 1 | ctpu9 2 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/clean/application_default_credentials.json: -------------------------------------------------------------------------------- 1 | fake_app_default_file 2 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/clean/configurations/config_ctpu9: -------------------------------------------------------------------------------- 1 | [core] 2 | account = saeta@google.com 3 | unused = extraneous data 4 | project = ctpu9-test-project 5 | 6 | [compute] 7 | zone = us-central1-c 8 | region = us-central1 9 | 10 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/corrupted/README.md: -------------------------------------------------------------------------------- 1 | # Corrupted configuration environment # 2 | 3 | The gcloud active config does not exist under configurations/... 4 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/corrupted/active_config: -------------------------------------------------------------------------------- 1 | ctpu9 2 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/corrupted/application_default_credentials.json: -------------------------------------------------------------------------------- 1 | fake_app_default_file 2 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/corrupted/configurations/config_default: -------------------------------------------------------------------------------- 1 | [core] 2 | account = saeta@google.com 3 | project = ctpu9-test-project 4 | 5 | [compute] 6 | zone = us-central1-c 7 | region = us-central1 8 | 9 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/corrupted2/README.md: -------------------------------------------------------------------------------- 1 | # Corrupted 2 # 2 | 3 | There is no configurations directory. 4 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/corrupted2/active_config: -------------------------------------------------------------------------------- 1 | ctpu9 2 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/corrupted2/application_default_credentials.json: -------------------------------------------------------------------------------- 1 | fake_app_default_file 2 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/incomplete/README.md: -------------------------------------------------------------------------------- 1 | # Incomplete # 2 | 3 | This configuration simply does not have all required values set. 4 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/incomplete/active_config: -------------------------------------------------------------------------------- 1 | ctpu9 2 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/incomplete/application_default_credentials.json: -------------------------------------------------------------------------------- 1 | fake_app_default_file 2 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/incomplete/configurations/config_ctpu9: -------------------------------------------------------------------------------- 1 | [core] 2 | account = saeta@google.com 3 | 4 | [compute] 5 | zone = us-central1-c 6 | 7 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/no_app_creds/README.md: -------------------------------------------------------------------------------- 1 | # No App Creds # 2 | 3 | This is a working example, except there are no application default credentials. 4 | When running in the Cloud DevShell environment, this is a possible scenario, and 5 | should be validated to parse correctly. 6 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/no_app_creds/active_config: -------------------------------------------------------------------------------- 1 | ctpu9 2 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/no_app_creds/configurations/config_ctpu9: -------------------------------------------------------------------------------- 1 | [core] 2 | account = saeta@google.com 3 | unused = extraneous data 4 | project = ctpu9-test-project 5 | 6 | [compute] 7 | zone = us-central1-c 8 | region = us-central1 9 | 10 | -------------------------------------------------------------------------------- /tools/ctpu/config/testdata/gcloud/no_config/README.md: -------------------------------------------------------------------------------- 1 | # No configuration # 2 | 3 | This scenario is when the user has no configuration at all for gcloud. -------------------------------------------------------------------------------- /tools/ctpu/ctrl/ctrl_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================== 15 | 16 | package ctrl 17 | 18 | import ( 19 | "testing" 20 | "time" 21 | 22 | "golang.org/x/oauth2" 23 | ) 24 | 25 | func TestParseResponse(t *testing.T) { 26 | addTime, err := time.ParseDuration("1234.2s") 27 | if err != nil { 28 | t.Fatal(err) 29 | } 30 | testcases := []struct { 31 | input string 32 | want oauth2.Token 33 | }{{ 34 | input: "[\"saeta@example.com\",\"ctpu-test-project\",\"abc123\",1234.2]", 35 | want: oauth2.Token{ 36 | AccessToken: "abc123", 37 | Expiry: time.Now().Add(addTime), 38 | }, 39 | }} 40 | 41 | for _, testcase := range testcases { 42 | src := devshellTokenSource{} 43 | got, err := src.parseResponse(testcase.input) 44 | if err != nil { 45 | t.Fatalf("src.parseResponse(%q) = %v, want nil", testcase.input, err) 46 | } 47 | if got == nil { 48 | t.Fatalf("src.parseResponse(%q) = nil, want non-nil", testcase.input) 49 | } 50 | if got.AccessToken != testcase.want.AccessToken { 51 | t.Errorf("src.parseResponse(%q).AccessToken = %q, want %q", testcase.input, got.AccessToken, testcase.want.AccessToken) 52 | } 53 | // Add some slop to avoid test flakiness. 54 | if got.Expiry.Sub(testcase.want.Expiry) > 200*time.Millisecond { 55 | t.Errorf("src.parseResponse(%q).Expiry = %v, want: %v", testcase.input, got.Expiry.Truncate(100*time.Millisecond), testcase.want.Expiry.Truncate(100*time.Millisecond)) 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /tools/data_converter/README.md: -------------------------------------------------------------------------------- 1 | # About 2 | This folder contains a suite of tools that builds upon [tensorflow/datasets](https://www.tensorflow.org/datasets) 3 | that can be used to easily convert raw data into the TFRecord format on GCS. 4 | This is helpful because data must be stored in [TFRecords](https://www.tensorflow.org/tutorials/load_data/tf_records) 5 | on [GCS](https://cloud.google.com/storage/) to run with TPU models. 6 | 7 | # High-Level Overview 8 | The folder is divided by task and each task has specific fields that are required 9 | "essential inputs" for each task. 10 | 11 | For example, image classification requires an image and a label. However, models 12 | may require more features, and this tool both facilitates the extraction of 13 | these extra features and converts the data into TFRecords. 14 | 15 | Currently supported tasks: 16 | - Image Classification 17 | 18 | # Usage 19 | To use the tool, create an implementation of one of the abstract BuilderConfigs. 20 | 21 | For example: 22 | ``` 23 | class MyBuilderConfig(ImageClassificationDataConfig): 24 | ... 25 | 26 | config = MyBuilderConfig(name="MyBuilderConfig", 27 | description="MyBuilderConfig") 28 | ds = ImageClassificationData(config) 29 | ds.download_and_prepare() 30 | 31 | ``` 32 | 33 | In each folder are also simple examples for further reference. 34 | -------------------------------------------------------------------------------- /tools/data_converter/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tools/data_converter/image_classification/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tools/data_converter/object_detection/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tools/dataset_profiler/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tools/datasets/README.md: -------------------------------------------------------------------------------- 1 | # Tools for preparing datasets 2 | 3 | ## imagenet_to_gcs.py 4 | Downloads [Image-Net](http://image-net.org/) dataset, transforms data into 5 | `TFRecords`, and uploads to the specified GCS bucket. The script also has flags to 6 | skip the GCS bucket upload and utilize an existing download of ImageNet. 7 | Common to the various options are the following commands: 8 | 9 | ```bash 10 | pip install gcloud google-cloud-storage 11 | pip install tensorflow 12 | ``` 13 | 14 | **Image-Net to GCS** 15 | 16 | Downloads the files from [Image-Net](http://image-net.org/), processes them into 17 | `TFRecords` and uploads them to the specified GCS bucket. 18 | 19 | ```bash 20 | python imagenet_to_gcs.py \ 21 | --project="TEST_PROJECT" \ 22 | --gcs_output_path="gs://TEST_BUCKET/IMAGENET_DIR" \ 23 | --local_scratch_dir="./imagenet" \ 24 | --imagenet_username=FILL_ME_IN \ 25 | --imagenet_access_key=FILL_ME_IN \ 26 | ``` 27 | 28 | **Image-Net to local only** 29 | 30 | Downloads the files from [Image-Net](http://image-net.org/) and processes them 31 | into `TFRecords` but does not upload them to GCS. 32 | 33 | ```bash 34 | # `local_scratch_dir` will be where the TFRecords are stored.` 35 | python imagenet_to_gcs.py \ 36 | --local_scratch_dir=/data/imagenet \ 37 | --nogcs_upload 38 | 39 | ``` 40 | 41 | **Image-Net with existing .tar files from Image-Net** 42 | 43 | Utilizes already downloaded .tar files of the images 44 | 45 | 46 | ```bash 47 | export IMAGENET_HOME=FILL_ME_IN 48 | # Setup folders 49 | mkdir -p $IMAGENET_HOME/validation 50 | mkdir -p $IMAGENET_HOME/train 51 | 52 | # Extract validation and training 53 | tar xf ILSVRC2012_img_val.tar -C $IMAGENET_HOME/validation 54 | tar xf ILSVRC2012_img_train.tar -C $IMAGENET_HOME/train 55 | 56 | # Extract and then delete individual training tar files This can be pasted 57 | # directly into a bash command-line or create a file and execute. 58 | cd $IMAGENET_HOME/train 59 | 60 | for f in *.tar; do 61 | d=`basename $f .tar` 62 | mkdir $d 63 | tar xf $f -C $d 64 | done 65 | 66 | cd $IMAGENET_HOME # Move back to the base folder 67 | 68 | # [Optional] Delete tar files if desired as they are not needed 69 | rm $IMAGENET_HOME/train/*.tar 70 | 71 | # Download labels file. 72 | wget -O $IMAGENET_HOME/synset_labels.txt \ 73 | https://raw.githubusercontent.com/tensorflow/models/master/research/inception/inception/data/imagenet_2012_validation_synset_labels.txt 74 | 75 | # Process the files. Remember to get the script from github first. The TFRecords 76 | # will end up in the --local_scratch_dir. To upload to gcs with this method 77 | # leave off `nogcs_upload` and provide gcs flags for project and output_path. 78 | python imagenet_to_gcs.py \ 79 | --raw_data_dir=$IMAGENET_HOME \ 80 | --local_scratch_dir=$IMAGENET_HOME/tf_records \ 81 | --nogcs_upload 82 | ``` 83 | -------------------------------------------------------------------------------- /tools/datasets/download_and_preprocess_coco_k8s.yaml: -------------------------------------------------------------------------------- 1 | # Download and preprocess the COCO dataset. 2 | # 3 | # Instructions: 4 | # 1. Follow the instructions on https://cloud.google.com/tpu/docs/kubernetes-engine-setup 5 | # to create a Kubernetes Engine cluster. The Job must be running at least 6 | # on a n1-standard-4 machine. 7 | # 2. Change the environment variable DATA_BUCKET below to the path of the 8 | # Google Cloud Storage bucket where you want to store the training data. 9 | # 3. Run `kubectl create -f download_and_preprocess_coco_k8s.yaml`. 10 | 11 | apiVersion: batch/v1 12 | kind: Job 13 | metadata: 14 | name: download-and-preprocess-coco 15 | spec: 16 | template: 17 | spec: 18 | restartPolicy: Never 19 | containers: 20 | - name: download-and-preprocess-coco 21 | # The official TensorFlow 1.13 TPU model image built from https://github.com/tensorflow/tpu/blob/r1.13/tools/docker/Dockerfile. 22 | image: gcr.io/tensorflow/tpu-models:r1.13 23 | command: 24 | - /bin/bash 25 | - -c 26 | - > 27 | DEBIAN_FRONTEND=noninteractive apt-get update && 28 | cd /tensorflow_tpu_models/tools/datasets && 29 | bash download_and_preprocess_coco.sh /scratch-dir && 30 | gsutil -m cp /scratch-dir/*.tfrecord ${DATA_BUCKET}/coco && 31 | gsutil cp /scratch-dir/raw-data/annotations/*.json ${DATA_BUCKET}/coco 32 | env: 33 | # [REQUIRED] Must specify the Google Cloud Storage location where the 34 | # COCO dataset will be stored. 35 | - name: DATA_BUCKET 36 | value: "gs:///data/coco" 37 | volumeMounts: 38 | - mountPath: /scratch-dir 39 | name: scratch-volume 40 | volumes: 41 | - name: scratch-volume 42 | persistentVolumeClaim: 43 | claimName: scratch-disk-coco 44 | --- 45 | apiVersion: v1 46 | kind: PersistentVolumeClaim 47 | metadata: 48 | name: scratch-disk-coco 49 | spec: 50 | accessModes: 51 | - ReadWriteOnce 52 | resources: 53 | requests: 54 | storage: 100Gi 55 | -------------------------------------------------------------------------------- /tools/datasets/imagenet_to_gcs_k8s.yaml: -------------------------------------------------------------------------------- 1 | # Download and preprocess the ImageNet dataset. 2 | # 3 | # Instructions: 4 | # 1. Follow the instructions on https://cloud.google.com/tpu/docs/kubernetes-engine-setup 5 | # to create a Kubernetes Engine cluster. The Job must be running at least 6 | # on a n1-standard-4 machine. 7 | # 2. Change the environment variable 8 | # - PROJECT_NAME to your project name. 9 | # - DATA_BUCKET to the path of the Google Cloud Storage bucket where you 10 | # want to store the training data. 11 | # - IMAGENET_USERNAME and IMAGENET_PASSWORD to the username and password of 12 | # your ImageNet account. 13 | # 3. Run `kubectl create -f imagenet_to_gcs_k8s.yaml`. 14 | 15 | apiVersion: batch/v1 16 | kind: Job 17 | metadata: 18 | name: imagenet-to-gcs 19 | spec: 20 | template: 21 | spec: 22 | restartPolicy: Never 23 | containers: 24 | - name: imagenet-to-gcs 25 | # The official TensorFlow 1.11 TPU model image built from https://github.com/tensorflow/tpu/blob/r1.11/tools/docker/Dockerfile. 26 | image: gcr.io/tensorflow/tpu-models:r1.11 27 | command: 28 | - python 29 | - /tensorflow_tpu_models/tools/datasets/imagenet_to_gcs.py 30 | - --project=$(PROJECT_NAME) 31 | - --gcs_output_path=$(DATA_BUCKET) 32 | - --local_scratch_dir=/scratch-dir 33 | - --imagenet_username=$(IMAGENET_USERNAME) 34 | - --imagenet_access_key=$(IMAGENET_PASSWORD) 35 | volumeMounts: 36 | - mountPath: /scratch-dir 37 | name: scratch-volume 38 | env: 39 | # [REQUIRED] Must specify your project name. 40 | - name: PROJECT_NAME 41 | value: "" 42 | # [REQUIRED] Must specify the Google Cloud Storage location where the 43 | # ImageNet dataset will be stored. 44 | - name: DATA_BUCKET 45 | value: "gs:///data/imagenet" 46 | # [REQUIRED] Must specify the username of your ImageNet account. 47 | - name: IMAGENET_USERNAME 48 | value: "" 49 | # [REQUIRED] Must specify the password of your ImageNet account. 50 | - name: IMAGENET_PASSWORD 51 | value: "" 52 | volumes: 53 | - name: scratch-volume 54 | persistentVolumeClaim: 55 | claimName: scratch-disk-imagenet 56 | --- 57 | apiVersion: v1 58 | kind: PersistentVolumeClaim 59 | metadata: 60 | name: scratch-disk-imagenet 61 | spec: 62 | accessModes: 63 | - ReadWriteOnce 64 | resources: 65 | requests: 66 | storage: 300Gi 67 | -------------------------------------------------------------------------------- /tools/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Docker image for running TPU tensorflow examples. 2 | FROM ubuntu:bionic 3 | 4 | RUN apt-get update && apt-get install -y --no-install-recommends \ 5 | curl \ 6 | wget \ 7 | sudo \ 8 | gnupg \ 9 | lsb-release \ 10 | ca-certificates \ 11 | build-essential \ 12 | git \ 13 | python \ 14 | python-pip \ 15 | python-setuptools && \ 16 | export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \ 17 | echo "deb https://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" > /etc/apt/sources.list.d/google-cloud-sdk.list && \ 18 | curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \ 19 | apt-get update && \ 20 | apt-get install -y google-cloud-sdk && \ 21 | pip install pyyaml && \ 22 | pip install wheel && \ 23 | pip install tensorflow==1.13.1 && \ 24 | pip install google-cloud-storage && \ 25 | pip install google-api-python-client && \ 26 | pip install oauth2client 27 | 28 | # Checkout tensorflow/models at the appropriate branch 29 | RUN git clone -b r1.13.0 --depth 1 https://github.com/tensorflow/models.git /tensorflow_models 30 | 31 | # Checkout tensorflow/tpu at the appropriate branch 32 | RUN git clone -b r1.13 --depth 1 https://github.com/tensorflow/tpu.git /tensorflow_tpu_models 33 | -------------------------------------------------------------------------------- /tools/docker/Dockerfile.util: -------------------------------------------------------------------------------- 1 | # Docker image of TensorBoard and TPU Profiler. 2 | FROM ubuntu:bionic 3 | RUN apt-get update && apt-get install -y --no-install-recommends \ 4 | ca-certificates \ 5 | build-essential \ 6 | git \ 7 | python \ 8 | python-pip \ 9 | python-setuptools && \ 10 | pip install tensorflow==1.11 && \ 11 | pip install google-cloud-storage && \ 12 | pip install google-api-python-client && \ 13 | pip install oauth2client && \ 14 | pip install cloud-tpu-profiler==1.11 -------------------------------------------------------------------------------- /tools/kubernetes/tensorboard_k8s.yaml: -------------------------------------------------------------------------------- 1 | # Run TensorBoard on Google Kubernetes Engine to visualize model learning 2 | # statistics. 3 | # 4 | # https://cloud.google.com/tpu/docs/kubernetes-engine-setup 5 | # 6 | # [Instructions] 7 | # 1. Change the environment variable MODEL_BUCKET below to the Google Cloud 8 | # Storage location where the output model and the TensorFlow events exist. 9 | # 2. Run `kubectl apply -f tensorboard_k8s.yaml`. 10 | # 3. Run `kubectl get service tensorboard-service` to get the . 11 | # NOTE: A Load Balancer will be created to route the requests to 12 | # TensorBoard. This will incur additional cost. See https://cloud.google.com/compute/pricing#lb. 13 | # 4. Access http://:6006 within your browser. 14 | 15 | apiVersion: apps/v1 16 | kind: Deployment 17 | metadata: 18 | name: tensorboard 19 | spec: 20 | replicas: 1 21 | selector: 22 | matchLabels: 23 | name: tensorboard 24 | template: 25 | metadata: 26 | labels: 27 | name: tensorboard 28 | spec: 29 | restartPolicy: Always 30 | containers: 31 | - name: tensorboard 32 | # The official TensorFlow 1.11 TPU utility image built from https://github.com/tensorflow/tpu/blob/r1.11/tools/docker/Dockerfile.util. 33 | image: gcr.io/tensorflow/tpu-util:r1.11 34 | command: 35 | - tensorboard 36 | - --logdir=$(MODEL_BUCKET) 37 | env: 38 | # [REQUIRED] Must specify the Google Cloud Storage location where 39 | # your output model and TensorFlow events are stored. 40 | - name: MODEL_BUCKET 41 | value: gs://my-project/my-model 42 | ports: 43 | - containerPort: 6006 44 | --- 45 | apiVersion: v1 46 | kind: Service 47 | metadata: 48 | name: tensorboard-service 49 | spec: 50 | type: LoadBalancer 51 | selector: 52 | name: tensorboard 53 | ports: 54 | - port: 6006 55 | targetPort: 6006 56 | -------------------------------------------------------------------------------- /tools/kubernetes/tpu_profiler_k8s.yaml: -------------------------------------------------------------------------------- 1 | # Run TPU Profiler on Google Kubernetes Engine to generate TPU tracing data. 2 | # 3 | # https://cloud.google.com/tpu/docs/kubernetes-engine-setup 4 | # 5 | # [Instructions] 6 | # 1. Change the environment variable TPU_NAME below to the name of the Cloud 7 | # TPU you want to profile. 8 | # 2. Change the environment variable MODEL_BUCKET below to the Google Cloud 9 | # Storage location where the output model and the TensorFlow events exist. 10 | # 3. Run `kubectl create -f tpu_profiler_k8s.yaml`. 11 | # 4. See the results on TensorBoard. 12 | 13 | apiVersion: batch/v1 14 | kind: Job 15 | metadata: 16 | generateName: tpu-profiler- 17 | spec: 18 | template: 19 | spec: 20 | restartPolicy: Never 21 | containers: 22 | - name: tpu-profiler 23 | # The official TensorFlow 1.11 TPU utility image built from https://github.com/tensorflow/tpu/blob/r1.11/tools/docker/Dockerfile.util. 24 | image: gcr.io/tensorflow/tpu-util:r1.11 25 | command: 26 | - capture_tpu_profile 27 | - --tpu=$(TPU_NAME) 28 | - --logdir=$(MODEL_BUCKET) 29 | - --duration_ms=$(TRACING_DURATION_IN_MS) 30 | env: 31 | # [REQUIRED] Must specify the name of the Cloud TPU. 32 | # See https://cloud.google.com/tpu/docs/kubernetes-engine-setup to 33 | # get the name of the Cloud TPU used by your pod. 34 | - name: TPU_NAME 35 | value: my-tpu 36 | # [REQUIRED] Must specify the Google Cloud Storage location where 37 | # your output model and TensorFlow events are stored. 38 | - name: MODEL_BUCKET 39 | value: gs://my-project/my-model 40 | # How long the profiling should last (in millisecond). 41 | - name: TRACING_DURATION_IN_MS 42 | value: "2000" 43 | --------------------------------------------------------------------------------