├── .github └── workflows │ ├── build_docs.sh │ ├── build_test.yml │ ├── gpu_test.sh │ ├── stale.yml │ └── unittest.yml ├── .gitignore ├── .gitmodules ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── docs ├── .gitignore ├── .nojekyll ├── Doxyfile ├── Makefile ├── README.txt ├── _static │ ├── action-recognition.png │ ├── action_basketball_demo.gif │ ├── apache2.svg │ ├── applications.html │ ├── assets │ │ ├── img │ │ │ ├── action_recognition_demo.png │ │ │ ├── background │ │ │ │ ├── img-01.jpg │ │ │ │ ├── img-02.jpg │ │ │ │ ├── img-03.jpg │ │ │ │ ├── img-04.jpg │ │ │ │ ├── img-05.jpg │ │ │ │ ├── img-06.jpg │ │ │ │ ├── img-07.jpg │ │ │ │ ├── img-08.jpg │ │ │ │ ├── img-09.jpg │ │ │ │ ├── img-10.jpg │ │ │ │ └── img-11.jpg │ │ │ ├── gluon_white.png │ │ │ ├── image-classification-demo.png │ │ │ ├── instance_segmentation_demo.png │ │ │ ├── object-detection-demo.png │ │ │ ├── pose_estimation_demo.png │ │ │ └── semantic-segmentation_demo.png │ │ └── svg │ │ │ ├── icons.svg │ │ │ ├── play.svg │ │ │ ├── video-icon-dark.svg │ │ │ └── video-icon.svg │ ├── classification-demo.png │ ├── css │ │ ├── custom.css │ │ ├── material_icon.css │ │ └── slides.min.css │ ├── depth.png │ ├── gluon-logo.png │ ├── gluon-logo.svg │ ├── gluon.ico │ ├── gluon_black.png │ ├── gluon_s2.png │ ├── gluon_white.png │ ├── google_analytics.js │ ├── hidebib.js │ ├── image-classification.png │ ├── imagenet_banner.jpeg │ ├── install-options.js │ ├── instance-segmentation.png │ ├── js │ │ ├── jquery.min.js │ │ └── slides.min.js │ ├── logos │ │ ├── acer_byoc_grad_lockup_rgb.png │ │ ├── acroquest_logo_cmyk_2.png │ │ ├── embed.html │ │ ├── kumiawase_e_1_RGB.jpg │ │ └── pioneer.png │ ├── object-detection.png │ ├── plot_help.png │ ├── pose-estimation.svg │ ├── semantic-segmentation.png │ ├── short_demo.gif │ ├── smot_demo.gif │ ├── smot_multi_demo.gif │ ├── tabs.js │ └── tracking_demo.gif ├── _templates │ ├── index.html │ └── layout.html ├── api │ ├── data.batchify.rst │ ├── data.datasets.rst │ ├── data.transforms.rst │ ├── index.rst │ ├── loss.rst │ ├── model_zoo.rst │ ├── nn.rst │ └── utils.rst ├── build.yml ├── conf.py ├── contents.rst ├── how_to │ ├── contribute.rst │ ├── index.rst │ └── support.rst ├── install.rst ├── install │ ├── install-include.rst │ └── install-more.rst ├── model_zoo │ ├── action_recognition.rst │ ├── action_recognition_mxnet.rst │ ├── action_recognition_torch.rst │ ├── classification.rst │ ├── classification_mxnet.rst │ ├── classification_torch.rst │ ├── csv_tables │ │ ├── Action_Recognitions │ │ │ ├── HMDB51.csv │ │ │ ├── Kinetics400.csv │ │ │ ├── Kinetics400_torch.csv │ │ │ ├── Kinetics700.csv │ │ │ ├── Kinetics700_torch.csv │ │ │ ├── Something-Something-V2.csv │ │ │ ├── Something-Something-V2_torch.csv │ │ │ └── UCF101.csv │ │ ├── Classifications │ │ │ ├── CIFAR10.csv │ │ │ ├── DenseNet.csv │ │ │ ├── MobileNet.csv │ │ │ ├── Others.csv │ │ │ ├── Pruned_ResNet.csv │ │ │ ├── ResNeSt.csv │ │ │ ├── ResNet.csv │ │ │ ├── ResNext.csv │ │ │ ├── SqueezeNet.csv │ │ │ └── VGG.csv │ │ ├── Depths │ │ │ ├── KITTI.csv │ │ │ └── PoseNet.csv │ │ ├── Detections │ │ │ ├── MSCOCO_CenterNet.csv │ │ │ ├── MSCOCO_Faster-RCNN.csv │ │ │ ├── MSCOCO_SSD.csv │ │ │ ├── MSCOCO_YOLO-v3.csv │ │ │ ├── Pascal_CenterNet.csv │ │ │ ├── Pascal_Faster-RCNN.csv │ │ │ ├── Pascal_SSD.csv │ │ │ └── Pascal_YOLO-v3.csv │ │ ├── Poses │ │ │ ├── MSCOCO_Alpha-Pose.csv │ │ │ ├── MSCOCO_Mobile-Pose.csv │ │ │ └── MSCOCO_Simple-Pose.csv │ │ └── Segmentations │ │ │ ├── IS_MS-COCO.csv │ │ │ ├── SS_ADE20K.csv │ │ │ ├── SS_Cityscapes.csv │ │ │ ├── SS_MHP-V1.csv │ │ │ ├── SS_MS-COCO.csv │ │ │ └── SS_Pascal-VOC.csv │ ├── depth.rst │ ├── depth_mxnet.rst │ ├── depth_torch.rst │ ├── detection.rst │ ├── detection_mxnet.rst │ ├── detection_torch.rst │ ├── index.rst │ ├── pose.rst │ ├── pose_mxnet.rst │ ├── pose_torch.rst │ ├── segmentation.rst │ ├── segmentation_mxnet.rst │ └── segmentation_torch.rst ├── slides.md ├── tutorials │ ├── action_recognition │ │ ├── README.txt │ │ ├── decord_loader.py │ │ ├── demo_custom.py │ │ ├── demo_i3d_kinetics400.py │ │ ├── demo_slowfast_kinetics400.py │ │ ├── demo_tsn_ucf101.py │ │ ├── dive_deep_i3d_kinetics400.py │ │ ├── dive_deep_slowfast_kinetics400.py │ │ ├── dive_deep_tsn_ucf101.py │ │ ├── feat_custom.py │ │ └── finetune_custom.py │ ├── auto_module │ │ ├── README.txt │ │ ├── demo_auto_data.py │ │ ├── demo_auto_detection.py │ │ └── train_image_classifier_basic.py │ ├── classification │ │ ├── README.txt │ │ ├── demo_cifar10.py │ │ ├── demo_imagenet.py │ │ ├── dive_deep_cifar10.py │ │ ├── dive_deep_imagenet.py │ │ └── transfer_learning_minc.py │ ├── datasets │ │ ├── .gitignore │ │ ├── README.txt │ │ ├── ade20k.py │ │ ├── cityscapes.py │ │ ├── det.py │ │ ├── detection_custom.py │ │ ├── hmdb51.py │ │ ├── imagenet.py │ │ ├── kinetics400.py │ │ ├── mhp_v1.py │ │ ├── mscoco.py │ │ ├── mscoco_tracking.py │ │ ├── otb2015.py │ │ ├── pascal_voc.py │ │ ├── recordio.py │ │ ├── somethingsomethingv2.py │ │ ├── ucf101.py │ │ ├── vid.py │ │ └── youtube_bb.py │ ├── deployment │ │ ├── .gitignore │ │ ├── README.txt │ │ ├── cpp_inference.py │ │ ├── export_network.py │ │ └── int8_inference.py │ ├── depth │ │ ├── README.txt │ │ ├── demo_monodepth2.py │ │ ├── test_monodepth2_posenet.py │ │ ├── train_monodepth2.py │ │ └── videos_monodepth2.py │ ├── detection │ │ ├── .gitignore │ │ ├── README.txt │ │ ├── demo_center_net.py │ │ ├── demo_faster_rcnn.py │ │ ├── demo_jetson.py │ │ ├── demo_ssd.py │ │ ├── demo_webcam.py │ │ ├── demo_yolo.py │ │ ├── finetune_detection.py │ │ ├── skip_fintune.py │ │ ├── train_faster_rcnn_voc.py │ │ ├── train_ssd_advanced.py │ │ ├── train_ssd_voc.py │ │ └── train_yolo_v3.py │ ├── distributed │ │ ├── README.txt │ │ └── distributed_slowfast.py │ ├── index.rst │ ├── instance │ │ ├── .gitignore │ │ ├── README.txt │ │ ├── demo_mask_rcnn.py │ │ └── train_mask_rcnn_coco.py │ ├── pose │ │ ├── README.txt │ │ ├── cam_demo.py │ │ ├── demo_alpha_pose.py │ │ ├── demo_simple_pose.py │ │ └── dive_deep_simple_pose.py │ ├── segmentation │ │ ├── .gitignore │ │ ├── README.txt │ │ ├── demo_deeplab.py │ │ ├── demo_fcn.py │ │ ├── demo_icnet.py │ │ ├── demo_psp.py │ │ ├── train_fcn.py │ │ ├── train_psp.py │ │ └── voc_sota.py │ └── tracking │ │ ├── README.txt │ │ ├── demo_SiamRPN.py │ │ ├── demo_smot.py │ │ └── train_siamrpn.py └── tutorials_torch │ ├── action_recognition │ ├── README.txt │ ├── ddp_pytorch.py │ ├── demo_i3d_kinetics400.py │ ├── extract_feat.py │ ├── finetune_custom.py │ └── speed.py │ └── index.rst ├── gluoncv ├── __init__.py ├── auto │ ├── __init__.py │ ├── data │ │ ├── __init__.py │ │ ├── auto_data.py │ │ ├── data_zoo.py │ │ └── dataset.py │ ├── estimators │ │ ├── __init__.py │ │ ├── base_estimator.py │ │ ├── center_net │ │ │ ├── __init__.py │ │ │ ├── center_net.py │ │ │ └── default.py │ │ ├── conf.py │ │ ├── constants.py │ │ ├── faster_rcnn │ │ │ ├── __init__.py │ │ │ ├── default.py │ │ │ ├── faster_rcnn.py │ │ │ └── utils.py │ │ ├── image_classification │ │ │ ├── __init__.py │ │ │ ├── default.py │ │ │ ├── image_classification.py │ │ │ └── utils.py │ │ ├── mask_rcnn │ │ │ ├── __init__.py │ │ │ ├── default.py │ │ │ ├── mask_rcnn.py │ │ │ └── utils.py │ │ ├── ssd │ │ │ ├── __init__.py │ │ │ ├── default.py │ │ │ ├── ssd.py │ │ │ └── utils.py │ │ ├── torch_image_classification │ │ │ ├── __init__.py │ │ │ ├── default.py │ │ │ ├── torch_image_classification.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── constants.py │ │ │ │ ├── metrics.py │ │ │ │ ├── model.py │ │ │ │ ├── optimizer.py │ │ │ │ ├── scheduler.py │ │ │ │ └── utils.py │ │ ├── utils.py │ │ └── yolo │ │ │ ├── __init__.py │ │ │ ├── default.py │ │ │ ├── utils.py │ │ │ └── yolo.py │ └── tasks │ │ ├── __init__.py │ │ ├── image_classification.py │ │ ├── object_detection.py │ │ └── utils.py ├── check.py ├── data │ ├── __init__.py │ ├── ade20k │ │ ├── __init__.py │ │ └── segmentation.py │ ├── base.py │ ├── batchify.py │ ├── cityscapes.py │ ├── dataloader.py │ ├── hmdb51 │ │ ├── __init__.py │ │ └── classification.py │ ├── imagenet │ │ ├── __init__.py │ │ └── classification.py │ ├── kinetics400 │ │ ├── __init__.py │ │ └── classification.py │ ├── kinetics700 │ │ ├── __init__.py │ │ └── classification.py │ ├── kitti │ │ ├── __init__.py │ │ ├── kitti_dataset.py │ │ ├── kitti_utils.py │ │ └── mono_dataset.py │ ├── lst │ │ ├── __init__.py │ │ └── detection.py │ ├── market1501 │ │ ├── __init__.py │ │ ├── data_read.py │ │ └── label_read.py │ ├── mhp.py │ ├── mixup │ │ ├── __init__.py │ │ └── detection.py │ ├── mscoco │ │ ├── __init__.py │ │ ├── detection.py │ │ ├── instance.py │ │ ├── keypoints.py │ │ ├── segmentation.py │ │ └── utils.py │ ├── otb │ │ ├── __init__.py │ │ └── tracking.py │ ├── pascal_aug │ │ ├── __init__.py │ │ └── segmentation.py │ ├── pascal_voc │ │ ├── __init__.py │ │ ├── detection.py │ │ └── segmentation.py │ ├── recordio │ │ ├── __init__.py │ │ └── detection.py │ ├── sampler.py │ ├── segbase.py │ ├── somethingsomethingv2 │ │ ├── __init__.py │ │ └── classification.py │ ├── tracking_data │ │ ├── __init__.py │ │ └── track.py │ ├── transforms │ │ ├── __init__.py │ │ ├── bbox.py │ │ ├── block.py │ │ ├── experimental │ │ │ ├── __init__.py │ │ │ ├── bbox.py │ │ │ └── image.py │ │ ├── image.py │ │ ├── mask.py │ │ ├── pose.py │ │ ├── presets │ │ │ ├── __init__.py │ │ │ ├── alpha_pose.py │ │ │ ├── center_net.py │ │ │ ├── imagenet.py │ │ │ ├── rcnn.py │ │ │ ├── segmentation.py │ │ │ ├── simple_pose.py │ │ │ ├── ssd.py │ │ │ └── yolo.py │ │ ├── track.py │ │ └── video.py │ ├── ucf101 │ │ ├── __init__.py │ │ └── classification.py │ ├── video_custom │ │ ├── __init__.py │ │ └── classification.py │ └── visdrone │ │ ├── __init__.py │ │ └── detection.py ├── loss.py ├── model_zoo │ ├── __init__.py │ ├── action_recognition │ │ ├── __init__.py │ │ ├── actionrec_inceptionv1.py │ │ ├── actionrec_inceptionv3.py │ │ ├── actionrec_resnetv1b.py │ │ ├── actionrec_vgg16.py │ │ ├── c3d.py │ │ ├── i3d_inceptionv1.py │ │ ├── i3d_inceptionv3.py │ │ ├── i3d_resnet.py │ │ ├── i3d_slow.py │ │ ├── non_local.py │ │ ├── p3d.py │ │ ├── r2plus1d.py │ │ └── slowfast.py │ ├── alexnet.py │ ├── alpha_pose │ │ ├── __init__.py │ │ ├── fast_pose.py │ │ └── utils.py │ ├── attention.py │ ├── center_net │ │ ├── __init__.py │ │ ├── center_net.py │ │ ├── deconv_dla.py │ │ ├── deconv_resnet.py │ │ ├── duc_mobilenet.py │ │ └── target_generator.py │ ├── cifarresnet.py │ ├── cifarresnext.py │ ├── cifarwideresnet.py │ ├── danet.py │ ├── deeplabv3.py │ ├── deeplabv3_plus.py │ ├── deeplabv3b_plus.py │ ├── densenet.py │ ├── dla.py │ ├── fastscnn.py │ ├── fcn.py │ ├── googlenet.py │ ├── hrnet.py │ ├── icnet.py │ ├── inception.py │ ├── mobilenet.py │ ├── mobilenetv3.py │ ├── model_store.py │ ├── model_zoo.py │ ├── monodepthv2 │ │ ├── __init__.py │ │ ├── depth_decoder.py │ │ ├── layers.py │ │ ├── monodepth2.py │ │ ├── monodepth2_posenet.py │ │ ├── pose_decoder.py │ │ └── resnet_encoder.py │ ├── nasnet.py │ ├── pruned_resnet │ │ ├── __init__.py │ │ ├── resnet101_v1d_1.9x.json │ │ ├── resnet101_v1d_2.2x.json │ │ ├── resnet18_v1b_2.6x.json │ │ ├── resnet50_v1d_1.8x.json │ │ ├── resnet50_v1d_3.6x.json │ │ ├── resnet50_v1d_5.9x.json │ │ ├── resnet50_v1d_8.8x.json │ │ └── resnetv1b_pruned.py │ ├── pspnet.py │ ├── quantized │ │ ├── __init__.py │ │ ├── mobilenet1.0_int8-symbol.json │ │ ├── quantized.py │ │ ├── resnet50_v1_int8-symbol.json │ │ ├── ssd_300_vgg16_atrous_voc_int8-symbol.json │ │ ├── ssd_512_mobilenet1.0_voc_int8-symbol.json │ │ ├── ssd_512_resnet50_v1_voc_int8-symbol.json │ │ └── ssd_512_vgg16_atrous_voc_int8-symbol.json │ ├── rcnn │ │ ├── __init__.py │ │ ├── faster_rcnn │ │ │ ├── __init__.py │ │ │ ├── data_parallel.py │ │ │ ├── doublehead_rcnn.py │ │ │ ├── faster_rcnn.py │ │ │ ├── predefined_models.py │ │ │ └── rcnn_target.py │ │ ├── mask_rcnn │ │ │ ├── __init__.py │ │ │ ├── data_parallel.py │ │ │ ├── mask_rcnn.py │ │ │ ├── predefined_models.py │ │ │ └── rcnn_target.py │ │ ├── rcnn.py │ │ └── rpn │ │ │ ├── __init__.py │ │ │ ├── anchor.py │ │ │ ├── bbox_clip.py │ │ │ ├── cython_rpn_target.pyx │ │ │ ├── proposal.py │ │ │ ├── rpn.py │ │ │ └── rpn_target.py │ ├── residual_attentionnet.py │ ├── resnest.py │ ├── resnet.py │ ├── resnetv1b.py │ ├── resnext.py │ ├── se_resnet.py │ ├── segbase.py │ ├── senet.py │ ├── shufflenet.py │ ├── siamrpn │ │ ├── __init__.py │ │ ├── siam_alexnet.py │ │ ├── siam_net.py │ │ ├── siam_rpn.py │ │ └── siamrpn_tracker.py │ ├── simple_pose │ │ ├── __init__.py │ │ ├── mobile_pose.py │ │ ├── pose_target.py │ │ └── simple_pose_resnet.py │ ├── smot │ │ ├── __init__.py │ │ ├── general_detector.py │ │ ├── motion_estimation.py │ │ ├── presets.py │ │ ├── smot_tracker.py │ │ ├── ssd.py │ │ ├── tracktors.py │ │ └── utils.py │ ├── squeezenet.py │ ├── ssd │ │ ├── __init__.py │ │ ├── anchor.py │ │ ├── presets.py │ │ ├── resnet_v1b_ssd.py │ │ ├── ssd.py │ │ ├── target.py │ │ └── vgg_atrous.py │ ├── vgg.py │ ├── wideresnet.py │ ├── xception.py │ └── yolo │ │ ├── __init__.py │ │ ├── darknet.py │ │ ├── yolo3.py │ │ └── yolo_target.py ├── nn │ ├── __init__.py │ ├── bbox.py │ ├── block.py │ ├── coder.py │ ├── cython_bbox.pyx │ ├── dropblock.py │ ├── feature.py │ ├── gn.py │ ├── matcher.py │ ├── predictor.py │ ├── sampler.py │ └── splat.py ├── torch │ ├── __init__.py │ ├── data │ │ ├── __init__.py │ │ ├── coot │ │ │ └── dataloader.py │ │ ├── detection │ │ │ ├── __init__.py │ │ │ ├── detection_dataset.py │ │ │ ├── detection_utils.py │ │ │ └── samplers │ │ │ │ ├── __init__.py │ │ │ │ ├── distributed_sampler.py │ │ │ │ └── grouped_batch_sampler.py │ │ ├── gluoncv_motion_dataset │ │ │ ├── __init__.py │ │ │ ├── dataset.py │ │ │ ├── dataset_pre_processor.py │ │ │ ├── ingestion │ │ │ │ ├── __init__.py │ │ │ │ ├── duplicate_remover.py │ │ │ │ ├── filename_sanitizer.py │ │ │ │ └── video_chunker.py │ │ │ ├── io │ │ │ │ ├── __init__.py │ │ │ │ └── video_io.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── ingestion_utils.py │ │ │ │ └── serialization_utils.py │ │ ├── pose │ │ │ ├── __init__.py │ │ │ └── dataset_pose.py │ │ ├── registry │ │ │ ├── __init__.py │ │ │ ├── catalog.py │ │ │ ├── metadata.py │ │ │ └── mscoco.py │ │ ├── structures │ │ │ ├── __init__.py │ │ │ ├── beziers.py │ │ │ ├── boxes.py │ │ │ ├── image_list.py │ │ │ ├── instances.py │ │ │ ├── keypoints.py │ │ │ └── masks.py │ │ ├── transforms │ │ │ ├── instance_transforms │ │ │ │ ├── __init__.py │ │ │ │ ├── augmentation.py │ │ │ │ ├── transform.py │ │ │ │ └── transform_utils.py │ │ │ ├── transforms.py │ │ │ └── videotransforms │ │ │ │ ├── functional.py │ │ │ │ ├── stack_transforms.py │ │ │ │ ├── tensor_transforms.py │ │ │ │ ├── utils │ │ │ │ ├── functional.py │ │ │ │ └── images.py │ │ │ │ ├── video_transforms.py │ │ │ │ └── volume_transforms.py │ │ └── video_cls │ │ │ ├── __init__.py │ │ │ ├── dataset_classification.py │ │ │ └── multigrid_helper.py │ ├── engine │ │ ├── __init__.py │ │ ├── config │ │ │ ├── __init__.py │ │ │ ├── action_recognition.py │ │ │ ├── coot.py │ │ │ └── directpose.py │ │ └── launch.py │ ├── model_zoo │ │ ├── __init__.py │ │ ├── action_recognition │ │ │ ├── __init__.py │ │ │ ├── actionrec_resnetv1b.py │ │ │ ├── i3d_resnet.py │ │ │ ├── i3d_slow.py │ │ │ ├── ircsnv2.py │ │ │ ├── non_local.py │ │ │ ├── r2plus1dv1.py │ │ │ ├── r2plus1dv2.py │ │ │ ├── slowfast.py │ │ │ └── tpn.py │ │ ├── model_store.py │ │ ├── model_zoo.py │ │ ├── object_detection │ │ │ ├── __init__.py │ │ │ ├── fcos.py │ │ │ └── model_utils.py │ │ ├── pose │ │ │ ├── __init__.py │ │ │ ├── directpose.py │ │ │ ├── directpose_outputs.py │ │ │ └── directpose_resnet_fpn.py │ │ └── video_language │ │ │ └── coot_model.py │ ├── nn │ │ ├── __init__.py │ │ ├── batch_norm.py │ │ ├── deform_conv.py │ │ ├── focal_loss.py │ │ ├── group_norm.py │ │ ├── iou_loss.py │ │ ├── keypoint_loss.py │ │ ├── nms.py │ │ ├── shape_spec.py │ │ └── smooth_l1_loss.py │ └── utils │ │ ├── __init__.py │ │ ├── comm.py │ │ ├── coot_utils.py │ │ ├── eval_utils │ │ ├── __init__.py │ │ └── coco_eval.py │ │ ├── loss.py │ │ ├── lr_policy.py │ │ ├── model_utils.py │ │ ├── optimizer.py │ │ ├── random.py │ │ ├── task_utils │ │ ├── __init__.py │ │ ├── classification.py │ │ ├── coot.py │ │ └── pose.py │ │ ├── tvm_utils │ │ ├── __init__.py │ │ └── nms.py │ │ ├── utils.py │ │ └── visualizer.py └── utils │ ├── __init__.py │ ├── bbox.py │ ├── block.py │ ├── compress_json.py │ ├── data │ └── tracking.py │ ├── download.py │ ├── export_helper.py │ ├── filesystem.py │ ├── lr_scheduler.py │ ├── metrics │ ├── __init__.py │ ├── accuracy.py │ ├── coco_detection.py │ ├── coco_instance.py │ ├── coco_keypoints.py │ ├── heatmap_accuracy.py │ ├── rcnn.py │ ├── segmentation.py │ ├── tracking.py │ └── voc_detection.py │ ├── parallel.py │ ├── plot_history.py │ ├── random.py │ ├── sync_loader_helper.py │ ├── transforms.py │ ├── version.py │ └── viz │ ├── __init__.py │ ├── bbox.py │ ├── image.py │ ├── keypoints.py │ ├── mask.py │ ├── network.py │ └── segmentation.py ├── scripts ├── README.md ├── action-recognition │ ├── ARXIV.md │ ├── CALIBRATION.md │ ├── README.md │ ├── configuration │ │ ├── i3d_nl10_resnet101_v1_kinetics400.yaml │ │ ├── i3d_nl10_resnet50_v1_kinetics400.yaml │ │ ├── i3d_nl5_resnet101_v1_kinetics400.yaml │ │ ├── i3d_nl5_resnet50_v1_kinetics400.yaml │ │ ├── i3d_resnet101_v1_kinetics400.yaml │ │ ├── i3d_resnet50_v1_custom.yaml │ │ ├── i3d_resnet50_v1_feat.yaml │ │ ├── i3d_resnet50_v1_kinetics400.yaml │ │ ├── i3d_resnet50_v1_sthsthv2.yaml │ │ ├── i3d_slow_resnet101_f16s4_kinetics400.yaml │ │ ├── i3d_slow_resnet101_f16s4_kinetics700.yaml │ │ ├── i3d_slow_resnet101_f32s2_kinetics400.yaml │ │ ├── i3d_slow_resnet101_f8s8_kinetics400.yaml │ │ ├── i3d_slow_resnet50_f16s4_kinetics400.yaml │ │ ├── i3d_slow_resnet50_f32s2_custom.yaml │ │ ├── i3d_slow_resnet50_f32s2_feat.yaml │ │ ├── i3d_slow_resnet50_f32s2_kinetics400.yaml │ │ ├── i3d_slow_resnet50_f8s8_kinetics400.yaml │ │ ├── ircsn_v2_resnet152_f32s2_kinetics400.yaml │ │ ├── r2plus1d_v1_resnet18_kinetics400.yaml │ │ ├── r2plus1d_v1_resnet34_kinetics400.yaml │ │ ├── r2plus1d_v1_resnet50_custom.yaml │ │ ├── r2plus1d_v1_resnet50_feat.yaml │ │ ├── r2plus1d_v1_resnet50_kinetics400.yaml │ │ ├── r2plus1d_v2_resnet152_kinetics400.yaml │ │ ├── resnet101_v1b_kinetics400.yaml │ │ ├── resnet152_v1b_kinetics400.yaml │ │ ├── resnet18_v1b_kinetics400.yaml │ │ ├── resnet34_v1b_kinetics400.yaml │ │ ├── resnet50_v1b_custom.yaml │ │ ├── resnet50_v1b_feat.yaml │ │ ├── resnet50_v1b_kinetics400.yaml │ │ ├── resnet50_v1b_sthsthv2.yaml │ │ ├── slowfast_16x8_resnet50_sthsthv2.yaml │ │ ├── slowfast_4x16_resnet50_custom.yaml │ │ ├── slowfast_4x16_resnet50_feat.yaml │ │ ├── slowfast_4x16_resnet50_kinetics400.yaml │ │ ├── slowfast_8x8_resnet101_kinetics400.yaml │ │ ├── slowfast_8x8_resnet50_kinetics400.yaml │ │ ├── tpn_resnet101_f16s4_kinetics400.yaml │ │ ├── tpn_resnet101_f32s2_kinetics400.yaml │ │ ├── tpn_resnet101_f8s8_kinetics400.yaml │ │ ├── tpn_resnet50_f16s4_kinetics400.yaml │ │ ├── tpn_resnet50_f32s2_custom.yaml │ │ ├── tpn_resnet50_f32s2_feat.yaml │ │ ├── tpn_resnet50_f32s2_kinetics400.yaml │ │ └── tpn_resnet50_f8s8_kinetics400.yaml │ ├── feat_extract.py │ ├── feat_extract_pytorch.py │ ├── get_flops.py │ ├── get_fps.py │ ├── inference.py │ ├── test_ddp_pytorch.py │ ├── test_recognizer.py │ ├── train_ddp_pytorch.py │ ├── train_ddp_shortonly_pytorch.py │ └── train_recognizer.py ├── classification │ ├── auto_classification │ │ └── train_auto_classification.py │ ├── cifar │ │ ├── README.md │ │ ├── demo_cifar10.py │ │ ├── train_cifar10.py │ │ └── train_mixup_cifar10.py │ ├── finetune │ │ ├── finetune_minc.py │ │ └── prepare_minc.py │ ├── fit_classification.py │ └── imagenet │ │ ├── README.md │ │ ├── dali.py │ │ ├── demo_imagenet.py │ │ ├── imagenet_labels.txt │ │ ├── test.sh │ │ ├── train_horovod.py │ │ ├── train_imagenet.py │ │ ├── train_imagenet_nasnet.py │ │ └── verify_pretrained.py ├── datasets │ ├── README.md │ ├── ade20k.py │ ├── cityscapes.py │ ├── coco_tracking.py │ ├── hmdb51.py │ ├── ilsvrc_det.py │ ├── ilsvrc_vid.py │ ├── imagenet.py │ ├── imagenet_val_maps.pklz │ ├── kinetics400.py │ ├── lsun.py │ ├── market1501.py │ ├── mhp_v1.py │ ├── mscoco.py │ ├── otb2015.py │ ├── pascal_voc.py │ ├── somethingsomethingv2.py │ ├── tiny_motorbike.py │ └── ucf101.py ├── deployment │ ├── README.md │ ├── cpp-inference │ │ ├── .gitignore │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ └── src │ │ │ ├── clipp.hpp │ │ │ ├── common.hpp │ │ │ └── detect.cpp │ └── export │ │ ├── .gitignore │ │ ├── README.md │ │ └── export_pretrained.py ├── depth │ ├── README.md │ ├── demo.py │ ├── options.py │ ├── test.py │ ├── test_pose.py │ ├── train.py │ └── trainer.py ├── detection │ ├── README.md │ ├── auto_detection │ │ ├── train_auto_center_net.py │ │ ├── train_auto_detection.py │ │ ├── train_auto_faster_rcnn.py │ │ ├── train_auto_ssd.py │ │ └── train_auto_yolo.py │ ├── center_net │ │ ├── demo_center_net.py │ │ ├── eval_center_net.py │ │ ├── fit_center_net.py │ │ └── train_center_net.py │ ├── demo_webcam_run.py │ ├── faster_rcnn │ │ ├── README.md │ │ ├── demo_faster_rcnn.py │ │ ├── eval_faster_rcnn.py │ │ ├── fit_faster_rcnn.py │ │ ├── train_doublehead_rcnn.py │ │ └── train_faster_rcnn.py │ ├── ssd │ │ ├── README.md │ │ ├── demo_ssd.py │ │ ├── eval_ssd.py │ │ ├── fit_ssd.py │ │ └── train_ssd.py │ └── yolo │ │ ├── README.md │ │ ├── demo_yolo.py │ │ ├── eval_yolo.py │ │ ├── fit_yolo.py │ │ ├── train_yolo.py │ │ └── train_yolo3.py ├── gan │ ├── cycle_gan │ │ ├── README.md │ │ ├── demo_cycle_gan.py │ │ ├── download_dataset.py │ │ ├── images.png │ │ └── train_cgan.py │ ├── srgan │ │ ├── README.md │ │ ├── __init__.py │ │ ├── demo_srgan.py │ │ ├── download_dataset.py │ │ ├── images.png │ │ ├── pred.png │ │ └── train_srgan.py │ ├── stylegan │ │ ├── README.md │ │ ├── demo_stylegan.py │ │ ├── model.py │ │ ├── modules.py │ │ ├── prepare_data.py │ │ ├── sample.jpg │ │ ├── sample_train.png │ │ └── train.py │ └── wgan │ │ ├── README.md │ │ ├── fake_samples_400000.png │ │ ├── lossd.png │ │ ├── lsun.py │ │ └── train_wgan.py ├── instance │ ├── README.md │ └── mask_rcnn │ │ ├── README.md │ │ ├── benchmark │ │ ├── README.md │ │ └── ompi_bind_DGX1.sh │ │ ├── demo_mask_rcnn.py │ │ ├── eval_mask_rcnn.py │ │ ├── fit_mask_rcnn.py │ │ └── train_mask_rcnn.py ├── onnx │ ├── README.md │ ├── exported_models.csv │ └── notebooks │ │ ├── action-recognition │ │ ├── c3d_kinetics400.ipynb │ │ ├── i3d_inceptionv1_kinetics400.ipynb │ │ ├── i3d_inceptionv3_kinetics400.ipynb │ │ ├── i3d_nl10_resnet101_v1_kinetics400.ipynb │ │ ├── i3d_nl10_resnet50_v1_kinetics400.ipynb │ │ ├── i3d_nl5_resnet101_v1_kinetics400.ipynb │ │ ├── i3d_nl5_resnet50_v1_kinetics400.ipynb │ │ ├── i3d_resnet101_v1_kinetics400.ipynb │ │ ├── i3d_resnet50_v1_hmdb51.ipynb │ │ ├── i3d_resnet50_v1_kinetics400.ipynb │ │ ├── i3d_resnet50_v1_sthsthv2.ipynb │ │ ├── i3d_resnet50_v1_ucf101.ipynb │ │ ├── inceptionv1_kinetics400.ipynb │ │ ├── inceptionv3_kinetics400.ipynb │ │ ├── inceptionv3_ucf101.ipynb │ │ ├── p3d_resnet101_kinetics400.ipynb │ │ ├── p3d_resnet50_kinetics400.ipynb │ │ ├── r2plus1d_resnet18_kinetics400.ipynb │ │ ├── r2plus1d_resnet34_kinetics400.ipynb │ │ ├── r2plus1d_resnet50_kinetics400.ipynb │ │ ├── resnet101_v1b_kinetics400.ipynb │ │ ├── resnet152_v1b_kinetics400.ipynb │ │ ├── resnet18_v1b_kinetics400.ipynb │ │ ├── resnet34_v1b_kinetics400.ipynb │ │ ├── resnet50_v1b_hmdb51.ipynb │ │ ├── resnet50_v1b_kinetics400.ipynb │ │ ├── resnet50_v1b_sthsthv2.ipynb │ │ ├── slowfast_4x16_resnet50_kinetics400.ipynb │ │ ├── slowfast_8x8_resnet101_kinetics400.ipynb │ │ ├── slowfast_8x8_resnet50_kinetics400.ipynb │ │ └── vgg16_ucf101.ipynb │ │ ├── classification │ │ ├── alexnet.ipynb │ │ ├── darknet53.ipynb │ │ ├── densenet121.ipynb │ │ ├── densenet161.ipynb │ │ ├── densenet169.ipynb │ │ ├── densenet201.ipynb │ │ ├── googlenet.ipynb │ │ ├── inceptionv3.ipynb │ │ ├── mobilenet0.25.ipynb │ │ ├── mobilenet0.5.ipynb │ │ ├── mobilenet0.75.ipynb │ │ ├── mobilenet1.0.ipynb │ │ ├── mobilenetv2_0.25.ipynb │ │ ├── mobilenetv2_0.5.ipynb │ │ ├── mobilenetv2_0.75.ipynb │ │ ├── mobilenetv2_1.0.ipynb │ │ ├── mobilenetv3_large.ipynb │ │ ├── mobilenetv3_small.ipynb │ │ ├── resnest101.ipynb │ │ ├── resnest14.ipynb │ │ ├── resnest200.ipynb │ │ ├── resnest26.ipynb │ │ ├── resnest269.ipynb │ │ ├── resnest50.ipynb │ │ ├── resnet101_v1.ipynb │ │ ├── resnet101_v1d_0.73.ipynb │ │ ├── resnet101_v1d_0.76.ipynb │ │ ├── resnet101_v2.ipynb │ │ ├── resnet152_v1.ipynb │ │ ├── resnet152_v2.ipynb │ │ ├── resnet18_v1.ipynb │ │ ├── resnet18_v1b_0.89.ipynb │ │ ├── resnet18_v2.ipynb │ │ ├── resnet34_v1.ipynb │ │ ├── resnet34_v2.ipynb │ │ ├── resnet50_v1.ipynb │ │ ├── resnet50_v1d_0.11.ipynb │ │ ├── resnet50_v1d_0.37.ipynb │ │ ├── resnet50_v1d_0.48.ipynb │ │ ├── resnet50_v1d_0.86.ipynb │ │ ├── resnet50_v2.ipynb │ │ ├── resnext101_32x4d.ipynb │ │ ├── resnext101_64x4d.ipynb │ │ ├── resnext50_32x4d.ipynb │ │ ├── se_resnext101_32x4d.ipynb │ │ ├── se_resnext101_64x4d.ipynb │ │ ├── se_resnext50_32x4d.ipynb │ │ ├── senet_154.ipynb │ │ ├── squeezenet1.0.ipynb │ │ ├── squeezenet1.1.ipynb │ │ ├── vgg11.ipynb │ │ ├── vgg11_bn.ipynb │ │ ├── vgg13.ipynb │ │ ├── vgg13_bn.ipynb │ │ ├── vgg16.ipynb │ │ ├── vgg16_bn.ipynb │ │ ├── vgg19.ipynb │ │ ├── vgg19_bn.ipynb │ │ └── xception.ipynb │ │ ├── detection │ │ ├── center_net_resnet101_v1b_coco.ipynb │ │ ├── center_net_resnet101_v1b_voc.ipynb │ │ ├── center_net_resnet18_v1b_coco.ipynb │ │ ├── center_net_resnet18_v1b_voc.ipynb │ │ ├── center_net_resnet50_v1b_coco.ipynb │ │ ├── center_net_resnet50_v1b_voc.ipynb │ │ ├── faster_rcnn_resnet50_v1b_voc.ipynb │ │ ├── ssd_300_resnet34_v1b_coco.ipynb │ │ ├── ssd_300_vgg16_atrous_coco.ipynb │ │ ├── ssd_300_vgg16_atrous_voc.ipynb │ │ ├── ssd_512_mobilenet1.0_coco.ipynb │ │ ├── ssd_512_mobilenet1.0_voc.ipynb │ │ ├── ssd_512_resnet50_v1_coco.ipynb │ │ ├── ssd_512_resnet50_v1_voc.ipynb │ │ ├── ssd_512_vgg16_atrous_coco.ipynb │ │ ├── ssd_512_vgg16_atrous_voc.ipynb │ │ ├── yolo3_darknet53_coco.ipynb │ │ ├── yolo3_darknet53_voc.ipynb │ │ ├── yolo3_mobilenet1.0_coco.ipynb │ │ └── yolo3_mobilenet1.0_voc.ipynb │ │ ├── pose │ │ ├── alpha_pose_resnet101_v1b_coco.ipynb │ │ ├── mobile_pose_mobilenet1.0.ipynb │ │ ├── mobile_pose_mobilenetv2_1.0.ipynb │ │ ├── mobile_pose_mobilenetv3_large.ipynb │ │ ├── mobile_pose_mobilenetv3_small.ipynb │ │ ├── mobile_pose_resnet18_v1b.ipynb │ │ ├── mobile_pose_resnet50_v1b.ipynb │ │ ├── simple_pose_resnet101_v1b.ipynb │ │ ├── simple_pose_resnet101_v1d.ipynb │ │ ├── simple_pose_resnet152_v1b.ipynb │ │ ├── simple_pose_resnet152_v1d.ipynb │ │ ├── simple_pose_resnet18_v1b.ipynb │ │ ├── simple_pose_resnet50_v1b.ipynb │ │ └── simple_pose_resnet50_v1d.ipynb │ │ └── segmentation │ │ ├── danet_resnet101_citys.ipynb │ │ ├── danet_resnet50_citys.ipynb │ │ ├── deeplab_resnest101_ade.ipynb │ │ ├── deeplab_resnest200_ade.ipynb │ │ ├── deeplab_resnest269_ade.ipynb │ │ ├── deeplab_resnest50_ade.ipynb │ │ ├── deeplab_resnet101_ade.ipynb │ │ ├── deeplab_resnet101_citys.ipynb │ │ ├── deeplab_resnet101_coco.ipynb │ │ ├── deeplab_resnet101_voc.ipynb │ │ ├── deeplab_resnet152_voc.ipynb │ │ ├── deeplab_resnet50_ade.ipynb │ │ ├── deeplab_resnet50_citys.ipynb │ │ ├── deeplab_v3b_plus_wideresnet_citys.ipynb │ │ ├── fcn_resnet101_ade.ipynb │ │ ├── fcn_resnet101_coco.ipynb │ │ ├── fcn_resnet101_voc.ipynb │ │ └── fcn_resnet50_ade.ipynb ├── pose │ ├── alpha_pose │ │ ├── cam_demo.py │ │ ├── coco.sh │ │ ├── coco_dpg.sh │ │ ├── demo.py │ │ ├── train_alpha_pose.py │ │ ├── validate.py │ │ ├── validate.sh │ │ └── validate_tools.py │ ├── directpose │ │ ├── .gitignore │ │ ├── configurations │ │ │ └── ms_aa_resnet50_4x_syncbn.yaml │ │ ├── demo_directpose.py │ │ ├── export_directpose_tvm.py │ │ ├── train_ddp_directpose.py │ │ └── tvm_evaluation │ │ │ ├── evaluate_pose.py │ │ │ └── pose_model.py │ └── simple_pose │ │ ├── README.md │ │ ├── cam_demo.py │ │ ├── coco.sh │ │ ├── demo.py │ │ ├── train_simple_pose.py │ │ ├── validate.py │ │ └── validate.sh ├── re-id │ └── baseline │ │ ├── README.md │ │ ├── networks │ │ ├── __init__.py │ │ └── resnet.py │ │ ├── test.py │ │ └── train.py ├── segmentation │ ├── README.md │ ├── test.py │ └── train.py ├── tracking │ ├── siamrpn │ │ ├── benchmark.py │ │ ├── demo.py │ │ ├── test.py │ │ └── train.py │ └── smot │ │ ├── README.md │ │ ├── demo.py │ │ ├── eval.py │ │ ├── helper.py │ │ └── preprocess.py └── vision-language │ └── video-language │ └── coot │ ├── README.md │ ├── configuration │ └── youcook2.yaml │ └── train_pytorch.py ├── setup.py ├── tests ├── __init__.py ├── auto │ ├── __init__.py │ ├── test_auto_data.py │ ├── test_auto_estimators.py │ ├── test_auto_tasks.py │ ├── test_hybrid_auto_tasks.py │ ├── test_torch_auto_estimators.py │ └── test_torch_auto_tasks.py ├── lint.py ├── model_zoo │ ├── __init__.py │ ├── test_model_zoo.py │ └── test_utils_export.py ├── model_zoo_torch │ ├── __init__.py │ ├── test_model_zoo_torch.py │ └── test_tvm_torch_export.py ├── onnx │ └── test_inference.py ├── py3_auto.yml ├── py3_mxnet.yml ├── py3_mxnet_ci.yml ├── py3_torch.yml ├── pylint.yml ├── pylintrc └── unittests │ ├── __init__.py │ ├── common.py │ ├── test_data_dataloader.py │ ├── test_data_datasets.py │ ├── test_data_transforms.py │ ├── test_lr_scheduler.py │ ├── test_nn.py │ ├── test_utils_bbox.py │ ├── test_utils_block.py │ ├── test_utils_metric.py │ ├── test_utils_parallel.py │ ├── test_utils_segmentation.py │ ├── test_utils_viz.py │ └── tiny_datasets.py └── tools ├── batch ├── README.md ├── batch-test.py ├── docker │ ├── Dockerfile.cpu │ ├── Dockerfile.gpu │ ├── README.md │ ├── docker_deploy.sh │ └── gluon_cv_job.sh ├── submit-job.py └── template │ ├── launch-template-data-cpu.json │ └── launch-template-data-gpu.json └── docker ├── README.md ├── devel_entrypoint.sh ├── dockerfile └── start_jupyter.sh /.github/workflows/gpu_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | COVER_PACKAGE=$1 4 | TESTS_PATH=$2 5 | 6 | EFS=/mnt/efs 7 | 8 | mkdir -p ~/.mxnet/models 9 | for f in $EFS/.mxnet/models/*.params; do 10 | ln -s $f ~/.mxnet/models/$(basename "$f") 11 | done 12 | 13 | export MXNET_CUDNN_AUTOTUNE_DEFAULT=0 14 | export MPLBACKEND=Agg 15 | export KMP_DUPLICATE_LIB_OK=TRUE 16 | 17 | if [[ $TESTS_PATH == *"auto"* ]]; then 18 | echo "Installing autogluon.core and timm for auto module" 19 | pip3 install autogluon.core==0.2.0 20 | pip3 install timm==0.5.4 21 | fi 22 | 23 | nosetests --process-restartworker --with-timer --timer-ok 5 --timer-warning 20 -x --with-coverage --cover-package $COVER_PACKAGE -v $TESTS_PATH 24 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: 'Close stale issues and PRs' 2 | on: 3 | schedule: 4 | - cron: '30 6 * * *' 5 | 6 | jobs: 7 | stale: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/stale@v3 11 | with: 12 | days-before-stale: 90 13 | days-before-pr-stale: -1 14 | days-before-issue-close: 7 15 | days-before-pr-close: -1 16 | stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.' 17 | exempt-issue-labels: 'bug, call for contribution, doc, enhancement, good first issue, help wanted, mxnet, question' 18 | ascending: true 19 | operations-per-run: 300 20 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/.gitmodules -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | graft gluoncv 4 | prune docs tests scripts 5 | global-exclude *.py[co] 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR = $(CURDIR) 2 | 3 | lint: cpplint pylint 4 | 5 | cpplint: 6 | tests/lint.py gluoncv cpp src 7 | 8 | pylint: 9 | pylint --rcfile=$(ROOTDIR)/tests/pylintrc --ignore-patterns=".*\.so$$,.*\.dll$$,.*\.dylib$$" gluoncv 10 | 11 | doc: docs 12 | 13 | clean: clean_build 14 | 15 | clean_docs: 16 | make -C docs clean 17 | 18 | clean_build: 19 | rm -rf dist gluoncv.egg-info build | true 20 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | doxygen 2 | _build 3 | build 4 | gen_modules 5 | __MACOSX 6 | *.zip 7 | *.png 8 | *.jpg* 9 | *.jpeg 10 | -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/.nojekyll -------------------------------------------------------------------------------- /docs/README.txt: -------------------------------------------------------------------------------- 1 | The documentation of gluoncv is generated with recommonmark and sphinx. 2 | 3 | - pip install sphinx>=1.5.5 sphinx-gallery sphinx_rtd_theme matplotlib Image recommonmark scipy 4 | -------------------------------------------------------------------------------- /docs/_static/action-recognition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/action-recognition.png -------------------------------------------------------------------------------- /docs/_static/action_basketball_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/action_basketball_demo.gif -------------------------------------------------------------------------------- /docs/_static/apache2.svg: -------------------------------------------------------------------------------- 1 | licenselicenseApache 2.0Apache 2.0 2 | -------------------------------------------------------------------------------- /docs/_static/assets/img/action_recognition_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/action_recognition_demo.png -------------------------------------------------------------------------------- /docs/_static/assets/img/background/img-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-01.jpg -------------------------------------------------------------------------------- /docs/_static/assets/img/background/img-02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-02.jpg -------------------------------------------------------------------------------- /docs/_static/assets/img/background/img-03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-03.jpg -------------------------------------------------------------------------------- /docs/_static/assets/img/background/img-04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-04.jpg -------------------------------------------------------------------------------- /docs/_static/assets/img/background/img-05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-05.jpg -------------------------------------------------------------------------------- /docs/_static/assets/img/background/img-06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-06.jpg -------------------------------------------------------------------------------- /docs/_static/assets/img/background/img-07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-07.jpg -------------------------------------------------------------------------------- /docs/_static/assets/img/background/img-08.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-08.jpg -------------------------------------------------------------------------------- /docs/_static/assets/img/background/img-09.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-09.jpg -------------------------------------------------------------------------------- /docs/_static/assets/img/background/img-10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-10.jpg -------------------------------------------------------------------------------- /docs/_static/assets/img/background/img-11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-11.jpg -------------------------------------------------------------------------------- /docs/_static/assets/img/gluon_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/gluon_white.png -------------------------------------------------------------------------------- /docs/_static/assets/img/image-classification-demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/image-classification-demo.png -------------------------------------------------------------------------------- /docs/_static/assets/img/instance_segmentation_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/instance_segmentation_demo.png -------------------------------------------------------------------------------- /docs/_static/assets/img/object-detection-demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/object-detection-demo.png -------------------------------------------------------------------------------- /docs/_static/assets/img/pose_estimation_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/pose_estimation_demo.png -------------------------------------------------------------------------------- /docs/_static/assets/img/semantic-segmentation_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/semantic-segmentation_demo.png -------------------------------------------------------------------------------- /docs/_static/assets/svg/play.svg: -------------------------------------------------------------------------------- 1 | 3 | 4 | -------------------------------------------------------------------------------- /docs/_static/assets/svg/video-icon-dark.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/_static/assets/svg/video-icon.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/_static/classification-demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/classification-demo.png -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | td { 2 | word-wrap: break-word; 3 | } 4 | 5 | table { 6 | table-layout: fixed; 7 | width: 100% 8 | } 9 | 10 | .tag { 11 | color:grey; 12 | } 13 | 14 | .tag:after { 15 | content: ' (no mixup)'; 16 | } 17 | 18 | .tsntag { 19 | color:grey; 20 | } 21 | 22 | .tsntag:after { 23 | content: ' (no TSN)'; 24 | } 25 | 26 | .greytag { 27 | color:grey; 28 | } 29 | 30 | .gray { 31 | color:#808080; 32 | } 33 | 34 | .Logos { 35 | display: inline; 36 | margin: 1em; 37 | max-width: 120px; 38 | } 39 | 40 | .install { 41 | max-width: 800px; 42 | } 43 | .install .title { 44 | display: inline-block; 45 | min-width: 100px; 46 | text-transform: uppercase; 47 | font-size: 90%; 48 | color: #555; 49 | } 50 | 51 | .install .option { 52 | margin: 5px; 53 | } 54 | 55 | @media (max-width: 650px) { 56 | .install .option, .install .title { 57 | width: 90%; 58 | } 59 | .install .title { 60 | margin-top: 1em; 61 | } 62 | -------------------------------------------------------------------------------- /docs/_static/depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/depth.png -------------------------------------------------------------------------------- /docs/_static/gluon-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/gluon-logo.png -------------------------------------------------------------------------------- /docs/_static/gluon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/gluon.ico -------------------------------------------------------------------------------- /docs/_static/gluon_black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/gluon_black.png -------------------------------------------------------------------------------- /docs/_static/gluon_s2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/gluon_s2.png -------------------------------------------------------------------------------- /docs/_static/gluon_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/gluon_white.png -------------------------------------------------------------------------------- /docs/_static/google_analytics.js: -------------------------------------------------------------------------------- 1 | (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ 2 | (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), 3 | m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) 4 | })(window,document,'script','https://www.google-analytics.com/analytics.js','ga'); 5 | 6 | ga('create', 'UA-96378503-9', 'auto'); 7 | ga('send', 'pageview'); 8 | -------------------------------------------------------------------------------- /docs/_static/hidebib.js: -------------------------------------------------------------------------------- 1 | // adapted from: http://www.robots.ox.ac.uk/~vedaldi/assets/hidebib.js 2 | function hideallbibs() 3 | { 4 | var el = document.getElementsByTagName("div") ; 5 | for (var i = 0 ; i < el.length ; ++i) { 6 | if (el[i].className == "paper") { 7 | var bib = el[i].getElementsByTagName("pre") ; 8 | if (bib.length > 0) { 9 | bib [0] .style.display = 'none' ; 10 | } 11 | } 12 | } 13 | } 14 | 15 | function togglebib(paperid) 16 | { 17 | var paper = document.getElementById(paperid) ; 18 | var bib = paper.getElementsByTagName('pre') ; 19 | if (bib.length > 0) { 20 | if (bib [0] .style.display == 'none') { 21 | bib [0] .style.display = 'block' ; 22 | } else { 23 | bib [0] .style.display = 'none' ; 24 | } 25 | } 26 | } 27 | 28 | function toggleblock(blockId) 29 | { 30 | var block = document.getElementById(blockId); 31 | if (block.style.display == 'none') { 32 | block.style.display = 'block' ; 33 | } else { 34 | block.style.display = 'none' ; 35 | } 36 | } 37 | 38 | function hideblock(blockId) 39 | { 40 | var block = document.getElementById(blockId); 41 | block.style.display = 'none' ; 42 | } 43 | -------------------------------------------------------------------------------- /docs/_static/image-classification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/image-classification.png -------------------------------------------------------------------------------- /docs/_static/imagenet_banner.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/imagenet_banner.jpeg -------------------------------------------------------------------------------- /docs/_static/instance-segmentation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/instance-segmentation.png -------------------------------------------------------------------------------- /docs/_static/logos/acer_byoc_grad_lockup_rgb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/logos/acer_byoc_grad_lockup_rgb.png -------------------------------------------------------------------------------- /docs/_static/logos/acroquest_logo_cmyk_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/logos/acroquest_logo_cmyk_2.png -------------------------------------------------------------------------------- /docs/_static/logos/embed.html: -------------------------------------------------------------------------------- 1 | .. raw:: html 2 | 3 | ACER BYOC 4 | PIONEER 5 | NRI 6 | Acroquest 7 | -------------------------------------------------------------------------------- /docs/_static/logos/kumiawase_e_1_RGB.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/logos/kumiawase_e_1_RGB.jpg -------------------------------------------------------------------------------- /docs/_static/logos/pioneer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/logos/pioneer.png -------------------------------------------------------------------------------- /docs/_static/object-detection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/object-detection.png -------------------------------------------------------------------------------- /docs/_static/plot_help.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/plot_help.png -------------------------------------------------------------------------------- /docs/_static/semantic-segmentation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/semantic-segmentation.png -------------------------------------------------------------------------------- /docs/_static/short_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/short_demo.gif -------------------------------------------------------------------------------- /docs/_static/smot_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/smot_demo.gif -------------------------------------------------------------------------------- /docs/_static/smot_multi_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/smot_multi_demo.gif -------------------------------------------------------------------------------- /docs/_static/tracking_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/tracking_demo.gif -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | 3 | {%- block extrahead %} 4 | 5 | {% endblock %} 6 | -------------------------------------------------------------------------------- /docs/api/data.batchify.rst: -------------------------------------------------------------------------------- 1 | gluoncv.data.batchify 2 | ===================== 3 | 4 | Batchify functions can be used to transform a dataset into mini-batches that can be processed efficiently. 5 | 6 | In computer vision tasks, images/labels often come with different shapes. GluonCV provides a collection of 7 | convenient batchify functions suitable for various situations. 8 | 9 | .. currentmodule:: gluoncv.data.batchify 10 | 11 | Batch Loaders 12 | ------------- 13 | 14 | .. autosummary:: 15 | :nosignatures: 16 | 17 | Stack 18 | Pad 19 | Append 20 | Tuple 21 | 22 | API Reference 23 | ------------- 24 | 25 | .. automodule:: gluoncv.data.batchify 26 | :members: 27 | -------------------------------------------------------------------------------- /docs/api/index.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | 7 | data.datasets 8 | data.batchify 9 | data.transforms 10 | model_zoo 11 | nn 12 | loss 13 | utils 14 | -------------------------------------------------------------------------------- /docs/api/loss.rst: -------------------------------------------------------------------------------- 1 | gluoncv.loss 2 | ============ 3 | 4 | .. currentmodule:: gluoncv.loss 5 | 6 | .. automodule:: gluoncv.loss 7 | 8 | .. autosummary:: 9 | :nosignatures: 10 | 11 | FocalLoss 12 | 13 | SSDMultiBoxLoss 14 | 15 | 16 | API Reference 17 | ------------- 18 | 19 | .. automodule:: gluoncv.loss 20 | :members: 21 | -------------------------------------------------------------------------------- /docs/api/utils.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | gluoncv.utils 5 | ================= 6 | We implemented a broad range of utility functions which cover visualization, file handler, download and training helpers. 7 | 8 | .. currentmodule:: gluoncv.utils 9 | 10 | Visualization 11 | ------------- 12 | 13 | .. currentmodule:: gluoncv.utils.viz 14 | 15 | .. autosummary:: 16 | :nosignatures: 17 | 18 | plot_image 19 | 20 | get_color_pallete 21 | 22 | plot_bbox 23 | 24 | expand_mask 25 | 26 | plot_mask 27 | 28 | plot_network 29 | 30 | Miscellaneous 31 | ------------- 32 | 33 | .. currentmodule:: gluoncv.utils 34 | 35 | .. autosummary:: 36 | :nosignatures: 37 | 38 | download 39 | 40 | makedirs 41 | 42 | .. currentmodule:: gluoncv.utils.random 43 | 44 | .. autosummary:: 45 | :nosignatures: 46 | 47 | seed 48 | 49 | Training Helpers 50 | ---------------- 51 | 52 | .. currentmodule:: gluoncv.utils 53 | 54 | .. autosummary:: 55 | :nosignatures: 56 | 57 | LRScheduler 58 | 59 | set_lr_mult 60 | 61 | Bounding Box Utils 62 | ------------------ 63 | 64 | .. currentmodule:: gluoncv.utils 65 | 66 | .. autosummary:: 67 | :nosignatures: 68 | 69 | bbox_iou 70 | 71 | 72 | API Reference 73 | ------------- 74 | 75 | .. automodule:: gluoncv.utils 76 | :members: 77 | :imported-members: 78 | 79 | 80 | .. automodule:: gluoncv.utils.viz 81 | :members: 82 | :imported-members: 83 | 84 | .. automodule:: gluoncv.utils.metrics 85 | :members: 86 | :imported-members: 87 | -------------------------------------------------------------------------------- /docs/build.yml: -------------------------------------------------------------------------------- 1 | name: gluon_vision_docs 2 | channels: 3 | - pytorch 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - python=3.7 8 | - sphinx>=1.5.5 9 | - scipy 10 | - numpy 11 | - matplotlib 12 | - sphinx_rtd_theme 13 | - pip=20.2 14 | - pytorch=1.6.0 15 | - torchvision=0.7.0 16 | - pip: 17 | - https://github.com/mli/mx-theme/tarball/0.3.1 18 | - sphinx-gallery 19 | - https://repo.mxnet.io/dist/python/cu100mkl/mxnet_cu100mkl-1.6.0b20191010-py2.py3-none-manylinux1_x86_64.whl 20 | # - guzzle_sphinx_theme 21 | - recommonmark 22 | - Image 23 | - awscli 24 | - tqdm 25 | - requests 26 | - Pillow 27 | - opencv-python 28 | - portalocker 29 | - decord 30 | - cython 31 | - pycocotools 32 | - autocfg 33 | - yacs 34 | -------------------------------------------------------------------------------- /docs/how_to/index.rst: -------------------------------------------------------------------------------- 1 | Community 2 | ========= 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | 7 | support 8 | contribute 9 | -------------------------------------------------------------------------------- /docs/how_to/support.rst: -------------------------------------------------------------------------------- 1 | Community 2 | ========= 3 | 4 | .. card:: 5 | :title: Community 6 | :is_head: true 7 | :link: https://www.apache.org/foundation/policies/conduct 8 | 9 | Welcome to GluonCV community. We strive to foster a collaborative and welcoming community. We 10 | expect all members to follow the `code of conduct `__. 11 | 12 | 13 | .. container:: cards 14 | 15 | .. card:: 16 | :title: Github Issues 17 | :link: https://github.com/dmlc/gluon-cv/issues 18 | 19 | Feature requests, bug reports, design and roadmap discussion. 20 | 21 | 22 | .. card:: 23 | :title: Github Projects 24 | :link: https://github.com/dmlc/gluon-cv/projects 25 | 26 | Active roadmaps, and current and past projects. 27 | 28 | .. card:: 29 | :title: GluonCV Slack Channel 30 | :link: https://apache-mxnet.slack.com/messages/CED6Y10E5 31 | 32 | #gluon-cv Slack channel. Click the `sign-up link `_ to register. 33 | 34 | .. card:: 35 | :title: Discuss Forum 36 | :link: https://discuss.mxnet.io/c/gluon 37 | 38 | General discussions, usage experiences, and questions. 39 | 40 | 41 | Interested in contributing to GluonCV? Check our `contribution guide `_. 42 | -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ------------ 3 | 4 | .. Ignore prerequisites to make the index page concise, which will be shown at 5 | the install page 6 | 7 | .. raw:: html 8 | 9 | 10 | 11 | .. include:: install/install-include.rst 12 | 13 | .. raw:: html 14 | 15 | 16 | 17 | 18 | Check :doc:`install/install-more` for more installation instructions and options. 19 | -------------------------------------------------------------------------------- /docs/install/install-more.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ------------ 3 | 4 | .. include:: install-include.rst 5 | 6 | .. raw:: html 7 | 8 | 9 | 10 | 11 | 12 | Next steps 13 | ---------- 14 | 15 | - Checkout `beta.mxnet.io `_ for more options such as ARM devices and docker images. 16 | - `Verify your MXNet installation `_ 17 | - `Configure MXNet environment variables `_ 18 | - For new users: `60-minute Gluon crash course `_ 19 | - For experienced users: `MXNet Guides. `_ 20 | - For advanced users: `MXNet API `_ and `GluonCV API <../api/index.html>`_. 21 | 22 | .. 23 | TOOD: write a new directive `no-local-toc` for it 24 | 25 | .. raw:: html 26 | 27 | 28 | -------------------------------------------------------------------------------- /docs/model_zoo/classification_torch.rst: -------------------------------------------------------------------------------- 1 | Models implemented using PyTorch will be added later. Please checkout our MXNet implementation instead. 2 | -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Action_Recognitions/HMDB51.csv: -------------------------------------------------------------------------------- 1 | Name,Pretrained,Segments,Clip Length,Top-1,Hashtag,Train Command,Train Log 2 | resnet50_v1b_hmdb51 [3]_,ImageNet,3,1,55.2,682591e2,`shell script `_,`log `_ 3 | resnet50_v1b_hmdb51 [1]_,ImageNet,1,1,52.2,ba66ee4b,`shell script `_,`log `_ 4 | i3d_resnet50_v1_hmdb51 [4]_,ImageNet,1,32 (64/2),48.5,0d0ad559,`shell script `_,`log `_ 5 | i3d_resnet50_v1_hmdb51 [4]_,"ImageNet, K400",1,32 (64/2),70.9,2ec6bf01,`shell script `_,`log `_ 6 | -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Action_Recognitions/Kinetics700.csv: -------------------------------------------------------------------------------- 1 | Name,Pretrained,Segments,Clip Length,Top-1,Hashtag,Train Command,Train Log 2 | i3d_slow_resnet101_f16s4_kinetics700 [8]_,Scratch,1,16 (64/4),67.65,299b1d9d,NA,NA -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Action_Recognitions/Kinetics700_torch.csv: -------------------------------------------------------------------------------- 1 | Name,Pretrained,Segment,Clip Length,Top-1,Hashtag,Config 2 | i3d_slow_resnet101_f16s4_kinetics700 [8]_,Scratch,1,16 (64/4),67.65,b5be1a2e,`config `_ -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Action_Recognitions/Something-Something-V2.csv: -------------------------------------------------------------------------------- 1 | Name,Pretrained,Segments,Clip Length,Top-1,Hashtag,Train Command,Train Log 2 | resnet50_v1b_sthsthv2 [3]_,ImageNet,8,1,35.5,80ee0c6b,`shell script `_,`log `_ 3 | i3d_resnet50_v1_sthsthv2 [4]_,ImageNet,1,16 (32/2),50.6,01961e4c,`shell script `_,`log `_ 4 | -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Action_Recognitions/Something-Something-V2_torch.csv: -------------------------------------------------------------------------------- 1 | Name,Pretrained,Segment,Clip Length,Top-1,Hashtag,Config 2 | resnet50_v1b_sthsthv2 [3]_,ImageNet,8,1,35.16,cbb9167b,`config `_ 3 | i3d_resnet50_v1_sthsthv2 [4]_,ImageNet,1,16 (32/2),49.61,e975d989,`config `_ -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Classifications/DenseNet.csv: -------------------------------------------------------------------------------- 1 | Model,Top-1,Top-5,Hashtag,Training Command,Training Log 2 | DenseNet121 [7]_,74.97,92.25,f27dbf2d,, 3 | DenseNet161 [7]_,77.70,93.80,b6c8a957,, 4 | DenseNet169 [7]_,76.17,93.17,2603f878,, 5 | DenseNet201 [7]_,77.32,93.62,1cdbc116,, 6 | -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Classifications/Pruned_ResNet.csv: -------------------------------------------------------------------------------- 1 | Model,Top-1,Top-5,Hashtag,Speedup (to original ResNet) 2 | resnet18_v1b_0.89,67.2,87.45,54f7742b,2x 3 | resnet50_v1d_0.86,78.02,93.82,a230c33f,1.68x 4 | resnet50_v1d_0.48,74.66,92.34,0d3e69bb,3.3x 5 | resnet50_v1d_0.37,70.71,89.74,9982ae49,5.01x 6 | resnet50_v1d_0.11,63.22,84.79,6a25eece,8.78x 7 | resnet101_v1d_0.76,79.46,94.69,a872796b,1.8x 8 | resnet101_v1d_0.73,78.89,94.48,712fccb1,2.02x 9 | -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Classifications/ResNeSt.csv: -------------------------------------------------------------------------------- 1 | Model,Top-1,Top-5,Hashtag,Training Command,Training Log 2 | ResNeSt14 [17]_,75.75,92.70,7e0b0cae,`shell script `_,`log `_ 3 | ResNeSt26 [17]_,78.68,94.38,36459074,`shell script `_, 4 | ResNeSt50 [17]_,81.04,95.42,bcfefe1d,`shell script `_,`log `_ 5 | ResNeSt101 [17]_,82.83,96.42,5da943b3,`shell script `_,`log `_ 6 | ResNeSt200 [17]_,83.86,96.86,0c5d117d,`shell script `_,`log `_ 7 | ResNeSt269 [17]_,84.53,96.98,11ae7f5d,`shell script `_,`log `_ 8 | -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Classifications/SqueezeNet.csv: -------------------------------------------------------------------------------- 1 | Model,Top-1,Top-5,Hashtag,Training Command,Training Log 2 | SqueezeNet1.0 [10]_,56.11,79.09,264ba497,, 3 | SqueezeNet1.1 [10]_,54.96,78.17,33ba0f93,, 4 | -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Classifications/VGG.csv: -------------------------------------------------------------------------------- 1 | Model,Top-1,Top-5,Hashtag,Training Command,Training Log 2 | VGG11 [9]_,66.62,87.34,dd221b16,, 3 | VGG13 [9]_,67.74,88.11,6bc5de58,, 4 | VGG16 [9]_,73.23,91.31,e660d456,`shell script `_,`log `_ 5 | VGG19 [9]_,74.11,91.35,ad2f660d,`shell script `_,`log `_ 6 | VGG11_bn [9]_,68.59,88.72,ee79a809,, 7 | VGG13_bn [9]_,68.84,88.82,7d97a06c,, 8 | VGG16_bn [9]_,73.10,91.76,7f01cf05,`shell script `_,`log `_ 9 | VGG19_bn [9]_,74.33,91.85,f360b758,`shell script `_,`log `_ 10 | -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Depths/KITTI.csv: -------------------------------------------------------------------------------- 1 | Name,Modality,Resolution,Abs. Rel. Error,delta < 1.25,Hashtag,Train Command,Train Log 2 | monodepth2_resnet18_kitti_stereo_640x192 [1]_,Stereo,640x192,0.114,0.860,83eea4a9,`shell script `_,`log `_ 3 | monodepth2_resnet18_kitti_mono_640x192 [1]_,Mono,640x192,0.121,0.858,c881771d,`shell script `_,`log `_ 4 | monodepth2_resnet18_kitti_mono_stereo_640x192 [1]_,Mono + Stereo,640x192,0.109,0.872,9515c219,`shell script `_,`log `_ 5 | -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Depths/PoseNet.csv: -------------------------------------------------------------------------------- 1 | Name,Modality,Resolution,Sequence 09,Sequence 10 2 | monodepth2_resnet18_posenet_kitti_mono_640x192 [1]_,Mono,640x192,0.021±0.012,0.018±0.011 3 | monodepth2_resnet18_posenet_kitti_mono_stereo_640x192 [1]_,Mono + Stereo,640x192,0.021±0.010,0.016±0.010 4 | -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Detections/MSCOCO_SSD.csv: -------------------------------------------------------------------------------- 1 | Model,Box AP,Training Command,Training Log 2 | ssd_300_vgg16_atrous_coco [1]_,25.1/42.9/25.8,`shell script `_,`log `_ 3 | ssd_512_vgg16_atrous_coco [1]_,28.9/47.9/30.6,`shell script `_,`log `_ 4 | ssd_300_resnet34_v1b_coco [1]_,25.1/41.7/26.2,`shell script `_,`log `_ 5 | ssd_512_resnet50_v1_coco [1]_,30.6/50.0/32.2,`shell script `_,`log `_ 6 | ssd_512_mobilenet1.0_coco [1]_,21.7/39.2/21.3,`shell script `_,`log `_ -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Detections/Pascal_Faster-RCNN.csv: -------------------------------------------------------------------------------- 1 | Model,mAP,Training Command,Training log 2 | faster_rcnn_resnet50_v1b_voc [2]_,78.3,`shell script `_,`log `_ 3 | -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Detections/Pascal_SSD.csv: -------------------------------------------------------------------------------- 1 | Model,mAP,Training Command,Training log 2 | ssd_300_vgg16_atrous_voc [1]_,77.6,`shell script `_,`log `_ 3 | ssd_300_vgg16_atrous_voc_int8* [1]_,77.46,, 4 | ssd_512_vgg16_atrous_voc [1]_,79.2,`shell script `_,`log `_ 5 | ssd_512_vgg16_atrous_voc_int8* [1]_,78.39,, 6 | ssd_512_resnet50_v1_voc [1]_,80.1,`shell script `_,`log `_ 7 | ssd_512_resnet50_v1_voc_int8* [1]_,80.16,, 8 | ssd_512_mobilenet1.0_voc [1]_,75.4,`shell script `_,`log `_ 9 | ssd_512_mobilenet1.0_voc_int8* [1]_,75.04,, 10 | -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Detections/Pascal_YOLO-v3.csv: -------------------------------------------------------------------------------- 1 | Model,mAP,Training Command,Training log 2 | yolo3_darknet53_voc [3]_ :gray:`(320x320)`,79.3,`shell script `_,`log `_ 3 | yolo3_darknet53_voc [3]_ :gray:`(416x416)`,81.5,`shell script `_,`log `_ 4 | yolo3_mobilenet1.0_voc [3]_ :gray:`(320x320)`,73.9,`shell script `_,`log `_ 5 | yolo3_mobilenet1.0_voc [3]_ :gray:`(416x416)`,75.8,`shell script `_,`log `_ 6 | -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Poses/MSCOCO_Alpha-Pose.csv: -------------------------------------------------------------------------------- 1 | Model,OKS AP,OKS AP (with flip),Hashtag,Training Command,Training log 2 | alpha_pose_resnet101_v1b_coco [2]_,74.2/91.6/80.7,76.7/92.6/82.9,de56b871,, 3 | -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Segmentations/SS_MHP-V1.csv: -------------------------------------------------------------------------------- 1 | Name,Method,pixAcc,mIoU,Command,log 2 | icnet_resnet50_mhpv1,ICNet [5]_,90.5,44.5,`shell script `_,`log `_ 3 | -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Segmentations/SS_MS-COCO.csv: -------------------------------------------------------------------------------- 1 | Name,Method,pixAcc,mIoU,Command,log 2 | fcn_resnet101_coco,FCN [2]_,92.2,66.2,`shell script `_,`log `_ 3 | psp_resnet101_coco,PSP [3]_,92.4,70.4,`shell script `_,`log `_ 4 | deeplab_resnet101_coco,DeepLabV3 [4]_,92.5,70.4,`shell script `_,`log `_ 5 | -------------------------------------------------------------------------------- /docs/model_zoo/csv_tables/Segmentations/SS_Pascal-VOC.csv: -------------------------------------------------------------------------------- 1 | Name,Method,pixAcc,mIoU,Command,log 2 | fcn_resnet101_voc,FCN [2]_,N/A,83.6_,`shell script `_,`log `_ 3 | psp_resnet101_voc,PSP [3]_,N/A,85.1_,`shell script `_,`log `_ 4 | deeplab_resnet101_voc,DeepLabV3 [4]_,N/A,86.2_,`shell script `_,`log `_ 5 | deeplab_resnet152_voc,DeepLabV3 [4]_,N/A,86.7_,`shell script `_,`log `_ 6 | -------------------------------------------------------------------------------- /docs/model_zoo/depth.rst: -------------------------------------------------------------------------------- 1 | .. _gluoncv-model-zoo-depth: 2 | 3 | Depth Prediction 4 | ================ 5 | 6 | .. role:: framework 7 | :class: framework 8 | .. role:: select 9 | :class: selected framework 10 | 11 | .. container:: Frameworks 12 | 13 | .. container:: framework-group 14 | 15 | :framework:`MXNet` 16 | :framework:`Pytorch` 17 | 18 | .. rst-class:: MXNet 19 | 20 | MXNet 21 | ************* 22 | 23 | .. include:: depth_mxnet.rst 24 | 25 | .. rst-class:: Pytorch 26 | 27 | PyTorch 28 | ************* 29 | 30 | .. include:: depth_torch.rst 31 | 32 | Reference 33 | ************* 34 | 35 | .. [1] Clement Godard, Oisin Mac Aodha, Michael Firman and Gabriel J. Brostow. \ 36 | "Digging into Self-Supervised Monocular Depth Prediction." \ 37 | Proceedings of the International Conference on Computer Vision (ICCV), 2019. 38 | -------------------------------------------------------------------------------- /docs/model_zoo/depth_torch.rst: -------------------------------------------------------------------------------- 1 | Models implemented using PyTorch will be added later. Please checkout our MXNet implementation instead. 2 | -------------------------------------------------------------------------------- /docs/model_zoo/detection_torch.rst: -------------------------------------------------------------------------------- 1 | Models implemented using PyTorch will be added later. Please checkout our MXNet implementation instead. 2 | -------------------------------------------------------------------------------- /docs/model_zoo/index.rst: -------------------------------------------------------------------------------- 1 | Model Zoo 2 | ========= 3 | 4 | .. container:: cards 5 | 6 | .. card:: 7 | :title: Classification 8 | :link: classification.html 9 | 10 | Select your models from charts and tables of the classification models 11 | 12 | .. card:: 13 | :title: Object Detection 14 | :link: detection.html 15 | 16 | Select your models from charts and tables of the detection models 17 | 18 | .. card:: 19 | :title: Segmentation 20 | :link: segmentation.html 21 | 22 | Select your models from charts and tables of the segmentation models 23 | 24 | .. card:: 25 | :title: Pose Estimation 26 | :link: pose.html 27 | 28 | Select your models from charts and tables of the pose estimation models 29 | 30 | .. card:: 31 | :title: Action Recognition 32 | :link: action_recognition.html 33 | 34 | Select your models from charts and tables of the action recognition models 35 | 36 | .. card:: 37 | :title: Depth Prediction 38 | :link: depth.html 39 | 40 | Select your models from charts and tables of the depth prediction models 41 | 42 | 43 | .. toctree:: 44 | :hidden: 45 | :maxdepth: 1 46 | 47 | classification 48 | detection 49 | segmentation 50 | pose 51 | action_recognition 52 | depth 53 | 54 | -------------------------------------------------------------------------------- /docs/model_zoo/pose.rst: -------------------------------------------------------------------------------- 1 | .. _gluoncv-model-zoo-classification: 2 | 3 | Pose Estimation 4 | ==================== 5 | 6 | .. role:: framework 7 | :class: framework 8 | .. role:: select 9 | :class: selected framework 10 | 11 | .. container:: Frameworks 12 | 13 | .. container:: framework-group 14 | 15 | :framework:`MXNet` 16 | :framework:`Pytorch` 17 | 18 | .. rst-class:: MXNet 19 | 20 | MXNet 21 | ************* 22 | 23 | .. include:: pose_mxnet.rst 24 | 25 | .. rst-class:: Pytorch 26 | 27 | PyTorch 28 | ************* 29 | 30 | .. include:: pose_torch.rst 31 | 32 | Reference 33 | ************* 34 | 35 | .. [1] Xiao, Bin, Haiping Wu, and Yichen Wei. \ 36 | "Simple baselines for human pose estimation and tracking." \ 37 | Proceedings of the European Conference on Computer Vision (ECCV). 2018. 38 | .. [2] Fang, Hao-Shu, et al. \ 39 | "Rmpe: Regional multi-person pose estimation." \ 40 | Proceedings of the IEEE International Conference on Computer Vision. 2017. 41 | -------------------------------------------------------------------------------- /docs/model_zoo/pose_torch.rst: -------------------------------------------------------------------------------- 1 | Models implemented using PyTorch will be added later. Please checkout our MXNet implementation instead. 2 | -------------------------------------------------------------------------------- /docs/model_zoo/segmentation_torch.rst: -------------------------------------------------------------------------------- 1 | Models implemented using PyTorch will be added later. Please checkout our MXNet implementation instead. 2 | -------------------------------------------------------------------------------- /docs/slides.md: -------------------------------------------------------------------------------- 1 | Slides 2 | ====== 3 | 4 | ### GluonCV Workshop in Tokyo, 2018 5 | 6 | - [2018.12.17 Introduction to GluonCV](https://github.com/dmlc/web-data/blob/master/gluoncv/slides/IntroToGluonCV.pdf) 7 | - [2018.12.17 GluonCV: Image Classification](https://github.com/dmlc/web-data/blob/master/gluoncv/slides/Classification.pdf) 8 | - [2018.12.17 GluonCV: Segmentation](https://github.com/dmlc/web-data/blob/master/gluoncv/slides/Segmentation.pdf) 9 | - [2018.12.17 GluonCV: Object Detection](https://github.com/dmlc/web-data/blob/master/gluoncv/slides/Detection.pdf) 10 | - [2018.12.18 GluonCV: Data Processing](https://github.com/dmlc/web-data/blob/master/gluoncv/slides/DataProcessing.pdf) 11 | 12 | 13 | Feel free to share slides that might interest your mates. 14 | -------------------------------------------------------------------------------- /docs/tutorials/action_recognition/README.txt: -------------------------------------------------------------------------------- 1 | Action Recognition 2 | ================== 3 | -------------------------------------------------------------------------------- /docs/tutorials/auto_module/README.txt: -------------------------------------------------------------------------------- 1 | Auto Module 2 | =========== 3 | -------------------------------------------------------------------------------- /docs/tutorials/classification/README.txt: -------------------------------------------------------------------------------- 1 | Image Classification 2 | ==================== 3 | -------------------------------------------------------------------------------- /docs/tutorials/datasets/.gitignore: -------------------------------------------------------------------------------- 1 | VOCtemplate 2 | val.lst 3 | val.rec 4 | val.idx 5 | im2rec.py 6 | -------------------------------------------------------------------------------- /docs/tutorials/datasets/README.txt: -------------------------------------------------------------------------------- 1 | Prepare Datasets 2 | ================ 3 | -------------------------------------------------------------------------------- /docs/tutorials/datasets/youtube_bb.py: -------------------------------------------------------------------------------- 1 | """Prepare Youtube_bb dataset 2 | ========================================= 3 | 4 | `Youtube_bb dataset `_ is 5 | The data set consists of approximately 380,000 15-20s video segments extracted from 240,000 different publicly visible YouTube videos. 6 | 7 | This tutorial helps you to download Youtube_bb and set it up for later experiments. 8 | 9 | .. hint:: 10 | 11 | You need 195G free disk space to download and extract this dataset. 12 | SSD harddrives are recommended for faster speed. 13 | The time it takes to prepare the dataset depends on your Internet connection 14 | and disk speed. 15 | 16 | If you want to download this dataset, please follow yt_bb` 17 | 18 | """ 19 | -------------------------------------------------------------------------------- /docs/tutorials/deployment/.gitignore: -------------------------------------------------------------------------------- 1 | *.json 2 | *.params 3 | -------------------------------------------------------------------------------- /docs/tutorials/deployment/README.txt: -------------------------------------------------------------------------------- 1 | Deployment 2 | ========== 3 | -------------------------------------------------------------------------------- /docs/tutorials/depth/README.txt: -------------------------------------------------------------------------------- 1 | Depth Prediction 2 | ===================== -------------------------------------------------------------------------------- /docs/tutorials/detection/.gitignore: -------------------------------------------------------------------------------- 1 | *.jpg 2 | *.rec 3 | *.idx 4 | -------------------------------------------------------------------------------- /docs/tutorials/detection/README.txt: -------------------------------------------------------------------------------- 1 | Object Detection 2 | ================ 3 | -------------------------------------------------------------------------------- /docs/tutorials/distributed/README.txt: -------------------------------------------------------------------------------- 1 | Distributed Training 2 | ==================== 3 | -------------------------------------------------------------------------------- /docs/tutorials/instance/.gitignore: -------------------------------------------------------------------------------- 1 | *.jpg 2 | -------------------------------------------------------------------------------- /docs/tutorials/instance/README.txt: -------------------------------------------------------------------------------- 1 | Instance Segmentation 2 | ===================== 3 | -------------------------------------------------------------------------------- /docs/tutorials/pose/README.txt: -------------------------------------------------------------------------------- 1 | Pose Estimation 2 | ===================== 3 | -------------------------------------------------------------------------------- /docs/tutorials/segmentation/.gitignore: -------------------------------------------------------------------------------- 1 | *.jpg 2 | *.png 3 | -------------------------------------------------------------------------------- /docs/tutorials/segmentation/README.txt: -------------------------------------------------------------------------------- 1 | Semantic Segmentation 2 | ===================== 3 | -------------------------------------------------------------------------------- /docs/tutorials/tracking/README.txt: -------------------------------------------------------------------------------- 1 | Object Tracking 2 | ===================== 3 | -------------------------------------------------------------------------------- /docs/tutorials_torch/action_recognition/README.txt: -------------------------------------------------------------------------------- 1 | Action Recognition 2 | ================== 3 | -------------------------------------------------------------------------------- /docs/tutorials_torch/index.rst: -------------------------------------------------------------------------------- 1 | PyTorch Tutorials 2 | ========= 3 | 4 | Interested in getting started in a new CV area? Here are some tutorials to help get started. 5 | 6 | Action Recognition 7 | --------------------- 8 | 9 | .. container:: cards 10 | 11 | 12 | .. card:: 13 | :title: Pre-trained I3D Models on Kinetics400 14 | :link: ../build/examples_torch_action_recognition/demo_i3d_kinetics400.html 15 | 16 | Recognize human actions in real-world videos with pre-trained I3D models 17 | 18 | .. card:: 19 | :title: Finetuning SOTA Video Models on Your Dataset 20 | :link: ../build/examples_torch_action_recognition/finetune_custom.html 21 | 22 | Hands on SOTA video models fine-tuning on your own dataset 23 | 24 | .. card:: 25 | :title: Extracting video features from pre-trained models 26 | :link: ../build/examples_torch_action_recognition/extract_feat.html 27 | 28 | Extracting video features from pre-trained models on your own videos 29 | 30 | .. card:: 31 | :title: Computing FLOPS, latency and fps of a model 32 | :link: ../build/examples_torch_action_recognition/speed.html 33 | 34 | How to compute FLOPS, number of parameters, latency and fps of a video model 35 | 36 | .. card:: 37 | :title: DistributedDataParallel (DDP) framework 38 | :link: ../build/examples_torch_action_recognition/ddp_pytorch.html 39 | 40 | How to use our DistributedDataParallel framework 41 | 42 | 43 | .. toctree:: 44 | :hidden: 45 | :maxdepth: 2 46 | 47 | ../build/examples_torch_action_recognition/index 48 | -------------------------------------------------------------------------------- /gluoncv/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # pylint: disable=wrong-import-position 3 | """GluonCV: a deep learning vision toolkit powered by Gluon.""" 4 | from __future__ import absolute_import 5 | 6 | from .check import _deprecate_python2 7 | from .check import _require_mxnet_version, _require_pytorch_version 8 | 9 | __version__ = '0.11.0' 10 | 11 | _deprecate_python2() 12 | 13 | # optionally depend on mxnet or pytorch 14 | _found_mxnet = _found_pytorch = False 15 | try: 16 | _require_mxnet_version('1.4.0', '2.0.0') 17 | from . import data 18 | from . import model_zoo 19 | from . import nn 20 | from . import utils 21 | from . import loss 22 | _found_mxnet = True 23 | except ImportError: 24 | pass 25 | 26 | try: 27 | _require_pytorch_version('1.4.0', '2.0.0') 28 | _found_pytorch = True 29 | except ImportError: 30 | pass 31 | 32 | if not any((_found_mxnet, _found_pytorch)): 33 | raise ImportError('Unable to import modules due to missing `mxnet` & `torch`. ' 34 | 'You should install at least one deep learning framework.') 35 | 36 | if all((_found_mxnet, _found_pytorch)): 37 | import warnings 38 | import mxnet as mx 39 | import torch 40 | warnings.warn(f'Both `mxnet=={mx.__version__}` and `torch=={torch.__version__}` are installed. ' 41 | 'You might encounter increased GPU memory footprint if both framework are used at the same time.') 42 | -------------------------------------------------------------------------------- /gluoncv/auto/__init__.py: -------------------------------------------------------------------------------- 1 | """GluonCV auto""" 2 | from .estimators import * 3 | -------------------------------------------------------------------------------- /gluoncv/auto/data/__init__.py: -------------------------------------------------------------------------------- 1 | """Data Pipelines""" 2 | from .auto_data import url_data, URLs, is_url 3 | -------------------------------------------------------------------------------- /gluoncv/auto/estimators/__init__.py: -------------------------------------------------------------------------------- 1 | """Estimator implementations""" 2 | from .utils import create_dummy_estimator 3 | # FIXME: for quick test purpose only 4 | try: 5 | import mxnet 6 | from .image_classification import ImageClassificationEstimator 7 | from .ssd import SSDEstimator 8 | from .yolo import YOLOv3Estimator 9 | from .faster_rcnn import FasterRCNNEstimator 10 | # from .mask_rcnn import MaskRCNNEstimator 11 | from .center_net import CenterNetEstimator 12 | except ImportError: 13 | # create dummy placeholder estimator classes 14 | reason = 'gluoncv.auto.estimators.{} requires mxnet to be installed which is missing.' 15 | ImageClassificationEstimator = create_dummy_estimator( 16 | 'ImageClassificationEstimator', reason) 17 | SSDEstimator = create_dummy_estimator( 18 | 'SSDEstimator', reason) 19 | YOLOv3Estimator = create_dummy_estimator( 20 | 'YOLOv3Estimator', reason) 21 | FasterRCNNEstimator = create_dummy_estimator( 22 | 'FasterRCNNEstimator', reason) 23 | CenterNetEstimator = create_dummy_estimator( 24 | 'CenterNetEstimator', reason) 25 | 26 | try: 27 | import timm 28 | import torch 29 | from .torch_image_classification import TorchImageClassificationEstimator 30 | except ImportError: 31 | reason = 'This estimator requires torch/timm to be installed which is missing.' 32 | TorchImageClassificationEstimator = create_dummy_estimator( 33 | 'TorchImageClassificationEstimator', reason) 34 | -------------------------------------------------------------------------------- /gluoncv/auto/estimators/center_net/__init__.py: -------------------------------------------------------------------------------- 1 | """SSD Estimator implementations""" 2 | 3 | from .center_net import CenterNetEstimator 4 | -------------------------------------------------------------------------------- /gluoncv/auto/estimators/conf.py: -------------------------------------------------------------------------------- 1 | """Shared configs""" 2 | _BEST_CHECKPOINT_FILE = 'best_checkpoint.pkl' 3 | -------------------------------------------------------------------------------- /gluoncv/auto/estimators/constants.py: -------------------------------------------------------------------------------- 1 | BINARY = 'binary' 2 | MULTICLASS = 'multiclass' 3 | REGRESSION = 'regression' 4 | -------------------------------------------------------------------------------- /gluoncv/auto/estimators/faster_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | """R-CNN Estimator implementations""" 2 | 3 | from .faster_rcnn import FasterRCNNEstimator 4 | -------------------------------------------------------------------------------- /gluoncv/auto/estimators/image_classification/__init__.py: -------------------------------------------------------------------------------- 1 | """Image classification estimator""" 2 | from .image_classification import ImageClassificationEstimator 3 | -------------------------------------------------------------------------------- /gluoncv/auto/estimators/mask_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | """Mask R-CNN Estimator implementations""" 2 | 3 | from .mask_rcnn import MaskRCNNEstimator 4 | -------------------------------------------------------------------------------- /gluoncv/auto/estimators/ssd/__init__.py: -------------------------------------------------------------------------------- 1 | """SSD Estimator implementations""" 2 | 3 | from .ssd import SSDEstimator 4 | -------------------------------------------------------------------------------- /gluoncv/auto/estimators/torch_image_classification/__init__.py: -------------------------------------------------------------------------------- 1 | """Torch image classification estimator""" 2 | from .torch_image_classification import TorchImageClassificationEstimator 3 | -------------------------------------------------------------------------------- /gluoncv/auto/estimators/torch_image_classification/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import resume_checkpoint 2 | from .utils import resolve_data_config, update_cfg 3 | from .optimizer import optimizer_kwargs, create_optimizer_v2a 4 | from .scheduler import create_scheduler 5 | from .metrics import rmse 6 | -------------------------------------------------------------------------------- /gluoncv/auto/estimators/torch_image_classification/utils/constants.py: -------------------------------------------------------------------------------- 1 | IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406) 2 | IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225) 3 | DEFAULT_CROP_PCT = 0.875 4 | -------------------------------------------------------------------------------- /gluoncv/auto/estimators/torch_image_classification/utils/metrics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.functional import softmax 3 | 4 | def rmse(outputs, target): 5 | return torch.sqrt(torch.mean((softmax(outputs, dim=0)-target)**2)) 6 | -------------------------------------------------------------------------------- /gluoncv/auto/estimators/yolo/__init__.py: -------------------------------------------------------------------------------- 1 | """YOLO Estimator implementations""" 2 | 3 | from .yolo import YOLOv3Estimator 4 | -------------------------------------------------------------------------------- /gluoncv/auto/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | """AutoML Tasks""" 2 | import logging 3 | 4 | from .image_classification import * 5 | from .object_detection import * 6 | # from .utils import * 7 | 8 | logger = logging.getLogger(__name__) 9 | msg = ( 10 | "We plan to deprecate auto from gluoncv on release 0.12.0." 11 | "Please consider using autogluon.vision instead, which provides the same functionality." 12 | "https://auto.gluon.ai/stable/tutorials/image_prediction/index.html" 13 | ) 14 | logger.warning(msg) 15 | -------------------------------------------------------------------------------- /gluoncv/data/ade20k/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/data/ade20k/__init__.py -------------------------------------------------------------------------------- /gluoncv/data/hmdb51/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import 2 | """Video action recognition, HMDB51 dataset. 3 | http://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/ 4 | """ 5 | from __future__ import absolute_import 6 | from .classification import * 7 | -------------------------------------------------------------------------------- /gluoncv/data/imagenet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/data/imagenet/__init__.py -------------------------------------------------------------------------------- /gluoncv/data/kinetics400/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import 2 | """Video action recognition, Kinetics400 dataset. 3 | https://deepmind.com/research/open-source/open-source-datasets/kinetics/ 4 | """ 5 | from __future__ import absolute_import 6 | from .classification import * 7 | -------------------------------------------------------------------------------- /gluoncv/data/kinetics700/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import 2 | """Video action recognition, Kinetics700 dataset. 3 | https://deepmind.com/research/open-source/open-source-datasets/kinetics/ 4 | """ 5 | from __future__ import absolute_import 6 | from .classification import * 7 | -------------------------------------------------------------------------------- /gluoncv/data/kitti/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-module-docstring 2 | from .kitti_dataset import * 3 | from .kitti_utils import * 4 | -------------------------------------------------------------------------------- /gluoncv/data/lst/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/data/lst/__init__.py -------------------------------------------------------------------------------- /gluoncv/data/market1501/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/data/market1501/__init__.py -------------------------------------------------------------------------------- /gluoncv/data/market1501/data_read.py: -------------------------------------------------------------------------------- 1 | """Market 1501 Person Re-Identification Dataset.""" 2 | from mxnet.gluon.data import dataset 3 | from mxnet import image 4 | 5 | __all__ = ['ImageTxtDataset'] 6 | 7 | 8 | class ImageTxtDataset(dataset.Dataset): 9 | """Load the Market 1501 dataset. 10 | 11 | Parameters 12 | ---------- 13 | items : list 14 | List for image names and labels. 15 | flag : int, default 1 16 | Whether to load the color image or gray image. 17 | transform : function, default None 18 | A function that takes data and label and transforms them. 19 | """ 20 | def __init__(self, items, flag=1, transform=None): 21 | self._flag = flag 22 | self._transform = transform 23 | self.items = items 24 | 25 | def __getitem__(self, idx): 26 | fpath = self.items[idx][0] 27 | img = image.imread(fpath, self._flag) 28 | label = self.items[idx][1] 29 | if self._transform is not None: 30 | img = self._transform(img) 31 | return img, label 32 | 33 | def __len__(self): 34 | return len(self.items) 35 | -------------------------------------------------------------------------------- /gluoncv/data/market1501/label_read.py: -------------------------------------------------------------------------------- 1 | """Market 1501 Person Re-Identification Dataset.""" 2 | import random 3 | from os import path as osp 4 | 5 | __all__ = ['LabelList'] 6 | 7 | 8 | def LabelList(ratio=1, root='~/.mxnet/datasets', name='market1501'): 9 | """Load the Label List for Market 1501 dataset. 10 | 11 | Parameters 12 | ---------- 13 | ratio : float, default 1 14 | Split label into train and test. 15 | root : str, default '~/.mxnet/datasets' 16 | Path to the folder stored the dataset. 17 | name : str, default 'market1501' 18 | Which dataset is used. Only support market 1501 now. 19 | """ 20 | root = osp.expanduser(root) 21 | 22 | if name == "market1501": 23 | path = osp.join(root, "Market-1501-v15.09.15") 24 | train_txt = osp.join(path, "train.txt") 25 | image_path = osp.join(path, "bounding_box_train") 26 | 27 | item_list = [(osp.join(image_path, line.split()[0]), int(line.split()[1])) 28 | for line in open(train_txt).readlines()] 29 | random.shuffle(item_list) 30 | count = len(item_list) 31 | train_count = int(count * ratio) 32 | 33 | train_set = item_list[:train_count] 34 | valid_set = item_list[train_count:] 35 | 36 | return train_set, valid_set 37 | return None, None 38 | -------------------------------------------------------------------------------- /gluoncv/data/mixup/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/data/mixup/__init__.py -------------------------------------------------------------------------------- /gluoncv/data/mscoco/__init__.py: -------------------------------------------------------------------------------- 1 | """MS COCO dataset.""" 2 | -------------------------------------------------------------------------------- /gluoncv/data/mscoco/utils.py: -------------------------------------------------------------------------------- 1 | """Import helper for pycocotools""" 2 | # NOTE: for developers 3 | # please do not import any pycocotools in __init__ because we are trying to lazy 4 | # import pycocotools to avoid install it for other users who may not use it. 5 | # only import when you actually use it 6 | from __future__ import absolute_import 7 | 8 | from ...utils.filesystem import import_try_install 9 | 10 | def try_import_pycocotools(): 11 | """Tricks to optionally install and import pycocotools""" 12 | # first we can try import pycocotools 13 | try: 14 | import pycocotools as _ 15 | except ImportError: 16 | import os 17 | # we need to install pycootools, which is a bit tricky 18 | # pycocotools sdist requires Cython, numpy(already met) 19 | import_try_install('cython') 20 | # pypi pycocotools is not compatible with windows 21 | win_url = 'git+https://github.com/zhreshold/cocoapi.git#subdirectory=PythonAPI' 22 | try: 23 | if os.name == 'nt': 24 | import_try_install('pycocotools', win_url) 25 | else: 26 | import_try_install('pycocotools') 27 | except ImportError: 28 | faq = 'cocoapi FAQ' 29 | raise ImportError('Cannot import or install pycocotools, please refer to %s.' % faq) 30 | -------------------------------------------------------------------------------- /gluoncv/data/otb/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import 2 | """Object Tracking, Visual Tracker Benchmark 3 | http://www.visual-tracking.net""" 4 | from __future__ import absolute_import 5 | from .tracking import * 6 | -------------------------------------------------------------------------------- /gluoncv/data/pascal_aug/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/data/pascal_aug/__init__.py -------------------------------------------------------------------------------- /gluoncv/data/pascal_voc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/data/pascal_voc/__init__.py -------------------------------------------------------------------------------- /gluoncv/data/recordio/__init__.py: -------------------------------------------------------------------------------- 1 | """Datasets from RecordIO files.""" 2 | -------------------------------------------------------------------------------- /gluoncv/data/somethingsomethingv2/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import 2 | """Video action recognition, something-something-v2 dataset. 3 | https://20bn.com/datasets/something-something 4 | """ 5 | from __future__ import absolute_import 6 | from .classification import * 7 | -------------------------------------------------------------------------------- /gluoncv/data/tracking_data/__init__.py: -------------------------------------------------------------------------------- 1 | """tracking dataset,include youtube-bb,VID,DET,COCO dataset""" 2 | from __future__ import absolute_import 3 | from .track import * 4 | -------------------------------------------------------------------------------- /gluoncv/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | """Data transforms""" 2 | from __future__ import absolute_import 3 | 4 | from . import bbox 5 | from . import image 6 | from . import experimental 7 | from . import mask 8 | from . import presets 9 | from .block import RandomCrop 10 | from . import pose 11 | from . import video 12 | -------------------------------------------------------------------------------- /gluoncv/data/transforms/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | """Experimental transforms.""" 2 | from . import bbox 3 | from . import image 4 | -------------------------------------------------------------------------------- /gluoncv/data/transforms/presets/__init__.py: -------------------------------------------------------------------------------- 1 | """Presets of sophisticated data transforms used in various papers.""" 2 | from . import ssd 3 | from . import rcnn 4 | from . import yolo 5 | from . import center_net 6 | from . import imagenet 7 | from . import simple_pose 8 | from . import segmentation 9 | -------------------------------------------------------------------------------- /gluoncv/data/transforms/presets/segmentation.py: -------------------------------------------------------------------------------- 1 | """Transforms for Segmentation models.""" 2 | from __future__ import absolute_import 3 | 4 | from mxnet.gluon.data.vision import transforms 5 | 6 | def test_transform(img, ctx): 7 | transform_fn = transforms.Compose([ 8 | transforms.ToTensor(), 9 | transforms.Normalize([.485, .456, .406], [.229, .224, .225]) 10 | ]) 11 | img = transform_fn(img) 12 | img = img.expand_dims(0).as_in_context(ctx) 13 | return img 14 | -------------------------------------------------------------------------------- /gluoncv/data/ucf101/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import 2 | """Video action recognition, UCF101 dataset. 3 | https://www.crcv.ucf.edu/data/UCF101.php 4 | """ 5 | from __future__ import absolute_import 6 | from .classification import * 7 | -------------------------------------------------------------------------------- /gluoncv/data/video_custom/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import 2 | """ 3 | Customized data loader for video classification related tasks. 4 | """ 5 | from __future__ import absolute_import 6 | from .classification import * 7 | -------------------------------------------------------------------------------- /gluoncv/data/visdrone/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/data/visdrone/__init__.py -------------------------------------------------------------------------------- /gluoncv/model_zoo/__init__.py: -------------------------------------------------------------------------------- 1 | """GluonCV Model Zoo""" 2 | # pylint: disable=wildcard-import 3 | from .model_zoo import get_model, get_model_list 4 | from .model_store import pretrained_model_list 5 | from .rcnn.faster_rcnn import * 6 | from .rcnn.mask_rcnn import * 7 | from .ssd import * 8 | from .yolo import * 9 | from .cifarresnet import * 10 | from .cifarwideresnet import * 11 | from .fcn import * 12 | from .pspnet import * 13 | from .deeplabv3 import * 14 | from .deeplabv3_plus import * 15 | from .deeplabv3b_plus import * 16 | from . import segbase 17 | from .resnetv1b import * 18 | from .se_resnet import * 19 | from .nasnet import * 20 | from .simple_pose.simple_pose_resnet import * 21 | from .simple_pose.mobile_pose import * 22 | from .action_recognition import * 23 | from .wideresnet import * 24 | 25 | from .resnest import * 26 | from .resnext import * 27 | from .alexnet import * 28 | from .densenet import * 29 | from .googlenet import * 30 | from .inception import * 31 | from .xception import * 32 | from .resnet import * 33 | from .squeezenet import * 34 | from .vgg import * 35 | from .mobilenet import * 36 | from .residual_attentionnet import * 37 | from .center_net import * 38 | from .hrnet import * 39 | from .siamrpn import * 40 | from .fastscnn import * 41 | from .monodepthv2 import * 42 | from .smot import * 43 | -------------------------------------------------------------------------------- /gluoncv/model_zoo/action_recognition/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import 2 | """Video action recognition.""" 3 | from __future__ import absolute_import 4 | from .actionrec_vgg16 import * 5 | from .actionrec_inceptionv1 import * 6 | from .actionrec_inceptionv3 import * 7 | from .actionrec_resnetv1b import * 8 | from .c3d import * 9 | from .p3d import * 10 | from .r2plus1d import * 11 | from .i3d_resnet import * 12 | from .i3d_inceptionv1 import * 13 | from .i3d_inceptionv3 import * 14 | from .slowfast import * 15 | from .i3d_slow import * 16 | -------------------------------------------------------------------------------- /gluoncv/model_zoo/alpha_pose/__init__.py: -------------------------------------------------------------------------------- 1 | """Alpha pose for real time human pose estimation""" 2 | # pylint: disable=wildcard-import 3 | from .fast_pose import * 4 | -------------------------------------------------------------------------------- /gluoncv/model_zoo/center_net/__init__.py: -------------------------------------------------------------------------------- 1 | """CenterNet""" 2 | # pylint: disable=wildcard-import 3 | from __future__ import absolute_import 4 | 5 | from .center_net import * 6 | -------------------------------------------------------------------------------- /gluoncv/model_zoo/monodepthv2/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-module-docstring 2 | from .resnet_encoder import ResnetEncoder 3 | from .depth_decoder import DepthDecoder 4 | from .pose_decoder import PoseDecoder 5 | from .monodepth2 import * 6 | from .monodepth2_posenet import * 7 | -------------------------------------------------------------------------------- /gluoncv/model_zoo/pruned_resnet/__init__.py: -------------------------------------------------------------------------------- 1 | """GluonCV Model Zoo""" 2 | # pylint: disable=wildcard-import 3 | from ..resnetv1b import * 4 | -------------------------------------------------------------------------------- /gluoncv/model_zoo/quantized/__init__.py: -------------------------------------------------------------------------------- 1 | """Quantized versions of GluonCV models.""" 2 | # pylint: disable=wildcard-import 3 | from .quantized import * 4 | -------------------------------------------------------------------------------- /gluoncv/model_zoo/rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | """Fast RCNN.""" 2 | from __future__ import absolute_import 3 | 4 | from .rcnn import RCNN 5 | -------------------------------------------------------------------------------- /gluoncv/model_zoo/rcnn/faster_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import 2 | """Faster-RCNN Object Detection.""" 3 | from __future__ import absolute_import 4 | 5 | from .faster_rcnn import * 6 | from .doublehead_rcnn import * 7 | from .predefined_models import * 8 | from .rcnn_target import RCNNTargetGenerator, RCNNTargetSampler 9 | from .data_parallel import ForwardBackwardTask 10 | -------------------------------------------------------------------------------- /gluoncv/model_zoo/rcnn/mask_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import 2 | """Mask RCNN Instance Segmentation""" 3 | from __future__ import absolute_import 4 | 5 | from .mask_rcnn import * 6 | from .predefined_models import * 7 | from .data_parallel import ForwardBackwardTask 8 | -------------------------------------------------------------------------------- /gluoncv/model_zoo/rcnn/rpn/__init__.py: -------------------------------------------------------------------------------- 1 | """Region Proposal Network.""" 2 | from __future__ import absolute_import 3 | 4 | from .rpn import RPN 5 | from . import bbox_clip 6 | from .anchor import RPNAnchorGenerator 7 | from .proposal import RPNProposal 8 | -------------------------------------------------------------------------------- /gluoncv/model_zoo/siamrpn/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import 2 | """SiamRPN tracking""" 3 | from __future__ import absolute_import 4 | from .siam_net import * 5 | -------------------------------------------------------------------------------- /gluoncv/model_zoo/simple_pose/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/model_zoo/simple_pose/__init__.py -------------------------------------------------------------------------------- /gluoncv/model_zoo/smot/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import 2 | """ 3 | SMOT: Single-Shot Multi Object Tracking 4 | https://arxiv.org/abs/2010.16031 5 | """ 6 | from __future__ import absolute_import 7 | from .smot_tracker import * 8 | from .tracktors import * 9 | -------------------------------------------------------------------------------- /gluoncv/model_zoo/ssd/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import 2 | """Single-shot Object Detection.""" 3 | from __future__ import absolute_import 4 | from .ssd import * 5 | from .presets import * 6 | from .vgg_atrous import * 7 | -------------------------------------------------------------------------------- /gluoncv/model_zoo/yolo/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import 2 | """YOLO Object Detection""" 3 | from __future__ import absolute_import 4 | 5 | from .darknet import * 6 | from .yolo3 import * 7 | -------------------------------------------------------------------------------- /gluoncv/nn/__init__.py: -------------------------------------------------------------------------------- 1 | """GluonCV neural network layers""" 2 | # pylint: disable=wildcard-import 3 | from __future__ import absolute_import 4 | 5 | from . import bbox 6 | from . import coder 7 | from . import feature 8 | from . import matcher 9 | from . import predictor 10 | from . import sampler 11 | from .block import * 12 | from .gn import * 13 | -------------------------------------------------------------------------------- /gluoncv/torch/__init__.py: -------------------------------------------------------------------------------- 1 | """GluonCV-Torch.""" 2 | from . import data 3 | from . import model_zoo 4 | from . import nn 5 | from . import utils 6 | -------------------------------------------------------------------------------- /gluoncv/torch/data/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module provides data loaders and transformers for popular vision datasets. 3 | """ 4 | 5 | from .video_cls.dataset_classification import VideoClsDataset 6 | from .video_cls.dataset_classification import build_dataloader, build_dataloader_test 7 | from .video_cls.multigrid_helper import multiGridHelper, MultiGridBatchSampler 8 | from .coot.dataloader import create_datasets, create_loaders 9 | from . import registry 10 | -------------------------------------------------------------------------------- /gluoncv/torch/data/detection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/torch/data/detection/__init__.py -------------------------------------------------------------------------------- /gluoncv/torch/data/detection/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler 2 | from .grouped_batch_sampler import GroupedBatchSampler 3 | 4 | __all__ = [ 5 | "GroupedBatchSampler", 6 | "TrainingSampler", 7 | "InferenceSampler", 8 | "RepeatFactorTrainingSampler", 9 | ] 10 | -------------------------------------------------------------------------------- /gluoncv/torch/data/gluoncv_motion_dataset/__init__.py: -------------------------------------------------------------------------------- 1 | """GluonCV motion dataset, supports multiple video tasks including 2 | video action recognition/detection, object tracking, pose tracking, etc.""" 3 | from .dataset import GluonCVMotionDataset, FieldNames 4 | -------------------------------------------------------------------------------- /gluoncv/torch/data/gluoncv_motion_dataset/ingestion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/torch/data/gluoncv_motion_dataset/ingestion/__init__.py -------------------------------------------------------------------------------- /gluoncv/torch/data/gluoncv_motion_dataset/io/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/torch/data/gluoncv_motion_dataset/io/__init__.py -------------------------------------------------------------------------------- /gluoncv/torch/data/gluoncv_motion_dataset/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/torch/data/gluoncv_motion_dataset/utils/__init__.py -------------------------------------------------------------------------------- /gluoncv/torch/data/pose/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset_pose import * 2 | -------------------------------------------------------------------------------- /gluoncv/torch/data/structures/__init__.py: -------------------------------------------------------------------------------- 1 | from .boxes import Boxes, BoxMode, pairwise_iou 2 | from .instances import Instances 3 | from .masks import BitMasks, PolygonMasks, polygons_to_bitmask 4 | from .image_list import ImageList 5 | from .keypoints import Keypoints 6 | from .beziers import Beziers 7 | -------------------------------------------------------------------------------- /gluoncv/torch/data/transforms/instance_transforms/__init__.py: -------------------------------------------------------------------------------- 1 | """Transform and augmentation for instance level manipulations""" 2 | from .augmentation import * 3 | from .transform import * 4 | -------------------------------------------------------------------------------- /gluoncv/torch/data/transforms/videotransforms/utils/functional.py: -------------------------------------------------------------------------------- 1 | def normalize(tensor, mean, std): 2 | """ 3 | Args: 4 | tensor (Tensor): Tensor to normalize 5 | 6 | Returns: 7 | Tensor: Normalized tensor 8 | """ 9 | tensor.sub_(mean).div_(std) 10 | return tensor 11 | -------------------------------------------------------------------------------- /gluoncv/torch/data/transforms/videotransforms/utils/images.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def convert_img(img): 5 | """Converts (H, W, C) numpy.ndarray to (C, W, H) format 6 | """ 7 | if len(img.shape) == 3: 8 | img = img.transpose(2, 0, 1) 9 | if len(img.shape) == 2: 10 | img = np.expand_dims(img, 0) 11 | return img 12 | -------------------------------------------------------------------------------- /gluoncv/torch/data/video_cls/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Customized data loader for video classification related tasks. 3 | """ 4 | from __future__ import absolute_import 5 | from .dataset_classification import * 6 | from .multigrid_helper import * 7 | -------------------------------------------------------------------------------- /gluoncv/torch/engine/__init__.py: -------------------------------------------------------------------------------- 1 | """GluonCV-Torch engine.""" 2 | -------------------------------------------------------------------------------- /gluoncv/torch/engine/config/__init__.py: -------------------------------------------------------------------------------- 1 | """The global configs registry""" 2 | from .action_recognition import _C as _C_action_recognition 3 | from .coot import _C as _C_coot 4 | from .directpose import _C as _C_directpose 5 | 6 | __all__ = ['get_cfg_defaults'] 7 | 8 | _CONFIG_REG = { 9 | "action_recognition": _C_action_recognition, 10 | "coot": _C_coot, 11 | "directpose": _C_directpose 12 | } 13 | 14 | def get_cfg_defaults(name='action_recognition'): 15 | """Get a yacs CfgNode object with default values for by name. 16 | 17 | Parameters 18 | ---------- 19 | name : str 20 | The name of the root config, e.g. action_recognition, coot, directpose... 21 | 22 | Returns 23 | ------- 24 | yacs.CfgNode object 25 | 26 | """ 27 | assert isinstance(name, str), f"{name} must be a str" 28 | name = name.lower() 29 | if name not in _CONFIG_REG.keys(): 30 | raise ValueError(f"Unknown root config with name: {name}") 31 | return _CONFIG_REG[name].clone() 32 | -------------------------------------------------------------------------------- /gluoncv/torch/model_zoo/__init__.py: -------------------------------------------------------------------------------- 1 | """GluonCV-Torch model zoo""" 2 | 3 | from .model_zoo import get_model, get_model_list 4 | from .action_recognition import * 5 | from .pose import * 6 | -------------------------------------------------------------------------------- /gluoncv/torch/model_zoo/action_recognition/__init__.py: -------------------------------------------------------------------------------- 1 | """GluonCV-Torch video action recognition.""" 2 | from __future__ import absolute_import 3 | from .actionrec_resnetv1b import * 4 | from .i3d_resnet import * 5 | from .i3d_slow import * 6 | from .slowfast import * 7 | from .r2plus1dv1 import * 8 | from .r2plus1dv2 import * 9 | from .tpn import * 10 | from .ircsnv2 import * 11 | -------------------------------------------------------------------------------- /gluoncv/torch/model_zoo/object_detection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/torch/model_zoo/object_detection/__init__.py -------------------------------------------------------------------------------- /gluoncv/torch/model_zoo/pose/__init__.py: -------------------------------------------------------------------------------- 1 | """GluonCV-Torch pose estimation.""" 2 | from __future__ import absolute_import 3 | from .directpose_resnet_fpn import * 4 | -------------------------------------------------------------------------------- /gluoncv/torch/nn/__init__.py: -------------------------------------------------------------------------------- 1 | """GluonCV-Torch neural network layers""" 2 | -------------------------------------------------------------------------------- /gluoncv/torch/nn/keypoint_loss.py: -------------------------------------------------------------------------------- 1 | """Loss layers for keypoints that can be inserted to modules""" 2 | import torch 3 | import torch.nn as nn 4 | 5 | __all__ = ['WeightedMSELoss', 'HMFocalLoss'] 6 | 7 | def _sigmoid(x): 8 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4) 9 | return y 10 | 11 | class WeightedMSELoss(nn.Module): 12 | """Weighted MSE loss layer""" 13 | def __init__(self): 14 | super().__init__() 15 | 16 | def forward(self, pred, gt, mask): 17 | assert pred.size() == gt.size() 18 | loss = ((pred - gt) **2) * mask 19 | loss = loss.mean() 20 | return loss 21 | 22 | class HMFocalLoss(nn.Module): 23 | """Heatmap Focal Loss layer""" 24 | def __init__(self, alpha, beta): 25 | super(HMFocalLoss, self).__init__() 26 | self.alpha = alpha 27 | self.beta = beta 28 | 29 | def forward(self, pred, gt): 30 | pos_inds = gt.eq(1).float() 31 | neg_inds = gt.lt(1).float() 32 | 33 | pred = _sigmoid(pred) 34 | neg_weights = torch.pow(1 - gt, self.beta) 35 | 36 | pos_loss = torch.log(pred) * torch.pow(1 - pred, self.alpha) * pos_inds 37 | neg_loss = torch.log(1 - pred) * torch.pow(pred, self.alpha) * neg_weights * neg_inds 38 | 39 | num_pos = pos_inds.float().sum() 40 | pos_loss = pos_loss.sum() 41 | neg_loss = neg_loss.sum() 42 | 43 | if num_pos == 0: 44 | return -neg_loss 45 | else: 46 | return -(pos_loss + neg_loss) / num_pos 47 | -------------------------------------------------------------------------------- /gluoncv/torch/nn/shape_spec.py: -------------------------------------------------------------------------------- 1 | """Internal structure for shape""" 2 | from collections import namedtuple 3 | 4 | 5 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])): 6 | """ 7 | A simple structure that contains basic shape specification about a tensor. 8 | It is often used as the auxiliary inputs/outputs of models, 9 | to obtain the shape inference ability among pytorch modules. 10 | 11 | Attributes: 12 | channels: 13 | height: 14 | width: 15 | stride: 16 | """ 17 | 18 | def __new__(cls, *, channels=None, height=None, width=None, stride=None): 19 | return super().__new__(cls, channels, height, width, stride) 20 | -------------------------------------------------------------------------------- /gluoncv/torch/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """GluonCV-Torch utility functions.""" 2 | -------------------------------------------------------------------------------- /gluoncv/torch/utils/eval_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/torch/utils/eval_utils/__init__.py -------------------------------------------------------------------------------- /gluoncv/torch/utils/random.py: -------------------------------------------------------------------------------- 1 | """Utils for random states""" 2 | import os 3 | from datetime import datetime 4 | import random 5 | import logging 6 | 7 | import numpy as np 8 | import torch 9 | 10 | __all__ = ['seed_all_rng'] 11 | 12 | 13 | def seed_all_rng(seed=None): 14 | """ 15 | Set the random seed for the RNG in torch, numpy and python. 16 | 17 | Args: 18 | seed (int): if None, will use a strong random seed. 19 | """ 20 | if seed is None: 21 | seed = ( 22 | os.getpid() 23 | + int(datetime.now().strftime("%S%f")) 24 | + int.from_bytes(os.urandom(2), "big") 25 | ) 26 | logger = logging.getLogger(__name__) 27 | logger.info("Using a generated random seed {}".format(seed)) 28 | np.random.seed(seed) 29 | torch.set_rng_state(torch.manual_seed(seed).get_state()) 30 | random.seed(seed) 31 | -------------------------------------------------------------------------------- /gluoncv/torch/utils/task_utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Task utils""" 2 | from .classification import train_classification, validation_classification, test_classification 3 | from .coot import train_coot, validate_coot 4 | from .pose import DirectposePipeline, build_pose_optimizer 5 | -------------------------------------------------------------------------------- /gluoncv/torch/utils/tvm_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/torch/utils/tvm_utils/__init__.py -------------------------------------------------------------------------------- /gluoncv/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """GluonCV Utility functions.""" 2 | # pylint: disable=wildcard-import,exec-used,wrong-import-position 3 | from __future__ import absolute_import 4 | 5 | import types 6 | 7 | def import_dummy_module(code, name): 8 | # create blank module 9 | module = types.ModuleType(name) 10 | # populate the module with code 11 | exec(code, module.__dict__) 12 | return module 13 | 14 | dummy_module = """ 15 | def __getattr__(name): 16 | raise AttributeError(f"gluoncv.utils.{__name__} module requires mxnet which is missing.") 17 | """ 18 | 19 | 20 | from . import bbox 21 | from . import random 22 | from . import filesystem 23 | try: 24 | import mxnet 25 | from . import viz 26 | from . import metrics 27 | from . import parallel 28 | from .lr_scheduler import LRSequential, LRScheduler 29 | from .export_helper import export_block, export_tvm 30 | from .sync_loader_helper import split_data, split_and_load 31 | except ImportError: 32 | viz = import_dummy_module(dummy_module, 'viz') 33 | metrics = import_dummy_module(dummy_module, 'metrics') 34 | parallel = import_dummy_module(dummy_module, 'parallel') 35 | LRSequential, LRScheduler = None, None 36 | export_block, export_tvm = None, None 37 | split_data, split_and_load = None, None 38 | 39 | from .download import download, check_sha1 40 | from .filesystem import makedirs, try_import_dali, try_import_cv2 41 | from .bbox import bbox_iou 42 | from .block import recursive_visit, set_lr_mult, freeze_bn 43 | from .plot_history import TrainingHistory 44 | from .version import * 45 | -------------------------------------------------------------------------------- /gluoncv/utils/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | """Custom evaluation metrics""" 2 | from __future__ import absolute_import 3 | 4 | from .coco_detection import COCODetectionMetric 5 | from .coco_keypoints import COCOKeyPointsMetric 6 | from .voc_detection import VOCMApMetric, VOC07MApMetric 7 | from .segmentation import SegmentationMetric 8 | from .heatmap_accuracy import HeatmapAccuracy 9 | -------------------------------------------------------------------------------- /gluoncv/utils/random.py: -------------------------------------------------------------------------------- 1 | """Random wrapper.""" 2 | from __future__ import absolute_import 3 | import random as pyrandom 4 | import numpy as np 5 | try: 6 | import mxnet as mx 7 | except ImportError: 8 | mx = None 9 | 10 | 11 | def seed(a=None): 12 | """Seed the generator for python builtin random, numpy.random, mxnet.random. 13 | 14 | This method is to control random state for mxnet related random functions. 15 | 16 | Note that this function cannot guarantee 100 percent reproducibility due to 17 | hardware settings. 18 | 19 | Parameters 20 | ---------- 21 | a : int or 1-d array_like, optional 22 | Initialize internal state of the random number generator. 23 | If `seed` is not None or an int or a long, then hash(seed) is used instead. 24 | Note that the hash values for some types are nondeterministic. 25 | 26 | """ 27 | pyrandom.seed(a) 28 | np.random.seed(a) 29 | if mx is not None: 30 | mx.random.seed(a) 31 | -------------------------------------------------------------------------------- /gluoncv/utils/version.py: -------------------------------------------------------------------------------- 1 | """Utility functions for version checking.""" 2 | import warnings 3 | 4 | __all__ = ['check_version'] 5 | 6 | def check_version(min_version, warning_only=False): 7 | """Check the version of gluoncv satisfies the provided minimum version. 8 | An exception is thrown if the check does not pass. 9 | 10 | Parameters 11 | ---------- 12 | min_version : str 13 | Minimum version 14 | warning_only : bool 15 | Printing a warning instead of throwing an exception. 16 | """ 17 | from .. import __version__ 18 | from distutils.version import LooseVersion 19 | bad_version = LooseVersion(__version__) < LooseVersion(min_version) 20 | if bad_version: 21 | msg = 'Installed GluonCV version (%s) does not satisfy the ' \ 22 | 'minimum required version (%s)'%(__version__, min_version) 23 | if warning_only: 24 | warnings.warn(msg) 25 | else: 26 | raise AssertionError(msg) 27 | -------------------------------------------------------------------------------- /gluoncv/utils/viz/__init__.py: -------------------------------------------------------------------------------- 1 | """Visualization tools""" 2 | from __future__ import absolute_import 3 | 4 | from .image import plot_image, cv_plot_image 5 | from .bbox import plot_bbox, cv_plot_bbox 6 | from .keypoints import plot_keypoints, cv_plot_keypoints 7 | from .mask import expand_mask, plot_mask, cv_merge_two_images 8 | from .segmentation import get_color_pallete, DeNormalize 9 | from .network import plot_network, plot_mxboard 10 | -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- 1 | # Launch scripts 2 | Scripts folder includes training/evaluation/demo python scripts. 3 | Please refer to [GluonCV](http://gluon-cv.mxnet.io/index.html) website for further instruction if needed. 4 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_nl10_resnet101_v1_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # i3d_nl10_resnet101_v1_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 100 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [40, 80] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'i3d_nl10_resnet101_v1_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_nl10_resnet101_v1_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_nl10_resnet101_v1_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_nl10_resnet50_v1_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # i3d_nl10_resnet50_v1_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 100 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [40, 80] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'i3d_nl10_resnet50_v1_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_nl10_resnet50_v1_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_nl10_resnet50_v1_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_nl5_resnet101_v1_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # i3d_nl5_resnet101_v1_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 100 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [40, 80] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'i3d_nl5_resnet101_v1_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_nl5_resnet101_v1_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_nl5_resnet101_v1_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_nl5_resnet50_v1_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # i3d_nl5_resnet50_v1_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.69.134:23456' 9 | WOLRD_URLS: ['172.31.69.134'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 100 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [40, 80] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 5 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'i3d_nl5_resnet50_v1_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_nl5_resnet50_v1_kinetics400/' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_nl5_resnet50_v1_kinetics400/eval' 55 | SAVE_FREQ: 5 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_resnet101_v1_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # i3d_resnet101_v1_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 100 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [40, 80] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'i3d_resnet101_v1_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_resnet101_v1_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_resnet101_v1_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_resnet50_v1_custom.yaml: -------------------------------------------------------------------------------- 1 | # i3d_resnet50_v1_custom 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.252:23456' 9 | WOLRD_URLS: ['172.31.72.252'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 100 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [40, 80] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/' 36 | NUM_CLASSES: 174 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'i3d_resnet50_v1_custom' 48 | PRETRAINED: True # Default True, use a backbone pretrained on K400. If set to False, the model is just inflated from 2D ImageNet weights. 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_resnet50_v1_custom' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_resnet50_v1_custom/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_resnet50_v1_feat.yaml: -------------------------------------------------------------------------------- 1 | # i3d_resnet50_v1_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 1 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.252:23456' 9 | WOLRD_URLS: ['172.31.72.252'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | INFERENCE: 17 | FEAT: True 18 | 19 | DATA: 20 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 21 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 22 | NUM_CLASSES: 400 23 | CLIP_LEN: 32 24 | FRAME_RATE: 2 25 | NUM_SEGMENT: 1 26 | NUM_CROP: 1 27 | MULTIGRID: False 28 | KEEP_ASPECT_RATIO: False 29 | 30 | MODEL: 31 | NAME: 'i3d_resnet50_v1_kinetics400' 32 | PRETRAINED: True 33 | 34 | LOG: 35 | BASE_PATH: './logs/i3d_resnet50_v1_kinetics400' 36 | SAVE_DIR: 'features' 37 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_resnet50_v1_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # i3d_resnet50_v1_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.69.242:23456' 9 | WOLRD_URLS: ['172.31.69.242'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 100 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [40, 80] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 5 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'i3d_resnet50_v1_kinetics400' 48 | PRETRAINED: True 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_resnet50_v1_kinetics400/' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_resnet50_v1_kinetics400/eval' 55 | SAVE_FREQ: 5 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_resnet50_v1_sthsthv2.yaml: -------------------------------------------------------------------------------- 1 | # i3d_resnet50_v1_sthsthv2 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 100 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | LR_POLICY: 'Step' 21 | MOMENTUM: 0.9 22 | W_DECAY: 1e-5 23 | USE_WARMUP: False 24 | LR_MILESTONE: [40, 80] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/' 36 | NUM_CLASSES: 174 37 | CLIP_LEN: 16 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 2 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'i3d_resnet50_v1_sthsthv2' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_resnet50_v1_sthsthv2' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_resnet50_v1_sthsthv2/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_slow_resnet101_f16s4_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # i3d_slow_resnet101_f16s4_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 196 18 | BATCH_SIZE: 8 19 | LR: 0.1 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-4 22 | USE_WARMUP: True 23 | WARMUP_EPOCHS: 34 24 | LR_POLICY: 'Cosine' 25 | RESUME_EPOCH: -1 26 | 27 | VAL: 28 | FREQ: 5 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 16 38 | FRAME_RATE: 4 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'i3d_slow_resnet101_f16s4_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_slow_resnet101_f16s4_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_slow_resnet101_f16s4_kinetics400/eval' 55 | DISPLAY_FREQ: 50 56 | SAVE_FREQ: 5 57 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_slow_resnet101_f16s4_kinetics700.yaml: -------------------------------------------------------------------------------- 1 | # i3d_slow_resnet101_f16s4_kinetics700 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.69.242:23456' 9 | WOLRD_URLS: ['172.31.69.242'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 196 18 | BATCH_SIZE: 8 19 | LR: 0.1 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-4 22 | USE_WARMUP: True 23 | WARMUP_EPOCHS: 34 24 | LR_POLICY: 'Cosine' 25 | RESUME_EPOCH: -1 26 | 27 | VAL: 28 | FREQ: 5 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics700_v1/k700_v1_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics700_v1/k700_v1_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics700_v1/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics700_v1/val_256/' 36 | NUM_CLASSES: 700 37 | CLIP_LEN: 16 38 | FRAME_RATE: 4 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: True 45 | 46 | MODEL: 47 | NAME: 'i3d_slow_resnet101_f16s4_kinetics700' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_slow_resnet101_f16s4_kinetics700' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_slow_resnet101_f16s4_kinetics700/eval' 55 | DISPLAY_FREQ: 50 56 | SAVE_FREQ: 5 57 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_slow_resnet101_f32s2_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # i3d_slow_resnet101_f32s2_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 196 18 | BATCH_SIZE: 8 19 | LR: 0.1 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-4 22 | USE_WARMUP: True 23 | WARMUP_EPOCHS: 34 24 | LR_POLICY: 'Cosine' 25 | RESUME_EPOCH: -1 26 | 27 | VAL: 28 | FREQ: 5 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'i3d_slow_resnet101_f32s2_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_slow_resnet101_f32s2_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_slow_resnet101_f32s2_kinetics400/eval' 55 | DISPLAY_FREQ: 50 56 | SAVE_FREQ: 5 57 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_slow_resnet101_f8s8_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # i3d_slow_resnet101_f8s8_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 196 18 | BATCH_SIZE: 8 19 | LR: 0.1 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-4 22 | USE_WARMUP: True 23 | WARMUP_EPOCHS: 34 24 | LR_POLICY: 'Cosine' 25 | RESUME_EPOCH: -1 26 | 27 | VAL: 28 | FREQ: 5 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 8 38 | FRAME_RATE: 8 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'i3d_slow_resnet101_f8s8_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_slow_resnet101_f8s8_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_slow_resnet101_f8s8_kinetics400/eval' 55 | DISPLAY_FREQ: 50 56 | SAVE_FREQ: 5 57 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_slow_resnet50_f16s4_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # i3d_slow_resnet50_f16s4_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 196 18 | BATCH_SIZE: 8 19 | LR: 0.1 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-4 22 | USE_WARMUP: True 23 | WARMUP_EPOCHS: 34 24 | LR_POLICY: 'Cosine' 25 | RESUME_EPOCH: -1 26 | 27 | VAL: 28 | FREQ: 5 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 16 38 | FRAME_RATE: 4 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'i3d_slow_resnet50_f16s4_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_slow_resnet50_f16s4_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_slow_resnet50_f16s4_kinetics400/eval' 55 | DISPLAY_FREQ: 50 56 | SAVE_FREQ: 5 57 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_slow_resnet50_f32s2_custom.yaml: -------------------------------------------------------------------------------- 1 | # i3d_slow_resnet50_f32s2_custom 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.252:23456' 9 | WOLRD_URLS: ['172.31.72.252'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 196 18 | BATCH_SIZE: 8 19 | LR: 0.1 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-4 22 | USE_WARMUP: True 23 | WARMUP_EPOCHS: 34 24 | LR_POLICY: 'Cosine' 25 | RESUME_EPOCH: -1 26 | 27 | VAL: 28 | FREQ: 5 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/' 36 | NUM_CLASSES: 174 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'i3d_slow_resnet50_f32s2_custom' 48 | PRETRAINED: True 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_slow_resnet50_f32s2_custom' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_slow_resnet50_f32s2_custom/eval' 55 | DISPLAY_FREQ: 50 56 | SAVE_FREQ: 5 57 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_slow_resnet50_f32s2_feat.yaml: -------------------------------------------------------------------------------- 1 | # i3d_slow_resnet50_f32s2_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | INFERENCE: 17 | FEAT: True 18 | 19 | DATA: 20 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 21 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 22 | NUM_CLASSES: 400 23 | CLIP_LEN: 32 24 | FRAME_RATE: 2 25 | NUM_SEGMENT: 1 26 | NUM_CROP: 1 27 | TEST_NUM_SEGMENT: 10 28 | TEST_NUM_CROP: 3 29 | MULTIGRID: False 30 | KEEP_ASPECT_RATIO: False 31 | 32 | MODEL: 33 | NAME: 'i3d_slow_resnet50_f32s2_kinetics400' 34 | PRETRAINED: True 35 | 36 | LOG: 37 | BASE_PATH: './logs/i3d_slow_resnet50_f32s2_kinetics400' 38 | SAVE_DIR: 'featues' 39 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_slow_resnet50_f32s2_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # i3d_slow_resnet50_f32s2_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 196 18 | BATCH_SIZE: 8 19 | LR: 0.1 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-4 22 | USE_WARMUP: True 23 | WARMUP_EPOCHS: 34 24 | LR_POLICY: 'Cosine' 25 | RESUME_EPOCH: -1 26 | 27 | VAL: 28 | FREQ: 5 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'i3d_slow_resnet50_f32s2_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_slow_resnet50_f32s2_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_slow_resnet50_f32s2_kinetics400/eval' 55 | DISPLAY_FREQ: 50 56 | SAVE_FREQ: 5 57 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/i3d_slow_resnet50_f8s8_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # i3d_slow_resnet50_f8s8_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.0.32:23456' 9 | WOLRD_URLS: ['172.31.0.32'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 196 18 | BATCH_SIZE: 8 19 | LR: 0.1 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-4 22 | USE_WARMUP: True 23 | WARMUP_EPOCHS: 34 24 | LR_POLICY: 'Cosine' 25 | RESUME_EPOCH: -1 26 | 27 | VAL: 28 | FREQ: 5 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 8 38 | FRAME_RATE: 8 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'i3d_slow_resnet50_f8s8_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_slow_resnet50_f8s8_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_slow_resnet50_f8s8_kinetics400/eval' 55 | DISPLAY_FREQ: 50 56 | SAVE_FREQ: 5 57 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/ircsn_v2_resnet152_f32s2_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # ircsn_v2_resnet152_f32s2_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.252:23456' 9 | WOLRD_URLS: ['172.31.72.252'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 58 # finetune from a pretrained model, hence small lr 18 | BATCH_SIZE: 8 19 | LR: 0.000125 20 | LR_POLICY: 'Step' 21 | MOMENTUM: 0.9 22 | W_DECAY: 1e-5 23 | USE_WARMUP: False 24 | LR_MILESTONE: [32, 48] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: True 45 | 46 | MODEL: 47 | NAME: 'ircsn_v2_resnet152_f32s2_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/ircsn_v2_resnet152_f32s2_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/ircsn_v2_resnet152_f32s2_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/r2plus1d_v1_resnet18_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # r2plus1d_v1_resnet18_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 196 18 | BATCH_SIZE: 8 19 | LR: 0.001 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | USE_WARMUP: True 23 | WARMUP_EPOCHS: 34 24 | LR_POLICY: 'Cosine' 25 | WARMUP_END_LR: 0.1 26 | RESUME_EPOCH: -1 27 | 28 | VAL: 29 | FREQ: 2 30 | BATCH_SIZE: 8 31 | 32 | DATA: 33 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 34 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 35 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 36 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 37 | NUM_CLASSES: 400 38 | CLIP_LEN: 16 39 | FRAME_RATE: 2 40 | NUM_SEGMENT: 1 41 | NUM_CROP: 1 42 | TEST_NUM_SEGMENT: 10 43 | TEST_NUM_CROP: 3 44 | MULTIGRID: False 45 | KEEP_ASPECT_RATIO: False 46 | CROP_SIZE: 112 47 | SHORT_SIDE_SIZE: 128 48 | NEW_HEIGHT: 128 49 | NEW_WIDTH: 171 50 | 51 | MODEL: 52 | NAME: 'r2plus1d_v1_resnet18_kinetics400' 53 | PRETRAINED: False 54 | 55 | LOG: 56 | BASE_PATH: './logs/r2plus1d_v1_resnet18_kinetics400' 57 | LOG_DIR: 'tb_log' 58 | SAVE_DIR: 'checkpoints' 59 | EVAL_DIR: './logs/r2plus1d_v1_resnet18_kinetics400/eval' 60 | SAVE_FREQ: 2 61 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/r2plus1d_v1_resnet34_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # r2plus1d_v1_resnet34_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 196 18 | BATCH_SIZE: 8 19 | LR: 0.001 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | USE_WARMUP: True 23 | WARMUP_EPOCHS: 34 24 | LR_POLICY: 'Cosine' 25 | WARMUP_END_LR: 0.1 26 | RESUME_EPOCH: -1 27 | 28 | VAL: 29 | FREQ: 2 30 | BATCH_SIZE: 8 31 | 32 | DATA: 33 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 34 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 35 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 36 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 37 | NUM_CLASSES: 400 38 | CLIP_LEN: 16 39 | FRAME_RATE: 2 40 | NUM_SEGMENT: 1 41 | NUM_CROP: 1 42 | TEST_NUM_SEGMENT: 10 43 | TEST_NUM_CROP: 3 44 | MULTIGRID: False 45 | KEEP_ASPECT_RATIO: False 46 | CROP_SIZE: 112 47 | SHORT_SIDE_SIZE: 128 48 | NEW_HEIGHT: 128 49 | NEW_WIDTH: 171 50 | 51 | MODEL: 52 | NAME: 'r2plus1d_v1_resnet34_kinetics400' 53 | PRETRAINED: False 54 | 55 | LOG: 56 | BASE_PATH: './logs/r2plus1d_v1_resnet34_kinetics400' 57 | LOG_DIR: 'tb_log' 58 | SAVE_DIR: 'checkpoints' 59 | EVAL_DIR: './logs/r2plus1d_v1_resnet34_kinetics400/eval' 60 | SAVE_FREQ: 2 61 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/r2plus1d_v1_resnet50_feat.yaml: -------------------------------------------------------------------------------- 1 | # r2plus1d_v1_resnet50_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | INFERENCE: 17 | FEAT: True 18 | 19 | DATA: 20 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 21 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 22 | NUM_CLASSES: 400 23 | CLIP_LEN: 16 24 | FRAME_RATE: 2 25 | NUM_SEGMENT: 1 26 | NUM_CROP: 1 27 | MULTIGRID: False 28 | KEEP_ASPECT_RATIO: False 29 | CROP_SIZE: 112 30 | SHORT_SIDE_SIZE: 128 31 | NEW_HEIGHT: 128 32 | NEW_WIDTH: 171 33 | 34 | MODEL: 35 | NAME: 'r2plus1d_v1_resnet50_kinetics400' 36 | PRETRAINED: True 37 | 38 | LOG: 39 | BASE_PATH: './logs/r2plus1d_v1_resnet50_kinetics400' 40 | SAVE_DIR: 'features' 41 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/resnet101_v1b_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # resnet101_v1b_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 100 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [40, 80] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 1 38 | FRAME_RATE: 1 39 | NUM_SEGMENT: 7 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'resnet101_v1b_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/resnet101_v1b_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/resnet101_v1b_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/resnet152_v1b_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # resnet152_v1b_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 100 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [40, 80] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 1 38 | FRAME_RATE: 1 39 | NUM_SEGMENT: 7 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'resnet152_v1b_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/resnet152_v1b_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/resnet152_v1b_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/resnet18_v1b_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # resnet18_v1b_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 100 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [40, 80] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 1 38 | FRAME_RATE: 1 39 | NUM_SEGMENT: 7 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'resnet18_v1b_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/resnet18_v1b_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/resnet18_v1b_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/resnet34_v1b_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # resnet34_v1b_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 100 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [40, 80] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 1 38 | FRAME_RATE: 1 39 | NUM_SEGMENT: 7 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'resnet34_v1b_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/resnet34_v1b_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/resnet34_v1b_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/resnet50_v1b_custom.yaml: -------------------------------------------------------------------------------- 1 | # resnet50_v1b_custom 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.252:23456' 9 | WOLRD_URLS: ['172.31.72.252'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 100 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [40, 80] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/' 36 | NUM_CLASSES: 174 37 | CLIP_LEN: 1 38 | FRAME_RATE: 1 39 | NUM_SEGMENT: 7 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'resnet50_v1b_custom' 48 | PRETRAINED: True 49 | 50 | LOG: 51 | BASE_PATH: './logs/resnet50_v1b_custom' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/resnet50_v1b_custom/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/resnet50_v1b_feat.yaml: -------------------------------------------------------------------------------- 1 | # resnet50_v1b_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | INFERENCE: 17 | FEAT: True 18 | 19 | DATA: 20 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 21 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 22 | NUM_CLASSES: 400 23 | CLIP_LEN: 1 24 | FRAME_RATE: 1 25 | NUM_SEGMENT: 1 26 | NUM_CROP: 1 27 | MULTIGRID: False 28 | KEEP_ASPECT_RATIO: False 29 | 30 | MODEL: 31 | NAME: 'resnet50_v1b_kinetics400' 32 | PRETRAINED: True 33 | 34 | LOG: 35 | BASE_PATH: './logs/resnet50_v1b_kinetics400' 36 | SAVE_DIR: 'features' 37 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/resnet50_v1b_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # resnet50_v1b_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 100 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [40, 80] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 1 38 | FRAME_RATE: 1 39 | NUM_SEGMENT: 7 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'resnet50_v1b_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/resnet50_v1b_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/resnet50_v1b_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/resnet50_v1b_sthsthv2.yaml: -------------------------------------------------------------------------------- 1 | # resnet50_v1b_sthsthv2 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 30 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [10, 20] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/' 36 | NUM_CLASSES: 174 37 | CLIP_LEN: 1 38 | FRAME_RATE: 1 39 | NUM_SEGMENT: 8 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 8 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'resnet50_v1b_sthsthv2' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/resnet50_v1b_sthsthv2' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/resnet50_v1b_sthsthv2/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/slowfast_16x8_resnet50_sthsthv2.yaml: -------------------------------------------------------------------------------- 1 | # slowfast_16x8_resnet50_sthsthv2 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 30 18 | BATCH_SIZE: 8 19 | LR: 0.0001 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-6 22 | USE_WARMUP: True 23 | WARMUP_EPOCHS: 5 24 | LR_POLICY: 'Cosine' 25 | WARMUP_END_LR: 0.1 26 | RESUME_EPOCH: -1 27 | 28 | VAL: 29 | FREQ: 2 30 | BATCH_SIZE: 8 31 | 32 | DATA: 33 | TRAIN_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_train.txt' 34 | VAL_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_val.txt' 35 | TRAIN_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/' 36 | VAL_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/' 37 | NUM_CLASSES: 174 38 | CLIP_LEN: 64 39 | FRAME_RATE: 2 40 | NUM_SEGMENT: 1 41 | NUM_CROP: 1 42 | TEST_NUM_SEGMENT: 2 43 | TEST_NUM_CROP: 3 44 | MULTIGRID: False 45 | KEEP_ASPECT_RATIO: False 46 | 47 | MODEL: 48 | NAME: 'slowfast_16x8_resnet50_sthsthv2' 49 | PRETRAINED: False 50 | 51 | LOG: 52 | BASE_PATH: './logs/slowfast_16x8_resnet50_sthsthv2' 53 | LOG_DIR: 'tb_log' 54 | SAVE_DIR: 'checkpoints' 55 | EVAL_DIR: './logs/slowfast_16x8_resnet50_sthsthv2/eval' 56 | SAVE_FREQ: 2 57 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/slowfast_4x16_resnet50_custom.yaml: -------------------------------------------------------------------------------- 1 | # slowfast_4x16_resnet50_custom 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.252:23456' 9 | WOLRD_URLS: ['172.31.72.252'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 196 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | USE_WARMUP: True 23 | WARMUP_EPOCHS: 34 24 | LR_POLICY: 'Cosine' 25 | WARMUP_END_LR: 0.1 26 | RESUME_EPOCH: -1 27 | 28 | VAL: 29 | FREQ: 2 30 | BATCH_SIZE: 8 31 | 32 | DATA: 33 | TRAIN_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_train.txt' 34 | VAL_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_val.txt' 35 | TRAIN_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/' 36 | VAL_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/' 37 | NUM_CLASSES: 174 38 | CLIP_LEN: 32 39 | FRAME_RATE: 2 40 | NUM_SEGMENT: 1 41 | NUM_CROP: 1 42 | TEST_NUM_SEGMENT: 10 43 | TEST_NUM_CROP: 3 44 | MULTIGRID: False 45 | KEEP_ASPECT_RATIO: False 46 | 47 | MODEL: 48 | NAME: 'slowfast_4x16_resnet50_custom' 49 | PRETRAINED: True 50 | 51 | LOG: 52 | BASE_PATH: './logs/slowfast_4x16_resnet50_custom' 53 | LOG_DIR: 'tb_log' 54 | SAVE_DIR: 'checkpoints' 55 | EVAL_DIR: './logs/slowfast_4x16_resnet50_custom/eval' 56 | SAVE_FREQ: 2 57 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/slowfast_4x16_resnet50_feat.yaml: -------------------------------------------------------------------------------- 1 | # slowfast_4x16_resnet50_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | INFERENCE: 17 | FEAT: True 18 | 19 | DATA: 20 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 21 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 22 | NUM_CLASSES: 400 23 | CLIP_LEN: 32 24 | FRAME_RATE: 2 25 | NUM_SEGMENT: 1 26 | NUM_CROP: 1 27 | MULTIGRID: False 28 | KEEP_ASPECT_RATIO: False 29 | 30 | MODEL: 31 | NAME: 'slowfast_4x16_resnet50_kinetics400' 32 | PRETRAINED: False 33 | 34 | LOG: 35 | BASE_PATH: './logs/slowfast_4x16_resnet50_kinetics400' 36 | SAVE_DIR: 'features' 37 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/slowfast_4x16_resnet50_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # slowfast_4x16_resnet50_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 196 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | USE_WARMUP: True 23 | WARMUP_EPOCHS: 34 24 | LR_POLICY: 'Cosine' 25 | RESUME_EPOCH: -1 26 | 27 | VAL: 28 | FREQ: 5 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'slowfast_4x16_resnet50_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/slowfast_4x16_resnet50_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/slowfast_4x16_resnet50_kinetics400/eval' 55 | SAVE_FREQ: 5 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/slowfast_8x8_resnet101_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # slowfast_8x8_resnet101_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 196 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | USE_WARMUP: True 23 | WARMUP_EPOCHS: 34 24 | LR_POLICY: 'Cosine' 25 | WARMUP_END_LR: 0.1 26 | RESUME_EPOCH: -1 27 | 28 | VAL: 29 | FREQ: 2 30 | BATCH_SIZE: 8 31 | 32 | DATA: 33 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 34 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 35 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 36 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 37 | NUM_CLASSES: 400 38 | CLIP_LEN: 32 39 | FRAME_RATE: 2 40 | NUM_SEGMENT: 1 41 | NUM_CROP: 1 42 | TEST_NUM_SEGMENT: 10 43 | TEST_NUM_CROP: 3 44 | MULTIGRID: False 45 | KEEP_ASPECT_RATIO: False 46 | 47 | MODEL: 48 | NAME: 'slowfast_8x8_resnet101_kinetics400' 49 | PRETRAINED: False 50 | 51 | LOG: 52 | BASE_PATH: './logs/slowfast_8x8_resnet101_kinetics400' 53 | LOG_DIR: 'tb_log' 54 | SAVE_DIR: 'checkpoints' 55 | EVAL_DIR: './logs/slowfast_8x8_resnet101_kinetics400/eval' 56 | SAVE_FREQ: 2 57 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/slowfast_8x8_resnet50_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # slowfast_8x8_resnet50_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 196 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | USE_WARMUP: True 23 | WARMUP_EPOCHS: 34 24 | LR_POLICY: 'Cosine' 25 | WARMUP_END_LR: 0.1 26 | RESUME_EPOCH: -1 27 | 28 | VAL: 29 | FREQ: 2 30 | BATCH_SIZE: 8 31 | 32 | DATA: 33 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 34 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 35 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 36 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 37 | NUM_CLASSES: 400 38 | CLIP_LEN: 32 39 | FRAME_RATE: 2 40 | NUM_SEGMENT: 1 41 | NUM_CROP: 1 42 | TEST_NUM_SEGMENT: 10 43 | TEST_NUM_CROP: 3 44 | MULTIGRID: False 45 | KEEP_ASPECT_RATIO: False 46 | 47 | MODEL: 48 | NAME: 'slowfast_8x8_resnet50_kinetics400' 49 | PRETRAINED: False 50 | 51 | LOG: 52 | BASE_PATH: './logs/slowfast_8x8_resnet50_kinetics400' 53 | LOG_DIR: 'tb_log' 54 | SAVE_DIR: 'checkpoints' 55 | EVAL_DIR: './logs/slowfast_8x8_resnet50_kinetics400/eval' 56 | SAVE_FREQ: 2 57 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/tpn_resnet101_f16s4_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # tpn_resnet101_f16s4_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 150 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [75, 125] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 16 38 | FRAME_RATE: 4 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'tpn_resnet101_f16s4_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/tpn_resnet101_f16s4_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/tpn_resnet101_f16s4_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/tpn_resnet101_f32s2_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # tpn_resnet101_f32s2_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.252:23456' 9 | WOLRD_URLS: ['172.31.72.252'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 150 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [75, 125] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'tpn_resnet101_f32s2_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/tpn_resnet101_f32s2_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/tpn_resnet101_f32s2_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/tpn_resnet101_f8s8_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # tpn_resnet101_f8s8_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 150 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [75, 125] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 8 38 | FRAME_RATE: 8 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'tpn_resnet101_f8s8_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/tpn_resnet101_f8s8_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/tpn_resnet101_f8s8_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/tpn_resnet50_f16s4_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # tpn_resnet50_f16s4_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 150 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [75, 125] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 16 38 | FRAME_RATE: 4 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'tpn_resnet50_f16s4_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/tpn_resnet50_f16s4_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/tpn_resnet50_f16s4_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/tpn_resnet50_f32s2_custom.yaml: -------------------------------------------------------------------------------- 1 | # tpn_resnet50_f32s2_custom 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.252:23456' 9 | WOLRD_URLS: ['172.31.72.252'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 150 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | LR_POLICY: 'Step' 21 | MOMENTUM: 0.9 22 | W_DECAY: 1e-5 23 | USE_WARMUP: False 24 | LR_MILESTONE: [75, 125] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'tpn_resnet50_f32s2_custom' 48 | PRETRAINED: True 49 | 50 | LOG: 51 | BASE_PATH: './logs/tpn_resnet50_f32s2_custom' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/tpn_resnet50_f32s2_custom/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/tpn_resnet50_f32s2_feat.yaml: -------------------------------------------------------------------------------- 1 | # tpn_resnet50_f32s2_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | 17 | INFERENCE: 18 | FEAT: True 19 | 20 | DATA: 21 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 22 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 23 | NUM_CLASSES: 400 24 | CLIP_LEN: 32 25 | FRAME_RATE: 2 26 | NUM_SEGMENT: 1 27 | NUM_CROP: 1 28 | MULTIGRID: False 29 | KEEP_ASPECT_RATIO: False 30 | 31 | MODEL: 32 | NAME: 'tpn_resnet50_f32s2_kinetics400' 33 | PRETRAINED: True 34 | 35 | LOG: 36 | BASE_PATH: './logs/tpn_resnet50_f32s2_kinetics400' 37 | SAVE_DIR: 'features' 38 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/tpn_resnet50_f32s2_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # tpn_resnet50_f32s2_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 150 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [75, 125] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'tpn_resnet50_f32s2_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/tpn_resnet50_f32s2_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/tpn_resnet50_f32s2_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/configuration/tpn_resnet50_f8s8_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # tpn_resnet50_f8s8_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.0.32:23456' 9 | WOLRD_URLS: ['172.31.0.32'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 150 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-4 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [75, 125] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 10 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 8 38 | FRAME_RATE: 8 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'tpn_resnet50_f8s8_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/tpn_resnet50_f8s8_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/tpn_resnet50_f8s8_kinetics400/eval' 55 | SAVE_FREQ: 10 56 | -------------------------------------------------------------------------------- /scripts/action-recognition/get_flops.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script to compute FLOPs of a model 3 | """ 4 | import os 5 | import argparse 6 | 7 | import torch 8 | from gluoncv.torch.model_zoo import get_model 9 | from gluoncv.torch.engine.config import get_cfg_defaults 10 | 11 | from thop import profile, clever_format 12 | 13 | 14 | if __name__ == '__main__': 15 | parser = argparse.ArgumentParser(description='Compute FLOPs of a model.') 16 | parser.add_argument('--config-file', type=str, help='path to config file.') 17 | parser.add_argument('--num-frames', type=int, default=32, help='temporal clip length.') 18 | parser.add_argument('--input-size', type=int, default=224, 19 | help='size of the input image size. default is 224') 20 | 21 | args = parser.parse_args() 22 | cfg = get_cfg_defaults() 23 | cfg.merge_from_file(args.config_file) 24 | 25 | model = get_model(cfg) 26 | input_tensor = torch.autograd.Variable(torch.rand(1, 3, args.num_frames, args.input_size, args.input_size)) 27 | 28 | macs, params = profile(model, inputs=(input_tensor,)) 29 | macs, params = clever_format([macs, params], "%.3f") 30 | print("FLOPs: ", macs, "; #params: ", params) 31 | -------------------------------------------------------------------------------- /scripts/classification/cifar/README.md: -------------------------------------------------------------------------------- 1 | # Image Classification on CIFAR10 2 | 3 | Please refer to [GluonCV Model Zoo](http://gluon-cv.mxnet.io/model_zoo/index.html#image-classification) 4 | for available pretrained models, training hyper-parameters, etc. 5 | -------------------------------------------------------------------------------- /scripts/classification/fit_classification.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import gluoncv as gcv 4 | gcv.utils.check_version('0.8.0') 5 | 6 | from gluoncv.auto.estimators import ImageClassificationEstimator 7 | from gluoncv.auto.tasks.utils import config_to_nested 8 | from d8.image_classification import Dataset 9 | 10 | 11 | if __name__ == '__main__': 12 | # specify hyperparameters 13 | config = { 14 | 'dataset': 'boat', 15 | 'gpus': [0, 1, 2, 3, 4, 5, 6, 7], 16 | 'estimator': 'img_cls', 17 | 'model': 'resnet50_v1b', 18 | 'batch_size': 128, # range [16, 32, 64, 128] 19 | 'epochs': 3 20 | } 21 | config = config_to_nested(config) 22 | config.pop('estimator') 23 | 24 | # specify dataset 25 | dataset = Dataset.get('boat') 26 | train_data, valid_data = dataset.split(0.8) 27 | 28 | # specify estimator 29 | estimator = ImageClassificationEstimator(config) 30 | 31 | # fit estimator 32 | estimator.fit(train_data, valid_data) 33 | 34 | # evaluate auto estimator 35 | top1, top5 = estimator.evaluate(valid_data) 36 | logging.info('evaluation: top1={}, top5={}'.format(top1, top5)) 37 | -------------------------------------------------------------------------------- /scripts/classification/imagenet/demo_imagenet.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from mxnet import nd, image 4 | 5 | import gluoncv as gcv 6 | gcv.utils.check_version('0.6.0') 7 | from gluoncv.data import ImageNet1kAttr 8 | from gluoncv.data.transforms.presets.imagenet import transform_eval 9 | from gluoncv.model_zoo import get_model 10 | 11 | parser = argparse.ArgumentParser(description='Predict ImageNet classes from a given image') 12 | parser.add_argument('--model', type=str, required=True, 13 | help='name of the model to use') 14 | parser.add_argument('--saved-params', type=str, default='', 15 | help='path to the saved model parameters') 16 | parser.add_argument('--input-pic', type=str, required=True, 17 | help='path to the input picture') 18 | opt = parser.parse_args() 19 | 20 | # Load Model 21 | model_name = opt.model 22 | pretrained = True if opt.saved_params == '' else False 23 | net = get_model(model_name, pretrained=pretrained) 24 | 25 | if not pretrained: 26 | net.load_parameters(opt.saved_params) 27 | attrib = ImageNet1kAttr() 28 | classes = attrib.classes 29 | else: 30 | classes = net.classes 31 | 32 | # Load Images 33 | img = image.imread(opt.input_pic) 34 | 35 | # Transform 36 | img = transform_eval(img) 37 | pred = net(img) 38 | 39 | topK = 5 40 | ind = nd.topk(pred, k=topK)[0].astype('int') 41 | print('The input picture is classified to be') 42 | for i in range(topK): 43 | print('\t[%s], with probability %.3f.'% 44 | (classes[ind[i].asscalar()], nd.softmax(pred)[0][ind[i]].asscalar())) 45 | -------------------------------------------------------------------------------- /scripts/classification/imagenet/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$MODEL" ]; then 4 | export MODEL=resnet18_v1 5 | fi 6 | 7 | if [ -z "$NUM_TRAINING_SAMPLES" ]; then 8 | export NUM_TRAINING_SAMPLES=1281167 9 | fi 10 | 11 | if [ -z "$NUM_EPOCHS" ]; then 12 | export NUM_EPOCHS=3 13 | fi 14 | 15 | if [ -z "$NUM_GPUS" ] || [ $NUM_GPUS '-lt' 0 ]; then 16 | export NUM_GPUS=0 17 | fi 18 | 19 | if [ -z "$DATA_BACKEND" ]; then 20 | export DATA_BACKEND='mxnet' # Options are: dali-gpu, dali-cpu, mxnet 21 | fi 22 | 23 | if [ -z "$TRAIN_DATA_DIR" ]; then 24 | export TRAIN_DATA_DIR=~/.mxnet/datasets/imagenet 25 | fi 26 | 27 | if [ -z "$DALI_VER" ]; then 28 | export DALI_VER=nvidia-dali-cuda100 29 | fi 30 | 31 | python train_imagenet.py --model $MODEL --data-backend $DATA_BACKEND --num-gpus $NUM_GPUS \ 32 | --num-epochs $NUM_EPOCHS --num-training-samples $NUM_TRAINING_SAMPLES --use-rec \ 33 | --rec-train $TRAIN_DATA_DIR/train.rec --rec-train-idx $TRAIN_DATA_DIR/train.idx \ 34 | --rec-val $TRAIN_DATA_DIR/val.rec --rec-val-idx $TRAIN_DATA_DIR/val.idx --data-dir $TRAIN_DATA_DIR \ 35 | 36 | 37 | -------------------------------------------------------------------------------- /scripts/datasets/README.md: -------------------------------------------------------------------------------- 1 | # Prepare large datasets for vision 2 | [Gluon](https://mxnet.incubator.apache.org/gluon/) itself provides self-managed 3 | tiny datasets such as MNIST, CIFAR-10/100, Fashion-MNIST. 4 | However, downloading and unzipping large scale datasets are very time consuming 5 | processes which are not appropriate to be initialized during class instantiation. 6 | Therefore we provide convenient example scripts for existing/non-existing datasets. 7 | 8 | All datasets requires one-time setup, and will be automatically recognized by `gluoncv` 9 | package in the future. 10 | 11 | ## Instructions 12 | Please refer to our official [tutorials](http://gluon-cv.mxnet.io/build/examples_datasets/index.html) 13 | -------------------------------------------------------------------------------- /scripts/datasets/imagenet_val_maps.pklz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/datasets/imagenet_val_maps.pklz -------------------------------------------------------------------------------- /scripts/datasets/tiny_motorbike.py: -------------------------------------------------------------------------------- 1 | """Prepare PASCAL VOC tiny motorbike datasets""" 2 | import os 3 | import autogluon as ag 4 | 5 | 6 | if __name__ == '__main__': 7 | root = os.path.expanduser('~/.mxnet/datasets/') 8 | if not os.path.exists(root): 9 | os.makedirs(root) 10 | 11 | filename_zip = ag.download('https://autogluon.s3.amazonaws.com/datasets/tiny_motorbike.zip', path=root) 12 | filename = ag.unzip(filename_zip, root=root) 13 | data_root = os.path.join(root, filename) 14 | os.remove(filename_zip) 15 | 16 | print("dataset saved to: {}".format(data_root)) -------------------------------------------------------------------------------- /scripts/deployment/README.md: -------------------------------------------------------------------------------- 1 | # Deploy GluonCV models 2 | 3 | This folder includes deployment scripts and examples for pre-trained models. 4 | 5 | Please refer to [GluonCV Deployment Tutorials](https://gluon-cv.mxnet.io/build/examples_deployment/index.html) for detailed instructions. -------------------------------------------------------------------------------- /scripts/deployment/cpp-inference/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | bin/ 3 | install/ 4 | *.json 5 | *.param 6 | *.jpg 7 | *.names 8 | -------------------------------------------------------------------------------- /scripts/deployment/export/.gitignore: -------------------------------------------------------------------------------- 1 | *.json 2 | *.params 3 | -------------------------------------------------------------------------------- /scripts/deployment/export/README.md: -------------------------------------------------------------------------------- 1 | # Export Pre-trained models in GluonCV [model zoo](https://gluon-cv.mxnet.io/model_zoo/index.html) 2 | 3 | ### Usage 4 | 5 | ```bash 6 | python export_pretrained.py -m resnet18_v1 7 | ``` 8 | 9 | ### Checkout supported arguments 10 | 11 | ```bash 12 | python export_pretrained.py -h 13 | ``` 14 | -------------------------------------------------------------------------------- /scripts/deployment/export/export_pretrained.py: -------------------------------------------------------------------------------- 1 | """Script for export pre-trained models in GluonCV model zoo.""" 2 | from __future__ import print_function 3 | import argparse 4 | import gluoncv as gcv 5 | gcv.utils.check_version('0.6.0') 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser("Export model helper.") 9 | parser.add_argument('--model', '-m', required=True, type=str, help='Name of the model') 10 | parser.add_argument('--no-preprocess', action='store_true', help='Do not include standard preprocess.') 11 | args = parser.parse_args() 12 | return args 13 | 14 | args = parse_args() 15 | net = gcv.model_zoo.get_model(args.model, pretrained=True) 16 | gcv.utils.export_block(args.model, net, preprocess=(not args.no_preprocess), layout='HWC') 17 | print('Done...') 18 | -------------------------------------------------------------------------------- /scripts/depth/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/depth/README.md -------------------------------------------------------------------------------- /scripts/depth/train.py: -------------------------------------------------------------------------------- 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 2 | # 3 | # This software is licensed under the terms of the Monodepth2 licence 4 | # which allows for non-commercial use only, the full terms of which are made 5 | # available in the LICENSE file. 6 | 7 | from __future__ import absolute_import, division, print_function 8 | 9 | import os 10 | import time 11 | import logging 12 | 13 | from trainer import Trainer 14 | from options import MonodepthOptions 15 | 16 | options = MonodepthOptions() 17 | opts = options.parse() 18 | 19 | if __name__ == "__main__": 20 | # build logger 21 | # logging and checkpoint saving 22 | log_path = os.path.join(opts.log_dir, opts.model_zoo) 23 | if not os.path.exists(log_path): 24 | os.makedirs(log_path) 25 | file_handler = logging.FileHandler(os.path.join(log_path, "train.log")) 26 | stream_handler = logging.StreamHandler() 27 | logger = logging.getLogger('') 28 | logger.setLevel(logging.INFO) 29 | logger.addHandler(file_handler) 30 | logger.addHandler(stream_handler) 31 | logger.info(opts) 32 | 33 | trainer = Trainer(opts, logger) 34 | 35 | tic = time.time() 36 | trainer.train() 37 | logger.info("Training Finished! Total training time is %dh %dm" % 38 | (int((time.time() - tic) / 3600), int((time.time() - tic) % 3600 / 60))) 39 | -------------------------------------------------------------------------------- /scripts/detection/README.md: -------------------------------------------------------------------------------- 1 | # Object Detection Models 2 | 3 | Please refer to [GluonCV Model Zoo](http://gluon-cv.mxnet.io/model_zoo/index.html#object-detection) 4 | for available pretrained models, training hyper-parameters, etc. 5 | -------------------------------------------------------------------------------- /scripts/detection/center_net/fit_center_net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import gluoncv as gcv 4 | gcv.utils.check_version('0.8.0') 5 | 6 | from gluoncv.auto.estimators import CenterNetEstimator 7 | from gluoncv.auto.tasks.utils import config_to_nested 8 | from d8.object_detection import Dataset 9 | 10 | 11 | if __name__ == '__main__': 12 | # specify hyperparameters 13 | config = { 14 | 'dataset': 'sheep', 15 | 'gpus': [0, 1, 2, 3, 4, 5, 6, 7], 16 | 'estimator': 'center_net', 17 | 'base_network': 'resnet50_v1b', 18 | 'batch_size': 64, # range [8, 16, 32, 64] 19 | 'epochs': 3 20 | } 21 | config = config_to_nested(config) 22 | config.pop('estimator') 23 | 24 | # specify dataset 25 | dataset = Dataset.get('sheep') 26 | train_data, valid_data = dataset.split(0.8) 27 | 28 | # specify estimator 29 | estimator = CenterNetEstimator(config) 30 | 31 | # fit estimator 32 | estimator.fit(train_data, valid_data) 33 | 34 | # evaluate auto estimator 35 | eval_map = estimator.evaluate(valid_data) 36 | logging.info('evaluation: mAP={}'.format(eval_map[-1][-1])) 37 | -------------------------------------------------------------------------------- /scripts/detection/faster_rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Faster R-CNN: Towards real-time object detection with region proposal networks. [1] 2 | 3 | [GluonCV Model Zoo](http://gluon-cv.mxnet.io/model_zoo/index.html#object-detection) 4 | 5 | - `--amp` Use [Automatic Mixed Precision training](https://mxnet.incubator.apache.org/versions/master/tutorials/amp/amp_tutorial.html), automatically casting FP16 where safe. 6 | - `--horovod` Use [Horovod](https://github.com/horovod/horovod) for distributed training, with a network agnostic wrapper for the optimizer, allowing efficient allreduce using OpemMPI and NCCL. 7 | 8 | ## References 9 | 1. Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. "Faster R-CNN: Towards real-time object detection with region proposal networks." In IEEE Transactions on Pattern Analysis and Machine Intelligence, 2016. 10 | 2. Ross Girshick. "Fast R-CNN." In Proceedings of the IEEE International Conference on Computer Vision, 2015. 11 | -------------------------------------------------------------------------------- /scripts/detection/faster_rcnn/fit_faster_rcnn.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import gluoncv as gcv 4 | gcv.utils.check_version('0.8.0') 5 | 6 | from gluoncv.auto.estimators import FasterRCNNEstimator 7 | from gluoncv.auto.tasks.utils import config_to_nested 8 | from d8.object_detection import Dataset 9 | 10 | 11 | if __name__ == '__main__': 12 | # specify hyperparameters 13 | config = { 14 | 'dataset': 'sheep', 15 | 'gpus': [0, 1, 2, 3, 4, 5, 6, 7], 16 | 'estimator': 'faster_rcnn', 17 | 'base_network': 'resnet50_v1b', 18 | 'batch_size': 8, # range [8, 16, 32, 64] 19 | 'epochs': 3 20 | } 21 | config = config_to_nested(config) 22 | config.pop('estimator') 23 | 24 | # specify dataset 25 | dataset = Dataset.get('sheep') 26 | train_data, valid_data = dataset.split(0.8) 27 | 28 | # specify estimator 29 | estimator = FasterRCNNEstimator(config) 30 | 31 | # fit estimator 32 | estimator.fit(train_data, valid_data) 33 | 34 | # evaluate auto estimator 35 | eval_map = estimator.evaluate(valid_data) 36 | logging.info('evaluation: mAP={}'.format(eval_map[-1][-1])) 37 | -------------------------------------------------------------------------------- /scripts/detection/ssd/fit_ssd.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import gluoncv as gcv 4 | gcv.utils.check_version('0.8.0') 5 | 6 | from gluoncv.auto.estimators import SSDEstimator 7 | from gluoncv.auto.tasks.utils import config_to_nested 8 | from d8.object_detection import Dataset 9 | 10 | 11 | if __name__ == '__main__': 12 | # specify hyperparameters 13 | config = { 14 | 'dataset': 'sheep', 15 | 'gpus': [0, 1, 2, 3, 4, 5, 6, 7], 16 | 'estimator': 'ssd', 17 | 'base_network': 'resnet50_v1', 18 | 'data_shape': 512, 19 | 'batch_size': 64, # range [8, 16, 32, 64] 20 | 'epochs': 3 21 | } 22 | config = config_to_nested(config) 23 | config.pop('estimator') 24 | 25 | # specify dataset 26 | dataset = Dataset.get('sheep') 27 | train_data, valid_data = dataset.split(0.8) 28 | 29 | # specify estimator 30 | estimator = SSDEstimator(config) 31 | 32 | # fit estimator 33 | estimator.fit(train_data, valid_data) 34 | 35 | # evaluate auto estimator 36 | eval_map = estimator.evaluate(valid_data) 37 | logging.info('evaluation: mAP={}'.format(eval_map[-1][-1])) 38 | -------------------------------------------------------------------------------- /scripts/detection/yolo/fit_yolo.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import gluoncv as gcv 4 | gcv.utils.check_version('0.8.0') 5 | 6 | from gluoncv.auto.estimators import YOLOv3Estimator 7 | from gluoncv.auto.tasks.utils import config_to_nested 8 | from d8.object_detection import Dataset 9 | 10 | 11 | if __name__ == '__main__': 12 | # specify hyperparameters 13 | config = { 14 | 'dataset': 'sheep', 15 | 'gpus': [0, 1, 2, 3, 4, 5, 6, 7], 16 | 'estimator': 'yolo3', 17 | 'base_network': 'darknet53', 18 | 'batch_size': 64, # range [8, 16, 32, 64] 19 | 'epochs': 3 20 | } 21 | config = config_to_nested(config) 22 | config.pop('estimator') 23 | 24 | # specify dataset 25 | dataset = Dataset.get('sheep') 26 | train_data, valid_data = dataset.split(0.8) 27 | 28 | # specify estimator 29 | estimator = YOLOv3Estimator(config) 30 | 31 | # fit estimator 32 | estimator.fit(train_data, valid_data) 33 | 34 | # evaluate auto estimator 35 | eval_map = estimator.evaluate(valid_data) 36 | logging.info('evaluation: mAP={}'.format(eval_map[-1][-1])) 37 | -------------------------------------------------------------------------------- /scripts/gan/cycle_gan/README.md: -------------------------------------------------------------------------------- 1 | ## Reproducing Cycle GAN experiments 2 | 3 | 4 | **Download horse2zebra dataset** 5 | ```bash 6 | python ./download_dataset.py --download-dir . --file horse2zebra 7 | ``` 8 | 9 | **Monitoring loss values and images during training** 10 | ```bash 11 | pip install mxboard 12 | tensorboard --logdir=./logs --host=127.0.0.1 --port=8888 13 | ``` 14 | Details about mxboard is in [mxboard](https://github.com/awslabs/mxboard) 15 | 16 | **Train Cycle GAN** 17 | ```bash 18 | python train_cgan.py --dataroot ./horse2zebra 19 | ``` 20 | 21 | **Test Cycle GAN** 22 | 23 | for horse to zebra model: 24 | ```bash 25 | python demo_cycle_gan.py --images ./horse2zebra/testA/n02391049_10160.jpg --pretrained ./samples/netG_A_epoch_200.params --gpu_id -1 26 | ``` 27 | for zebra to horse model: 28 | ```bash 29 | python demo_cycle_gan.py --images ./horse2zebra/testA/n02391049_10160.jpg --pretrained ./samples/netG_A_epoch_200.params --gpu_id -1 30 | ``` 31 | ![images](images.png "images during training") 32 | 33 | ![images](images.png "images during training") 34 | The meaning of those images are : 35 | 36 | | | | | | 37 | |-|-|-|-| 38 | | real_A(from dataset) | fake_B(generate from real_A) | rec_A(reconstruct from fake_B) | idt_A(generate from real_B) | 39 | | real_B(from dataset) | fake_A(generate from real_A) | rec_B(reconstruct from fake_A) | idt_B(generate from real_A) | 40 | 41 | ## References 42 | ["Cycle GAN"](https://arxiv.org/abs/1703.10593) 43 | -------------------------------------------------------------------------------- /scripts/gan/cycle_gan/images.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/gan/cycle_gan/images.png -------------------------------------------------------------------------------- /scripts/gan/srgan/README.md: -------------------------------------------------------------------------------- 1 | ## Reproducing SRGAN experiments 2 | 3 | ![images](pred.png "images ") 4 | 5 | **Download DIV2K dataset** 6 | ```bash 7 | python download_dataset.py --file DIV2K_train_HR 8 | ``` 9 | 10 | **Train SRGAN** 11 | ```bash 12 | python train_srgan.py --dataroot ./DIV2K_train_HR 13 | ``` 14 | 15 | **Monitoring loss values and images during training** 16 | ```bash 17 | pip install mxboard 18 | tensorboard --logdir=./logs --host=127.0.0.1 --port=8888 19 | ``` 20 | Details about mxboard is in [mxboard](https://github.com/awslabs/mxboard) 21 | 22 | ![images](images.png "images during training") 23 | 24 | **Test SRGAN** 25 | 26 | ```bash 27 | python demo_sr_gan.py --images ./ --pretrained ./samples/netG_epoch_20000.params --gpu_id -1 28 | ``` 29 | 30 | ## References 31 | ["Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network 32 | "](https://arxiv.org/abs/1609.04802) -------------------------------------------------------------------------------- /scripts/gan/srgan/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/gan/srgan/__init__.py -------------------------------------------------------------------------------- /scripts/gan/srgan/images.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/gan/srgan/images.png -------------------------------------------------------------------------------- /scripts/gan/srgan/pred.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/gan/srgan/pred.png -------------------------------------------------------------------------------- /scripts/gan/stylegan/sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/gan/stylegan/sample.jpg -------------------------------------------------------------------------------- /scripts/gan/stylegan/sample_train.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/gan/stylegan/sample_train.png -------------------------------------------------------------------------------- /scripts/gan/wgan/README.md: -------------------------------------------------------------------------------- 1 | ## Reproducing LSUN experiments 2 | 3 | 4 | **Download LSUN dataset** 5 | ```bash 6 | cd ../../../scripts/datasets/ 7 | python lsun.py -c bedroom 8 | ``` 9 | 10 | **Monitoring -loss D values during training** 11 | ```bash 12 | pip install mxboard 13 | tensorboard --logdir=./logs --host=127.0.0.1 --port=8888 14 | ``` 15 | Details about mxboard is in [mxboard](https://github.com/awslabs/mxboard) 16 | 17 | 18 | **With DCGAN:** 19 | 20 | ```bash 21 | python train_wgan.py --dataset lsun --dataroot [lsun-train-folder] --cuda 22 | ``` 23 | 24 | **With MLP:** 25 | 26 | ```bash 27 | python main.py --mlp_G --ngf 512 28 | ``` 29 | 30 | **Generate fake samples after 400000 epoch** 31 | 32 | ![gensample](fake_samples_400000.png "fake samples in 400000 epoch") 33 | 34 | **Plot the value `-Loss_D`** 35 | 36 | ![lossD](lossd.png "-loss D") 37 | 38 | ## References 39 | ["Wasserstein GAN"](https://arxiv.org/abs/1701.07875) 40 | -------------------------------------------------------------------------------- /scripts/gan/wgan/fake_samples_400000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/gan/wgan/fake_samples_400000.png -------------------------------------------------------------------------------- /scripts/gan/wgan/lossd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/gan/wgan/lossd.png -------------------------------------------------------------------------------- /scripts/instance/README.md: -------------------------------------------------------------------------------- 1 | # Instance Segmentation Models 2 | 3 | Please refer to [GluonCV Model Zoo](https://gluon-cv.mxnet.io/model_zoo/index.html#instance-segmentation) 4 | for available pretrained models, training hyper-parameters, etc. 5 | -------------------------------------------------------------------------------- /scripts/instance/mask_rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Mask R-CNN [1] 2 | 3 | [GluonCV Model Zoo](http://gluon-cv.mxnet.io/model_zoo/index.html#object-detection) 4 | 5 | ## References 6 | 1. Kaiming He, Georgia Gkioxari, Piotr Dollár, and Ross Girshick. "Mask R-CNN." IEEE International Conference on Computer Vision (ICCV), 2017. 7 | 2. Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. "Deep Residual Learning for Image Recognition." IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2016. 8 | 3. Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. "Faster R-CNN: Towards real-time object detection with region proposal networks." In IEEE Transactions on Pattern Analysis and Machine Intelligence, 2016. 9 | 4. Ross Girshick. "Fast R-CNN." In IEEE International Conference on Computer Vision (ICCV), 2015. 10 | -------------------------------------------------------------------------------- /scripts/instance/mask_rcnn/benchmark/ompi_bind_DGX1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | case "${OMPI_COMM_WORLD_LOCAL_RANK}" in 4 | 0) 5 | exec numactl --physcpubind=0-5,48-53 --membind=0 "${@}" 6 | ;; 7 | 1) 8 | exec numactl --physcpubind=6-11,54-59 --membind=0 "${@}" 9 | ;; 10 | 2) 11 | exec numactl --physcpubind=12-17,60-65 --membind=0 "${@}" 12 | ;; 13 | 3) 14 | exec numactl --physcpubind=18-23,66-71 --membind=0 "${@}" 15 | ;; 16 | 4) 17 | exec numactl --physcpubind=24-29,72-77 --membind=1 "${@}" 18 | ;; 19 | 5) 20 | exec numactl --physcpubind=30-35,78-83 --membind=1 "${@}" 21 | ;; 22 | 6) 23 | exec numactl --physcpubind=36-41,84-89 --membind=1 "${@}" 24 | ;; 25 | 7) 26 | exec numactl --physcpubind=42-47,90-95 --membind=1 "${@}" 27 | ;; 28 | *) 29 | echo ============================================================== 30 | echo "ERROR: Unknown local rank ${OMPI_COMM_WORLD_LOCAL_RANK}" 31 | echo ============================================================== 32 | exit 1 33 | ;; 34 | esac 35 | 36 | -------------------------------------------------------------------------------- /scripts/instance/mask_rcnn/fit_mask_rcnn.py: -------------------------------------------------------------------------------- 1 | """Train Mask-RCNN end to end.""" 2 | import os 3 | 4 | # disable autotune 5 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 6 | os.environ['MXNET_GPU_MEM_POOL_TYPE'] = 'Round' 7 | os.environ['MXNET_GPU_MEM_POOL_ROUND_LINEAR_CUTOFF'] = '26' 8 | os.environ['MXNET_EXEC_BULK_EXEC_MAX_NODE_TRAIN_FWD'] = '999' 9 | os.environ['MXNET_EXEC_BULK_EXEC_MAX_NODE_TRAIN_BWD'] = '25' 10 | os.environ['MXNET_GPU_COPY_NTHREADS'] = '1' 11 | os.environ['MXNET_OPTIMIZER_AGGREGATION_SIZE'] = '54' 12 | 13 | import gluoncv as gcv 14 | 15 | gcv.utils.check_version('0.7.0') 16 | from gluoncv.auto.estimators.mask_rcnn import MaskRCNNEstimator 17 | from gluoncv.auto.estimators.mask_rcnn import ex 18 | 19 | 20 | @ex.automain 21 | def main(_config, _log): 22 | # main is the commandline entry for user w/o coding 23 | c = MaskRCNNEstimator(_config, _log) 24 | c.fit() 25 | -------------------------------------------------------------------------------- /scripts/pose/alpha_pose/coco.sh: -------------------------------------------------------------------------------- 1 | python train_alpha_pose.py --dataset coco \ 2 | --model alpha_pose_resnet101_v1b --mode hybrid --num-joints 17 \ 3 | --lr 0.001 --wd 0.0 --lr-mode step --lr-decay-epoch 90,120 \ 4 | --num-epochs 140 --batch-size 32 --num-gpus 4 -j 60 \ 5 | --dtype float32 --warmup-epochs 0 --use-pretrained-base \ 6 | --save-dir params_alpha_pose_resnet101_v1b_coco \ 7 | --logging-file alpha_pose_resnet101_v1b_coco.log --log-interval 100 --flip-test 8 | -------------------------------------------------------------------------------- /scripts/pose/alpha_pose/coco_dpg.sh: -------------------------------------------------------------------------------- 1 | python train_alpha_pose.py --dataset coco \ 2 | --model alpha_pose_resnet101_v1b --mode hybrid --num-joints 17 \ 3 | --lr 0.001 --wd 0.0 --lr-mode step --lr-decay-epoch 30,60 \ 4 | --num-epochs 90 --batch-size 32 --num-gpus 4 -j 60 \ 5 | --dtype float32 --warmup-epochs 0 --use-pretrained-base \ 6 | --save-dir params_alpha_pose_resnet101_v1b_coco_dpg \ 7 | --logging-file alpha_pose_resnet101_v1b_coco.log --log-interval 100 --flip-test \ 8 | --addDPG --load-model final.params 9 | -------------------------------------------------------------------------------- /scripts/pose/alpha_pose/validate.sh: -------------------------------------------------------------------------------- 1 | python validate.py \ 2 | --model alpha_pose_resnet101_v1b --dataset coco --num-joints 17 \ 3 | --batch-size 128 --num-gpus 4 -j 60 \ 4 | --params-file duc_se_coco.params \ 5 | --input-size 320,256 --flip-test 6 | -------------------------------------------------------------------------------- /scripts/pose/directpose/.gitignore: -------------------------------------------------------------------------------- 1 | *.json 2 | *.pth 3 | *.so 4 | -------------------------------------------------------------------------------- /scripts/pose/simple_pose/coco.sh: -------------------------------------------------------------------------------- 1 | python train_simple_pose.py \ 2 | --model simple_pose_resnet50_v1b --mode hybrid --num-joints 17 \ 3 | --lr 0.001 --wd 0.0 --lr-mode step --lr-decay-epoch 90,120 \ 4 | --num-epochs 140 --batch-size 32 --num-gpus 8 -j 60 \ 5 | --dtype float32 --warmup-epochs 0 --use-pretrained-base \ 6 | --save-dir params_simple_pose_resnet50_v1b \ 7 | --logging-file simple_pose_resnet50_v1b.log --log-interval 100 8 | -------------------------------------------------------------------------------- /scripts/pose/simple_pose/validate.sh: -------------------------------------------------------------------------------- 1 | python validate.py \ 2 | --model simple_pose_resnet18_v1b --num-joints 17 \ 3 | --batch-size 128 --num-gpus 8 -j 60 4 | -------------------------------------------------------------------------------- /scripts/re-id/baseline/networks/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .resnet import resnet18, resnet34, resnet50 4 | -------------------------------------------------------------------------------- /scripts/tracking/smot/eval.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | from multiprocessing import Pool 5 | from terminaltables import AsciiTable 6 | 7 | from helper import * 8 | 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument('--model-name', type=str, default='smot') 12 | parser.add_argument('--gt-dir', type=str, required=True) 13 | parser.add_argument('--pred-dir', type=str, required=True) 14 | parser.add_argument('--min-iou', type=float, default=0.5) 15 | parser.add_argument('--num-worker', type=int, default=32) 16 | 17 | 18 | if __name__ == '__main__': 19 | args = parser.parse_args() 20 | 21 | gt_pred_pairs = get_gt_pred_pairs(args.gt_dir, args.pred_dir, iou_thresh=args.min_iou) 22 | 23 | pool = Pool(args.num_worker) 24 | results = dict(pool.starmap(run_video, gt_pred_pairs)) 25 | pool.close() 26 | pool.join() 27 | print("run on {} videos".format(len(gt_pred_pairs))) 28 | 29 | headers = ['MOTA', 'IDF1', 'IDR', 'IDP', 'N. Trans', 'FP','FN', 'IDsw.', 'MT/GT', 'Prec.', 'Rec.', 'F1'] 30 | data = [mota(results), idf1(results), idr(results), idp(results), num_transfer(results), num_fp(results), num_misses(results), num_sw(results), 31 | '{}/{}'.format(mt(results), num_tracks(results)), precision(results), recall(results), f1(results)] 32 | 33 | table = AsciiTable([headers, data], title='Tracking Results: {}'.format(args.model_name)) 34 | print(table.table) 35 | 36 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/tests/__init__.py -------------------------------------------------------------------------------- /tests/auto/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/tests/auto/__init__.py -------------------------------------------------------------------------------- /tests/auto/test_hybrid_auto_tasks.py: -------------------------------------------------------------------------------- 1 | from gluoncv.auto.tasks import ImageClassification 2 | import autogluon.core as ag 3 | from nose.tools import nottest 4 | 5 | IMAGE_CLASS_DATASET, _, IMAGE_CLASS_TEST = ImageClassification.Dataset.from_folders( 6 | 'https://autogluon.s3.amazonaws.com/datasets/shopee-iet.zip') 7 | 8 | def test_hybrid_image_classification(): 9 | from gluoncv.auto.tasks import ImageClassification 10 | model = ag.Categorical('resnet18_v1b', 'resnet18') 11 | task = ImageClassification({'model': model, 'num_trials': 4, 'epochs': 1, 'batch_size': 8}) 12 | classifier = task.fit(IMAGE_CLASS_DATASET) 13 | assert task.fit_summary().get('valid_acc', 0) > 0 14 | test_result = classifier.predict(IMAGE_CLASS_TEST) 15 | 16 | if __name__ == '__main__': 17 | import nose 18 | nose.runmodule() 19 | -------------------------------------------------------------------------------- /tests/auto/test_torch_auto_tasks.py: -------------------------------------------------------------------------------- 1 | from gluoncv.auto.tasks import ImageClassification 2 | import autogluon.core as ag 3 | from nose.tools import nottest 4 | 5 | IMAGE_CLASS_DATASET, _, IMAGE_CLASS_TEST = ImageClassification.Dataset.from_folders( 6 | 'https://autogluon.s3.amazonaws.com/datasets/shopee-iet.zip') 7 | 8 | def test_torch_image_classification(): 9 | from gluoncv.auto.tasks import ImageClassification 10 | task = ImageClassification({'model': 'resnet18', 'num_trials': 1, 'epochs': 1, 'batch_size': 8}) 11 | classifier = task.fit(IMAGE_CLASS_DATASET) 12 | assert task.fit_summary().get('valid_acc', 0) > 0 13 | test_result = classifier.predict(IMAGE_CLASS_TEST) 14 | 15 | def test_torch_image_classification_custom_net(): 16 | from gluoncv.auto.tasks import ImageClassification 17 | from timm import create_model 18 | import torch.nn as nn 19 | net = create_model('resnet18') 20 | net.fc = nn.Linear(512, 4) 21 | task = ImageClassification({'num_trials': 1, 'epochs': 1, 'custom_net': net, 'batch_size': 8}) 22 | classifier = task.fit(IMAGE_CLASS_DATASET) 23 | assert task.fit_summary().get('valid_acc', 0) > 0 24 | test_result = classifier.predict(IMAGE_CLASS_TEST) 25 | 26 | if __name__ == '__main__': 27 | import nose 28 | nose.runmodule() 29 | -------------------------------------------------------------------------------- /tests/model_zoo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/tests/model_zoo/__init__.py -------------------------------------------------------------------------------- /tests/model_zoo_torch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/tests/model_zoo_torch/__init__.py -------------------------------------------------------------------------------- /tests/py3_auto.yml: -------------------------------------------------------------------------------- 1 | name: gluon_cv_py3_mxnet 2 | channels: 3 | - conda-forge 4 | - defaults 5 | - pytorch 6 | dependencies: 7 | - python=3.6 8 | - perl 9 | - sphinx=1.7.2 10 | - nose 11 | - coverage=4.5.4 12 | - scipy 13 | - cython 14 | - pip=20.2.4 15 | - requests 16 | - matplotlib 17 | - tqdm 18 | - pillow 19 | - pandas==1.3 20 | - pytorch==1.6.0 21 | - torchvision==0.7.0 22 | - pip: 23 | - https://repo.mxnet.io/dist/python/cu100mkl/mxnet_cu100mkl-1.6.0b20191010-py2.py3-none-manylinux1_x86_64.whl 24 | - coverage-badge 25 | - awscli 26 | - nose-timer 27 | - opencv-python 28 | - git+https://github.com/zhanghang1989/detail-api.git#subdirectory=PythonAPI 29 | - portalocker 30 | - autocfg>=0.0.6 31 | - autogluon.core==0.2.0 32 | - timm==0.5.4 33 | -------------------------------------------------------------------------------- /tests/py3_mxnet.yml: -------------------------------------------------------------------------------- 1 | name: gluon_cv_py3_mxnet 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python=3.6 7 | - perl 8 | - sphinx=1.7.2 9 | - nose 10 | - coverage=4.5.4 11 | - scipy 12 | - cython 13 | - pip=20.2.4 14 | - requests 15 | - matplotlib 16 | - tqdm 17 | - pillow 18 | - pip: 19 | - https://repo.mxnet.io/dist/python/cu100mkl/mxnet_cu100mkl-1.6.0b20191010-py2.py3-none-manylinux1_x86_64.whl 20 | - coverage-badge 21 | - awscli 22 | - nose-timer 23 | - opencv-python 24 | - git+https://github.com/zhanghang1989/detail-api.git#subdirectory=PythonAPI 25 | - portalocker 26 | - autocfg -------------------------------------------------------------------------------- /tests/py3_mxnet_ci.yml: -------------------------------------------------------------------------------- 1 | name: gluon_cv_py3_mxnet 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python=3.7 7 | - nose 8 | - coverage=4.5.4 9 | - pip: 10 | - mxnet 11 | - coverage-badge 12 | - nose-timer 13 | -------------------------------------------------------------------------------- /tests/py3_torch.yml: -------------------------------------------------------------------------------- 1 | name: gluon_cv_py3_pytorch 2 | channels: 3 | - pytorch 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - python=3.6 8 | - perl 9 | - sphinx=1.7.2 10 | - nose 11 | - coverage=4.5.4 12 | - scipy 13 | - cython 14 | - pip=20.2.4 15 | - requests 16 | - matplotlib 17 | - tqdm 18 | - pillow 19 | - pytorch=1.6.0 20 | - torchvision=0.7.0 21 | - pip: 22 | - coverage-badge 23 | - awscli 24 | - nose-timer 25 | - opencv-python 26 | - git+https://github.com/zhanghang1989/detail-api.git#subdirectory=PythonAPI 27 | - portalocker 28 | - tensorboardx 29 | - decord 30 | - opencv-python-headless 31 | - yacs 32 | -------------------------------------------------------------------------------- /tests/pylint.yml: -------------------------------------------------------------------------------- 1 | name: gluon_cv_pylint 2 | dependencies: 3 | - python=3.7 4 | - pip=20.2.4 5 | - pip: 6 | - pylint==2.4.4 7 | -------------------------------------------------------------------------------- /tests/unittests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/tests/unittests/__init__.py -------------------------------------------------------------------------------- /tests/unittests/test_nn.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | from gluoncv.nn import GroupNorm 3 | 4 | def test_groupnorm(): 5 | ctx=mx.context.current_context() 6 | x = mx.nd.random.uniform(1, 2, (4, 16, 8, 8), ctx=ctx) 7 | gn = GroupNorm(4, 16) 8 | gn.initialize(ctx=ctx) 9 | y = gn(x) 10 | y = y.reshape(0, 4, -1) 11 | print('y.mean(2) =', y.mean(2)) 12 | mx.test_utils.assert_almost_equal(y.mean(2).asnumpy(), 13 | mx.nd.zeros_like(y.mean(2)).asnumpy(), 14 | rtol=1e-3, atol=1e-3) 15 | 16 | if __name__ == '__main__': 17 | import nose 18 | nose.runmodule() 19 | -------------------------------------------------------------------------------- /tests/unittests/test_utils_bbox.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import gluoncv as gcv 5 | 6 | def test_bbox_xywh_to_xyxy(): 7 | # test list 8 | a = [20, 30, 100.2, 300.4] 9 | expected = [20, 30, 119.2, 329.4] 10 | np.testing.assert_allclose(gcv.utils.bbox.bbox_xywh_to_xyxy(a), expected) 11 | aa = np.array([a, a]) 12 | bb = np.array([expected, expected]) 13 | np.testing.assert_allclose(gcv.utils.bbox.bbox_xywh_to_xyxy(aa), bb) 14 | 15 | if __name__ == '__main__': 16 | import nose 17 | nose.runmodule() 18 | -------------------------------------------------------------------------------- /tests/unittests/test_utils_block.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import gluoncv as gcv 5 | from mxnet.gluon.nn import BatchNorm 6 | 7 | def check_bn_frozen_callback(net, value): 8 | if isinstance(net, BatchNorm): 9 | assert value == net._kwargs['use_global_stats'] 10 | 11 | def test_block_freeze_bn(): 12 | net = gcv.model_zoo.get_model('resnet18_v1') 13 | gcv.utils.recursive_visit(net, check_bn_frozen_callback, value=False) 14 | gcv.utils.freeze_bn(net, True) 15 | gcv.utils.recursive_visit(net, check_bn_frozen_callback, value=True) 16 | gcv.utils.freeze_bn(net, False) 17 | gcv.utils.recursive_visit(net, check_bn_frozen_callback, value=False) 18 | 19 | if __name__ == '__main__': 20 | import nose 21 | nose.runmodule() 22 | -------------------------------------------------------------------------------- /tests/unittests/tiny_datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gluoncv import data 3 | 4 | class COCODetectionTiny(data.COCODetection): 5 | CLASSES = ['bicycle', 'motorcycle'] 6 | 7 | def __init__(self, root=os.path.join('~', '.mxnet', 'datasets', 'tiny_coco'), 8 | splits=('instances_val2017_tiny',), **kwargs): 9 | super().__init__(root=root, splits=splits, **kwargs) 10 | 11 | class COCOInstanceTiny(data.COCOInstance): 12 | CLASSES = ['bicycle', 'motorcycle'] 13 | 14 | def __init__(self, root=os.path.join('~', '.mxnet', 'datasets', 'tiny_coco'), 15 | splits=('instances_val2017_tiny',), **kwargs): 16 | super().__init__(root=root, splits=splits, **kwargs) 17 | 18 | class VOCDetectionTiny(data.VOCDetection): 19 | CLASSES = ['motorbike', 'person'] 20 | 21 | def __init__(self, root=os.path.join('~', '.mxnet', 'datasets', 'tiny_motorbike'), 22 | splits=(('tiny_motorbike', 'trainval'),), **kwargs): 23 | super().__init__(root=root, splits=splits, **kwargs) 24 | 25 | class VOCSegmentationTiny(data.VOCSegmentation): 26 | CLASSES = ['motorbike', 'person'] 27 | BASE_DIR = 'tiny_motorbike' 28 | 29 | def __init__(self, root=os.path.expanduser(os.path.join('~', '.mxnet', 'datasets', 'tiny_motorbike')), 30 | split='train', **kwargs): 31 | super().__init__(root=root, split=split, **kwargs) 32 | -------------------------------------------------------------------------------- /tools/batch/README.md: -------------------------------------------------------------------------------- 1 | # Launch AWS Batch Jobs 2 | 3 | Once you've correctly configured the AWS CLI, you may use submit-job.py to deploy your job. 4 | 5 | #### Requirements 6 | 7 | **boto3** is required. To install it: 8 | 9 | ```shell 10 | pip install boto3 11 | ``` 12 | 13 | You'll also need to configure it so that the script can authenticate you successfully: 14 | 15 | https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration 16 | 17 | #### Some arguments 18 | 19 | * --job-type: which instance you want your job to be ran on 20 | * --source-ref: the branch name 21 | * --remote: reporsitory url 22 | * --command: the command you want to execute 23 | * --wait: let the script hang and display status of the required job 24 | 25 | Example: 26 | 27 | ```shell 28 | python3 submit-job.py \ 29 | --job-type c4.2x \ 30 | --source-ref master \ 31 | --work-dir docs/tutorials/classification \ 32 | --remote https://github.com/dmlc/gluon-cv \ 33 | --command "python3 demo_cifar10.py" \ 34 | --wait 35 | ``` 36 | 37 | For a full list of arguments and their default values: 38 | 39 | ```shell 40 | python3 submit-job.py -h 41 | ``` 42 | 43 | -------------------------------------------------------------------------------- /tools/batch/batch-test.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import boto3 3 | 4 | batch = boto3.client('batch', region_name='us-east-1') 5 | response = batch.describe_job_definitions(status='ACTIVE')['jobDefinitions'] 6 | instance_type_info = {} 7 | for res in response: 8 | jobDefinition = res['jobDefinitionName'] # example: gluon-cv-p2_8xlarge:1 9 | instance = jobDefinition.split('-')[-1].split(':')[0].replace('large', '') # example: p2_8x 10 | job_queue = jobDefinition.split('-')[-1].split('_')[0] # example: p2 11 | instance_type_info[instance] = {'job_definition': jobDefinition, 'job_queue': job_queue} 12 | 13 | for instance in instance_type_info: 14 | command = ['python3', \ 15 | 'submit-job.py', \ 16 | '--name', instance+'-test', \ 17 | '--job-type', instance.replace('large', ''), \ 18 | '--source-ref', 'master', \ 19 | '--work-dir', 'docs/tutorials/classification', \ 20 | '--remote', 'https://github.com/dmlc/gluon-cv', \ 21 | '--command', 'python3 demo_cifar10.py' 22 | ] 23 | subprocess.run(command) 24 | -------------------------------------------------------------------------------- /tools/batch/docker/Dockerfile.cpu: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | RUN apt-get update && apt-get install -y --no-install-recommends \ 4 | build-essential \ 5 | locales \ 6 | cmake \ 7 | wget \ 8 | subversion \ 9 | git \ 10 | curl \ 11 | vim \ 12 | unzip \ 13 | sudo \ 14 | ca-certificates \ 15 | libjpeg-dev \ 16 | libpng-dev \ 17 | libfreetype6-dev \ 18 | libopenblas-dev \ 19 | python3-dev \ 20 | python3-pip \ 21 | python3-setuptools \ 22 | pandoc \ 23 | libxft-dev &&\ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | RUN pip3 install --upgrade pip 27 | RUN pip3 install --no-cache --upgrade \ 28 | wheel \ 29 | cmake \ 30 | awscli \ 31 | pypandoc 32 | RUN git clone https://github.com/dmlc/gluon-cv 33 | WORKDIR gluon-cv 34 | ADD gluon_cv_job.sh . 35 | RUN chmod +x gluon_cv_job.sh 36 | -------------------------------------------------------------------------------- /tools/batch/docker/README.md: -------------------------------------------------------------------------------- 1 | # Updating the Docker Image for AWS Batch 2 | 3 | To update the docker: 4 | 5 | - Update the Dockerfile 6 | - Make sure docker and docker-compose, as well as the docker python package are installed. 7 | - Export the AWS account credentials as environment variables 8 | - CD to the same folder as the Dockerfile and execute the following: 9 | 10 | ```shell 11 | # First export your ecr repo address as a environment variable 12 | export $AWS_ECR_REPO=${your_repo} 13 | 14 | # This executes a command that logs into ECR. 15 | aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $AWS_ECR_REPO 16 | 17 | # Following script will build, tag, and push the image 18 | # For cpu 19 | ./docker_deploy.sh cpu 20 | # For gpu 21 | ./docker_deploy.sh gpu 22 | 23 | ``` 24 | 25 | -------------------------------------------------------------------------------- /tools/batch/docker/docker_deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TYPE=$1 4 | 5 | if [ -z $TYPE ]; then 6 | echo "No type detected. Choices: cpu, gpu" 7 | exit 1 8 | fi; 9 | 10 | if [ $TYPE == cpu ] || [ $TYPE == CPU ]; then 11 | docker build --no-cache -f Dockerfile.cpu -t gluon-cv-1:cpu-latest . 12 | docker tag gluon-cv-1:cpu-latest $AWS_ECR_REPO:cpu-latest 13 | docker push $AWS_ECR_REPO:cpu-latest 14 | elif [ $TYPE == gpu ] || [ $TYPE == GPU ]; then 15 | docker build --no-cache -f Dockerfile.gpu -t gluon-cv-1:latest . 16 | docker tag gluon-cv-1:latest $AWS_ECR_REPO:latest 17 | docker push $AWS_ECR_REPO:latest 18 | else 19 | echo "Invalid type detected. Choices: cpu, gpu" 20 | exit 1 21 | fi; 22 | -------------------------------------------------------------------------------- /tools/batch/docker/gluon_cv_job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | date 3 | echo "Args: $@" 4 | env 5 | echo "jobId: $AWS_BATCH_JOB_ID" 6 | echo "jobQueue: $AWS_BATCH_JQ_NAME" 7 | echo "computeEnvironment: $AWS_BATCH_CE_NAME" 8 | 9 | SOURCE_REF=$1 10 | WORK_DIR=$2 11 | COMMAND=$3 12 | SAVED_OUTPUT=$4 13 | SAVE_PATH=$5 14 | REMOTE=$6 15 | DEVICE=${7:-gpu} 16 | 17 | if [ ! -z $REMOTE ]; then 18 | git remote set-url origin $REMOTE 19 | fi; 20 | 21 | git fetch origin $SOURCE_REF:working 22 | git checkout working 23 | if [ $DEVICE == "cpu" ]; then 24 | python3 -m pip install -U --quiet "mxnet==1.7.0.post1" 25 | python3 -m pip install -U --quiet torch==1.6.0+cpu torchvision==0.7.0+cpu 26 | else 27 | python3 -m pip install -U --quiet "mxnet-cu102==1.7.0" 28 | python3 -m pip install -U --quiet torch==1.6.0 torchvision==0.7.0 29 | fi; 30 | 31 | python3 -m pip install --quiet -e . 32 | python3 -m pip install --quiet timm==0.5.4 33 | 34 | cd $WORK_DIR 35 | /bin/bash -o pipefail -c "$COMMAND" 36 | COMMAND_EXIT_CODE=$? 37 | if [[ -f $SAVED_OUTPUT ]]; then 38 | aws s3 cp $SAVED_OUTPUT s3://gluon-cv-dev/batch/$AWS_BATCH_JOB_ID/$SAVE_PATH; 39 | elif [[ -d $SAVED_OUTPUT ]]; then 40 | aws s3 cp --recursive $SAVED_OUTPUT s3://gluon-cv-dev/batch/$AWS_BATCH_JOB_ID/$SAVE_PATH; 41 | fi; 42 | exit $COMMAND_EXIT_CODE 43 | -------------------------------------------------------------------------------- /tools/batch/template/launch-template-data-cpu.json: -------------------------------------------------------------------------------- 1 | { 2 | "LaunchTemplateName":"increase-volume-batch-linux2", 3 | "LaunchTemplateData": { 4 | "BlockDeviceMappings": [ 5 | { 6 | "Ebs": { 7 | "DeleteOnTermination": true, 8 | "VolumeSize": 256, 9 | "VolumeType": "gp2" 10 | }, 11 | "DeviceName": "/dev/xvdcz" 12 | } 13 | ] 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /tools/batch/template/launch-template-data-gpu.json: -------------------------------------------------------------------------------- 1 | { 2 | "LaunchTemplateName":"increase-volume-batch-linux1", 3 | "LaunchTemplateData": { 4 | "BlockDeviceMappings": [ 5 | { 6 | "Ebs": { 7 | "DeleteOnTermination": true, 8 | "VolumeSize": 100, 9 | "VolumeType": "gp2" 10 | }, 11 | "DeviceName": "/dev/xvda" 12 | }, 13 | { 14 | "Ebs": { 15 | "DeleteOnTermination": true, 16 | "VolumeSize": 100, 17 | "VolumeType": "gp2" 18 | }, 19 | "DeviceName": "/dev/xvdcz" 20 | } 21 | ] 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /tools/docker/README.md: -------------------------------------------------------------------------------- 1 | # Docker Support in GluonCV 2 | 3 | We provide the [Docker](https://www.docker.com/) container with everything set up to run GluonCV. With the prebuilt docker image, there is no need to worry about the operating systems or system dependencies. You can launch a [JupyterLab](https://jupyterlab.readthedocs.io/en/stable/) development environment and try out to use GluonCV to solve your problem. 4 | 5 | ## Run Docker 6 | 7 | You can run the docker with the following command. 8 | 9 | ``` 10 | docker pull gluonai/gluon-cv:gpu-latest 11 | docker run --gpus all --rm -it -p 8888:8888 -p 8787:8787 -p 8786:8786 --shm-size=2g gluonai/gluon-cv:gpu-latest 12 | ``` 13 | 14 | Here, we open the ports 8888, 8787, 8786, which are used for connecting to JupyterLab. Also, we set `--shm-size` to `2g`. This sets the shared memory storage to 2GB. Since NCCL will create shared memory segments, this argument is essential for the JupyterNotebook to work with NCCL. (See also https://github.com/NVIDIA/nccl/issues/290). 15 | 16 | ## Build your own Docker Image 17 | 18 | To build a docker image fom the dockerfile, you may use the following command: 19 | 20 | ``` 21 | docker build -t gluonai/gluon-cv:gpu-latest . 22 | ``` -------------------------------------------------------------------------------- /tools/docker/devel_entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source /start_jupyter.sh 4 | 5 | exec "$@" 6 | -------------------------------------------------------------------------------- /tools/docker/start_jupyter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Run Jupyter in foreground if $JUPYTER_FG is set 4 | if [[ "${JUPYTER_FG}" == "true" ]]; then 5 | jupyter-lab --allow-root --ip=0.0.0.0 --no-browser --NotebookApp.token='' 6 | exit 0 7 | else 8 | nohup jupyter-lab --allow-root --ip=0.0.0.0 --no-browser --NotebookApp.token='' > /dev/null 2>&1 & 9 | 10 | echo "Notebook server successfully started, a JupyterLab instance has been executed!" 11 | echo "Make local folders visible by volume mounting to /workspace/notebook" 12 | echo "To access visit http://localhost:8888 on your host machine." 13 | echo 'Ensure the following arguments to "docker run" are added to expose the server ports to your host machine: 14 | -p 8888:8888 -p 8787:8787 -p 8786:8786' 15 | fi 16 | --------------------------------------------------------------------------------