├── .github
└── workflows
│ ├── build_docs.sh
│ ├── build_test.yml
│ ├── gpu_test.sh
│ ├── stale.yml
│ └── unittest.yml
├── .gitignore
├── .gitmodules
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── docs
├── .gitignore
├── .nojekyll
├── Doxyfile
├── Makefile
├── README.txt
├── _static
│ ├── action-recognition.png
│ ├── action_basketball_demo.gif
│ ├── apache2.svg
│ ├── applications.html
│ ├── assets
│ │ ├── img
│ │ │ ├── action_recognition_demo.png
│ │ │ ├── background
│ │ │ │ ├── img-01.jpg
│ │ │ │ ├── img-02.jpg
│ │ │ │ ├── img-03.jpg
│ │ │ │ ├── img-04.jpg
│ │ │ │ ├── img-05.jpg
│ │ │ │ ├── img-06.jpg
│ │ │ │ ├── img-07.jpg
│ │ │ │ ├── img-08.jpg
│ │ │ │ ├── img-09.jpg
│ │ │ │ ├── img-10.jpg
│ │ │ │ └── img-11.jpg
│ │ │ ├── gluon_white.png
│ │ │ ├── image-classification-demo.png
│ │ │ ├── instance_segmentation_demo.png
│ │ │ ├── object-detection-demo.png
│ │ │ ├── pose_estimation_demo.png
│ │ │ └── semantic-segmentation_demo.png
│ │ └── svg
│ │ │ ├── icons.svg
│ │ │ ├── play.svg
│ │ │ ├── video-icon-dark.svg
│ │ │ └── video-icon.svg
│ ├── classification-demo.png
│ ├── css
│ │ ├── custom.css
│ │ ├── material_icon.css
│ │ └── slides.min.css
│ ├── depth.png
│ ├── gluon-logo.png
│ ├── gluon-logo.svg
│ ├── gluon.ico
│ ├── gluon_black.png
│ ├── gluon_s2.png
│ ├── gluon_white.png
│ ├── google_analytics.js
│ ├── hidebib.js
│ ├── image-classification.png
│ ├── imagenet_banner.jpeg
│ ├── install-options.js
│ ├── instance-segmentation.png
│ ├── js
│ │ ├── jquery.min.js
│ │ └── slides.min.js
│ ├── logos
│ │ ├── acer_byoc_grad_lockup_rgb.png
│ │ ├── acroquest_logo_cmyk_2.png
│ │ ├── embed.html
│ │ ├── kumiawase_e_1_RGB.jpg
│ │ └── pioneer.png
│ ├── object-detection.png
│ ├── plot_help.png
│ ├── pose-estimation.svg
│ ├── semantic-segmentation.png
│ ├── short_demo.gif
│ ├── smot_demo.gif
│ ├── smot_multi_demo.gif
│ ├── tabs.js
│ └── tracking_demo.gif
├── _templates
│ ├── index.html
│ └── layout.html
├── api
│ ├── data.batchify.rst
│ ├── data.datasets.rst
│ ├── data.transforms.rst
│ ├── index.rst
│ ├── loss.rst
│ ├── model_zoo.rst
│ ├── nn.rst
│ └── utils.rst
├── build.yml
├── conf.py
├── contents.rst
├── how_to
│ ├── contribute.rst
│ ├── index.rst
│ └── support.rst
├── install.rst
├── install
│ ├── install-include.rst
│ └── install-more.rst
├── model_zoo
│ ├── action_recognition.rst
│ ├── action_recognition_mxnet.rst
│ ├── action_recognition_torch.rst
│ ├── classification.rst
│ ├── classification_mxnet.rst
│ ├── classification_torch.rst
│ ├── csv_tables
│ │ ├── Action_Recognitions
│ │ │ ├── HMDB51.csv
│ │ │ ├── Kinetics400.csv
│ │ │ ├── Kinetics400_torch.csv
│ │ │ ├── Kinetics700.csv
│ │ │ ├── Kinetics700_torch.csv
│ │ │ ├── Something-Something-V2.csv
│ │ │ ├── Something-Something-V2_torch.csv
│ │ │ └── UCF101.csv
│ │ ├── Classifications
│ │ │ ├── CIFAR10.csv
│ │ │ ├── DenseNet.csv
│ │ │ ├── MobileNet.csv
│ │ │ ├── Others.csv
│ │ │ ├── Pruned_ResNet.csv
│ │ │ ├── ResNeSt.csv
│ │ │ ├── ResNet.csv
│ │ │ ├── ResNext.csv
│ │ │ ├── SqueezeNet.csv
│ │ │ └── VGG.csv
│ │ ├── Depths
│ │ │ ├── KITTI.csv
│ │ │ └── PoseNet.csv
│ │ ├── Detections
│ │ │ ├── MSCOCO_CenterNet.csv
│ │ │ ├── MSCOCO_Faster-RCNN.csv
│ │ │ ├── MSCOCO_SSD.csv
│ │ │ ├── MSCOCO_YOLO-v3.csv
│ │ │ ├── Pascal_CenterNet.csv
│ │ │ ├── Pascal_Faster-RCNN.csv
│ │ │ ├── Pascal_SSD.csv
│ │ │ └── Pascal_YOLO-v3.csv
│ │ ├── Poses
│ │ │ ├── MSCOCO_Alpha-Pose.csv
│ │ │ ├── MSCOCO_Mobile-Pose.csv
│ │ │ └── MSCOCO_Simple-Pose.csv
│ │ └── Segmentations
│ │ │ ├── IS_MS-COCO.csv
│ │ │ ├── SS_ADE20K.csv
│ │ │ ├── SS_Cityscapes.csv
│ │ │ ├── SS_MHP-V1.csv
│ │ │ ├── SS_MS-COCO.csv
│ │ │ └── SS_Pascal-VOC.csv
│ ├── depth.rst
│ ├── depth_mxnet.rst
│ ├── depth_torch.rst
│ ├── detection.rst
│ ├── detection_mxnet.rst
│ ├── detection_torch.rst
│ ├── index.rst
│ ├── pose.rst
│ ├── pose_mxnet.rst
│ ├── pose_torch.rst
│ ├── segmentation.rst
│ ├── segmentation_mxnet.rst
│ └── segmentation_torch.rst
├── slides.md
├── tutorials
│ ├── action_recognition
│ │ ├── README.txt
│ │ ├── decord_loader.py
│ │ ├── demo_custom.py
│ │ ├── demo_i3d_kinetics400.py
│ │ ├── demo_slowfast_kinetics400.py
│ │ ├── demo_tsn_ucf101.py
│ │ ├── dive_deep_i3d_kinetics400.py
│ │ ├── dive_deep_slowfast_kinetics400.py
│ │ ├── dive_deep_tsn_ucf101.py
│ │ ├── feat_custom.py
│ │ └── finetune_custom.py
│ ├── auto_module
│ │ ├── README.txt
│ │ ├── demo_auto_data.py
│ │ ├── demo_auto_detection.py
│ │ └── train_image_classifier_basic.py
│ ├── classification
│ │ ├── README.txt
│ │ ├── demo_cifar10.py
│ │ ├── demo_imagenet.py
│ │ ├── dive_deep_cifar10.py
│ │ ├── dive_deep_imagenet.py
│ │ └── transfer_learning_minc.py
│ ├── datasets
│ │ ├── .gitignore
│ │ ├── README.txt
│ │ ├── ade20k.py
│ │ ├── cityscapes.py
│ │ ├── det.py
│ │ ├── detection_custom.py
│ │ ├── hmdb51.py
│ │ ├── imagenet.py
│ │ ├── kinetics400.py
│ │ ├── mhp_v1.py
│ │ ├── mscoco.py
│ │ ├── mscoco_tracking.py
│ │ ├── otb2015.py
│ │ ├── pascal_voc.py
│ │ ├── recordio.py
│ │ ├── somethingsomethingv2.py
│ │ ├── ucf101.py
│ │ ├── vid.py
│ │ └── youtube_bb.py
│ ├── deployment
│ │ ├── .gitignore
│ │ ├── README.txt
│ │ ├── cpp_inference.py
│ │ ├── export_network.py
│ │ └── int8_inference.py
│ ├── depth
│ │ ├── README.txt
│ │ ├── demo_monodepth2.py
│ │ ├── test_monodepth2_posenet.py
│ │ ├── train_monodepth2.py
│ │ └── videos_monodepth2.py
│ ├── detection
│ │ ├── .gitignore
│ │ ├── README.txt
│ │ ├── demo_center_net.py
│ │ ├── demo_faster_rcnn.py
│ │ ├── demo_jetson.py
│ │ ├── demo_ssd.py
│ │ ├── demo_webcam.py
│ │ ├── demo_yolo.py
│ │ ├── finetune_detection.py
│ │ ├── skip_fintune.py
│ │ ├── train_faster_rcnn_voc.py
│ │ ├── train_ssd_advanced.py
│ │ ├── train_ssd_voc.py
│ │ └── train_yolo_v3.py
│ ├── distributed
│ │ ├── README.txt
│ │ └── distributed_slowfast.py
│ ├── index.rst
│ ├── instance
│ │ ├── .gitignore
│ │ ├── README.txt
│ │ ├── demo_mask_rcnn.py
│ │ └── train_mask_rcnn_coco.py
│ ├── pose
│ │ ├── README.txt
│ │ ├── cam_demo.py
│ │ ├── demo_alpha_pose.py
│ │ ├── demo_simple_pose.py
│ │ └── dive_deep_simple_pose.py
│ ├── segmentation
│ │ ├── .gitignore
│ │ ├── README.txt
│ │ ├── demo_deeplab.py
│ │ ├── demo_fcn.py
│ │ ├── demo_icnet.py
│ │ ├── demo_psp.py
│ │ ├── train_fcn.py
│ │ ├── train_psp.py
│ │ └── voc_sota.py
│ └── tracking
│ │ ├── README.txt
│ │ ├── demo_SiamRPN.py
│ │ ├── demo_smot.py
│ │ └── train_siamrpn.py
└── tutorials_torch
│ ├── action_recognition
│ ├── README.txt
│ ├── ddp_pytorch.py
│ ├── demo_i3d_kinetics400.py
│ ├── extract_feat.py
│ ├── finetune_custom.py
│ └── speed.py
│ └── index.rst
├── gluoncv
├── __init__.py
├── auto
│ ├── __init__.py
│ ├── data
│ │ ├── __init__.py
│ │ ├── auto_data.py
│ │ ├── data_zoo.py
│ │ └── dataset.py
│ ├── estimators
│ │ ├── __init__.py
│ │ ├── base_estimator.py
│ │ ├── center_net
│ │ │ ├── __init__.py
│ │ │ ├── center_net.py
│ │ │ └── default.py
│ │ ├── conf.py
│ │ ├── constants.py
│ │ ├── faster_rcnn
│ │ │ ├── __init__.py
│ │ │ ├── default.py
│ │ │ ├── faster_rcnn.py
│ │ │ └── utils.py
│ │ ├── image_classification
│ │ │ ├── __init__.py
│ │ │ ├── default.py
│ │ │ ├── image_classification.py
│ │ │ └── utils.py
│ │ ├── mask_rcnn
│ │ │ ├── __init__.py
│ │ │ ├── default.py
│ │ │ ├── mask_rcnn.py
│ │ │ └── utils.py
│ │ ├── ssd
│ │ │ ├── __init__.py
│ │ │ ├── default.py
│ │ │ ├── ssd.py
│ │ │ └── utils.py
│ │ ├── torch_image_classification
│ │ │ ├── __init__.py
│ │ │ ├── default.py
│ │ │ ├── torch_image_classification.py
│ │ │ └── utils
│ │ │ │ ├── __init__.py
│ │ │ │ ├── constants.py
│ │ │ │ ├── metrics.py
│ │ │ │ ├── model.py
│ │ │ │ ├── optimizer.py
│ │ │ │ ├── scheduler.py
│ │ │ │ └── utils.py
│ │ ├── utils.py
│ │ └── yolo
│ │ │ ├── __init__.py
│ │ │ ├── default.py
│ │ │ ├── utils.py
│ │ │ └── yolo.py
│ └── tasks
│ │ ├── __init__.py
│ │ ├── image_classification.py
│ │ ├── object_detection.py
│ │ └── utils.py
├── check.py
├── data
│ ├── __init__.py
│ ├── ade20k
│ │ ├── __init__.py
│ │ └── segmentation.py
│ ├── base.py
│ ├── batchify.py
│ ├── cityscapes.py
│ ├── dataloader.py
│ ├── hmdb51
│ │ ├── __init__.py
│ │ └── classification.py
│ ├── imagenet
│ │ ├── __init__.py
│ │ └── classification.py
│ ├── kinetics400
│ │ ├── __init__.py
│ │ └── classification.py
│ ├── kinetics700
│ │ ├── __init__.py
│ │ └── classification.py
│ ├── kitti
│ │ ├── __init__.py
│ │ ├── kitti_dataset.py
│ │ ├── kitti_utils.py
│ │ └── mono_dataset.py
│ ├── lst
│ │ ├── __init__.py
│ │ └── detection.py
│ ├── market1501
│ │ ├── __init__.py
│ │ ├── data_read.py
│ │ └── label_read.py
│ ├── mhp.py
│ ├── mixup
│ │ ├── __init__.py
│ │ └── detection.py
│ ├── mscoco
│ │ ├── __init__.py
│ │ ├── detection.py
│ │ ├── instance.py
│ │ ├── keypoints.py
│ │ ├── segmentation.py
│ │ └── utils.py
│ ├── otb
│ │ ├── __init__.py
│ │ └── tracking.py
│ ├── pascal_aug
│ │ ├── __init__.py
│ │ └── segmentation.py
│ ├── pascal_voc
│ │ ├── __init__.py
│ │ ├── detection.py
│ │ └── segmentation.py
│ ├── recordio
│ │ ├── __init__.py
│ │ └── detection.py
│ ├── sampler.py
│ ├── segbase.py
│ ├── somethingsomethingv2
│ │ ├── __init__.py
│ │ └── classification.py
│ ├── tracking_data
│ │ ├── __init__.py
│ │ └── track.py
│ ├── transforms
│ │ ├── __init__.py
│ │ ├── bbox.py
│ │ ├── block.py
│ │ ├── experimental
│ │ │ ├── __init__.py
│ │ │ ├── bbox.py
│ │ │ └── image.py
│ │ ├── image.py
│ │ ├── mask.py
│ │ ├── pose.py
│ │ ├── presets
│ │ │ ├── __init__.py
│ │ │ ├── alpha_pose.py
│ │ │ ├── center_net.py
│ │ │ ├── imagenet.py
│ │ │ ├── rcnn.py
│ │ │ ├── segmentation.py
│ │ │ ├── simple_pose.py
│ │ │ ├── ssd.py
│ │ │ └── yolo.py
│ │ ├── track.py
│ │ └── video.py
│ ├── ucf101
│ │ ├── __init__.py
│ │ └── classification.py
│ ├── video_custom
│ │ ├── __init__.py
│ │ └── classification.py
│ └── visdrone
│ │ ├── __init__.py
│ │ └── detection.py
├── loss.py
├── model_zoo
│ ├── __init__.py
│ ├── action_recognition
│ │ ├── __init__.py
│ │ ├── actionrec_inceptionv1.py
│ │ ├── actionrec_inceptionv3.py
│ │ ├── actionrec_resnetv1b.py
│ │ ├── actionrec_vgg16.py
│ │ ├── c3d.py
│ │ ├── i3d_inceptionv1.py
│ │ ├── i3d_inceptionv3.py
│ │ ├── i3d_resnet.py
│ │ ├── i3d_slow.py
│ │ ├── non_local.py
│ │ ├── p3d.py
│ │ ├── r2plus1d.py
│ │ └── slowfast.py
│ ├── alexnet.py
│ ├── alpha_pose
│ │ ├── __init__.py
│ │ ├── fast_pose.py
│ │ └── utils.py
│ ├── attention.py
│ ├── center_net
│ │ ├── __init__.py
│ │ ├── center_net.py
│ │ ├── deconv_dla.py
│ │ ├── deconv_resnet.py
│ │ ├── duc_mobilenet.py
│ │ └── target_generator.py
│ ├── cifarresnet.py
│ ├── cifarresnext.py
│ ├── cifarwideresnet.py
│ ├── danet.py
│ ├── deeplabv3.py
│ ├── deeplabv3_plus.py
│ ├── deeplabv3b_plus.py
│ ├── densenet.py
│ ├── dla.py
│ ├── fastscnn.py
│ ├── fcn.py
│ ├── googlenet.py
│ ├── hrnet.py
│ ├── icnet.py
│ ├── inception.py
│ ├── mobilenet.py
│ ├── mobilenetv3.py
│ ├── model_store.py
│ ├── model_zoo.py
│ ├── monodepthv2
│ │ ├── __init__.py
│ │ ├── depth_decoder.py
│ │ ├── layers.py
│ │ ├── monodepth2.py
│ │ ├── monodepth2_posenet.py
│ │ ├── pose_decoder.py
│ │ └── resnet_encoder.py
│ ├── nasnet.py
│ ├── pruned_resnet
│ │ ├── __init__.py
│ │ ├── resnet101_v1d_1.9x.json
│ │ ├── resnet101_v1d_2.2x.json
│ │ ├── resnet18_v1b_2.6x.json
│ │ ├── resnet50_v1d_1.8x.json
│ │ ├── resnet50_v1d_3.6x.json
│ │ ├── resnet50_v1d_5.9x.json
│ │ ├── resnet50_v1d_8.8x.json
│ │ └── resnetv1b_pruned.py
│ ├── pspnet.py
│ ├── quantized
│ │ ├── __init__.py
│ │ ├── mobilenet1.0_int8-symbol.json
│ │ ├── quantized.py
│ │ ├── resnet50_v1_int8-symbol.json
│ │ ├── ssd_300_vgg16_atrous_voc_int8-symbol.json
│ │ ├── ssd_512_mobilenet1.0_voc_int8-symbol.json
│ │ ├── ssd_512_resnet50_v1_voc_int8-symbol.json
│ │ └── ssd_512_vgg16_atrous_voc_int8-symbol.json
│ ├── rcnn
│ │ ├── __init__.py
│ │ ├── faster_rcnn
│ │ │ ├── __init__.py
│ │ │ ├── data_parallel.py
│ │ │ ├── doublehead_rcnn.py
│ │ │ ├── faster_rcnn.py
│ │ │ ├── predefined_models.py
│ │ │ └── rcnn_target.py
│ │ ├── mask_rcnn
│ │ │ ├── __init__.py
│ │ │ ├── data_parallel.py
│ │ │ ├── mask_rcnn.py
│ │ │ ├── predefined_models.py
│ │ │ └── rcnn_target.py
│ │ ├── rcnn.py
│ │ └── rpn
│ │ │ ├── __init__.py
│ │ │ ├── anchor.py
│ │ │ ├── bbox_clip.py
│ │ │ ├── cython_rpn_target.pyx
│ │ │ ├── proposal.py
│ │ │ ├── rpn.py
│ │ │ └── rpn_target.py
│ ├── residual_attentionnet.py
│ ├── resnest.py
│ ├── resnet.py
│ ├── resnetv1b.py
│ ├── resnext.py
│ ├── se_resnet.py
│ ├── segbase.py
│ ├── senet.py
│ ├── shufflenet.py
│ ├── siamrpn
│ │ ├── __init__.py
│ │ ├── siam_alexnet.py
│ │ ├── siam_net.py
│ │ ├── siam_rpn.py
│ │ └── siamrpn_tracker.py
│ ├── simple_pose
│ │ ├── __init__.py
│ │ ├── mobile_pose.py
│ │ ├── pose_target.py
│ │ └── simple_pose_resnet.py
│ ├── smot
│ │ ├── __init__.py
│ │ ├── general_detector.py
│ │ ├── motion_estimation.py
│ │ ├── presets.py
│ │ ├── smot_tracker.py
│ │ ├── ssd.py
│ │ ├── tracktors.py
│ │ └── utils.py
│ ├── squeezenet.py
│ ├── ssd
│ │ ├── __init__.py
│ │ ├── anchor.py
│ │ ├── presets.py
│ │ ├── resnet_v1b_ssd.py
│ │ ├── ssd.py
│ │ ├── target.py
│ │ └── vgg_atrous.py
│ ├── vgg.py
│ ├── wideresnet.py
│ ├── xception.py
│ └── yolo
│ │ ├── __init__.py
│ │ ├── darknet.py
│ │ ├── yolo3.py
│ │ └── yolo_target.py
├── nn
│ ├── __init__.py
│ ├── bbox.py
│ ├── block.py
│ ├── coder.py
│ ├── cython_bbox.pyx
│ ├── dropblock.py
│ ├── feature.py
│ ├── gn.py
│ ├── matcher.py
│ ├── predictor.py
│ ├── sampler.py
│ └── splat.py
├── torch
│ ├── __init__.py
│ ├── data
│ │ ├── __init__.py
│ │ ├── coot
│ │ │ └── dataloader.py
│ │ ├── detection
│ │ │ ├── __init__.py
│ │ │ ├── detection_dataset.py
│ │ │ ├── detection_utils.py
│ │ │ └── samplers
│ │ │ │ ├── __init__.py
│ │ │ │ ├── distributed_sampler.py
│ │ │ │ └── grouped_batch_sampler.py
│ │ ├── gluoncv_motion_dataset
│ │ │ ├── __init__.py
│ │ │ ├── dataset.py
│ │ │ ├── dataset_pre_processor.py
│ │ │ ├── ingestion
│ │ │ │ ├── __init__.py
│ │ │ │ ├── duplicate_remover.py
│ │ │ │ ├── filename_sanitizer.py
│ │ │ │ └── video_chunker.py
│ │ │ ├── io
│ │ │ │ ├── __init__.py
│ │ │ │ └── video_io.py
│ │ │ └── utils
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ingestion_utils.py
│ │ │ │ └── serialization_utils.py
│ │ ├── pose
│ │ │ ├── __init__.py
│ │ │ └── dataset_pose.py
│ │ ├── registry
│ │ │ ├── __init__.py
│ │ │ ├── catalog.py
│ │ │ ├── metadata.py
│ │ │ └── mscoco.py
│ │ ├── structures
│ │ │ ├── __init__.py
│ │ │ ├── beziers.py
│ │ │ ├── boxes.py
│ │ │ ├── image_list.py
│ │ │ ├── instances.py
│ │ │ ├── keypoints.py
│ │ │ └── masks.py
│ │ ├── transforms
│ │ │ ├── instance_transforms
│ │ │ │ ├── __init__.py
│ │ │ │ ├── augmentation.py
│ │ │ │ ├── transform.py
│ │ │ │ └── transform_utils.py
│ │ │ ├── transforms.py
│ │ │ └── videotransforms
│ │ │ │ ├── functional.py
│ │ │ │ ├── stack_transforms.py
│ │ │ │ ├── tensor_transforms.py
│ │ │ │ ├── utils
│ │ │ │ ├── functional.py
│ │ │ │ └── images.py
│ │ │ │ ├── video_transforms.py
│ │ │ │ └── volume_transforms.py
│ │ └── video_cls
│ │ │ ├── __init__.py
│ │ │ ├── dataset_classification.py
│ │ │ └── multigrid_helper.py
│ ├── engine
│ │ ├── __init__.py
│ │ ├── config
│ │ │ ├── __init__.py
│ │ │ ├── action_recognition.py
│ │ │ ├── coot.py
│ │ │ └── directpose.py
│ │ └── launch.py
│ ├── model_zoo
│ │ ├── __init__.py
│ │ ├── action_recognition
│ │ │ ├── __init__.py
│ │ │ ├── actionrec_resnetv1b.py
│ │ │ ├── i3d_resnet.py
│ │ │ ├── i3d_slow.py
│ │ │ ├── ircsnv2.py
│ │ │ ├── non_local.py
│ │ │ ├── r2plus1dv1.py
│ │ │ ├── r2plus1dv2.py
│ │ │ ├── slowfast.py
│ │ │ └── tpn.py
│ │ ├── model_store.py
│ │ ├── model_zoo.py
│ │ ├── object_detection
│ │ │ ├── __init__.py
│ │ │ ├── fcos.py
│ │ │ └── model_utils.py
│ │ ├── pose
│ │ │ ├── __init__.py
│ │ │ ├── directpose.py
│ │ │ ├── directpose_outputs.py
│ │ │ └── directpose_resnet_fpn.py
│ │ └── video_language
│ │ │ └── coot_model.py
│ ├── nn
│ │ ├── __init__.py
│ │ ├── batch_norm.py
│ │ ├── deform_conv.py
│ │ ├── focal_loss.py
│ │ ├── group_norm.py
│ │ ├── iou_loss.py
│ │ ├── keypoint_loss.py
│ │ ├── nms.py
│ │ ├── shape_spec.py
│ │ └── smooth_l1_loss.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── comm.py
│ │ ├── coot_utils.py
│ │ ├── eval_utils
│ │ ├── __init__.py
│ │ └── coco_eval.py
│ │ ├── loss.py
│ │ ├── lr_policy.py
│ │ ├── model_utils.py
│ │ ├── optimizer.py
│ │ ├── random.py
│ │ ├── task_utils
│ │ ├── __init__.py
│ │ ├── classification.py
│ │ ├── coot.py
│ │ └── pose.py
│ │ ├── tvm_utils
│ │ ├── __init__.py
│ │ └── nms.py
│ │ ├── utils.py
│ │ └── visualizer.py
└── utils
│ ├── __init__.py
│ ├── bbox.py
│ ├── block.py
│ ├── compress_json.py
│ ├── data
│ └── tracking.py
│ ├── download.py
│ ├── export_helper.py
│ ├── filesystem.py
│ ├── lr_scheduler.py
│ ├── metrics
│ ├── __init__.py
│ ├── accuracy.py
│ ├── coco_detection.py
│ ├── coco_instance.py
│ ├── coco_keypoints.py
│ ├── heatmap_accuracy.py
│ ├── rcnn.py
│ ├── segmentation.py
│ ├── tracking.py
│ └── voc_detection.py
│ ├── parallel.py
│ ├── plot_history.py
│ ├── random.py
│ ├── sync_loader_helper.py
│ ├── transforms.py
│ ├── version.py
│ └── viz
│ ├── __init__.py
│ ├── bbox.py
│ ├── image.py
│ ├── keypoints.py
│ ├── mask.py
│ ├── network.py
│ └── segmentation.py
├── scripts
├── README.md
├── action-recognition
│ ├── ARXIV.md
│ ├── CALIBRATION.md
│ ├── README.md
│ ├── configuration
│ │ ├── i3d_nl10_resnet101_v1_kinetics400.yaml
│ │ ├── i3d_nl10_resnet50_v1_kinetics400.yaml
│ │ ├── i3d_nl5_resnet101_v1_kinetics400.yaml
│ │ ├── i3d_nl5_resnet50_v1_kinetics400.yaml
│ │ ├── i3d_resnet101_v1_kinetics400.yaml
│ │ ├── i3d_resnet50_v1_custom.yaml
│ │ ├── i3d_resnet50_v1_feat.yaml
│ │ ├── i3d_resnet50_v1_kinetics400.yaml
│ │ ├── i3d_resnet50_v1_sthsthv2.yaml
│ │ ├── i3d_slow_resnet101_f16s4_kinetics400.yaml
│ │ ├── i3d_slow_resnet101_f16s4_kinetics700.yaml
│ │ ├── i3d_slow_resnet101_f32s2_kinetics400.yaml
│ │ ├── i3d_slow_resnet101_f8s8_kinetics400.yaml
│ │ ├── i3d_slow_resnet50_f16s4_kinetics400.yaml
│ │ ├── i3d_slow_resnet50_f32s2_custom.yaml
│ │ ├── i3d_slow_resnet50_f32s2_feat.yaml
│ │ ├── i3d_slow_resnet50_f32s2_kinetics400.yaml
│ │ ├── i3d_slow_resnet50_f8s8_kinetics400.yaml
│ │ ├── ircsn_v2_resnet152_f32s2_kinetics400.yaml
│ │ ├── r2plus1d_v1_resnet18_kinetics400.yaml
│ │ ├── r2plus1d_v1_resnet34_kinetics400.yaml
│ │ ├── r2plus1d_v1_resnet50_custom.yaml
│ │ ├── r2plus1d_v1_resnet50_feat.yaml
│ │ ├── r2plus1d_v1_resnet50_kinetics400.yaml
│ │ ├── r2plus1d_v2_resnet152_kinetics400.yaml
│ │ ├── resnet101_v1b_kinetics400.yaml
│ │ ├── resnet152_v1b_kinetics400.yaml
│ │ ├── resnet18_v1b_kinetics400.yaml
│ │ ├── resnet34_v1b_kinetics400.yaml
│ │ ├── resnet50_v1b_custom.yaml
│ │ ├── resnet50_v1b_feat.yaml
│ │ ├── resnet50_v1b_kinetics400.yaml
│ │ ├── resnet50_v1b_sthsthv2.yaml
│ │ ├── slowfast_16x8_resnet50_sthsthv2.yaml
│ │ ├── slowfast_4x16_resnet50_custom.yaml
│ │ ├── slowfast_4x16_resnet50_feat.yaml
│ │ ├── slowfast_4x16_resnet50_kinetics400.yaml
│ │ ├── slowfast_8x8_resnet101_kinetics400.yaml
│ │ ├── slowfast_8x8_resnet50_kinetics400.yaml
│ │ ├── tpn_resnet101_f16s4_kinetics400.yaml
│ │ ├── tpn_resnet101_f32s2_kinetics400.yaml
│ │ ├── tpn_resnet101_f8s8_kinetics400.yaml
│ │ ├── tpn_resnet50_f16s4_kinetics400.yaml
│ │ ├── tpn_resnet50_f32s2_custom.yaml
│ │ ├── tpn_resnet50_f32s2_feat.yaml
│ │ ├── tpn_resnet50_f32s2_kinetics400.yaml
│ │ └── tpn_resnet50_f8s8_kinetics400.yaml
│ ├── feat_extract.py
│ ├── feat_extract_pytorch.py
│ ├── get_flops.py
│ ├── get_fps.py
│ ├── inference.py
│ ├── test_ddp_pytorch.py
│ ├── test_recognizer.py
│ ├── train_ddp_pytorch.py
│ ├── train_ddp_shortonly_pytorch.py
│ └── train_recognizer.py
├── classification
│ ├── auto_classification
│ │ └── train_auto_classification.py
│ ├── cifar
│ │ ├── README.md
│ │ ├── demo_cifar10.py
│ │ ├── train_cifar10.py
│ │ └── train_mixup_cifar10.py
│ ├── finetune
│ │ ├── finetune_minc.py
│ │ └── prepare_minc.py
│ ├── fit_classification.py
│ └── imagenet
│ │ ├── README.md
│ │ ├── dali.py
│ │ ├── demo_imagenet.py
│ │ ├── imagenet_labels.txt
│ │ ├── test.sh
│ │ ├── train_horovod.py
│ │ ├── train_imagenet.py
│ │ ├── train_imagenet_nasnet.py
│ │ └── verify_pretrained.py
├── datasets
│ ├── README.md
│ ├── ade20k.py
│ ├── cityscapes.py
│ ├── coco_tracking.py
│ ├── hmdb51.py
│ ├── ilsvrc_det.py
│ ├── ilsvrc_vid.py
│ ├── imagenet.py
│ ├── imagenet_val_maps.pklz
│ ├── kinetics400.py
│ ├── lsun.py
│ ├── market1501.py
│ ├── mhp_v1.py
│ ├── mscoco.py
│ ├── otb2015.py
│ ├── pascal_voc.py
│ ├── somethingsomethingv2.py
│ ├── tiny_motorbike.py
│ └── ucf101.py
├── deployment
│ ├── README.md
│ ├── cpp-inference
│ │ ├── .gitignore
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── src
│ │ │ ├── clipp.hpp
│ │ │ ├── common.hpp
│ │ │ └── detect.cpp
│ └── export
│ │ ├── .gitignore
│ │ ├── README.md
│ │ └── export_pretrained.py
├── depth
│ ├── README.md
│ ├── demo.py
│ ├── options.py
│ ├── test.py
│ ├── test_pose.py
│ ├── train.py
│ └── trainer.py
├── detection
│ ├── README.md
│ ├── auto_detection
│ │ ├── train_auto_center_net.py
│ │ ├── train_auto_detection.py
│ │ ├── train_auto_faster_rcnn.py
│ │ ├── train_auto_ssd.py
│ │ └── train_auto_yolo.py
│ ├── center_net
│ │ ├── demo_center_net.py
│ │ ├── eval_center_net.py
│ │ ├── fit_center_net.py
│ │ └── train_center_net.py
│ ├── demo_webcam_run.py
│ ├── faster_rcnn
│ │ ├── README.md
│ │ ├── demo_faster_rcnn.py
│ │ ├── eval_faster_rcnn.py
│ │ ├── fit_faster_rcnn.py
│ │ ├── train_doublehead_rcnn.py
│ │ └── train_faster_rcnn.py
│ ├── ssd
│ │ ├── README.md
│ │ ├── demo_ssd.py
│ │ ├── eval_ssd.py
│ │ ├── fit_ssd.py
│ │ └── train_ssd.py
│ └── yolo
│ │ ├── README.md
│ │ ├── demo_yolo.py
│ │ ├── eval_yolo.py
│ │ ├── fit_yolo.py
│ │ ├── train_yolo.py
│ │ └── train_yolo3.py
├── gan
│ ├── cycle_gan
│ │ ├── README.md
│ │ ├── demo_cycle_gan.py
│ │ ├── download_dataset.py
│ │ ├── images.png
│ │ └── train_cgan.py
│ ├── srgan
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── demo_srgan.py
│ │ ├── download_dataset.py
│ │ ├── images.png
│ │ ├── pred.png
│ │ └── train_srgan.py
│ ├── stylegan
│ │ ├── README.md
│ │ ├── demo_stylegan.py
│ │ ├── model.py
│ │ ├── modules.py
│ │ ├── prepare_data.py
│ │ ├── sample.jpg
│ │ ├── sample_train.png
│ │ └── train.py
│ └── wgan
│ │ ├── README.md
│ │ ├── fake_samples_400000.png
│ │ ├── lossd.png
│ │ ├── lsun.py
│ │ └── train_wgan.py
├── instance
│ ├── README.md
│ └── mask_rcnn
│ │ ├── README.md
│ │ ├── benchmark
│ │ ├── README.md
│ │ └── ompi_bind_DGX1.sh
│ │ ├── demo_mask_rcnn.py
│ │ ├── eval_mask_rcnn.py
│ │ ├── fit_mask_rcnn.py
│ │ └── train_mask_rcnn.py
├── onnx
│ ├── README.md
│ ├── exported_models.csv
│ └── notebooks
│ │ ├── action-recognition
│ │ ├── c3d_kinetics400.ipynb
│ │ ├── i3d_inceptionv1_kinetics400.ipynb
│ │ ├── i3d_inceptionv3_kinetics400.ipynb
│ │ ├── i3d_nl10_resnet101_v1_kinetics400.ipynb
│ │ ├── i3d_nl10_resnet50_v1_kinetics400.ipynb
│ │ ├── i3d_nl5_resnet101_v1_kinetics400.ipynb
│ │ ├── i3d_nl5_resnet50_v1_kinetics400.ipynb
│ │ ├── i3d_resnet101_v1_kinetics400.ipynb
│ │ ├── i3d_resnet50_v1_hmdb51.ipynb
│ │ ├── i3d_resnet50_v1_kinetics400.ipynb
│ │ ├── i3d_resnet50_v1_sthsthv2.ipynb
│ │ ├── i3d_resnet50_v1_ucf101.ipynb
│ │ ├── inceptionv1_kinetics400.ipynb
│ │ ├── inceptionv3_kinetics400.ipynb
│ │ ├── inceptionv3_ucf101.ipynb
│ │ ├── p3d_resnet101_kinetics400.ipynb
│ │ ├── p3d_resnet50_kinetics400.ipynb
│ │ ├── r2plus1d_resnet18_kinetics400.ipynb
│ │ ├── r2plus1d_resnet34_kinetics400.ipynb
│ │ ├── r2plus1d_resnet50_kinetics400.ipynb
│ │ ├── resnet101_v1b_kinetics400.ipynb
│ │ ├── resnet152_v1b_kinetics400.ipynb
│ │ ├── resnet18_v1b_kinetics400.ipynb
│ │ ├── resnet34_v1b_kinetics400.ipynb
│ │ ├── resnet50_v1b_hmdb51.ipynb
│ │ ├── resnet50_v1b_kinetics400.ipynb
│ │ ├── resnet50_v1b_sthsthv2.ipynb
│ │ ├── slowfast_4x16_resnet50_kinetics400.ipynb
│ │ ├── slowfast_8x8_resnet101_kinetics400.ipynb
│ │ ├── slowfast_8x8_resnet50_kinetics400.ipynb
│ │ └── vgg16_ucf101.ipynb
│ │ ├── classification
│ │ ├── alexnet.ipynb
│ │ ├── darknet53.ipynb
│ │ ├── densenet121.ipynb
│ │ ├── densenet161.ipynb
│ │ ├── densenet169.ipynb
│ │ ├── densenet201.ipynb
│ │ ├── googlenet.ipynb
│ │ ├── inceptionv3.ipynb
│ │ ├── mobilenet0.25.ipynb
│ │ ├── mobilenet0.5.ipynb
│ │ ├── mobilenet0.75.ipynb
│ │ ├── mobilenet1.0.ipynb
│ │ ├── mobilenetv2_0.25.ipynb
│ │ ├── mobilenetv2_0.5.ipynb
│ │ ├── mobilenetv2_0.75.ipynb
│ │ ├── mobilenetv2_1.0.ipynb
│ │ ├── mobilenetv3_large.ipynb
│ │ ├── mobilenetv3_small.ipynb
│ │ ├── resnest101.ipynb
│ │ ├── resnest14.ipynb
│ │ ├── resnest200.ipynb
│ │ ├── resnest26.ipynb
│ │ ├── resnest269.ipynb
│ │ ├── resnest50.ipynb
│ │ ├── resnet101_v1.ipynb
│ │ ├── resnet101_v1d_0.73.ipynb
│ │ ├── resnet101_v1d_0.76.ipynb
│ │ ├── resnet101_v2.ipynb
│ │ ├── resnet152_v1.ipynb
│ │ ├── resnet152_v2.ipynb
│ │ ├── resnet18_v1.ipynb
│ │ ├── resnet18_v1b_0.89.ipynb
│ │ ├── resnet18_v2.ipynb
│ │ ├── resnet34_v1.ipynb
│ │ ├── resnet34_v2.ipynb
│ │ ├── resnet50_v1.ipynb
│ │ ├── resnet50_v1d_0.11.ipynb
│ │ ├── resnet50_v1d_0.37.ipynb
│ │ ├── resnet50_v1d_0.48.ipynb
│ │ ├── resnet50_v1d_0.86.ipynb
│ │ ├── resnet50_v2.ipynb
│ │ ├── resnext101_32x4d.ipynb
│ │ ├── resnext101_64x4d.ipynb
│ │ ├── resnext50_32x4d.ipynb
│ │ ├── se_resnext101_32x4d.ipynb
│ │ ├── se_resnext101_64x4d.ipynb
│ │ ├── se_resnext50_32x4d.ipynb
│ │ ├── senet_154.ipynb
│ │ ├── squeezenet1.0.ipynb
│ │ ├── squeezenet1.1.ipynb
│ │ ├── vgg11.ipynb
│ │ ├── vgg11_bn.ipynb
│ │ ├── vgg13.ipynb
│ │ ├── vgg13_bn.ipynb
│ │ ├── vgg16.ipynb
│ │ ├── vgg16_bn.ipynb
│ │ ├── vgg19.ipynb
│ │ ├── vgg19_bn.ipynb
│ │ └── xception.ipynb
│ │ ├── detection
│ │ ├── center_net_resnet101_v1b_coco.ipynb
│ │ ├── center_net_resnet101_v1b_voc.ipynb
│ │ ├── center_net_resnet18_v1b_coco.ipynb
│ │ ├── center_net_resnet18_v1b_voc.ipynb
│ │ ├── center_net_resnet50_v1b_coco.ipynb
│ │ ├── center_net_resnet50_v1b_voc.ipynb
│ │ ├── faster_rcnn_resnet50_v1b_voc.ipynb
│ │ ├── ssd_300_resnet34_v1b_coco.ipynb
│ │ ├── ssd_300_vgg16_atrous_coco.ipynb
│ │ ├── ssd_300_vgg16_atrous_voc.ipynb
│ │ ├── ssd_512_mobilenet1.0_coco.ipynb
│ │ ├── ssd_512_mobilenet1.0_voc.ipynb
│ │ ├── ssd_512_resnet50_v1_coco.ipynb
│ │ ├── ssd_512_resnet50_v1_voc.ipynb
│ │ ├── ssd_512_vgg16_atrous_coco.ipynb
│ │ ├── ssd_512_vgg16_atrous_voc.ipynb
│ │ ├── yolo3_darknet53_coco.ipynb
│ │ ├── yolo3_darknet53_voc.ipynb
│ │ ├── yolo3_mobilenet1.0_coco.ipynb
│ │ └── yolo3_mobilenet1.0_voc.ipynb
│ │ ├── pose
│ │ ├── alpha_pose_resnet101_v1b_coco.ipynb
│ │ ├── mobile_pose_mobilenet1.0.ipynb
│ │ ├── mobile_pose_mobilenetv2_1.0.ipynb
│ │ ├── mobile_pose_mobilenetv3_large.ipynb
│ │ ├── mobile_pose_mobilenetv3_small.ipynb
│ │ ├── mobile_pose_resnet18_v1b.ipynb
│ │ ├── mobile_pose_resnet50_v1b.ipynb
│ │ ├── simple_pose_resnet101_v1b.ipynb
│ │ ├── simple_pose_resnet101_v1d.ipynb
│ │ ├── simple_pose_resnet152_v1b.ipynb
│ │ ├── simple_pose_resnet152_v1d.ipynb
│ │ ├── simple_pose_resnet18_v1b.ipynb
│ │ ├── simple_pose_resnet50_v1b.ipynb
│ │ └── simple_pose_resnet50_v1d.ipynb
│ │ └── segmentation
│ │ ├── danet_resnet101_citys.ipynb
│ │ ├── danet_resnet50_citys.ipynb
│ │ ├── deeplab_resnest101_ade.ipynb
│ │ ├── deeplab_resnest200_ade.ipynb
│ │ ├── deeplab_resnest269_ade.ipynb
│ │ ├── deeplab_resnest50_ade.ipynb
│ │ ├── deeplab_resnet101_ade.ipynb
│ │ ├── deeplab_resnet101_citys.ipynb
│ │ ├── deeplab_resnet101_coco.ipynb
│ │ ├── deeplab_resnet101_voc.ipynb
│ │ ├── deeplab_resnet152_voc.ipynb
│ │ ├── deeplab_resnet50_ade.ipynb
│ │ ├── deeplab_resnet50_citys.ipynb
│ │ ├── deeplab_v3b_plus_wideresnet_citys.ipynb
│ │ ├── fcn_resnet101_ade.ipynb
│ │ ├── fcn_resnet101_coco.ipynb
│ │ ├── fcn_resnet101_voc.ipynb
│ │ └── fcn_resnet50_ade.ipynb
├── pose
│ ├── alpha_pose
│ │ ├── cam_demo.py
│ │ ├── coco.sh
│ │ ├── coco_dpg.sh
│ │ ├── demo.py
│ │ ├── train_alpha_pose.py
│ │ ├── validate.py
│ │ ├── validate.sh
│ │ └── validate_tools.py
│ ├── directpose
│ │ ├── .gitignore
│ │ ├── configurations
│ │ │ └── ms_aa_resnet50_4x_syncbn.yaml
│ │ ├── demo_directpose.py
│ │ ├── export_directpose_tvm.py
│ │ ├── train_ddp_directpose.py
│ │ └── tvm_evaluation
│ │ │ ├── evaluate_pose.py
│ │ │ └── pose_model.py
│ └── simple_pose
│ │ ├── README.md
│ │ ├── cam_demo.py
│ │ ├── coco.sh
│ │ ├── demo.py
│ │ ├── train_simple_pose.py
│ │ ├── validate.py
│ │ └── validate.sh
├── re-id
│ └── baseline
│ │ ├── README.md
│ │ ├── networks
│ │ ├── __init__.py
│ │ └── resnet.py
│ │ ├── test.py
│ │ └── train.py
├── segmentation
│ ├── README.md
│ ├── test.py
│ └── train.py
├── tracking
│ ├── siamrpn
│ │ ├── benchmark.py
│ │ ├── demo.py
│ │ ├── test.py
│ │ └── train.py
│ └── smot
│ │ ├── README.md
│ │ ├── demo.py
│ │ ├── eval.py
│ │ ├── helper.py
│ │ └── preprocess.py
└── vision-language
│ └── video-language
│ └── coot
│ ├── README.md
│ ├── configuration
│ └── youcook2.yaml
│ └── train_pytorch.py
├── setup.py
├── tests
├── __init__.py
├── auto
│ ├── __init__.py
│ ├── test_auto_data.py
│ ├── test_auto_estimators.py
│ ├── test_auto_tasks.py
│ ├── test_hybrid_auto_tasks.py
│ ├── test_torch_auto_estimators.py
│ └── test_torch_auto_tasks.py
├── lint.py
├── model_zoo
│ ├── __init__.py
│ ├── test_model_zoo.py
│ └── test_utils_export.py
├── model_zoo_torch
│ ├── __init__.py
│ ├── test_model_zoo_torch.py
│ └── test_tvm_torch_export.py
├── onnx
│ └── test_inference.py
├── py3_auto.yml
├── py3_mxnet.yml
├── py3_mxnet_ci.yml
├── py3_torch.yml
├── pylint.yml
├── pylintrc
└── unittests
│ ├── __init__.py
│ ├── common.py
│ ├── test_data_dataloader.py
│ ├── test_data_datasets.py
│ ├── test_data_transforms.py
│ ├── test_lr_scheduler.py
│ ├── test_nn.py
│ ├── test_utils_bbox.py
│ ├── test_utils_block.py
│ ├── test_utils_metric.py
│ ├── test_utils_parallel.py
│ ├── test_utils_segmentation.py
│ ├── test_utils_viz.py
│ └── tiny_datasets.py
└── tools
├── batch
├── README.md
├── batch-test.py
├── docker
│ ├── Dockerfile.cpu
│ ├── Dockerfile.gpu
│ ├── README.md
│ ├── docker_deploy.sh
│ └── gluon_cv_job.sh
├── submit-job.py
└── template
│ ├── launch-template-data-cpu.json
│ └── launch-template-data-gpu.json
└── docker
├── README.md
├── devel_entrypoint.sh
├── dockerfile
└── start_jupyter.sh
/.github/workflows/gpu_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | COVER_PACKAGE=$1
4 | TESTS_PATH=$2
5 |
6 | EFS=/mnt/efs
7 |
8 | mkdir -p ~/.mxnet/models
9 | for f in $EFS/.mxnet/models/*.params; do
10 | ln -s $f ~/.mxnet/models/$(basename "$f")
11 | done
12 |
13 | export MXNET_CUDNN_AUTOTUNE_DEFAULT=0
14 | export MPLBACKEND=Agg
15 | export KMP_DUPLICATE_LIB_OK=TRUE
16 |
17 | if [[ $TESTS_PATH == *"auto"* ]]; then
18 | echo "Installing autogluon.core and timm for auto module"
19 | pip3 install autogluon.core==0.2.0
20 | pip3 install timm==0.5.4
21 | fi
22 |
23 | nosetests --process-restartworker --with-timer --timer-ok 5 --timer-warning 20 -x --with-coverage --cover-package $COVER_PACKAGE -v $TESTS_PATH
24 |
--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
1 | name: 'Close stale issues and PRs'
2 | on:
3 | schedule:
4 | - cron: '30 6 * * *'
5 |
6 | jobs:
7 | stale:
8 | runs-on: ubuntu-latest
9 | steps:
10 | - uses: actions/stale@v3
11 | with:
12 | days-before-stale: 90
13 | days-before-pr-stale: -1
14 | days-before-issue-close: 7
15 | days-before-pr-close: -1
16 | stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.'
17 | exempt-issue-labels: 'bug, call for contribution, doc, enhancement, good first issue, help wanted, mxnet, question'
18 | ascending: true
19 | operations-per-run: 300
20 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/.gitmodules
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE
3 | graft gluoncv
4 | prune docs tests scripts
5 | global-exclude *.py[co]
6 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | ROOTDIR = $(CURDIR)
2 |
3 | lint: cpplint pylint
4 |
5 | cpplint:
6 | tests/lint.py gluoncv cpp src
7 |
8 | pylint:
9 | pylint --rcfile=$(ROOTDIR)/tests/pylintrc --ignore-patterns=".*\.so$$,.*\.dll$$,.*\.dylib$$" gluoncv
10 |
11 | doc: docs
12 |
13 | clean: clean_build
14 |
15 | clean_docs:
16 | make -C docs clean
17 |
18 | clean_build:
19 | rm -rf dist gluoncv.egg-info build | true
20 |
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | doxygen
2 | _build
3 | build
4 | gen_modules
5 | __MACOSX
6 | *.zip
7 | *.png
8 | *.jpg*
9 | *.jpeg
10 |
--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/.nojekyll
--------------------------------------------------------------------------------
/docs/README.txt:
--------------------------------------------------------------------------------
1 | The documentation of gluoncv is generated with recommonmark and sphinx.
2 |
3 | - pip install sphinx>=1.5.5 sphinx-gallery sphinx_rtd_theme matplotlib Image recommonmark scipy
4 |
--------------------------------------------------------------------------------
/docs/_static/action-recognition.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/action-recognition.png
--------------------------------------------------------------------------------
/docs/_static/action_basketball_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/action_basketball_demo.gif
--------------------------------------------------------------------------------
/docs/_static/apache2.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/docs/_static/assets/img/action_recognition_demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/action_recognition_demo.png
--------------------------------------------------------------------------------
/docs/_static/assets/img/background/img-01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-01.jpg
--------------------------------------------------------------------------------
/docs/_static/assets/img/background/img-02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-02.jpg
--------------------------------------------------------------------------------
/docs/_static/assets/img/background/img-03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-03.jpg
--------------------------------------------------------------------------------
/docs/_static/assets/img/background/img-04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-04.jpg
--------------------------------------------------------------------------------
/docs/_static/assets/img/background/img-05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-05.jpg
--------------------------------------------------------------------------------
/docs/_static/assets/img/background/img-06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-06.jpg
--------------------------------------------------------------------------------
/docs/_static/assets/img/background/img-07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-07.jpg
--------------------------------------------------------------------------------
/docs/_static/assets/img/background/img-08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-08.jpg
--------------------------------------------------------------------------------
/docs/_static/assets/img/background/img-09.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-09.jpg
--------------------------------------------------------------------------------
/docs/_static/assets/img/background/img-10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-10.jpg
--------------------------------------------------------------------------------
/docs/_static/assets/img/background/img-11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/background/img-11.jpg
--------------------------------------------------------------------------------
/docs/_static/assets/img/gluon_white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/gluon_white.png
--------------------------------------------------------------------------------
/docs/_static/assets/img/image-classification-demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/image-classification-demo.png
--------------------------------------------------------------------------------
/docs/_static/assets/img/instance_segmentation_demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/instance_segmentation_demo.png
--------------------------------------------------------------------------------
/docs/_static/assets/img/object-detection-demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/object-detection-demo.png
--------------------------------------------------------------------------------
/docs/_static/assets/img/pose_estimation_demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/pose_estimation_demo.png
--------------------------------------------------------------------------------
/docs/_static/assets/img/semantic-segmentation_demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/assets/img/semantic-segmentation_demo.png
--------------------------------------------------------------------------------
/docs/_static/assets/svg/play.svg:
--------------------------------------------------------------------------------
1 |
4 |
--------------------------------------------------------------------------------
/docs/_static/assets/svg/video-icon-dark.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/_static/assets/svg/video-icon.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/_static/classification-demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/classification-demo.png
--------------------------------------------------------------------------------
/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
1 | td {
2 | word-wrap: break-word;
3 | }
4 |
5 | table {
6 | table-layout: fixed;
7 | width: 100%
8 | }
9 |
10 | .tag {
11 | color:grey;
12 | }
13 |
14 | .tag:after {
15 | content: ' (no mixup)';
16 | }
17 |
18 | .tsntag {
19 | color:grey;
20 | }
21 |
22 | .tsntag:after {
23 | content: ' (no TSN)';
24 | }
25 |
26 | .greytag {
27 | color:grey;
28 | }
29 |
30 | .gray {
31 | color:#808080;
32 | }
33 |
34 | .Logos {
35 | display: inline;
36 | margin: 1em;
37 | max-width: 120px;
38 | }
39 |
40 | .install {
41 | max-width: 800px;
42 | }
43 | .install .title {
44 | display: inline-block;
45 | min-width: 100px;
46 | text-transform: uppercase;
47 | font-size: 90%;
48 | color: #555;
49 | }
50 |
51 | .install .option {
52 | margin: 5px;
53 | }
54 |
55 | @media (max-width: 650px) {
56 | .install .option, .install .title {
57 | width: 90%;
58 | }
59 | .install .title {
60 | margin-top: 1em;
61 | }
62 |
--------------------------------------------------------------------------------
/docs/_static/depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/depth.png
--------------------------------------------------------------------------------
/docs/_static/gluon-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/gluon-logo.png
--------------------------------------------------------------------------------
/docs/_static/gluon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/gluon.ico
--------------------------------------------------------------------------------
/docs/_static/gluon_black.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/gluon_black.png
--------------------------------------------------------------------------------
/docs/_static/gluon_s2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/gluon_s2.png
--------------------------------------------------------------------------------
/docs/_static/gluon_white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/gluon_white.png
--------------------------------------------------------------------------------
/docs/_static/google_analytics.js:
--------------------------------------------------------------------------------
1 | (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
2 | (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
3 | m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
4 | })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
5 |
6 | ga('create', 'UA-96378503-9', 'auto');
7 | ga('send', 'pageview');
8 |
--------------------------------------------------------------------------------
/docs/_static/hidebib.js:
--------------------------------------------------------------------------------
1 | // adapted from: http://www.robots.ox.ac.uk/~vedaldi/assets/hidebib.js
2 | function hideallbibs()
3 | {
4 | var el = document.getElementsByTagName("div") ;
5 | for (var i = 0 ; i < el.length ; ++i) {
6 | if (el[i].className == "paper") {
7 | var bib = el[i].getElementsByTagName("pre") ;
8 | if (bib.length > 0) {
9 | bib [0] .style.display = 'none' ;
10 | }
11 | }
12 | }
13 | }
14 |
15 | function togglebib(paperid)
16 | {
17 | var paper = document.getElementById(paperid) ;
18 | var bib = paper.getElementsByTagName('pre') ;
19 | if (bib.length > 0) {
20 | if (bib [0] .style.display == 'none') {
21 | bib [0] .style.display = 'block' ;
22 | } else {
23 | bib [0] .style.display = 'none' ;
24 | }
25 | }
26 | }
27 |
28 | function toggleblock(blockId)
29 | {
30 | var block = document.getElementById(blockId);
31 | if (block.style.display == 'none') {
32 | block.style.display = 'block' ;
33 | } else {
34 | block.style.display = 'none' ;
35 | }
36 | }
37 |
38 | function hideblock(blockId)
39 | {
40 | var block = document.getElementById(blockId);
41 | block.style.display = 'none' ;
42 | }
43 |
--------------------------------------------------------------------------------
/docs/_static/image-classification.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/image-classification.png
--------------------------------------------------------------------------------
/docs/_static/imagenet_banner.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/imagenet_banner.jpeg
--------------------------------------------------------------------------------
/docs/_static/instance-segmentation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/instance-segmentation.png
--------------------------------------------------------------------------------
/docs/_static/logos/acer_byoc_grad_lockup_rgb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/logos/acer_byoc_grad_lockup_rgb.png
--------------------------------------------------------------------------------
/docs/_static/logos/acroquest_logo_cmyk_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/logos/acroquest_logo_cmyk_2.png
--------------------------------------------------------------------------------
/docs/_static/logos/embed.html:
--------------------------------------------------------------------------------
1 | .. raw:: html
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/docs/_static/logos/kumiawase_e_1_RGB.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/logos/kumiawase_e_1_RGB.jpg
--------------------------------------------------------------------------------
/docs/_static/logos/pioneer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/logos/pioneer.png
--------------------------------------------------------------------------------
/docs/_static/object-detection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/object-detection.png
--------------------------------------------------------------------------------
/docs/_static/plot_help.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/plot_help.png
--------------------------------------------------------------------------------
/docs/_static/semantic-segmentation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/semantic-segmentation.png
--------------------------------------------------------------------------------
/docs/_static/short_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/short_demo.gif
--------------------------------------------------------------------------------
/docs/_static/smot_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/smot_demo.gif
--------------------------------------------------------------------------------
/docs/_static/smot_multi_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/smot_multi_demo.gif
--------------------------------------------------------------------------------
/docs/_static/tracking_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/docs/_static/tracking_demo.gif
--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "!layout.html" %}
2 |
3 | {%- block extrahead %}
4 |
5 | {% endblock %}
6 |
--------------------------------------------------------------------------------
/docs/api/data.batchify.rst:
--------------------------------------------------------------------------------
1 | gluoncv.data.batchify
2 | =====================
3 |
4 | Batchify functions can be used to transform a dataset into mini-batches that can be processed efficiently.
5 |
6 | In computer vision tasks, images/labels often come with different shapes. GluonCV provides a collection of
7 | convenient batchify functions suitable for various situations.
8 |
9 | .. currentmodule:: gluoncv.data.batchify
10 |
11 | Batch Loaders
12 | -------------
13 |
14 | .. autosummary::
15 | :nosignatures:
16 |
17 | Stack
18 | Pad
19 | Append
20 | Tuple
21 |
22 | API Reference
23 | -------------
24 |
25 | .. automodule:: gluoncv.data.batchify
26 | :members:
27 |
--------------------------------------------------------------------------------
/docs/api/index.rst:
--------------------------------------------------------------------------------
1 | API Reference
2 | =============
3 |
4 | .. toctree::
5 | :maxdepth: 1
6 |
7 | data.datasets
8 | data.batchify
9 | data.transforms
10 | model_zoo
11 | nn
12 | loss
13 | utils
14 |
--------------------------------------------------------------------------------
/docs/api/loss.rst:
--------------------------------------------------------------------------------
1 | gluoncv.loss
2 | ============
3 |
4 | .. currentmodule:: gluoncv.loss
5 |
6 | .. automodule:: gluoncv.loss
7 |
8 | .. autosummary::
9 | :nosignatures:
10 |
11 | FocalLoss
12 |
13 | SSDMultiBoxLoss
14 |
15 |
16 | API Reference
17 | -------------
18 |
19 | .. automodule:: gluoncv.loss
20 | :members:
21 |
--------------------------------------------------------------------------------
/docs/api/utils.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 |
4 | gluoncv.utils
5 | =================
6 | We implemented a broad range of utility functions which cover visualization, file handler, download and training helpers.
7 |
8 | .. currentmodule:: gluoncv.utils
9 |
10 | Visualization
11 | -------------
12 |
13 | .. currentmodule:: gluoncv.utils.viz
14 |
15 | .. autosummary::
16 | :nosignatures:
17 |
18 | plot_image
19 |
20 | get_color_pallete
21 |
22 | plot_bbox
23 |
24 | expand_mask
25 |
26 | plot_mask
27 |
28 | plot_network
29 |
30 | Miscellaneous
31 | -------------
32 |
33 | .. currentmodule:: gluoncv.utils
34 |
35 | .. autosummary::
36 | :nosignatures:
37 |
38 | download
39 |
40 | makedirs
41 |
42 | .. currentmodule:: gluoncv.utils.random
43 |
44 | .. autosummary::
45 | :nosignatures:
46 |
47 | seed
48 |
49 | Training Helpers
50 | ----------------
51 |
52 | .. currentmodule:: gluoncv.utils
53 |
54 | .. autosummary::
55 | :nosignatures:
56 |
57 | LRScheduler
58 |
59 | set_lr_mult
60 |
61 | Bounding Box Utils
62 | ------------------
63 |
64 | .. currentmodule:: gluoncv.utils
65 |
66 | .. autosummary::
67 | :nosignatures:
68 |
69 | bbox_iou
70 |
71 |
72 | API Reference
73 | -------------
74 |
75 | .. automodule:: gluoncv.utils
76 | :members:
77 | :imported-members:
78 |
79 |
80 | .. automodule:: gluoncv.utils.viz
81 | :members:
82 | :imported-members:
83 |
84 | .. automodule:: gluoncv.utils.metrics
85 | :members:
86 | :imported-members:
87 |
--------------------------------------------------------------------------------
/docs/build.yml:
--------------------------------------------------------------------------------
1 | name: gluon_vision_docs
2 | channels:
3 | - pytorch
4 | - conda-forge
5 | - defaults
6 | dependencies:
7 | - python=3.7
8 | - sphinx>=1.5.5
9 | - scipy
10 | - numpy
11 | - matplotlib
12 | - sphinx_rtd_theme
13 | - pip=20.2
14 | - pytorch=1.6.0
15 | - torchvision=0.7.0
16 | - pip:
17 | - https://github.com/mli/mx-theme/tarball/0.3.1
18 | - sphinx-gallery
19 | - https://repo.mxnet.io/dist/python/cu100mkl/mxnet_cu100mkl-1.6.0b20191010-py2.py3-none-manylinux1_x86_64.whl
20 | # - guzzle_sphinx_theme
21 | - recommonmark
22 | - Image
23 | - awscli
24 | - tqdm
25 | - requests
26 | - Pillow
27 | - opencv-python
28 | - portalocker
29 | - decord
30 | - cython
31 | - pycocotools
32 | - autocfg
33 | - yacs
34 |
--------------------------------------------------------------------------------
/docs/how_to/index.rst:
--------------------------------------------------------------------------------
1 | Community
2 | =========
3 |
4 | .. toctree::
5 | :maxdepth: 1
6 |
7 | support
8 | contribute
9 |
--------------------------------------------------------------------------------
/docs/how_to/support.rst:
--------------------------------------------------------------------------------
1 | Community
2 | =========
3 |
4 | .. card::
5 | :title: Community
6 | :is_head: true
7 | :link: https://www.apache.org/foundation/policies/conduct
8 |
9 | Welcome to GluonCV community. We strive to foster a collaborative and welcoming community. We
10 | expect all members to follow the `code of conduct `__.
11 |
12 |
13 | .. container:: cards
14 |
15 | .. card::
16 | :title: Github Issues
17 | :link: https://github.com/dmlc/gluon-cv/issues
18 |
19 | Feature requests, bug reports, design and roadmap discussion.
20 |
21 |
22 | .. card::
23 | :title: Github Projects
24 | :link: https://github.com/dmlc/gluon-cv/projects
25 |
26 | Active roadmaps, and current and past projects.
27 |
28 | .. card::
29 | :title: GluonCV Slack Channel
30 | :link: https://apache-mxnet.slack.com/messages/CED6Y10E5
31 |
32 | #gluon-cv Slack channel. Click the `sign-up link `_ to register.
33 |
34 | .. card::
35 | :title: Discuss Forum
36 | :link: https://discuss.mxnet.io/c/gluon
37 |
38 | General discussions, usage experiences, and questions.
39 |
40 |
41 | Interested in contributing to GluonCV? Check our `contribution guide `_.
42 |
--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | ------------
3 |
4 | .. Ignore prerequisites to make the index page concise, which will be shown at
5 | the install page
6 |
7 | .. raw:: html
8 |
9 |
10 |
11 | .. include:: install/install-include.rst
12 |
13 | .. raw:: html
14 |
15 |
16 |
17 |
18 | Check :doc:`install/install-more` for more installation instructions and options.
19 |
--------------------------------------------------------------------------------
/docs/install/install-more.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | ------------
3 |
4 | .. include:: install-include.rst
5 |
6 | .. raw:: html
7 |
8 |
9 |
10 |
11 |
12 | Next steps
13 | ----------
14 |
15 | - Checkout `beta.mxnet.io `_ for more options such as ARM devices and docker images.
16 | - `Verify your MXNet installation `_
17 | - `Configure MXNet environment variables `_
18 | - For new users: `60-minute Gluon crash course `_
19 | - For experienced users: `MXNet Guides. `_
20 | - For advanced users: `MXNet API `_ and `GluonCV API <../api/index.html>`_.
21 |
22 | ..
23 | TOOD: write a new directive `no-local-toc` for it
24 |
25 | .. raw:: html
26 |
27 |
28 |
--------------------------------------------------------------------------------
/docs/model_zoo/classification_torch.rst:
--------------------------------------------------------------------------------
1 | Models implemented using PyTorch will be added later. Please checkout our MXNet implementation instead.
2 |
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Action_Recognitions/HMDB51.csv:
--------------------------------------------------------------------------------
1 | Name,Pretrained,Segments,Clip Length,Top-1,Hashtag,Train Command,Train Log
2 | resnet50_v1b_hmdb51 [3]_,ImageNet,3,1,55.2,682591e2,`shell script `_,`log `_
3 | resnet50_v1b_hmdb51 [1]_,ImageNet,1,1,52.2,ba66ee4b,`shell script `_,`log `_
4 | i3d_resnet50_v1_hmdb51 [4]_,ImageNet,1,32 (64/2),48.5,0d0ad559,`shell script `_,`log `_
5 | i3d_resnet50_v1_hmdb51 [4]_,"ImageNet, K400",1,32 (64/2),70.9,2ec6bf01,`shell script `_,`log `_
6 |
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Action_Recognitions/Kinetics700.csv:
--------------------------------------------------------------------------------
1 | Name,Pretrained,Segments,Clip Length,Top-1,Hashtag,Train Command,Train Log
2 | i3d_slow_resnet101_f16s4_kinetics700 [8]_,Scratch,1,16 (64/4),67.65,299b1d9d,NA,NA
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Action_Recognitions/Kinetics700_torch.csv:
--------------------------------------------------------------------------------
1 | Name,Pretrained,Segment,Clip Length,Top-1,Hashtag,Config
2 | i3d_slow_resnet101_f16s4_kinetics700 [8]_,Scratch,1,16 (64/4),67.65,b5be1a2e,`config `_
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Action_Recognitions/Something-Something-V2.csv:
--------------------------------------------------------------------------------
1 | Name,Pretrained,Segments,Clip Length,Top-1,Hashtag,Train Command,Train Log
2 | resnet50_v1b_sthsthv2 [3]_,ImageNet,8,1,35.5,80ee0c6b,`shell script `_,`log `_
3 | i3d_resnet50_v1_sthsthv2 [4]_,ImageNet,1,16 (32/2),50.6,01961e4c,`shell script `_,`log `_
4 |
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Action_Recognitions/Something-Something-V2_torch.csv:
--------------------------------------------------------------------------------
1 | Name,Pretrained,Segment,Clip Length,Top-1,Hashtag,Config
2 | resnet50_v1b_sthsthv2 [3]_,ImageNet,8,1,35.16,cbb9167b,`config `_
3 | i3d_resnet50_v1_sthsthv2 [4]_,ImageNet,1,16 (32/2),49.61,e975d989,`config `_
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Classifications/DenseNet.csv:
--------------------------------------------------------------------------------
1 | Model,Top-1,Top-5,Hashtag,Training Command,Training Log
2 | DenseNet121 [7]_,74.97,92.25,f27dbf2d,,
3 | DenseNet161 [7]_,77.70,93.80,b6c8a957,,
4 | DenseNet169 [7]_,76.17,93.17,2603f878,,
5 | DenseNet201 [7]_,77.32,93.62,1cdbc116,,
6 |
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Classifications/Pruned_ResNet.csv:
--------------------------------------------------------------------------------
1 | Model,Top-1,Top-5,Hashtag,Speedup (to original ResNet)
2 | resnet18_v1b_0.89,67.2,87.45,54f7742b,2x
3 | resnet50_v1d_0.86,78.02,93.82,a230c33f,1.68x
4 | resnet50_v1d_0.48,74.66,92.34,0d3e69bb,3.3x
5 | resnet50_v1d_0.37,70.71,89.74,9982ae49,5.01x
6 | resnet50_v1d_0.11,63.22,84.79,6a25eece,8.78x
7 | resnet101_v1d_0.76,79.46,94.69,a872796b,1.8x
8 | resnet101_v1d_0.73,78.89,94.48,712fccb1,2.02x
9 |
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Classifications/ResNeSt.csv:
--------------------------------------------------------------------------------
1 | Model,Top-1,Top-5,Hashtag,Training Command,Training Log
2 | ResNeSt14 [17]_,75.75,92.70,7e0b0cae,`shell script `_,`log `_
3 | ResNeSt26 [17]_,78.68,94.38,36459074,`shell script `_,
4 | ResNeSt50 [17]_,81.04,95.42,bcfefe1d,`shell script `_,`log `_
5 | ResNeSt101 [17]_,82.83,96.42,5da943b3,`shell script `_,`log `_
6 | ResNeSt200 [17]_,83.86,96.86,0c5d117d,`shell script `_,`log `_
7 | ResNeSt269 [17]_,84.53,96.98,11ae7f5d,`shell script `_,`log `_
8 |
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Classifications/SqueezeNet.csv:
--------------------------------------------------------------------------------
1 | Model,Top-1,Top-5,Hashtag,Training Command,Training Log
2 | SqueezeNet1.0 [10]_,56.11,79.09,264ba497,,
3 | SqueezeNet1.1 [10]_,54.96,78.17,33ba0f93,,
4 |
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Classifications/VGG.csv:
--------------------------------------------------------------------------------
1 | Model,Top-1,Top-5,Hashtag,Training Command,Training Log
2 | VGG11 [9]_,66.62,87.34,dd221b16,,
3 | VGG13 [9]_,67.74,88.11,6bc5de58,,
4 | VGG16 [9]_,73.23,91.31,e660d456,`shell script `_,`log `_
5 | VGG19 [9]_,74.11,91.35,ad2f660d,`shell script `_,`log `_
6 | VGG11_bn [9]_,68.59,88.72,ee79a809,,
7 | VGG13_bn [9]_,68.84,88.82,7d97a06c,,
8 | VGG16_bn [9]_,73.10,91.76,7f01cf05,`shell script `_,`log `_
9 | VGG19_bn [9]_,74.33,91.85,f360b758,`shell script `_,`log `_
10 |
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Depths/KITTI.csv:
--------------------------------------------------------------------------------
1 | Name,Modality,Resolution,Abs. Rel. Error,delta < 1.25,Hashtag,Train Command,Train Log
2 | monodepth2_resnet18_kitti_stereo_640x192 [1]_,Stereo,640x192,0.114,0.860,83eea4a9,`shell script `_,`log `_
3 | monodepth2_resnet18_kitti_mono_640x192 [1]_,Mono,640x192,0.121,0.858,c881771d,`shell script `_,`log `_
4 | monodepth2_resnet18_kitti_mono_stereo_640x192 [1]_,Mono + Stereo,640x192,0.109,0.872,9515c219,`shell script `_,`log `_
5 |
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Depths/PoseNet.csv:
--------------------------------------------------------------------------------
1 | Name,Modality,Resolution,Sequence 09,Sequence 10
2 | monodepth2_resnet18_posenet_kitti_mono_640x192 [1]_,Mono,640x192,0.021±0.012,0.018±0.011
3 | monodepth2_resnet18_posenet_kitti_mono_stereo_640x192 [1]_,Mono + Stereo,640x192,0.021±0.010,0.016±0.010
4 |
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Detections/MSCOCO_SSD.csv:
--------------------------------------------------------------------------------
1 | Model,Box AP,Training Command,Training Log
2 | ssd_300_vgg16_atrous_coco [1]_,25.1/42.9/25.8,`shell script `_,`log `_
3 | ssd_512_vgg16_atrous_coco [1]_,28.9/47.9/30.6,`shell script `_,`log `_
4 | ssd_300_resnet34_v1b_coco [1]_,25.1/41.7/26.2,`shell script `_,`log `_
5 | ssd_512_resnet50_v1_coco [1]_,30.6/50.0/32.2,`shell script `_,`log `_
6 | ssd_512_mobilenet1.0_coco [1]_,21.7/39.2/21.3,`shell script `_,`log `_
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Detections/Pascal_Faster-RCNN.csv:
--------------------------------------------------------------------------------
1 | Model,mAP,Training Command,Training log
2 | faster_rcnn_resnet50_v1b_voc [2]_,78.3,`shell script `_,`log `_
3 |
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Detections/Pascal_SSD.csv:
--------------------------------------------------------------------------------
1 | Model,mAP,Training Command,Training log
2 | ssd_300_vgg16_atrous_voc [1]_,77.6,`shell script `_,`log `_
3 | ssd_300_vgg16_atrous_voc_int8* [1]_,77.46,,
4 | ssd_512_vgg16_atrous_voc [1]_,79.2,`shell script `_,`log `_
5 | ssd_512_vgg16_atrous_voc_int8* [1]_,78.39,,
6 | ssd_512_resnet50_v1_voc [1]_,80.1,`shell script `_,`log `_
7 | ssd_512_resnet50_v1_voc_int8* [1]_,80.16,,
8 | ssd_512_mobilenet1.0_voc [1]_,75.4,`shell script `_,`log `_
9 | ssd_512_mobilenet1.0_voc_int8* [1]_,75.04,,
10 |
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Detections/Pascal_YOLO-v3.csv:
--------------------------------------------------------------------------------
1 | Model,mAP,Training Command,Training log
2 | yolo3_darknet53_voc [3]_ :gray:`(320x320)`,79.3,`shell script `_,`log `_
3 | yolo3_darknet53_voc [3]_ :gray:`(416x416)`,81.5,`shell script `_,`log `_
4 | yolo3_mobilenet1.0_voc [3]_ :gray:`(320x320)`,73.9,`shell script `_,`log `_
5 | yolo3_mobilenet1.0_voc [3]_ :gray:`(416x416)`,75.8,`shell script `_,`log `_
6 |
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Poses/MSCOCO_Alpha-Pose.csv:
--------------------------------------------------------------------------------
1 | Model,OKS AP,OKS AP (with flip),Hashtag,Training Command,Training log
2 | alpha_pose_resnet101_v1b_coco [2]_,74.2/91.6/80.7,76.7/92.6/82.9,de56b871,,
3 |
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Segmentations/SS_MHP-V1.csv:
--------------------------------------------------------------------------------
1 | Name,Method,pixAcc,mIoU,Command,log
2 | icnet_resnet50_mhpv1,ICNet [5]_,90.5,44.5,`shell script `_,`log `_
3 |
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Segmentations/SS_MS-COCO.csv:
--------------------------------------------------------------------------------
1 | Name,Method,pixAcc,mIoU,Command,log
2 | fcn_resnet101_coco,FCN [2]_,92.2,66.2,`shell script `_,`log `_
3 | psp_resnet101_coco,PSP [3]_,92.4,70.4,`shell script `_,`log `_
4 | deeplab_resnet101_coco,DeepLabV3 [4]_,92.5,70.4,`shell script `_,`log `_
5 |
--------------------------------------------------------------------------------
/docs/model_zoo/csv_tables/Segmentations/SS_Pascal-VOC.csv:
--------------------------------------------------------------------------------
1 | Name,Method,pixAcc,mIoU,Command,log
2 | fcn_resnet101_voc,FCN [2]_,N/A,83.6_,`shell script `_,`log `_
3 | psp_resnet101_voc,PSP [3]_,N/A,85.1_,`shell script `_,`log `_
4 | deeplab_resnet101_voc,DeepLabV3 [4]_,N/A,86.2_,`shell script `_,`log `_
5 | deeplab_resnet152_voc,DeepLabV3 [4]_,N/A,86.7_,`shell script `_,`log `_
6 |
--------------------------------------------------------------------------------
/docs/model_zoo/depth.rst:
--------------------------------------------------------------------------------
1 | .. _gluoncv-model-zoo-depth:
2 |
3 | Depth Prediction
4 | ================
5 |
6 | .. role:: framework
7 | :class: framework
8 | .. role:: select
9 | :class: selected framework
10 |
11 | .. container:: Frameworks
12 |
13 | .. container:: framework-group
14 |
15 | :framework:`MXNet`
16 | :framework:`Pytorch`
17 |
18 | .. rst-class:: MXNet
19 |
20 | MXNet
21 | *************
22 |
23 | .. include:: depth_mxnet.rst
24 |
25 | .. rst-class:: Pytorch
26 |
27 | PyTorch
28 | *************
29 |
30 | .. include:: depth_torch.rst
31 |
32 | Reference
33 | *************
34 |
35 | .. [1] Clement Godard, Oisin Mac Aodha, Michael Firman and Gabriel J. Brostow. \
36 | "Digging into Self-Supervised Monocular Depth Prediction." \
37 | Proceedings of the International Conference on Computer Vision (ICCV), 2019.
38 |
--------------------------------------------------------------------------------
/docs/model_zoo/depth_torch.rst:
--------------------------------------------------------------------------------
1 | Models implemented using PyTorch will be added later. Please checkout our MXNet implementation instead.
2 |
--------------------------------------------------------------------------------
/docs/model_zoo/detection_torch.rst:
--------------------------------------------------------------------------------
1 | Models implemented using PyTorch will be added later. Please checkout our MXNet implementation instead.
2 |
--------------------------------------------------------------------------------
/docs/model_zoo/index.rst:
--------------------------------------------------------------------------------
1 | Model Zoo
2 | =========
3 |
4 | .. container:: cards
5 |
6 | .. card::
7 | :title: Classification
8 | :link: classification.html
9 |
10 | Select your models from charts and tables of the classification models
11 |
12 | .. card::
13 | :title: Object Detection
14 | :link: detection.html
15 |
16 | Select your models from charts and tables of the detection models
17 |
18 | .. card::
19 | :title: Segmentation
20 | :link: segmentation.html
21 |
22 | Select your models from charts and tables of the segmentation models
23 |
24 | .. card::
25 | :title: Pose Estimation
26 | :link: pose.html
27 |
28 | Select your models from charts and tables of the pose estimation models
29 |
30 | .. card::
31 | :title: Action Recognition
32 | :link: action_recognition.html
33 |
34 | Select your models from charts and tables of the action recognition models
35 |
36 | .. card::
37 | :title: Depth Prediction
38 | :link: depth.html
39 |
40 | Select your models from charts and tables of the depth prediction models
41 |
42 |
43 | .. toctree::
44 | :hidden:
45 | :maxdepth: 1
46 |
47 | classification
48 | detection
49 | segmentation
50 | pose
51 | action_recognition
52 | depth
53 |
54 |
--------------------------------------------------------------------------------
/docs/model_zoo/pose.rst:
--------------------------------------------------------------------------------
1 | .. _gluoncv-model-zoo-classification:
2 |
3 | Pose Estimation
4 | ====================
5 |
6 | .. role:: framework
7 | :class: framework
8 | .. role:: select
9 | :class: selected framework
10 |
11 | .. container:: Frameworks
12 |
13 | .. container:: framework-group
14 |
15 | :framework:`MXNet`
16 | :framework:`Pytorch`
17 |
18 | .. rst-class:: MXNet
19 |
20 | MXNet
21 | *************
22 |
23 | .. include:: pose_mxnet.rst
24 |
25 | .. rst-class:: Pytorch
26 |
27 | PyTorch
28 | *************
29 |
30 | .. include:: pose_torch.rst
31 |
32 | Reference
33 | *************
34 |
35 | .. [1] Xiao, Bin, Haiping Wu, and Yichen Wei. \
36 | "Simple baselines for human pose estimation and tracking." \
37 | Proceedings of the European Conference on Computer Vision (ECCV). 2018.
38 | .. [2] Fang, Hao-Shu, et al. \
39 | "Rmpe: Regional multi-person pose estimation." \
40 | Proceedings of the IEEE International Conference on Computer Vision. 2017.
41 |
--------------------------------------------------------------------------------
/docs/model_zoo/pose_torch.rst:
--------------------------------------------------------------------------------
1 | Models implemented using PyTorch will be added later. Please checkout our MXNet implementation instead.
2 |
--------------------------------------------------------------------------------
/docs/model_zoo/segmentation_torch.rst:
--------------------------------------------------------------------------------
1 | Models implemented using PyTorch will be added later. Please checkout our MXNet implementation instead.
2 |
--------------------------------------------------------------------------------
/docs/slides.md:
--------------------------------------------------------------------------------
1 | Slides
2 | ======
3 |
4 | ### GluonCV Workshop in Tokyo, 2018
5 |
6 | - [2018.12.17 Introduction to GluonCV](https://github.com/dmlc/web-data/blob/master/gluoncv/slides/IntroToGluonCV.pdf)
7 | - [2018.12.17 GluonCV: Image Classification](https://github.com/dmlc/web-data/blob/master/gluoncv/slides/Classification.pdf)
8 | - [2018.12.17 GluonCV: Segmentation](https://github.com/dmlc/web-data/blob/master/gluoncv/slides/Segmentation.pdf)
9 | - [2018.12.17 GluonCV: Object Detection](https://github.com/dmlc/web-data/blob/master/gluoncv/slides/Detection.pdf)
10 | - [2018.12.18 GluonCV: Data Processing](https://github.com/dmlc/web-data/blob/master/gluoncv/slides/DataProcessing.pdf)
11 |
12 |
13 | Feel free to share slides that might interest your mates.
14 |
--------------------------------------------------------------------------------
/docs/tutorials/action_recognition/README.txt:
--------------------------------------------------------------------------------
1 | Action Recognition
2 | ==================
3 |
--------------------------------------------------------------------------------
/docs/tutorials/auto_module/README.txt:
--------------------------------------------------------------------------------
1 | Auto Module
2 | ===========
3 |
--------------------------------------------------------------------------------
/docs/tutorials/classification/README.txt:
--------------------------------------------------------------------------------
1 | Image Classification
2 | ====================
3 |
--------------------------------------------------------------------------------
/docs/tutorials/datasets/.gitignore:
--------------------------------------------------------------------------------
1 | VOCtemplate
2 | val.lst
3 | val.rec
4 | val.idx
5 | im2rec.py
6 |
--------------------------------------------------------------------------------
/docs/tutorials/datasets/README.txt:
--------------------------------------------------------------------------------
1 | Prepare Datasets
2 | ================
3 |
--------------------------------------------------------------------------------
/docs/tutorials/datasets/youtube_bb.py:
--------------------------------------------------------------------------------
1 | """Prepare Youtube_bb dataset
2 | =========================================
3 |
4 | `Youtube_bb dataset `_ is
5 | The data set consists of approximately 380,000 15-20s video segments extracted from 240,000 different publicly visible YouTube videos.
6 |
7 | This tutorial helps you to download Youtube_bb and set it up for later experiments.
8 |
9 | .. hint::
10 |
11 | You need 195G free disk space to download and extract this dataset.
12 | SSD harddrives are recommended for faster speed.
13 | The time it takes to prepare the dataset depends on your Internet connection
14 | and disk speed.
15 |
16 | If you want to download this dataset, please follow yt_bb`
17 |
18 | """
19 |
--------------------------------------------------------------------------------
/docs/tutorials/deployment/.gitignore:
--------------------------------------------------------------------------------
1 | *.json
2 | *.params
3 |
--------------------------------------------------------------------------------
/docs/tutorials/deployment/README.txt:
--------------------------------------------------------------------------------
1 | Deployment
2 | ==========
3 |
--------------------------------------------------------------------------------
/docs/tutorials/depth/README.txt:
--------------------------------------------------------------------------------
1 | Depth Prediction
2 | =====================
--------------------------------------------------------------------------------
/docs/tutorials/detection/.gitignore:
--------------------------------------------------------------------------------
1 | *.jpg
2 | *.rec
3 | *.idx
4 |
--------------------------------------------------------------------------------
/docs/tutorials/detection/README.txt:
--------------------------------------------------------------------------------
1 | Object Detection
2 | ================
3 |
--------------------------------------------------------------------------------
/docs/tutorials/distributed/README.txt:
--------------------------------------------------------------------------------
1 | Distributed Training
2 | ====================
3 |
--------------------------------------------------------------------------------
/docs/tutorials/instance/.gitignore:
--------------------------------------------------------------------------------
1 | *.jpg
2 |
--------------------------------------------------------------------------------
/docs/tutorials/instance/README.txt:
--------------------------------------------------------------------------------
1 | Instance Segmentation
2 | =====================
3 |
--------------------------------------------------------------------------------
/docs/tutorials/pose/README.txt:
--------------------------------------------------------------------------------
1 | Pose Estimation
2 | =====================
3 |
--------------------------------------------------------------------------------
/docs/tutorials/segmentation/.gitignore:
--------------------------------------------------------------------------------
1 | *.jpg
2 | *.png
3 |
--------------------------------------------------------------------------------
/docs/tutorials/segmentation/README.txt:
--------------------------------------------------------------------------------
1 | Semantic Segmentation
2 | =====================
3 |
--------------------------------------------------------------------------------
/docs/tutorials/tracking/README.txt:
--------------------------------------------------------------------------------
1 | Object Tracking
2 | =====================
3 |
--------------------------------------------------------------------------------
/docs/tutorials_torch/action_recognition/README.txt:
--------------------------------------------------------------------------------
1 | Action Recognition
2 | ==================
3 |
--------------------------------------------------------------------------------
/docs/tutorials_torch/index.rst:
--------------------------------------------------------------------------------
1 | PyTorch Tutorials
2 | =========
3 |
4 | Interested in getting started in a new CV area? Here are some tutorials to help get started.
5 |
6 | Action Recognition
7 | ---------------------
8 |
9 | .. container:: cards
10 |
11 |
12 | .. card::
13 | :title: Pre-trained I3D Models on Kinetics400
14 | :link: ../build/examples_torch_action_recognition/demo_i3d_kinetics400.html
15 |
16 | Recognize human actions in real-world videos with pre-trained I3D models
17 |
18 | .. card::
19 | :title: Finetuning SOTA Video Models on Your Dataset
20 | :link: ../build/examples_torch_action_recognition/finetune_custom.html
21 |
22 | Hands on SOTA video models fine-tuning on your own dataset
23 |
24 | .. card::
25 | :title: Extracting video features from pre-trained models
26 | :link: ../build/examples_torch_action_recognition/extract_feat.html
27 |
28 | Extracting video features from pre-trained models on your own videos
29 |
30 | .. card::
31 | :title: Computing FLOPS, latency and fps of a model
32 | :link: ../build/examples_torch_action_recognition/speed.html
33 |
34 | How to compute FLOPS, number of parameters, latency and fps of a video model
35 |
36 | .. card::
37 | :title: DistributedDataParallel (DDP) framework
38 | :link: ../build/examples_torch_action_recognition/ddp_pytorch.html
39 |
40 | How to use our DistributedDataParallel framework
41 |
42 |
43 | .. toctree::
44 | :hidden:
45 | :maxdepth: 2
46 |
47 | ../build/examples_torch_action_recognition/index
48 |
--------------------------------------------------------------------------------
/gluoncv/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # pylint: disable=wrong-import-position
3 | """GluonCV: a deep learning vision toolkit powered by Gluon."""
4 | from __future__ import absolute_import
5 |
6 | from .check import _deprecate_python2
7 | from .check import _require_mxnet_version, _require_pytorch_version
8 |
9 | __version__ = '0.11.0'
10 |
11 | _deprecate_python2()
12 |
13 | # optionally depend on mxnet or pytorch
14 | _found_mxnet = _found_pytorch = False
15 | try:
16 | _require_mxnet_version('1.4.0', '2.0.0')
17 | from . import data
18 | from . import model_zoo
19 | from . import nn
20 | from . import utils
21 | from . import loss
22 | _found_mxnet = True
23 | except ImportError:
24 | pass
25 |
26 | try:
27 | _require_pytorch_version('1.4.0', '2.0.0')
28 | _found_pytorch = True
29 | except ImportError:
30 | pass
31 |
32 | if not any((_found_mxnet, _found_pytorch)):
33 | raise ImportError('Unable to import modules due to missing `mxnet` & `torch`. '
34 | 'You should install at least one deep learning framework.')
35 |
36 | if all((_found_mxnet, _found_pytorch)):
37 | import warnings
38 | import mxnet as mx
39 | import torch
40 | warnings.warn(f'Both `mxnet=={mx.__version__}` and `torch=={torch.__version__}` are installed. '
41 | 'You might encounter increased GPU memory footprint if both framework are used at the same time.')
42 |
--------------------------------------------------------------------------------
/gluoncv/auto/__init__.py:
--------------------------------------------------------------------------------
1 | """GluonCV auto"""
2 | from .estimators import *
3 |
--------------------------------------------------------------------------------
/gluoncv/auto/data/__init__.py:
--------------------------------------------------------------------------------
1 | """Data Pipelines"""
2 | from .auto_data import url_data, URLs, is_url
3 |
--------------------------------------------------------------------------------
/gluoncv/auto/estimators/__init__.py:
--------------------------------------------------------------------------------
1 | """Estimator implementations"""
2 | from .utils import create_dummy_estimator
3 | # FIXME: for quick test purpose only
4 | try:
5 | import mxnet
6 | from .image_classification import ImageClassificationEstimator
7 | from .ssd import SSDEstimator
8 | from .yolo import YOLOv3Estimator
9 | from .faster_rcnn import FasterRCNNEstimator
10 | # from .mask_rcnn import MaskRCNNEstimator
11 | from .center_net import CenterNetEstimator
12 | except ImportError:
13 | # create dummy placeholder estimator classes
14 | reason = 'gluoncv.auto.estimators.{} requires mxnet to be installed which is missing.'
15 | ImageClassificationEstimator = create_dummy_estimator(
16 | 'ImageClassificationEstimator', reason)
17 | SSDEstimator = create_dummy_estimator(
18 | 'SSDEstimator', reason)
19 | YOLOv3Estimator = create_dummy_estimator(
20 | 'YOLOv3Estimator', reason)
21 | FasterRCNNEstimator = create_dummy_estimator(
22 | 'FasterRCNNEstimator', reason)
23 | CenterNetEstimator = create_dummy_estimator(
24 | 'CenterNetEstimator', reason)
25 |
26 | try:
27 | import timm
28 | import torch
29 | from .torch_image_classification import TorchImageClassificationEstimator
30 | except ImportError:
31 | reason = 'This estimator requires torch/timm to be installed which is missing.'
32 | TorchImageClassificationEstimator = create_dummy_estimator(
33 | 'TorchImageClassificationEstimator', reason)
34 |
--------------------------------------------------------------------------------
/gluoncv/auto/estimators/center_net/__init__.py:
--------------------------------------------------------------------------------
1 | """SSD Estimator implementations"""
2 |
3 | from .center_net import CenterNetEstimator
4 |
--------------------------------------------------------------------------------
/gluoncv/auto/estimators/conf.py:
--------------------------------------------------------------------------------
1 | """Shared configs"""
2 | _BEST_CHECKPOINT_FILE = 'best_checkpoint.pkl'
3 |
--------------------------------------------------------------------------------
/gluoncv/auto/estimators/constants.py:
--------------------------------------------------------------------------------
1 | BINARY = 'binary'
2 | MULTICLASS = 'multiclass'
3 | REGRESSION = 'regression'
4 |
--------------------------------------------------------------------------------
/gluoncv/auto/estimators/faster_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | """R-CNN Estimator implementations"""
2 |
3 | from .faster_rcnn import FasterRCNNEstimator
4 |
--------------------------------------------------------------------------------
/gluoncv/auto/estimators/image_classification/__init__.py:
--------------------------------------------------------------------------------
1 | """Image classification estimator"""
2 | from .image_classification import ImageClassificationEstimator
3 |
--------------------------------------------------------------------------------
/gluoncv/auto/estimators/mask_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | """Mask R-CNN Estimator implementations"""
2 |
3 | from .mask_rcnn import MaskRCNNEstimator
4 |
--------------------------------------------------------------------------------
/gluoncv/auto/estimators/ssd/__init__.py:
--------------------------------------------------------------------------------
1 | """SSD Estimator implementations"""
2 |
3 | from .ssd import SSDEstimator
4 |
--------------------------------------------------------------------------------
/gluoncv/auto/estimators/torch_image_classification/__init__.py:
--------------------------------------------------------------------------------
1 | """Torch image classification estimator"""
2 | from .torch_image_classification import TorchImageClassificationEstimator
3 |
--------------------------------------------------------------------------------
/gluoncv/auto/estimators/torch_image_classification/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import resume_checkpoint
2 | from .utils import resolve_data_config, update_cfg
3 | from .optimizer import optimizer_kwargs, create_optimizer_v2a
4 | from .scheduler import create_scheduler
5 | from .metrics import rmse
6 |
--------------------------------------------------------------------------------
/gluoncv/auto/estimators/torch_image_classification/utils/constants.py:
--------------------------------------------------------------------------------
1 | IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
2 | IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
3 | DEFAULT_CROP_PCT = 0.875
4 |
--------------------------------------------------------------------------------
/gluoncv/auto/estimators/torch_image_classification/utils/metrics.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn.functional import softmax
3 |
4 | def rmse(outputs, target):
5 | return torch.sqrt(torch.mean((softmax(outputs, dim=0)-target)**2))
6 |
--------------------------------------------------------------------------------
/gluoncv/auto/estimators/yolo/__init__.py:
--------------------------------------------------------------------------------
1 | """YOLO Estimator implementations"""
2 |
3 | from .yolo import YOLOv3Estimator
4 |
--------------------------------------------------------------------------------
/gluoncv/auto/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | """AutoML Tasks"""
2 | import logging
3 |
4 | from .image_classification import *
5 | from .object_detection import *
6 | # from .utils import *
7 |
8 | logger = logging.getLogger(__name__)
9 | msg = (
10 | "We plan to deprecate auto from gluoncv on release 0.12.0."
11 | "Please consider using autogluon.vision instead, which provides the same functionality."
12 | "https://auto.gluon.ai/stable/tutorials/image_prediction/index.html"
13 | )
14 | logger.warning(msg)
15 |
--------------------------------------------------------------------------------
/gluoncv/data/ade20k/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/data/ade20k/__init__.py
--------------------------------------------------------------------------------
/gluoncv/data/hmdb51/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=wildcard-import
2 | """Video action recognition, HMDB51 dataset.
3 | http://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/
4 | """
5 | from __future__ import absolute_import
6 | from .classification import *
7 |
--------------------------------------------------------------------------------
/gluoncv/data/imagenet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/data/imagenet/__init__.py
--------------------------------------------------------------------------------
/gluoncv/data/kinetics400/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=wildcard-import
2 | """Video action recognition, Kinetics400 dataset.
3 | https://deepmind.com/research/open-source/open-source-datasets/kinetics/
4 | """
5 | from __future__ import absolute_import
6 | from .classification import *
7 |
--------------------------------------------------------------------------------
/gluoncv/data/kinetics700/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=wildcard-import
2 | """Video action recognition, Kinetics700 dataset.
3 | https://deepmind.com/research/open-source/open-source-datasets/kinetics/
4 | """
5 | from __future__ import absolute_import
6 | from .classification import *
7 |
--------------------------------------------------------------------------------
/gluoncv/data/kitti/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=missing-module-docstring
2 | from .kitti_dataset import *
3 | from .kitti_utils import *
4 |
--------------------------------------------------------------------------------
/gluoncv/data/lst/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/data/lst/__init__.py
--------------------------------------------------------------------------------
/gluoncv/data/market1501/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/data/market1501/__init__.py
--------------------------------------------------------------------------------
/gluoncv/data/market1501/data_read.py:
--------------------------------------------------------------------------------
1 | """Market 1501 Person Re-Identification Dataset."""
2 | from mxnet.gluon.data import dataset
3 | from mxnet import image
4 |
5 | __all__ = ['ImageTxtDataset']
6 |
7 |
8 | class ImageTxtDataset(dataset.Dataset):
9 | """Load the Market 1501 dataset.
10 |
11 | Parameters
12 | ----------
13 | items : list
14 | List for image names and labels.
15 | flag : int, default 1
16 | Whether to load the color image or gray image.
17 | transform : function, default None
18 | A function that takes data and label and transforms them.
19 | """
20 | def __init__(self, items, flag=1, transform=None):
21 | self._flag = flag
22 | self._transform = transform
23 | self.items = items
24 |
25 | def __getitem__(self, idx):
26 | fpath = self.items[idx][0]
27 | img = image.imread(fpath, self._flag)
28 | label = self.items[idx][1]
29 | if self._transform is not None:
30 | img = self._transform(img)
31 | return img, label
32 |
33 | def __len__(self):
34 | return len(self.items)
35 |
--------------------------------------------------------------------------------
/gluoncv/data/market1501/label_read.py:
--------------------------------------------------------------------------------
1 | """Market 1501 Person Re-Identification Dataset."""
2 | import random
3 | from os import path as osp
4 |
5 | __all__ = ['LabelList']
6 |
7 |
8 | def LabelList(ratio=1, root='~/.mxnet/datasets', name='market1501'):
9 | """Load the Label List for Market 1501 dataset.
10 |
11 | Parameters
12 | ----------
13 | ratio : float, default 1
14 | Split label into train and test.
15 | root : str, default '~/.mxnet/datasets'
16 | Path to the folder stored the dataset.
17 | name : str, default 'market1501'
18 | Which dataset is used. Only support market 1501 now.
19 | """
20 | root = osp.expanduser(root)
21 |
22 | if name == "market1501":
23 | path = osp.join(root, "Market-1501-v15.09.15")
24 | train_txt = osp.join(path, "train.txt")
25 | image_path = osp.join(path, "bounding_box_train")
26 |
27 | item_list = [(osp.join(image_path, line.split()[0]), int(line.split()[1]))
28 | for line in open(train_txt).readlines()]
29 | random.shuffle(item_list)
30 | count = len(item_list)
31 | train_count = int(count * ratio)
32 |
33 | train_set = item_list[:train_count]
34 | valid_set = item_list[train_count:]
35 |
36 | return train_set, valid_set
37 | return None, None
38 |
--------------------------------------------------------------------------------
/gluoncv/data/mixup/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/data/mixup/__init__.py
--------------------------------------------------------------------------------
/gluoncv/data/mscoco/__init__.py:
--------------------------------------------------------------------------------
1 | """MS COCO dataset."""
2 |
--------------------------------------------------------------------------------
/gluoncv/data/mscoco/utils.py:
--------------------------------------------------------------------------------
1 | """Import helper for pycocotools"""
2 | # NOTE: for developers
3 | # please do not import any pycocotools in __init__ because we are trying to lazy
4 | # import pycocotools to avoid install it for other users who may not use it.
5 | # only import when you actually use it
6 | from __future__ import absolute_import
7 |
8 | from ...utils.filesystem import import_try_install
9 |
10 | def try_import_pycocotools():
11 | """Tricks to optionally install and import pycocotools"""
12 | # first we can try import pycocotools
13 | try:
14 | import pycocotools as _
15 | except ImportError:
16 | import os
17 | # we need to install pycootools, which is a bit tricky
18 | # pycocotools sdist requires Cython, numpy(already met)
19 | import_try_install('cython')
20 | # pypi pycocotools is not compatible with windows
21 | win_url = 'git+https://github.com/zhreshold/cocoapi.git#subdirectory=PythonAPI'
22 | try:
23 | if os.name == 'nt':
24 | import_try_install('pycocotools', win_url)
25 | else:
26 | import_try_install('pycocotools')
27 | except ImportError:
28 | faq = 'cocoapi FAQ'
29 | raise ImportError('Cannot import or install pycocotools, please refer to %s.' % faq)
30 |
--------------------------------------------------------------------------------
/gluoncv/data/otb/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=wildcard-import
2 | """Object Tracking, Visual Tracker Benchmark
3 | http://www.visual-tracking.net"""
4 | from __future__ import absolute_import
5 | from .tracking import *
6 |
--------------------------------------------------------------------------------
/gluoncv/data/pascal_aug/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/data/pascal_aug/__init__.py
--------------------------------------------------------------------------------
/gluoncv/data/pascal_voc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/data/pascal_voc/__init__.py
--------------------------------------------------------------------------------
/gluoncv/data/recordio/__init__.py:
--------------------------------------------------------------------------------
1 | """Datasets from RecordIO files."""
2 |
--------------------------------------------------------------------------------
/gluoncv/data/somethingsomethingv2/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=wildcard-import
2 | """Video action recognition, something-something-v2 dataset.
3 | https://20bn.com/datasets/something-something
4 | """
5 | from __future__ import absolute_import
6 | from .classification import *
7 |
--------------------------------------------------------------------------------
/gluoncv/data/tracking_data/__init__.py:
--------------------------------------------------------------------------------
1 | """tracking dataset,include youtube-bb,VID,DET,COCO dataset"""
2 | from __future__ import absolute_import
3 | from .track import *
4 |
--------------------------------------------------------------------------------
/gluoncv/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | """Data transforms"""
2 | from __future__ import absolute_import
3 |
4 | from . import bbox
5 | from . import image
6 | from . import experimental
7 | from . import mask
8 | from . import presets
9 | from .block import RandomCrop
10 | from . import pose
11 | from . import video
12 |
--------------------------------------------------------------------------------
/gluoncv/data/transforms/experimental/__init__.py:
--------------------------------------------------------------------------------
1 | """Experimental transforms."""
2 | from . import bbox
3 | from . import image
4 |
--------------------------------------------------------------------------------
/gluoncv/data/transforms/presets/__init__.py:
--------------------------------------------------------------------------------
1 | """Presets of sophisticated data transforms used in various papers."""
2 | from . import ssd
3 | from . import rcnn
4 | from . import yolo
5 | from . import center_net
6 | from . import imagenet
7 | from . import simple_pose
8 | from . import segmentation
9 |
--------------------------------------------------------------------------------
/gluoncv/data/transforms/presets/segmentation.py:
--------------------------------------------------------------------------------
1 | """Transforms for Segmentation models."""
2 | from __future__ import absolute_import
3 |
4 | from mxnet.gluon.data.vision import transforms
5 |
6 | def test_transform(img, ctx):
7 | transform_fn = transforms.Compose([
8 | transforms.ToTensor(),
9 | transforms.Normalize([.485, .456, .406], [.229, .224, .225])
10 | ])
11 | img = transform_fn(img)
12 | img = img.expand_dims(0).as_in_context(ctx)
13 | return img
14 |
--------------------------------------------------------------------------------
/gluoncv/data/ucf101/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=wildcard-import
2 | """Video action recognition, UCF101 dataset.
3 | https://www.crcv.ucf.edu/data/UCF101.php
4 | """
5 | from __future__ import absolute_import
6 | from .classification import *
7 |
--------------------------------------------------------------------------------
/gluoncv/data/video_custom/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=wildcard-import
2 | """
3 | Customized data loader for video classification related tasks.
4 | """
5 | from __future__ import absolute_import
6 | from .classification import *
7 |
--------------------------------------------------------------------------------
/gluoncv/data/visdrone/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/data/visdrone/__init__.py
--------------------------------------------------------------------------------
/gluoncv/model_zoo/__init__.py:
--------------------------------------------------------------------------------
1 | """GluonCV Model Zoo"""
2 | # pylint: disable=wildcard-import
3 | from .model_zoo import get_model, get_model_list
4 | from .model_store import pretrained_model_list
5 | from .rcnn.faster_rcnn import *
6 | from .rcnn.mask_rcnn import *
7 | from .ssd import *
8 | from .yolo import *
9 | from .cifarresnet import *
10 | from .cifarwideresnet import *
11 | from .fcn import *
12 | from .pspnet import *
13 | from .deeplabv3 import *
14 | from .deeplabv3_plus import *
15 | from .deeplabv3b_plus import *
16 | from . import segbase
17 | from .resnetv1b import *
18 | from .se_resnet import *
19 | from .nasnet import *
20 | from .simple_pose.simple_pose_resnet import *
21 | from .simple_pose.mobile_pose import *
22 | from .action_recognition import *
23 | from .wideresnet import *
24 |
25 | from .resnest import *
26 | from .resnext import *
27 | from .alexnet import *
28 | from .densenet import *
29 | from .googlenet import *
30 | from .inception import *
31 | from .xception import *
32 | from .resnet import *
33 | from .squeezenet import *
34 | from .vgg import *
35 | from .mobilenet import *
36 | from .residual_attentionnet import *
37 | from .center_net import *
38 | from .hrnet import *
39 | from .siamrpn import *
40 | from .fastscnn import *
41 | from .monodepthv2 import *
42 | from .smot import *
43 |
--------------------------------------------------------------------------------
/gluoncv/model_zoo/action_recognition/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=wildcard-import
2 | """Video action recognition."""
3 | from __future__ import absolute_import
4 | from .actionrec_vgg16 import *
5 | from .actionrec_inceptionv1 import *
6 | from .actionrec_inceptionv3 import *
7 | from .actionrec_resnetv1b import *
8 | from .c3d import *
9 | from .p3d import *
10 | from .r2plus1d import *
11 | from .i3d_resnet import *
12 | from .i3d_inceptionv1 import *
13 | from .i3d_inceptionv3 import *
14 | from .slowfast import *
15 | from .i3d_slow import *
16 |
--------------------------------------------------------------------------------
/gluoncv/model_zoo/alpha_pose/__init__.py:
--------------------------------------------------------------------------------
1 | """Alpha pose for real time human pose estimation"""
2 | # pylint: disable=wildcard-import
3 | from .fast_pose import *
4 |
--------------------------------------------------------------------------------
/gluoncv/model_zoo/center_net/__init__.py:
--------------------------------------------------------------------------------
1 | """CenterNet"""
2 | # pylint: disable=wildcard-import
3 | from __future__ import absolute_import
4 |
5 | from .center_net import *
6 |
--------------------------------------------------------------------------------
/gluoncv/model_zoo/monodepthv2/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=missing-module-docstring
2 | from .resnet_encoder import ResnetEncoder
3 | from .depth_decoder import DepthDecoder
4 | from .pose_decoder import PoseDecoder
5 | from .monodepth2 import *
6 | from .monodepth2_posenet import *
7 |
--------------------------------------------------------------------------------
/gluoncv/model_zoo/pruned_resnet/__init__.py:
--------------------------------------------------------------------------------
1 | """GluonCV Model Zoo"""
2 | # pylint: disable=wildcard-import
3 | from ..resnetv1b import *
4 |
--------------------------------------------------------------------------------
/gluoncv/model_zoo/quantized/__init__.py:
--------------------------------------------------------------------------------
1 | """Quantized versions of GluonCV models."""
2 | # pylint: disable=wildcard-import
3 | from .quantized import *
4 |
--------------------------------------------------------------------------------
/gluoncv/model_zoo/rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | """Fast RCNN."""
2 | from __future__ import absolute_import
3 |
4 | from .rcnn import RCNN
5 |
--------------------------------------------------------------------------------
/gluoncv/model_zoo/rcnn/faster_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=wildcard-import
2 | """Faster-RCNN Object Detection."""
3 | from __future__ import absolute_import
4 |
5 | from .faster_rcnn import *
6 | from .doublehead_rcnn import *
7 | from .predefined_models import *
8 | from .rcnn_target import RCNNTargetGenerator, RCNNTargetSampler
9 | from .data_parallel import ForwardBackwardTask
10 |
--------------------------------------------------------------------------------
/gluoncv/model_zoo/rcnn/mask_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=wildcard-import
2 | """Mask RCNN Instance Segmentation"""
3 | from __future__ import absolute_import
4 |
5 | from .mask_rcnn import *
6 | from .predefined_models import *
7 | from .data_parallel import ForwardBackwardTask
8 |
--------------------------------------------------------------------------------
/gluoncv/model_zoo/rcnn/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | """Region Proposal Network."""
2 | from __future__ import absolute_import
3 |
4 | from .rpn import RPN
5 | from . import bbox_clip
6 | from .anchor import RPNAnchorGenerator
7 | from .proposal import RPNProposal
8 |
--------------------------------------------------------------------------------
/gluoncv/model_zoo/siamrpn/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=wildcard-import
2 | """SiamRPN tracking"""
3 | from __future__ import absolute_import
4 | from .siam_net import *
5 |
--------------------------------------------------------------------------------
/gluoncv/model_zoo/simple_pose/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/model_zoo/simple_pose/__init__.py
--------------------------------------------------------------------------------
/gluoncv/model_zoo/smot/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=wildcard-import
2 | """
3 | SMOT: Single-Shot Multi Object Tracking
4 | https://arxiv.org/abs/2010.16031
5 | """
6 | from __future__ import absolute_import
7 | from .smot_tracker import *
8 | from .tracktors import *
9 |
--------------------------------------------------------------------------------
/gluoncv/model_zoo/ssd/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=wildcard-import
2 | """Single-shot Object Detection."""
3 | from __future__ import absolute_import
4 | from .ssd import *
5 | from .presets import *
6 | from .vgg_atrous import *
7 |
--------------------------------------------------------------------------------
/gluoncv/model_zoo/yolo/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=wildcard-import
2 | """YOLO Object Detection"""
3 | from __future__ import absolute_import
4 |
5 | from .darknet import *
6 | from .yolo3 import *
7 |
--------------------------------------------------------------------------------
/gluoncv/nn/__init__.py:
--------------------------------------------------------------------------------
1 | """GluonCV neural network layers"""
2 | # pylint: disable=wildcard-import
3 | from __future__ import absolute_import
4 |
5 | from . import bbox
6 | from . import coder
7 | from . import feature
8 | from . import matcher
9 | from . import predictor
10 | from . import sampler
11 | from .block import *
12 | from .gn import *
13 |
--------------------------------------------------------------------------------
/gluoncv/torch/__init__.py:
--------------------------------------------------------------------------------
1 | """GluonCV-Torch."""
2 | from . import data
3 | from . import model_zoo
4 | from . import nn
5 | from . import utils
6 |
--------------------------------------------------------------------------------
/gluoncv/torch/data/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This module provides data loaders and transformers for popular vision datasets.
3 | """
4 |
5 | from .video_cls.dataset_classification import VideoClsDataset
6 | from .video_cls.dataset_classification import build_dataloader, build_dataloader_test
7 | from .video_cls.multigrid_helper import multiGridHelper, MultiGridBatchSampler
8 | from .coot.dataloader import create_datasets, create_loaders
9 | from . import registry
10 |
--------------------------------------------------------------------------------
/gluoncv/torch/data/detection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/torch/data/detection/__init__.py
--------------------------------------------------------------------------------
/gluoncv/torch/data/detection/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler
2 | from .grouped_batch_sampler import GroupedBatchSampler
3 |
4 | __all__ = [
5 | "GroupedBatchSampler",
6 | "TrainingSampler",
7 | "InferenceSampler",
8 | "RepeatFactorTrainingSampler",
9 | ]
10 |
--------------------------------------------------------------------------------
/gluoncv/torch/data/gluoncv_motion_dataset/__init__.py:
--------------------------------------------------------------------------------
1 | """GluonCV motion dataset, supports multiple video tasks including
2 | video action recognition/detection, object tracking, pose tracking, etc."""
3 | from .dataset import GluonCVMotionDataset, FieldNames
4 |
--------------------------------------------------------------------------------
/gluoncv/torch/data/gluoncv_motion_dataset/ingestion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/torch/data/gluoncv_motion_dataset/ingestion/__init__.py
--------------------------------------------------------------------------------
/gluoncv/torch/data/gluoncv_motion_dataset/io/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/torch/data/gluoncv_motion_dataset/io/__init__.py
--------------------------------------------------------------------------------
/gluoncv/torch/data/gluoncv_motion_dataset/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/torch/data/gluoncv_motion_dataset/utils/__init__.py
--------------------------------------------------------------------------------
/gluoncv/torch/data/pose/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset_pose import *
2 |
--------------------------------------------------------------------------------
/gluoncv/torch/data/structures/__init__.py:
--------------------------------------------------------------------------------
1 | from .boxes import Boxes, BoxMode, pairwise_iou
2 | from .instances import Instances
3 | from .masks import BitMasks, PolygonMasks, polygons_to_bitmask
4 | from .image_list import ImageList
5 | from .keypoints import Keypoints
6 | from .beziers import Beziers
7 |
--------------------------------------------------------------------------------
/gluoncv/torch/data/transforms/instance_transforms/__init__.py:
--------------------------------------------------------------------------------
1 | """Transform and augmentation for instance level manipulations"""
2 | from .augmentation import *
3 | from .transform import *
4 |
--------------------------------------------------------------------------------
/gluoncv/torch/data/transforms/videotransforms/utils/functional.py:
--------------------------------------------------------------------------------
1 | def normalize(tensor, mean, std):
2 | """
3 | Args:
4 | tensor (Tensor): Tensor to normalize
5 |
6 | Returns:
7 | Tensor: Normalized tensor
8 | """
9 | tensor.sub_(mean).div_(std)
10 | return tensor
11 |
--------------------------------------------------------------------------------
/gluoncv/torch/data/transforms/videotransforms/utils/images.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def convert_img(img):
5 | """Converts (H, W, C) numpy.ndarray to (C, W, H) format
6 | """
7 | if len(img.shape) == 3:
8 | img = img.transpose(2, 0, 1)
9 | if len(img.shape) == 2:
10 | img = np.expand_dims(img, 0)
11 | return img
12 |
--------------------------------------------------------------------------------
/gluoncv/torch/data/video_cls/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Customized data loader for video classification related tasks.
3 | """
4 | from __future__ import absolute_import
5 | from .dataset_classification import *
6 | from .multigrid_helper import *
7 |
--------------------------------------------------------------------------------
/gluoncv/torch/engine/__init__.py:
--------------------------------------------------------------------------------
1 | """GluonCV-Torch engine."""
2 |
--------------------------------------------------------------------------------
/gluoncv/torch/engine/config/__init__.py:
--------------------------------------------------------------------------------
1 | """The global configs registry"""
2 | from .action_recognition import _C as _C_action_recognition
3 | from .coot import _C as _C_coot
4 | from .directpose import _C as _C_directpose
5 |
6 | __all__ = ['get_cfg_defaults']
7 |
8 | _CONFIG_REG = {
9 | "action_recognition": _C_action_recognition,
10 | "coot": _C_coot,
11 | "directpose": _C_directpose
12 | }
13 |
14 | def get_cfg_defaults(name='action_recognition'):
15 | """Get a yacs CfgNode object with default values for by name.
16 |
17 | Parameters
18 | ----------
19 | name : str
20 | The name of the root config, e.g. action_recognition, coot, directpose...
21 |
22 | Returns
23 | -------
24 | yacs.CfgNode object
25 |
26 | """
27 | assert isinstance(name, str), f"{name} must be a str"
28 | name = name.lower()
29 | if name not in _CONFIG_REG.keys():
30 | raise ValueError(f"Unknown root config with name: {name}")
31 | return _CONFIG_REG[name].clone()
32 |
--------------------------------------------------------------------------------
/gluoncv/torch/model_zoo/__init__.py:
--------------------------------------------------------------------------------
1 | """GluonCV-Torch model zoo"""
2 |
3 | from .model_zoo import get_model, get_model_list
4 | from .action_recognition import *
5 | from .pose import *
6 |
--------------------------------------------------------------------------------
/gluoncv/torch/model_zoo/action_recognition/__init__.py:
--------------------------------------------------------------------------------
1 | """GluonCV-Torch video action recognition."""
2 | from __future__ import absolute_import
3 | from .actionrec_resnetv1b import *
4 | from .i3d_resnet import *
5 | from .i3d_slow import *
6 | from .slowfast import *
7 | from .r2plus1dv1 import *
8 | from .r2plus1dv2 import *
9 | from .tpn import *
10 | from .ircsnv2 import *
11 |
--------------------------------------------------------------------------------
/gluoncv/torch/model_zoo/object_detection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/torch/model_zoo/object_detection/__init__.py
--------------------------------------------------------------------------------
/gluoncv/torch/model_zoo/pose/__init__.py:
--------------------------------------------------------------------------------
1 | """GluonCV-Torch pose estimation."""
2 | from __future__ import absolute_import
3 | from .directpose_resnet_fpn import *
4 |
--------------------------------------------------------------------------------
/gluoncv/torch/nn/__init__.py:
--------------------------------------------------------------------------------
1 | """GluonCV-Torch neural network layers"""
2 |
--------------------------------------------------------------------------------
/gluoncv/torch/nn/keypoint_loss.py:
--------------------------------------------------------------------------------
1 | """Loss layers for keypoints that can be inserted to modules"""
2 | import torch
3 | import torch.nn as nn
4 |
5 | __all__ = ['WeightedMSELoss', 'HMFocalLoss']
6 |
7 | def _sigmoid(x):
8 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
9 | return y
10 |
11 | class WeightedMSELoss(nn.Module):
12 | """Weighted MSE loss layer"""
13 | def __init__(self):
14 | super().__init__()
15 |
16 | def forward(self, pred, gt, mask):
17 | assert pred.size() == gt.size()
18 | loss = ((pred - gt) **2) * mask
19 | loss = loss.mean()
20 | return loss
21 |
22 | class HMFocalLoss(nn.Module):
23 | """Heatmap Focal Loss layer"""
24 | def __init__(self, alpha, beta):
25 | super(HMFocalLoss, self).__init__()
26 | self.alpha = alpha
27 | self.beta = beta
28 |
29 | def forward(self, pred, gt):
30 | pos_inds = gt.eq(1).float()
31 | neg_inds = gt.lt(1).float()
32 |
33 | pred = _sigmoid(pred)
34 | neg_weights = torch.pow(1 - gt, self.beta)
35 |
36 | pos_loss = torch.log(pred) * torch.pow(1 - pred, self.alpha) * pos_inds
37 | neg_loss = torch.log(1 - pred) * torch.pow(pred, self.alpha) * neg_weights * neg_inds
38 |
39 | num_pos = pos_inds.float().sum()
40 | pos_loss = pos_loss.sum()
41 | neg_loss = neg_loss.sum()
42 |
43 | if num_pos == 0:
44 | return -neg_loss
45 | else:
46 | return -(pos_loss + neg_loss) / num_pos
47 |
--------------------------------------------------------------------------------
/gluoncv/torch/nn/shape_spec.py:
--------------------------------------------------------------------------------
1 | """Internal structure for shape"""
2 | from collections import namedtuple
3 |
4 |
5 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
6 | """
7 | A simple structure that contains basic shape specification about a tensor.
8 | It is often used as the auxiliary inputs/outputs of models,
9 | to obtain the shape inference ability among pytorch modules.
10 |
11 | Attributes:
12 | channels:
13 | height:
14 | width:
15 | stride:
16 | """
17 |
18 | def __new__(cls, *, channels=None, height=None, width=None, stride=None):
19 | return super().__new__(cls, channels, height, width, stride)
20 |
--------------------------------------------------------------------------------
/gluoncv/torch/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """GluonCV-Torch utility functions."""
2 |
--------------------------------------------------------------------------------
/gluoncv/torch/utils/eval_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/torch/utils/eval_utils/__init__.py
--------------------------------------------------------------------------------
/gluoncv/torch/utils/random.py:
--------------------------------------------------------------------------------
1 | """Utils for random states"""
2 | import os
3 | from datetime import datetime
4 | import random
5 | import logging
6 |
7 | import numpy as np
8 | import torch
9 |
10 | __all__ = ['seed_all_rng']
11 |
12 |
13 | def seed_all_rng(seed=None):
14 | """
15 | Set the random seed for the RNG in torch, numpy and python.
16 |
17 | Args:
18 | seed (int): if None, will use a strong random seed.
19 | """
20 | if seed is None:
21 | seed = (
22 | os.getpid()
23 | + int(datetime.now().strftime("%S%f"))
24 | + int.from_bytes(os.urandom(2), "big")
25 | )
26 | logger = logging.getLogger(__name__)
27 | logger.info("Using a generated random seed {}".format(seed))
28 | np.random.seed(seed)
29 | torch.set_rng_state(torch.manual_seed(seed).get_state())
30 | random.seed(seed)
31 |
--------------------------------------------------------------------------------
/gluoncv/torch/utils/task_utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Task utils"""
2 | from .classification import train_classification, validation_classification, test_classification
3 | from .coot import train_coot, validate_coot
4 | from .pose import DirectposePipeline, build_pose_optimizer
5 |
--------------------------------------------------------------------------------
/gluoncv/torch/utils/tvm_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/gluoncv/torch/utils/tvm_utils/__init__.py
--------------------------------------------------------------------------------
/gluoncv/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """GluonCV Utility functions."""
2 | # pylint: disable=wildcard-import,exec-used,wrong-import-position
3 | from __future__ import absolute_import
4 |
5 | import types
6 |
7 | def import_dummy_module(code, name):
8 | # create blank module
9 | module = types.ModuleType(name)
10 | # populate the module with code
11 | exec(code, module.__dict__)
12 | return module
13 |
14 | dummy_module = """
15 | def __getattr__(name):
16 | raise AttributeError(f"gluoncv.utils.{__name__} module requires mxnet which is missing.")
17 | """
18 |
19 |
20 | from . import bbox
21 | from . import random
22 | from . import filesystem
23 | try:
24 | import mxnet
25 | from . import viz
26 | from . import metrics
27 | from . import parallel
28 | from .lr_scheduler import LRSequential, LRScheduler
29 | from .export_helper import export_block, export_tvm
30 | from .sync_loader_helper import split_data, split_and_load
31 | except ImportError:
32 | viz = import_dummy_module(dummy_module, 'viz')
33 | metrics = import_dummy_module(dummy_module, 'metrics')
34 | parallel = import_dummy_module(dummy_module, 'parallel')
35 | LRSequential, LRScheduler = None, None
36 | export_block, export_tvm = None, None
37 | split_data, split_and_load = None, None
38 |
39 | from .download import download, check_sha1
40 | from .filesystem import makedirs, try_import_dali, try_import_cv2
41 | from .bbox import bbox_iou
42 | from .block import recursive_visit, set_lr_mult, freeze_bn
43 | from .plot_history import TrainingHistory
44 | from .version import *
45 |
--------------------------------------------------------------------------------
/gluoncv/utils/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | """Custom evaluation metrics"""
2 | from __future__ import absolute_import
3 |
4 | from .coco_detection import COCODetectionMetric
5 | from .coco_keypoints import COCOKeyPointsMetric
6 | from .voc_detection import VOCMApMetric, VOC07MApMetric
7 | from .segmentation import SegmentationMetric
8 | from .heatmap_accuracy import HeatmapAccuracy
9 |
--------------------------------------------------------------------------------
/gluoncv/utils/random.py:
--------------------------------------------------------------------------------
1 | """Random wrapper."""
2 | from __future__ import absolute_import
3 | import random as pyrandom
4 | import numpy as np
5 | try:
6 | import mxnet as mx
7 | except ImportError:
8 | mx = None
9 |
10 |
11 | def seed(a=None):
12 | """Seed the generator for python builtin random, numpy.random, mxnet.random.
13 |
14 | This method is to control random state for mxnet related random functions.
15 |
16 | Note that this function cannot guarantee 100 percent reproducibility due to
17 | hardware settings.
18 |
19 | Parameters
20 | ----------
21 | a : int or 1-d array_like, optional
22 | Initialize internal state of the random number generator.
23 | If `seed` is not None or an int or a long, then hash(seed) is used instead.
24 | Note that the hash values for some types are nondeterministic.
25 |
26 | """
27 | pyrandom.seed(a)
28 | np.random.seed(a)
29 | if mx is not None:
30 | mx.random.seed(a)
31 |
--------------------------------------------------------------------------------
/gluoncv/utils/version.py:
--------------------------------------------------------------------------------
1 | """Utility functions for version checking."""
2 | import warnings
3 |
4 | __all__ = ['check_version']
5 |
6 | def check_version(min_version, warning_only=False):
7 | """Check the version of gluoncv satisfies the provided minimum version.
8 | An exception is thrown if the check does not pass.
9 |
10 | Parameters
11 | ----------
12 | min_version : str
13 | Minimum version
14 | warning_only : bool
15 | Printing a warning instead of throwing an exception.
16 | """
17 | from .. import __version__
18 | from distutils.version import LooseVersion
19 | bad_version = LooseVersion(__version__) < LooseVersion(min_version)
20 | if bad_version:
21 | msg = 'Installed GluonCV version (%s) does not satisfy the ' \
22 | 'minimum required version (%s)'%(__version__, min_version)
23 | if warning_only:
24 | warnings.warn(msg)
25 | else:
26 | raise AssertionError(msg)
27 |
--------------------------------------------------------------------------------
/gluoncv/utils/viz/__init__.py:
--------------------------------------------------------------------------------
1 | """Visualization tools"""
2 | from __future__ import absolute_import
3 |
4 | from .image import plot_image, cv_plot_image
5 | from .bbox import plot_bbox, cv_plot_bbox
6 | from .keypoints import plot_keypoints, cv_plot_keypoints
7 | from .mask import expand_mask, plot_mask, cv_merge_two_images
8 | from .segmentation import get_color_pallete, DeNormalize
9 | from .network import plot_network, plot_mxboard
10 |
--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
1 | # Launch scripts
2 | Scripts folder includes training/evaluation/demo python scripts.
3 | Please refer to [GluonCV](http://gluon-cv.mxnet.io/index.html) website for further instruction if needed.
4 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_nl10_resnet101_v1_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # i3d_nl10_resnet101_v1_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 100
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [40, 80]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 32
38 | FRAME_RATE: 2
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'i3d_nl10_resnet101_v1_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/i3d_nl10_resnet101_v1_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/i3d_nl10_resnet101_v1_kinetics400/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_nl10_resnet50_v1_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # i3d_nl10_resnet50_v1_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 100
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [40, 80]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 32
38 | FRAME_RATE: 2
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'i3d_nl10_resnet50_v1_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/i3d_nl10_resnet50_v1_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/i3d_nl10_resnet50_v1_kinetics400/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_nl5_resnet101_v1_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # i3d_nl5_resnet101_v1_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 100
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [40, 80]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 32
38 | FRAME_RATE: 2
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'i3d_nl5_resnet101_v1_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/i3d_nl5_resnet101_v1_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/i3d_nl5_resnet101_v1_kinetics400/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_nl5_resnet50_v1_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # i3d_nl5_resnet50_v1_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.69.134:23456'
9 | WOLRD_URLS: ['172.31.69.134']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 100
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [40, 80]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 5
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 32
38 | FRAME_RATE: 2
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'i3d_nl5_resnet50_v1_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/i3d_nl5_resnet50_v1_kinetics400/'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/i3d_nl5_resnet50_v1_kinetics400/eval'
55 | SAVE_FREQ: 5
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_resnet101_v1_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # i3d_resnet101_v1_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 100
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [40, 80]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 32
38 | FRAME_RATE: 2
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'i3d_resnet101_v1_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/i3d_resnet101_v1_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/i3d_resnet101_v1_kinetics400/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_resnet50_v1_custom.yaml:
--------------------------------------------------------------------------------
1 | # i3d_resnet50_v1_custom
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.252:23456'
9 | WOLRD_URLS: ['172.31.72.252']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 100
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [40, 80]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/'
36 | NUM_CLASSES: 174
37 | CLIP_LEN: 32
38 | FRAME_RATE: 2
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'i3d_resnet50_v1_custom'
48 | PRETRAINED: True # Default True, use a backbone pretrained on K400. If set to False, the model is just inflated from 2D ImageNet weights.
49 |
50 | LOG:
51 | BASE_PATH: './logs/i3d_resnet50_v1_custom'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/i3d_resnet50_v1_custom/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_resnet50_v1_feat.yaml:
--------------------------------------------------------------------------------
1 | # i3d_resnet50_v1_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 1
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.252:23456'
9 | WOLRD_URLS: ['172.31.72.252']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | INFERENCE:
17 | FEAT: True
18 |
19 | DATA:
20 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
21 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
22 | NUM_CLASSES: 400
23 | CLIP_LEN: 32
24 | FRAME_RATE: 2
25 | NUM_SEGMENT: 1
26 | NUM_CROP: 1
27 | MULTIGRID: False
28 | KEEP_ASPECT_RATIO: False
29 |
30 | MODEL:
31 | NAME: 'i3d_resnet50_v1_kinetics400'
32 | PRETRAINED: True
33 |
34 | LOG:
35 | BASE_PATH: './logs/i3d_resnet50_v1_kinetics400'
36 | SAVE_DIR: 'features'
37 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_resnet50_v1_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # i3d_resnet50_v1_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.69.242:23456'
9 | WOLRD_URLS: ['172.31.69.242']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 100
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [40, 80]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 5
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 32
38 | FRAME_RATE: 2
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'i3d_resnet50_v1_kinetics400'
48 | PRETRAINED: True
49 |
50 | LOG:
51 | BASE_PATH: './logs/i3d_resnet50_v1_kinetics400/'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/i3d_resnet50_v1_kinetics400/eval'
55 | SAVE_FREQ: 5
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_resnet50_v1_sthsthv2.yaml:
--------------------------------------------------------------------------------
1 | # i3d_resnet50_v1_sthsthv2
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 100
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | LR_POLICY: 'Step'
21 | MOMENTUM: 0.9
22 | W_DECAY: 1e-5
23 | USE_WARMUP: False
24 | LR_MILESTONE: [40, 80]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/'
36 | NUM_CLASSES: 174
37 | CLIP_LEN: 16
38 | FRAME_RATE: 2
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 2
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'i3d_resnet50_v1_sthsthv2'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/i3d_resnet50_v1_sthsthv2'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/i3d_resnet50_v1_sthsthv2/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_slow_resnet101_f16s4_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # i3d_slow_resnet101_f16s4_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 196
18 | BATCH_SIZE: 8
19 | LR: 0.1
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-4
22 | USE_WARMUP: True
23 | WARMUP_EPOCHS: 34
24 | LR_POLICY: 'Cosine'
25 | RESUME_EPOCH: -1
26 |
27 | VAL:
28 | FREQ: 5
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 16
38 | FRAME_RATE: 4
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'i3d_slow_resnet101_f16s4_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/i3d_slow_resnet101_f16s4_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/i3d_slow_resnet101_f16s4_kinetics400/eval'
55 | DISPLAY_FREQ: 50
56 | SAVE_FREQ: 5
57 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_slow_resnet101_f16s4_kinetics700.yaml:
--------------------------------------------------------------------------------
1 | # i3d_slow_resnet101_f16s4_kinetics700
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.69.242:23456'
9 | WOLRD_URLS: ['172.31.69.242']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 196
18 | BATCH_SIZE: 8
19 | LR: 0.1
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-4
22 | USE_WARMUP: True
23 | WARMUP_EPOCHS: 34
24 | LR_POLICY: 'Cosine'
25 | RESUME_EPOCH: -1
26 |
27 | VAL:
28 | FREQ: 5
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics700_v1/k700_v1_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics700_v1/k700_v1_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics700_v1/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics700_v1/val_256/'
36 | NUM_CLASSES: 700
37 | CLIP_LEN: 16
38 | FRAME_RATE: 4
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: True
45 |
46 | MODEL:
47 | NAME: 'i3d_slow_resnet101_f16s4_kinetics700'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/i3d_slow_resnet101_f16s4_kinetics700'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/i3d_slow_resnet101_f16s4_kinetics700/eval'
55 | DISPLAY_FREQ: 50
56 | SAVE_FREQ: 5
57 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_slow_resnet101_f32s2_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # i3d_slow_resnet101_f32s2_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 196
18 | BATCH_SIZE: 8
19 | LR: 0.1
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-4
22 | USE_WARMUP: True
23 | WARMUP_EPOCHS: 34
24 | LR_POLICY: 'Cosine'
25 | RESUME_EPOCH: -1
26 |
27 | VAL:
28 | FREQ: 5
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 32
38 | FRAME_RATE: 2
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'i3d_slow_resnet101_f32s2_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/i3d_slow_resnet101_f32s2_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/i3d_slow_resnet101_f32s2_kinetics400/eval'
55 | DISPLAY_FREQ: 50
56 | SAVE_FREQ: 5
57 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_slow_resnet101_f8s8_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # i3d_slow_resnet101_f8s8_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 196
18 | BATCH_SIZE: 8
19 | LR: 0.1
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-4
22 | USE_WARMUP: True
23 | WARMUP_EPOCHS: 34
24 | LR_POLICY: 'Cosine'
25 | RESUME_EPOCH: -1
26 |
27 | VAL:
28 | FREQ: 5
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 8
38 | FRAME_RATE: 8
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'i3d_slow_resnet101_f8s8_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/i3d_slow_resnet101_f8s8_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/i3d_slow_resnet101_f8s8_kinetics400/eval'
55 | DISPLAY_FREQ: 50
56 | SAVE_FREQ: 5
57 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_slow_resnet50_f16s4_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # i3d_slow_resnet50_f16s4_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 196
18 | BATCH_SIZE: 8
19 | LR: 0.1
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-4
22 | USE_WARMUP: True
23 | WARMUP_EPOCHS: 34
24 | LR_POLICY: 'Cosine'
25 | RESUME_EPOCH: -1
26 |
27 | VAL:
28 | FREQ: 5
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 16
38 | FRAME_RATE: 4
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'i3d_slow_resnet50_f16s4_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/i3d_slow_resnet50_f16s4_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/i3d_slow_resnet50_f16s4_kinetics400/eval'
55 | DISPLAY_FREQ: 50
56 | SAVE_FREQ: 5
57 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_slow_resnet50_f32s2_custom.yaml:
--------------------------------------------------------------------------------
1 | # i3d_slow_resnet50_f32s2_custom
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.252:23456'
9 | WOLRD_URLS: ['172.31.72.252']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 196
18 | BATCH_SIZE: 8
19 | LR: 0.1
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-4
22 | USE_WARMUP: True
23 | WARMUP_EPOCHS: 34
24 | LR_POLICY: 'Cosine'
25 | RESUME_EPOCH: -1
26 |
27 | VAL:
28 | FREQ: 5
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/'
36 | NUM_CLASSES: 174
37 | CLIP_LEN: 32
38 | FRAME_RATE: 2
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'i3d_slow_resnet50_f32s2_custom'
48 | PRETRAINED: True
49 |
50 | LOG:
51 | BASE_PATH: './logs/i3d_slow_resnet50_f32s2_custom'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/i3d_slow_resnet50_f32s2_custom/eval'
55 | DISPLAY_FREQ: 50
56 | SAVE_FREQ: 5
57 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_slow_resnet50_f32s2_feat.yaml:
--------------------------------------------------------------------------------
1 | # i3d_slow_resnet50_f32s2_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | INFERENCE:
17 | FEAT: True
18 |
19 | DATA:
20 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
21 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
22 | NUM_CLASSES: 400
23 | CLIP_LEN: 32
24 | FRAME_RATE: 2
25 | NUM_SEGMENT: 1
26 | NUM_CROP: 1
27 | TEST_NUM_SEGMENT: 10
28 | TEST_NUM_CROP: 3
29 | MULTIGRID: False
30 | KEEP_ASPECT_RATIO: False
31 |
32 | MODEL:
33 | NAME: 'i3d_slow_resnet50_f32s2_kinetics400'
34 | PRETRAINED: True
35 |
36 | LOG:
37 | BASE_PATH: './logs/i3d_slow_resnet50_f32s2_kinetics400'
38 | SAVE_DIR: 'featues'
39 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_slow_resnet50_f32s2_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # i3d_slow_resnet50_f32s2_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 196
18 | BATCH_SIZE: 8
19 | LR: 0.1
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-4
22 | USE_WARMUP: True
23 | WARMUP_EPOCHS: 34
24 | LR_POLICY: 'Cosine'
25 | RESUME_EPOCH: -1
26 |
27 | VAL:
28 | FREQ: 5
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 32
38 | FRAME_RATE: 2
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'i3d_slow_resnet50_f32s2_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/i3d_slow_resnet50_f32s2_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/i3d_slow_resnet50_f32s2_kinetics400/eval'
55 | DISPLAY_FREQ: 50
56 | SAVE_FREQ: 5
57 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/i3d_slow_resnet50_f8s8_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # i3d_slow_resnet50_f8s8_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.0.32:23456'
9 | WOLRD_URLS: ['172.31.0.32']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 196
18 | BATCH_SIZE: 8
19 | LR: 0.1
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-4
22 | USE_WARMUP: True
23 | WARMUP_EPOCHS: 34
24 | LR_POLICY: 'Cosine'
25 | RESUME_EPOCH: -1
26 |
27 | VAL:
28 | FREQ: 5
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 8
38 | FRAME_RATE: 8
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'i3d_slow_resnet50_f8s8_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/i3d_slow_resnet50_f8s8_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/i3d_slow_resnet50_f8s8_kinetics400/eval'
55 | DISPLAY_FREQ: 50
56 | SAVE_FREQ: 5
57 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/ircsn_v2_resnet152_f32s2_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # ircsn_v2_resnet152_f32s2_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.252:23456'
9 | WOLRD_URLS: ['172.31.72.252']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 58 # finetune from a pretrained model, hence small lr
18 | BATCH_SIZE: 8
19 | LR: 0.000125
20 | LR_POLICY: 'Step'
21 | MOMENTUM: 0.9
22 | W_DECAY: 1e-5
23 | USE_WARMUP: False
24 | LR_MILESTONE: [32, 48]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 32
38 | FRAME_RATE: 2
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: True
45 |
46 | MODEL:
47 | NAME: 'ircsn_v2_resnet152_f32s2_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/ircsn_v2_resnet152_f32s2_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/ircsn_v2_resnet152_f32s2_kinetics400/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/r2plus1d_v1_resnet18_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # r2plus1d_v1_resnet18_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 196
18 | BATCH_SIZE: 8
19 | LR: 0.001
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | USE_WARMUP: True
23 | WARMUP_EPOCHS: 34
24 | LR_POLICY: 'Cosine'
25 | WARMUP_END_LR: 0.1
26 | RESUME_EPOCH: -1
27 |
28 | VAL:
29 | FREQ: 2
30 | BATCH_SIZE: 8
31 |
32 | DATA:
33 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
34 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
35 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
36 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
37 | NUM_CLASSES: 400
38 | CLIP_LEN: 16
39 | FRAME_RATE: 2
40 | NUM_SEGMENT: 1
41 | NUM_CROP: 1
42 | TEST_NUM_SEGMENT: 10
43 | TEST_NUM_CROP: 3
44 | MULTIGRID: False
45 | KEEP_ASPECT_RATIO: False
46 | CROP_SIZE: 112
47 | SHORT_SIDE_SIZE: 128
48 | NEW_HEIGHT: 128
49 | NEW_WIDTH: 171
50 |
51 | MODEL:
52 | NAME: 'r2plus1d_v1_resnet18_kinetics400'
53 | PRETRAINED: False
54 |
55 | LOG:
56 | BASE_PATH: './logs/r2plus1d_v1_resnet18_kinetics400'
57 | LOG_DIR: 'tb_log'
58 | SAVE_DIR: 'checkpoints'
59 | EVAL_DIR: './logs/r2plus1d_v1_resnet18_kinetics400/eval'
60 | SAVE_FREQ: 2
61 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/r2plus1d_v1_resnet34_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # r2plus1d_v1_resnet34_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 196
18 | BATCH_SIZE: 8
19 | LR: 0.001
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | USE_WARMUP: True
23 | WARMUP_EPOCHS: 34
24 | LR_POLICY: 'Cosine'
25 | WARMUP_END_LR: 0.1
26 | RESUME_EPOCH: -1
27 |
28 | VAL:
29 | FREQ: 2
30 | BATCH_SIZE: 8
31 |
32 | DATA:
33 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
34 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
35 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
36 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
37 | NUM_CLASSES: 400
38 | CLIP_LEN: 16
39 | FRAME_RATE: 2
40 | NUM_SEGMENT: 1
41 | NUM_CROP: 1
42 | TEST_NUM_SEGMENT: 10
43 | TEST_NUM_CROP: 3
44 | MULTIGRID: False
45 | KEEP_ASPECT_RATIO: False
46 | CROP_SIZE: 112
47 | SHORT_SIDE_SIZE: 128
48 | NEW_HEIGHT: 128
49 | NEW_WIDTH: 171
50 |
51 | MODEL:
52 | NAME: 'r2plus1d_v1_resnet34_kinetics400'
53 | PRETRAINED: False
54 |
55 | LOG:
56 | BASE_PATH: './logs/r2plus1d_v1_resnet34_kinetics400'
57 | LOG_DIR: 'tb_log'
58 | SAVE_DIR: 'checkpoints'
59 | EVAL_DIR: './logs/r2plus1d_v1_resnet34_kinetics400/eval'
60 | SAVE_FREQ: 2
61 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/r2plus1d_v1_resnet50_feat.yaml:
--------------------------------------------------------------------------------
1 | # r2plus1d_v1_resnet50_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | INFERENCE:
17 | FEAT: True
18 |
19 | DATA:
20 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
21 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
22 | NUM_CLASSES: 400
23 | CLIP_LEN: 16
24 | FRAME_RATE: 2
25 | NUM_SEGMENT: 1
26 | NUM_CROP: 1
27 | MULTIGRID: False
28 | KEEP_ASPECT_RATIO: False
29 | CROP_SIZE: 112
30 | SHORT_SIDE_SIZE: 128
31 | NEW_HEIGHT: 128
32 | NEW_WIDTH: 171
33 |
34 | MODEL:
35 | NAME: 'r2plus1d_v1_resnet50_kinetics400'
36 | PRETRAINED: True
37 |
38 | LOG:
39 | BASE_PATH: './logs/r2plus1d_v1_resnet50_kinetics400'
40 | SAVE_DIR: 'features'
41 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/resnet101_v1b_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # resnet101_v1b_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 100
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [40, 80]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 1
38 | FRAME_RATE: 1
39 | NUM_SEGMENT: 7
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'resnet101_v1b_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/resnet101_v1b_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/resnet101_v1b_kinetics400/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/resnet152_v1b_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # resnet152_v1b_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 100
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [40, 80]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 1
38 | FRAME_RATE: 1
39 | NUM_SEGMENT: 7
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'resnet152_v1b_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/resnet152_v1b_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/resnet152_v1b_kinetics400/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/resnet18_v1b_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # resnet18_v1b_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 100
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [40, 80]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 1
38 | FRAME_RATE: 1
39 | NUM_SEGMENT: 7
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'resnet18_v1b_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/resnet18_v1b_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/resnet18_v1b_kinetics400/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/resnet34_v1b_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # resnet34_v1b_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 100
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [40, 80]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 1
38 | FRAME_RATE: 1
39 | NUM_SEGMENT: 7
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'resnet34_v1b_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/resnet34_v1b_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/resnet34_v1b_kinetics400/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/resnet50_v1b_custom.yaml:
--------------------------------------------------------------------------------
1 | # resnet50_v1b_custom
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.252:23456'
9 | WOLRD_URLS: ['172.31.72.252']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 100
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [40, 80]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/'
36 | NUM_CLASSES: 174
37 | CLIP_LEN: 1
38 | FRAME_RATE: 1
39 | NUM_SEGMENT: 7
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'resnet50_v1b_custom'
48 | PRETRAINED: True
49 |
50 | LOG:
51 | BASE_PATH: './logs/resnet50_v1b_custom'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/resnet50_v1b_custom/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/resnet50_v1b_feat.yaml:
--------------------------------------------------------------------------------
1 | # resnet50_v1b_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | INFERENCE:
17 | FEAT: True
18 |
19 | DATA:
20 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
21 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
22 | NUM_CLASSES: 400
23 | CLIP_LEN: 1
24 | FRAME_RATE: 1
25 | NUM_SEGMENT: 1
26 | NUM_CROP: 1
27 | MULTIGRID: False
28 | KEEP_ASPECT_RATIO: False
29 |
30 | MODEL:
31 | NAME: 'resnet50_v1b_kinetics400'
32 | PRETRAINED: True
33 |
34 | LOG:
35 | BASE_PATH: './logs/resnet50_v1b_kinetics400'
36 | SAVE_DIR: 'features'
37 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/resnet50_v1b_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # resnet50_v1b_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 100
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [40, 80]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 1
38 | FRAME_RATE: 1
39 | NUM_SEGMENT: 7
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'resnet50_v1b_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/resnet50_v1b_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/resnet50_v1b_kinetics400/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/resnet50_v1b_sthsthv2.yaml:
--------------------------------------------------------------------------------
1 | # resnet50_v1b_sthsthv2
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 30
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [10, 20]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/'
36 | NUM_CLASSES: 174
37 | CLIP_LEN: 1
38 | FRAME_RATE: 1
39 | NUM_SEGMENT: 8
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 8
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'resnet50_v1b_sthsthv2'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/resnet50_v1b_sthsthv2'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/resnet50_v1b_sthsthv2/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/slowfast_16x8_resnet50_sthsthv2.yaml:
--------------------------------------------------------------------------------
1 | # slowfast_16x8_resnet50_sthsthv2
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 30
18 | BATCH_SIZE: 8
19 | LR: 0.0001
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-6
22 | USE_WARMUP: True
23 | WARMUP_EPOCHS: 5
24 | LR_POLICY: 'Cosine'
25 | WARMUP_END_LR: 0.1
26 | RESUME_EPOCH: -1
27 |
28 | VAL:
29 | FREQ: 2
30 | BATCH_SIZE: 8
31 |
32 | DATA:
33 | TRAIN_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_train.txt'
34 | VAL_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_val.txt'
35 | TRAIN_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/'
36 | VAL_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/'
37 | NUM_CLASSES: 174
38 | CLIP_LEN: 64
39 | FRAME_RATE: 2
40 | NUM_SEGMENT: 1
41 | NUM_CROP: 1
42 | TEST_NUM_SEGMENT: 2
43 | TEST_NUM_CROP: 3
44 | MULTIGRID: False
45 | KEEP_ASPECT_RATIO: False
46 |
47 | MODEL:
48 | NAME: 'slowfast_16x8_resnet50_sthsthv2'
49 | PRETRAINED: False
50 |
51 | LOG:
52 | BASE_PATH: './logs/slowfast_16x8_resnet50_sthsthv2'
53 | LOG_DIR: 'tb_log'
54 | SAVE_DIR: 'checkpoints'
55 | EVAL_DIR: './logs/slowfast_16x8_resnet50_sthsthv2/eval'
56 | SAVE_FREQ: 2
57 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/slowfast_4x16_resnet50_custom.yaml:
--------------------------------------------------------------------------------
1 | # slowfast_4x16_resnet50_custom
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.252:23456'
9 | WOLRD_URLS: ['172.31.72.252']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 196
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | USE_WARMUP: True
23 | WARMUP_EPOCHS: 34
24 | LR_POLICY: 'Cosine'
25 | WARMUP_END_LR: 0.1
26 | RESUME_EPOCH: -1
27 |
28 | VAL:
29 | FREQ: 2
30 | BATCH_SIZE: 8
31 |
32 | DATA:
33 | TRAIN_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_train.txt'
34 | VAL_ANNO_PATH: '/home/ubuntu/data/sthsthv2/sthsthv2_val.txt'
35 | TRAIN_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/'
36 | VAL_DATA_PATH: '/home/ubuntu/data/sthsthv2/20bn-something-something-v2/'
37 | NUM_CLASSES: 174
38 | CLIP_LEN: 32
39 | FRAME_RATE: 2
40 | NUM_SEGMENT: 1
41 | NUM_CROP: 1
42 | TEST_NUM_SEGMENT: 10
43 | TEST_NUM_CROP: 3
44 | MULTIGRID: False
45 | KEEP_ASPECT_RATIO: False
46 |
47 | MODEL:
48 | NAME: 'slowfast_4x16_resnet50_custom'
49 | PRETRAINED: True
50 |
51 | LOG:
52 | BASE_PATH: './logs/slowfast_4x16_resnet50_custom'
53 | LOG_DIR: 'tb_log'
54 | SAVE_DIR: 'checkpoints'
55 | EVAL_DIR: './logs/slowfast_4x16_resnet50_custom/eval'
56 | SAVE_FREQ: 2
57 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/slowfast_4x16_resnet50_feat.yaml:
--------------------------------------------------------------------------------
1 | # slowfast_4x16_resnet50_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | INFERENCE:
17 | FEAT: True
18 |
19 | DATA:
20 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
21 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
22 | NUM_CLASSES: 400
23 | CLIP_LEN: 32
24 | FRAME_RATE: 2
25 | NUM_SEGMENT: 1
26 | NUM_CROP: 1
27 | MULTIGRID: False
28 | KEEP_ASPECT_RATIO: False
29 |
30 | MODEL:
31 | NAME: 'slowfast_4x16_resnet50_kinetics400'
32 | PRETRAINED: False
33 |
34 | LOG:
35 | BASE_PATH: './logs/slowfast_4x16_resnet50_kinetics400'
36 | SAVE_DIR: 'features'
37 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/slowfast_4x16_resnet50_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # slowfast_4x16_resnet50_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 196
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | USE_WARMUP: True
23 | WARMUP_EPOCHS: 34
24 | LR_POLICY: 'Cosine'
25 | RESUME_EPOCH: -1
26 |
27 | VAL:
28 | FREQ: 5
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 32
38 | FRAME_RATE: 2
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'slowfast_4x16_resnet50_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/slowfast_4x16_resnet50_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/slowfast_4x16_resnet50_kinetics400/eval'
55 | SAVE_FREQ: 5
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/slowfast_8x8_resnet101_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # slowfast_8x8_resnet101_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 196
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | USE_WARMUP: True
23 | WARMUP_EPOCHS: 34
24 | LR_POLICY: 'Cosine'
25 | WARMUP_END_LR: 0.1
26 | RESUME_EPOCH: -1
27 |
28 | VAL:
29 | FREQ: 2
30 | BATCH_SIZE: 8
31 |
32 | DATA:
33 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
34 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
35 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
36 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
37 | NUM_CLASSES: 400
38 | CLIP_LEN: 32
39 | FRAME_RATE: 2
40 | NUM_SEGMENT: 1
41 | NUM_CROP: 1
42 | TEST_NUM_SEGMENT: 10
43 | TEST_NUM_CROP: 3
44 | MULTIGRID: False
45 | KEEP_ASPECT_RATIO: False
46 |
47 | MODEL:
48 | NAME: 'slowfast_8x8_resnet101_kinetics400'
49 | PRETRAINED: False
50 |
51 | LOG:
52 | BASE_PATH: './logs/slowfast_8x8_resnet101_kinetics400'
53 | LOG_DIR: 'tb_log'
54 | SAVE_DIR: 'checkpoints'
55 | EVAL_DIR: './logs/slowfast_8x8_resnet101_kinetics400/eval'
56 | SAVE_FREQ: 2
57 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/slowfast_8x8_resnet50_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # slowfast_8x8_resnet50_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 196
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | USE_WARMUP: True
23 | WARMUP_EPOCHS: 34
24 | LR_POLICY: 'Cosine'
25 | WARMUP_END_LR: 0.1
26 | RESUME_EPOCH: -1
27 |
28 | VAL:
29 | FREQ: 2
30 | BATCH_SIZE: 8
31 |
32 | DATA:
33 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
34 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
35 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
36 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
37 | NUM_CLASSES: 400
38 | CLIP_LEN: 32
39 | FRAME_RATE: 2
40 | NUM_SEGMENT: 1
41 | NUM_CROP: 1
42 | TEST_NUM_SEGMENT: 10
43 | TEST_NUM_CROP: 3
44 | MULTIGRID: False
45 | KEEP_ASPECT_RATIO: False
46 |
47 | MODEL:
48 | NAME: 'slowfast_8x8_resnet50_kinetics400'
49 | PRETRAINED: False
50 |
51 | LOG:
52 | BASE_PATH: './logs/slowfast_8x8_resnet50_kinetics400'
53 | LOG_DIR: 'tb_log'
54 | SAVE_DIR: 'checkpoints'
55 | EVAL_DIR: './logs/slowfast_8x8_resnet50_kinetics400/eval'
56 | SAVE_FREQ: 2
57 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/tpn_resnet101_f16s4_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # tpn_resnet101_f16s4_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 150
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [75, 125]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 16
38 | FRAME_RATE: 4
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'tpn_resnet101_f16s4_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/tpn_resnet101_f16s4_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/tpn_resnet101_f16s4_kinetics400/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/tpn_resnet101_f32s2_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # tpn_resnet101_f32s2_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.252:23456'
9 | WOLRD_URLS: ['172.31.72.252']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 150
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [75, 125]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 32
38 | FRAME_RATE: 2
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'tpn_resnet101_f32s2_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/tpn_resnet101_f32s2_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/tpn_resnet101_f32s2_kinetics400/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/tpn_resnet101_f8s8_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # tpn_resnet101_f8s8_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 150
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [75, 125]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 8
38 | FRAME_RATE: 8
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'tpn_resnet101_f8s8_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/tpn_resnet101_f8s8_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/tpn_resnet101_f8s8_kinetics400/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/tpn_resnet50_f16s4_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # tpn_resnet50_f16s4_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 150
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [75, 125]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 16
38 | FRAME_RATE: 4
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'tpn_resnet50_f16s4_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/tpn_resnet50_f16s4_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/tpn_resnet50_f16s4_kinetics400/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/tpn_resnet50_f32s2_custom.yaml:
--------------------------------------------------------------------------------
1 | # tpn_resnet50_f32s2_custom
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.252:23456'
9 | WOLRD_URLS: ['172.31.72.252']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 150
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | LR_POLICY: 'Step'
21 | MOMENTUM: 0.9
22 | W_DECAY: 1e-5
23 | USE_WARMUP: False
24 | LR_MILESTONE: [75, 125]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 32
38 | FRAME_RATE: 2
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'tpn_resnet50_f32s2_custom'
48 | PRETRAINED: True
49 |
50 | LOG:
51 | BASE_PATH: './logs/tpn_resnet50_f32s2_custom'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/tpn_resnet50_f32s2_custom/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/tpn_resnet50_f32s2_feat.yaml:
--------------------------------------------------------------------------------
1 | # tpn_resnet50_f32s2_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 |
17 | INFERENCE:
18 | FEAT: True
19 |
20 | DATA:
21 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
22 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
23 | NUM_CLASSES: 400
24 | CLIP_LEN: 32
25 | FRAME_RATE: 2
26 | NUM_SEGMENT: 1
27 | NUM_CROP: 1
28 | MULTIGRID: False
29 | KEEP_ASPECT_RATIO: False
30 |
31 | MODEL:
32 | NAME: 'tpn_resnet50_f32s2_kinetics400'
33 | PRETRAINED: True
34 |
35 | LOG:
36 | BASE_PATH: './logs/tpn_resnet50_f32s2_kinetics400'
37 | SAVE_DIR: 'features'
38 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/tpn_resnet50_f32s2_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # tpn_resnet50_f32s2_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.72.195:23456'
9 | WOLRD_URLS: ['172.31.72.195']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 150
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-5
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [75, 125]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 2
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 32
38 | FRAME_RATE: 2
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'tpn_resnet50_f32s2_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/tpn_resnet50_f32s2_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/tpn_resnet50_f32s2_kinetics400/eval'
55 | SAVE_FREQ: 2
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/configuration/tpn_resnet50_f8s8_kinetics400.yaml:
--------------------------------------------------------------------------------
1 | # tpn_resnet50_f8s8_kinetics400
2 |
3 | DDP_CONFIG:
4 | WORLD_SIZE: 1
5 | WORLD_RANK: 0
6 | GPU_WORLD_SIZE: 8
7 | GPU_WORLD_RANK: 0
8 | DIST_URL: 'tcp://172.31.0.32:23456'
9 | WOLRD_URLS: ['172.31.0.32']
10 | AUTO_RANK_MATCH: True
11 | DIST_BACKEND: 'nccl'
12 | GPU: 0
13 | DISTRIBUTED: True
14 |
15 | CONFIG:
16 | TRAIN:
17 | EPOCH_NUM: 150
18 | BATCH_SIZE: 8
19 | LR: 0.01
20 | MOMENTUM: 0.9
21 | W_DECAY: 1e-4
22 | LR_POLICY: 'Step'
23 | USE_WARMUP: False
24 | LR_MILESTONE: [75, 125]
25 | STEP: 0.1
26 |
27 | VAL:
28 | FREQ: 10
29 | BATCH_SIZE: 8
30 |
31 | DATA:
32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt'
33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt'
34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/'
35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/'
36 | NUM_CLASSES: 400
37 | CLIP_LEN: 8
38 | FRAME_RATE: 8
39 | NUM_SEGMENT: 1
40 | NUM_CROP: 1
41 | TEST_NUM_SEGMENT: 10
42 | TEST_NUM_CROP: 3
43 | MULTIGRID: False
44 | KEEP_ASPECT_RATIO: False
45 |
46 | MODEL:
47 | NAME: 'tpn_resnet50_f8s8_kinetics400'
48 | PRETRAINED: False
49 |
50 | LOG:
51 | BASE_PATH: './logs/tpn_resnet50_f8s8_kinetics400'
52 | LOG_DIR: 'tb_log'
53 | SAVE_DIR: 'checkpoints'
54 | EVAL_DIR: './logs/tpn_resnet50_f8s8_kinetics400/eval'
55 | SAVE_FREQ: 10
56 |
--------------------------------------------------------------------------------
/scripts/action-recognition/get_flops.py:
--------------------------------------------------------------------------------
1 | """
2 | Script to compute FLOPs of a model
3 | """
4 | import os
5 | import argparse
6 |
7 | import torch
8 | from gluoncv.torch.model_zoo import get_model
9 | from gluoncv.torch.engine.config import get_cfg_defaults
10 |
11 | from thop import profile, clever_format
12 |
13 |
14 | if __name__ == '__main__':
15 | parser = argparse.ArgumentParser(description='Compute FLOPs of a model.')
16 | parser.add_argument('--config-file', type=str, help='path to config file.')
17 | parser.add_argument('--num-frames', type=int, default=32, help='temporal clip length.')
18 | parser.add_argument('--input-size', type=int, default=224,
19 | help='size of the input image size. default is 224')
20 |
21 | args = parser.parse_args()
22 | cfg = get_cfg_defaults()
23 | cfg.merge_from_file(args.config_file)
24 |
25 | model = get_model(cfg)
26 | input_tensor = torch.autograd.Variable(torch.rand(1, 3, args.num_frames, args.input_size, args.input_size))
27 |
28 | macs, params = profile(model, inputs=(input_tensor,))
29 | macs, params = clever_format([macs, params], "%.3f")
30 | print("FLOPs: ", macs, "; #params: ", params)
31 |
--------------------------------------------------------------------------------
/scripts/classification/cifar/README.md:
--------------------------------------------------------------------------------
1 | # Image Classification on CIFAR10
2 |
3 | Please refer to [GluonCV Model Zoo](http://gluon-cv.mxnet.io/model_zoo/index.html#image-classification)
4 | for available pretrained models, training hyper-parameters, etc.
5 |
--------------------------------------------------------------------------------
/scripts/classification/fit_classification.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import gluoncv as gcv
4 | gcv.utils.check_version('0.8.0')
5 |
6 | from gluoncv.auto.estimators import ImageClassificationEstimator
7 | from gluoncv.auto.tasks.utils import config_to_nested
8 | from d8.image_classification import Dataset
9 |
10 |
11 | if __name__ == '__main__':
12 | # specify hyperparameters
13 | config = {
14 | 'dataset': 'boat',
15 | 'gpus': [0, 1, 2, 3, 4, 5, 6, 7],
16 | 'estimator': 'img_cls',
17 | 'model': 'resnet50_v1b',
18 | 'batch_size': 128, # range [16, 32, 64, 128]
19 | 'epochs': 3
20 | }
21 | config = config_to_nested(config)
22 | config.pop('estimator')
23 |
24 | # specify dataset
25 | dataset = Dataset.get('boat')
26 | train_data, valid_data = dataset.split(0.8)
27 |
28 | # specify estimator
29 | estimator = ImageClassificationEstimator(config)
30 |
31 | # fit estimator
32 | estimator.fit(train_data, valid_data)
33 |
34 | # evaluate auto estimator
35 | top1, top5 = estimator.evaluate(valid_data)
36 | logging.info('evaluation: top1={}, top5={}'.format(top1, top5))
37 |
--------------------------------------------------------------------------------
/scripts/classification/imagenet/demo_imagenet.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from mxnet import nd, image
4 |
5 | import gluoncv as gcv
6 | gcv.utils.check_version('0.6.0')
7 | from gluoncv.data import ImageNet1kAttr
8 | from gluoncv.data.transforms.presets.imagenet import transform_eval
9 | from gluoncv.model_zoo import get_model
10 |
11 | parser = argparse.ArgumentParser(description='Predict ImageNet classes from a given image')
12 | parser.add_argument('--model', type=str, required=True,
13 | help='name of the model to use')
14 | parser.add_argument('--saved-params', type=str, default='',
15 | help='path to the saved model parameters')
16 | parser.add_argument('--input-pic', type=str, required=True,
17 | help='path to the input picture')
18 | opt = parser.parse_args()
19 |
20 | # Load Model
21 | model_name = opt.model
22 | pretrained = True if opt.saved_params == '' else False
23 | net = get_model(model_name, pretrained=pretrained)
24 |
25 | if not pretrained:
26 | net.load_parameters(opt.saved_params)
27 | attrib = ImageNet1kAttr()
28 | classes = attrib.classes
29 | else:
30 | classes = net.classes
31 |
32 | # Load Images
33 | img = image.imread(opt.input_pic)
34 |
35 | # Transform
36 | img = transform_eval(img)
37 | pred = net(img)
38 |
39 | topK = 5
40 | ind = nd.topk(pred, k=topK)[0].astype('int')
41 | print('The input picture is classified to be')
42 | for i in range(topK):
43 | print('\t[%s], with probability %.3f.'%
44 | (classes[ind[i].asscalar()], nd.softmax(pred)[0][ind[i]].asscalar()))
45 |
--------------------------------------------------------------------------------
/scripts/classification/imagenet/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ -z "$MODEL" ]; then
4 | export MODEL=resnet18_v1
5 | fi
6 |
7 | if [ -z "$NUM_TRAINING_SAMPLES" ]; then
8 | export NUM_TRAINING_SAMPLES=1281167
9 | fi
10 |
11 | if [ -z "$NUM_EPOCHS" ]; then
12 | export NUM_EPOCHS=3
13 | fi
14 |
15 | if [ -z "$NUM_GPUS" ] || [ $NUM_GPUS '-lt' 0 ]; then
16 | export NUM_GPUS=0
17 | fi
18 |
19 | if [ -z "$DATA_BACKEND" ]; then
20 | export DATA_BACKEND='mxnet' # Options are: dali-gpu, dali-cpu, mxnet
21 | fi
22 |
23 | if [ -z "$TRAIN_DATA_DIR" ]; then
24 | export TRAIN_DATA_DIR=~/.mxnet/datasets/imagenet
25 | fi
26 |
27 | if [ -z "$DALI_VER" ]; then
28 | export DALI_VER=nvidia-dali-cuda100
29 | fi
30 |
31 | python train_imagenet.py --model $MODEL --data-backend $DATA_BACKEND --num-gpus $NUM_GPUS \
32 | --num-epochs $NUM_EPOCHS --num-training-samples $NUM_TRAINING_SAMPLES --use-rec \
33 | --rec-train $TRAIN_DATA_DIR/train.rec --rec-train-idx $TRAIN_DATA_DIR/train.idx \
34 | --rec-val $TRAIN_DATA_DIR/val.rec --rec-val-idx $TRAIN_DATA_DIR/val.idx --data-dir $TRAIN_DATA_DIR \
35 |
36 |
37 |
--------------------------------------------------------------------------------
/scripts/datasets/README.md:
--------------------------------------------------------------------------------
1 | # Prepare large datasets for vision
2 | [Gluon](https://mxnet.incubator.apache.org/gluon/) itself provides self-managed
3 | tiny datasets such as MNIST, CIFAR-10/100, Fashion-MNIST.
4 | However, downloading and unzipping large scale datasets are very time consuming
5 | processes which are not appropriate to be initialized during class instantiation.
6 | Therefore we provide convenient example scripts for existing/non-existing datasets.
7 |
8 | All datasets requires one-time setup, and will be automatically recognized by `gluoncv`
9 | package in the future.
10 |
11 | ## Instructions
12 | Please refer to our official [tutorials](http://gluon-cv.mxnet.io/build/examples_datasets/index.html)
13 |
--------------------------------------------------------------------------------
/scripts/datasets/imagenet_val_maps.pklz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/datasets/imagenet_val_maps.pklz
--------------------------------------------------------------------------------
/scripts/datasets/tiny_motorbike.py:
--------------------------------------------------------------------------------
1 | """Prepare PASCAL VOC tiny motorbike datasets"""
2 | import os
3 | import autogluon as ag
4 |
5 |
6 | if __name__ == '__main__':
7 | root = os.path.expanduser('~/.mxnet/datasets/')
8 | if not os.path.exists(root):
9 | os.makedirs(root)
10 |
11 | filename_zip = ag.download('https://autogluon.s3.amazonaws.com/datasets/tiny_motorbike.zip', path=root)
12 | filename = ag.unzip(filename_zip, root=root)
13 | data_root = os.path.join(root, filename)
14 | os.remove(filename_zip)
15 |
16 | print("dataset saved to: {}".format(data_root))
--------------------------------------------------------------------------------
/scripts/deployment/README.md:
--------------------------------------------------------------------------------
1 | # Deploy GluonCV models
2 |
3 | This folder includes deployment scripts and examples for pre-trained models.
4 |
5 | Please refer to [GluonCV Deployment Tutorials](https://gluon-cv.mxnet.io/build/examples_deployment/index.html) for detailed instructions.
--------------------------------------------------------------------------------
/scripts/deployment/cpp-inference/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | bin/
3 | install/
4 | *.json
5 | *.param
6 | *.jpg
7 | *.names
8 |
--------------------------------------------------------------------------------
/scripts/deployment/export/.gitignore:
--------------------------------------------------------------------------------
1 | *.json
2 | *.params
3 |
--------------------------------------------------------------------------------
/scripts/deployment/export/README.md:
--------------------------------------------------------------------------------
1 | # Export Pre-trained models in GluonCV [model zoo](https://gluon-cv.mxnet.io/model_zoo/index.html)
2 |
3 | ### Usage
4 |
5 | ```bash
6 | python export_pretrained.py -m resnet18_v1
7 | ```
8 |
9 | ### Checkout supported arguments
10 |
11 | ```bash
12 | python export_pretrained.py -h
13 | ```
14 |
--------------------------------------------------------------------------------
/scripts/deployment/export/export_pretrained.py:
--------------------------------------------------------------------------------
1 | """Script for export pre-trained models in GluonCV model zoo."""
2 | from __future__ import print_function
3 | import argparse
4 | import gluoncv as gcv
5 | gcv.utils.check_version('0.6.0')
6 |
7 | def parse_args():
8 | parser = argparse.ArgumentParser("Export model helper.")
9 | parser.add_argument('--model', '-m', required=True, type=str, help='Name of the model')
10 | parser.add_argument('--no-preprocess', action='store_true', help='Do not include standard preprocess.')
11 | args = parser.parse_args()
12 | return args
13 |
14 | args = parse_args()
15 | net = gcv.model_zoo.get_model(args.model, pretrained=True)
16 | gcv.utils.export_block(args.model, net, preprocess=(not args.no_preprocess), layout='HWC')
17 | print('Done...')
18 |
--------------------------------------------------------------------------------
/scripts/depth/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/depth/README.md
--------------------------------------------------------------------------------
/scripts/depth/train.py:
--------------------------------------------------------------------------------
1 | # Copyright Niantic 2019. Patent Pending. All rights reserved.
2 | #
3 | # This software is licensed under the terms of the Monodepth2 licence
4 | # which allows for non-commercial use only, the full terms of which are made
5 | # available in the LICENSE file.
6 |
7 | from __future__ import absolute_import, division, print_function
8 |
9 | import os
10 | import time
11 | import logging
12 |
13 | from trainer import Trainer
14 | from options import MonodepthOptions
15 |
16 | options = MonodepthOptions()
17 | opts = options.parse()
18 |
19 | if __name__ == "__main__":
20 | # build logger
21 | # logging and checkpoint saving
22 | log_path = os.path.join(opts.log_dir, opts.model_zoo)
23 | if not os.path.exists(log_path):
24 | os.makedirs(log_path)
25 | file_handler = logging.FileHandler(os.path.join(log_path, "train.log"))
26 | stream_handler = logging.StreamHandler()
27 | logger = logging.getLogger('')
28 | logger.setLevel(logging.INFO)
29 | logger.addHandler(file_handler)
30 | logger.addHandler(stream_handler)
31 | logger.info(opts)
32 |
33 | trainer = Trainer(opts, logger)
34 |
35 | tic = time.time()
36 | trainer.train()
37 | logger.info("Training Finished! Total training time is %dh %dm" %
38 | (int((time.time() - tic) / 3600), int((time.time() - tic) % 3600 / 60)))
39 |
--------------------------------------------------------------------------------
/scripts/detection/README.md:
--------------------------------------------------------------------------------
1 | # Object Detection Models
2 |
3 | Please refer to [GluonCV Model Zoo](http://gluon-cv.mxnet.io/model_zoo/index.html#object-detection)
4 | for available pretrained models, training hyper-parameters, etc.
5 |
--------------------------------------------------------------------------------
/scripts/detection/center_net/fit_center_net.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import gluoncv as gcv
4 | gcv.utils.check_version('0.8.0')
5 |
6 | from gluoncv.auto.estimators import CenterNetEstimator
7 | from gluoncv.auto.tasks.utils import config_to_nested
8 | from d8.object_detection import Dataset
9 |
10 |
11 | if __name__ == '__main__':
12 | # specify hyperparameters
13 | config = {
14 | 'dataset': 'sheep',
15 | 'gpus': [0, 1, 2, 3, 4, 5, 6, 7],
16 | 'estimator': 'center_net',
17 | 'base_network': 'resnet50_v1b',
18 | 'batch_size': 64, # range [8, 16, 32, 64]
19 | 'epochs': 3
20 | }
21 | config = config_to_nested(config)
22 | config.pop('estimator')
23 |
24 | # specify dataset
25 | dataset = Dataset.get('sheep')
26 | train_data, valid_data = dataset.split(0.8)
27 |
28 | # specify estimator
29 | estimator = CenterNetEstimator(config)
30 |
31 | # fit estimator
32 | estimator.fit(train_data, valid_data)
33 |
34 | # evaluate auto estimator
35 | eval_map = estimator.evaluate(valid_data)
36 | logging.info('evaluation: mAP={}'.format(eval_map[-1][-1]))
37 |
--------------------------------------------------------------------------------
/scripts/detection/faster_rcnn/README.md:
--------------------------------------------------------------------------------
1 | # Faster R-CNN: Towards real-time object detection with region proposal networks. [1]
2 |
3 | [GluonCV Model Zoo](http://gluon-cv.mxnet.io/model_zoo/index.html#object-detection)
4 |
5 | - `--amp` Use [Automatic Mixed Precision training](https://mxnet.incubator.apache.org/versions/master/tutorials/amp/amp_tutorial.html), automatically casting FP16 where safe.
6 | - `--horovod` Use [Horovod](https://github.com/horovod/horovod) for distributed training, with a network agnostic wrapper for the optimizer, allowing efficient allreduce using OpemMPI and NCCL.
7 |
8 | ## References
9 | 1. Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. "Faster R-CNN: Towards real-time object detection with region proposal networks." In IEEE Transactions on Pattern Analysis and Machine Intelligence, 2016.
10 | 2. Ross Girshick. "Fast R-CNN." In Proceedings of the IEEE International Conference on Computer Vision, 2015.
11 |
--------------------------------------------------------------------------------
/scripts/detection/faster_rcnn/fit_faster_rcnn.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import gluoncv as gcv
4 | gcv.utils.check_version('0.8.0')
5 |
6 | from gluoncv.auto.estimators import FasterRCNNEstimator
7 | from gluoncv.auto.tasks.utils import config_to_nested
8 | from d8.object_detection import Dataset
9 |
10 |
11 | if __name__ == '__main__':
12 | # specify hyperparameters
13 | config = {
14 | 'dataset': 'sheep',
15 | 'gpus': [0, 1, 2, 3, 4, 5, 6, 7],
16 | 'estimator': 'faster_rcnn',
17 | 'base_network': 'resnet50_v1b',
18 | 'batch_size': 8, # range [8, 16, 32, 64]
19 | 'epochs': 3
20 | }
21 | config = config_to_nested(config)
22 | config.pop('estimator')
23 |
24 | # specify dataset
25 | dataset = Dataset.get('sheep')
26 | train_data, valid_data = dataset.split(0.8)
27 |
28 | # specify estimator
29 | estimator = FasterRCNNEstimator(config)
30 |
31 | # fit estimator
32 | estimator.fit(train_data, valid_data)
33 |
34 | # evaluate auto estimator
35 | eval_map = estimator.evaluate(valid_data)
36 | logging.info('evaluation: mAP={}'.format(eval_map[-1][-1]))
37 |
--------------------------------------------------------------------------------
/scripts/detection/ssd/fit_ssd.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import gluoncv as gcv
4 | gcv.utils.check_version('0.8.0')
5 |
6 | from gluoncv.auto.estimators import SSDEstimator
7 | from gluoncv.auto.tasks.utils import config_to_nested
8 | from d8.object_detection import Dataset
9 |
10 |
11 | if __name__ == '__main__':
12 | # specify hyperparameters
13 | config = {
14 | 'dataset': 'sheep',
15 | 'gpus': [0, 1, 2, 3, 4, 5, 6, 7],
16 | 'estimator': 'ssd',
17 | 'base_network': 'resnet50_v1',
18 | 'data_shape': 512,
19 | 'batch_size': 64, # range [8, 16, 32, 64]
20 | 'epochs': 3
21 | }
22 | config = config_to_nested(config)
23 | config.pop('estimator')
24 |
25 | # specify dataset
26 | dataset = Dataset.get('sheep')
27 | train_data, valid_data = dataset.split(0.8)
28 |
29 | # specify estimator
30 | estimator = SSDEstimator(config)
31 |
32 | # fit estimator
33 | estimator.fit(train_data, valid_data)
34 |
35 | # evaluate auto estimator
36 | eval_map = estimator.evaluate(valid_data)
37 | logging.info('evaluation: mAP={}'.format(eval_map[-1][-1]))
38 |
--------------------------------------------------------------------------------
/scripts/detection/yolo/fit_yolo.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import gluoncv as gcv
4 | gcv.utils.check_version('0.8.0')
5 |
6 | from gluoncv.auto.estimators import YOLOv3Estimator
7 | from gluoncv.auto.tasks.utils import config_to_nested
8 | from d8.object_detection import Dataset
9 |
10 |
11 | if __name__ == '__main__':
12 | # specify hyperparameters
13 | config = {
14 | 'dataset': 'sheep',
15 | 'gpus': [0, 1, 2, 3, 4, 5, 6, 7],
16 | 'estimator': 'yolo3',
17 | 'base_network': 'darknet53',
18 | 'batch_size': 64, # range [8, 16, 32, 64]
19 | 'epochs': 3
20 | }
21 | config = config_to_nested(config)
22 | config.pop('estimator')
23 |
24 | # specify dataset
25 | dataset = Dataset.get('sheep')
26 | train_data, valid_data = dataset.split(0.8)
27 |
28 | # specify estimator
29 | estimator = YOLOv3Estimator(config)
30 |
31 | # fit estimator
32 | estimator.fit(train_data, valid_data)
33 |
34 | # evaluate auto estimator
35 | eval_map = estimator.evaluate(valid_data)
36 | logging.info('evaluation: mAP={}'.format(eval_map[-1][-1]))
37 |
--------------------------------------------------------------------------------
/scripts/gan/cycle_gan/README.md:
--------------------------------------------------------------------------------
1 | ## Reproducing Cycle GAN experiments
2 |
3 |
4 | **Download horse2zebra dataset**
5 | ```bash
6 | python ./download_dataset.py --download-dir . --file horse2zebra
7 | ```
8 |
9 | **Monitoring loss values and images during training**
10 | ```bash
11 | pip install mxboard
12 | tensorboard --logdir=./logs --host=127.0.0.1 --port=8888
13 | ```
14 | Details about mxboard is in [mxboard](https://github.com/awslabs/mxboard)
15 |
16 | **Train Cycle GAN**
17 | ```bash
18 | python train_cgan.py --dataroot ./horse2zebra
19 | ```
20 |
21 | **Test Cycle GAN**
22 |
23 | for horse to zebra model:
24 | ```bash
25 | python demo_cycle_gan.py --images ./horse2zebra/testA/n02391049_10160.jpg --pretrained ./samples/netG_A_epoch_200.params --gpu_id -1
26 | ```
27 | for zebra to horse model:
28 | ```bash
29 | python demo_cycle_gan.py --images ./horse2zebra/testA/n02391049_10160.jpg --pretrained ./samples/netG_A_epoch_200.params --gpu_id -1
30 | ```
31 | 
32 |
33 | 
34 | The meaning of those images are :
35 |
36 | | | | | |
37 | |-|-|-|-|
38 | | real_A(from dataset) | fake_B(generate from real_A) | rec_A(reconstruct from fake_B) | idt_A(generate from real_B) |
39 | | real_B(from dataset) | fake_A(generate from real_A) | rec_B(reconstruct from fake_A) | idt_B(generate from real_A) |
40 |
41 | ## References
42 | ["Cycle GAN"](https://arxiv.org/abs/1703.10593)
43 |
--------------------------------------------------------------------------------
/scripts/gan/cycle_gan/images.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/gan/cycle_gan/images.png
--------------------------------------------------------------------------------
/scripts/gan/srgan/README.md:
--------------------------------------------------------------------------------
1 | ## Reproducing SRGAN experiments
2 |
3 | 
4 |
5 | **Download DIV2K dataset**
6 | ```bash
7 | python download_dataset.py --file DIV2K_train_HR
8 | ```
9 |
10 | **Train SRGAN**
11 | ```bash
12 | python train_srgan.py --dataroot ./DIV2K_train_HR
13 | ```
14 |
15 | **Monitoring loss values and images during training**
16 | ```bash
17 | pip install mxboard
18 | tensorboard --logdir=./logs --host=127.0.0.1 --port=8888
19 | ```
20 | Details about mxboard is in [mxboard](https://github.com/awslabs/mxboard)
21 |
22 | 
23 |
24 | **Test SRGAN**
25 |
26 | ```bash
27 | python demo_sr_gan.py --images ./ --pretrained ./samples/netG_epoch_20000.params --gpu_id -1
28 | ```
29 |
30 | ## References
31 | ["Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network
32 | "](https://arxiv.org/abs/1609.04802)
--------------------------------------------------------------------------------
/scripts/gan/srgan/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/gan/srgan/__init__.py
--------------------------------------------------------------------------------
/scripts/gan/srgan/images.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/gan/srgan/images.png
--------------------------------------------------------------------------------
/scripts/gan/srgan/pred.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/gan/srgan/pred.png
--------------------------------------------------------------------------------
/scripts/gan/stylegan/sample.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/gan/stylegan/sample.jpg
--------------------------------------------------------------------------------
/scripts/gan/stylegan/sample_train.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/gan/stylegan/sample_train.png
--------------------------------------------------------------------------------
/scripts/gan/wgan/README.md:
--------------------------------------------------------------------------------
1 | ## Reproducing LSUN experiments
2 |
3 |
4 | **Download LSUN dataset**
5 | ```bash
6 | cd ../../../scripts/datasets/
7 | python lsun.py -c bedroom
8 | ```
9 |
10 | **Monitoring -loss D values during training**
11 | ```bash
12 | pip install mxboard
13 | tensorboard --logdir=./logs --host=127.0.0.1 --port=8888
14 | ```
15 | Details about mxboard is in [mxboard](https://github.com/awslabs/mxboard)
16 |
17 |
18 | **With DCGAN:**
19 |
20 | ```bash
21 | python train_wgan.py --dataset lsun --dataroot [lsun-train-folder] --cuda
22 | ```
23 |
24 | **With MLP:**
25 |
26 | ```bash
27 | python main.py --mlp_G --ngf 512
28 | ```
29 |
30 | **Generate fake samples after 400000 epoch**
31 |
32 | 
33 |
34 | **Plot the value `-Loss_D`**
35 |
36 | 
37 |
38 | ## References
39 | ["Wasserstein GAN"](https://arxiv.org/abs/1701.07875)
40 |
--------------------------------------------------------------------------------
/scripts/gan/wgan/fake_samples_400000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/gan/wgan/fake_samples_400000.png
--------------------------------------------------------------------------------
/scripts/gan/wgan/lossd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/scripts/gan/wgan/lossd.png
--------------------------------------------------------------------------------
/scripts/instance/README.md:
--------------------------------------------------------------------------------
1 | # Instance Segmentation Models
2 |
3 | Please refer to [GluonCV Model Zoo](https://gluon-cv.mxnet.io/model_zoo/index.html#instance-segmentation)
4 | for available pretrained models, training hyper-parameters, etc.
5 |
--------------------------------------------------------------------------------
/scripts/instance/mask_rcnn/README.md:
--------------------------------------------------------------------------------
1 | # Mask R-CNN [1]
2 |
3 | [GluonCV Model Zoo](http://gluon-cv.mxnet.io/model_zoo/index.html#object-detection)
4 |
5 | ## References
6 | 1. Kaiming He, Georgia Gkioxari, Piotr Dollár, and Ross Girshick. "Mask R-CNN." IEEE International Conference on Computer Vision (ICCV), 2017.
7 | 2. Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. "Deep Residual Learning for Image Recognition." IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2016.
8 | 3. Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. "Faster R-CNN: Towards real-time object detection with region proposal networks." In IEEE Transactions on Pattern Analysis and Machine Intelligence, 2016.
9 | 4. Ross Girshick. "Fast R-CNN." In IEEE International Conference on Computer Vision (ICCV), 2015.
10 |
--------------------------------------------------------------------------------
/scripts/instance/mask_rcnn/benchmark/ompi_bind_DGX1.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | case "${OMPI_COMM_WORLD_LOCAL_RANK}" in
4 | 0)
5 | exec numactl --physcpubind=0-5,48-53 --membind=0 "${@}"
6 | ;;
7 | 1)
8 | exec numactl --physcpubind=6-11,54-59 --membind=0 "${@}"
9 | ;;
10 | 2)
11 | exec numactl --physcpubind=12-17,60-65 --membind=0 "${@}"
12 | ;;
13 | 3)
14 | exec numactl --physcpubind=18-23,66-71 --membind=0 "${@}"
15 | ;;
16 | 4)
17 | exec numactl --physcpubind=24-29,72-77 --membind=1 "${@}"
18 | ;;
19 | 5)
20 | exec numactl --physcpubind=30-35,78-83 --membind=1 "${@}"
21 | ;;
22 | 6)
23 | exec numactl --physcpubind=36-41,84-89 --membind=1 "${@}"
24 | ;;
25 | 7)
26 | exec numactl --physcpubind=42-47,90-95 --membind=1 "${@}"
27 | ;;
28 | *)
29 | echo ==============================================================
30 | echo "ERROR: Unknown local rank ${OMPI_COMM_WORLD_LOCAL_RANK}"
31 | echo ==============================================================
32 | exit 1
33 | ;;
34 | esac
35 |
36 |
--------------------------------------------------------------------------------
/scripts/instance/mask_rcnn/fit_mask_rcnn.py:
--------------------------------------------------------------------------------
1 | """Train Mask-RCNN end to end."""
2 | import os
3 |
4 | # disable autotune
5 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
6 | os.environ['MXNET_GPU_MEM_POOL_TYPE'] = 'Round'
7 | os.environ['MXNET_GPU_MEM_POOL_ROUND_LINEAR_CUTOFF'] = '26'
8 | os.environ['MXNET_EXEC_BULK_EXEC_MAX_NODE_TRAIN_FWD'] = '999'
9 | os.environ['MXNET_EXEC_BULK_EXEC_MAX_NODE_TRAIN_BWD'] = '25'
10 | os.environ['MXNET_GPU_COPY_NTHREADS'] = '1'
11 | os.environ['MXNET_OPTIMIZER_AGGREGATION_SIZE'] = '54'
12 |
13 | import gluoncv as gcv
14 |
15 | gcv.utils.check_version('0.7.0')
16 | from gluoncv.auto.estimators.mask_rcnn import MaskRCNNEstimator
17 | from gluoncv.auto.estimators.mask_rcnn import ex
18 |
19 |
20 | @ex.automain
21 | def main(_config, _log):
22 | # main is the commandline entry for user w/o coding
23 | c = MaskRCNNEstimator(_config, _log)
24 | c.fit()
25 |
--------------------------------------------------------------------------------
/scripts/pose/alpha_pose/coco.sh:
--------------------------------------------------------------------------------
1 | python train_alpha_pose.py --dataset coco \
2 | --model alpha_pose_resnet101_v1b --mode hybrid --num-joints 17 \
3 | --lr 0.001 --wd 0.0 --lr-mode step --lr-decay-epoch 90,120 \
4 | --num-epochs 140 --batch-size 32 --num-gpus 4 -j 60 \
5 | --dtype float32 --warmup-epochs 0 --use-pretrained-base \
6 | --save-dir params_alpha_pose_resnet101_v1b_coco \
7 | --logging-file alpha_pose_resnet101_v1b_coco.log --log-interval 100 --flip-test
8 |
--------------------------------------------------------------------------------
/scripts/pose/alpha_pose/coco_dpg.sh:
--------------------------------------------------------------------------------
1 | python train_alpha_pose.py --dataset coco \
2 | --model alpha_pose_resnet101_v1b --mode hybrid --num-joints 17 \
3 | --lr 0.001 --wd 0.0 --lr-mode step --lr-decay-epoch 30,60 \
4 | --num-epochs 90 --batch-size 32 --num-gpus 4 -j 60 \
5 | --dtype float32 --warmup-epochs 0 --use-pretrained-base \
6 | --save-dir params_alpha_pose_resnet101_v1b_coco_dpg \
7 | --logging-file alpha_pose_resnet101_v1b_coco.log --log-interval 100 --flip-test \
8 | --addDPG --load-model final.params
9 |
--------------------------------------------------------------------------------
/scripts/pose/alpha_pose/validate.sh:
--------------------------------------------------------------------------------
1 | python validate.py \
2 | --model alpha_pose_resnet101_v1b --dataset coco --num-joints 17 \
3 | --batch-size 128 --num-gpus 4 -j 60 \
4 | --params-file duc_se_coco.params \
5 | --input-size 320,256 --flip-test
6 |
--------------------------------------------------------------------------------
/scripts/pose/directpose/.gitignore:
--------------------------------------------------------------------------------
1 | *.json
2 | *.pth
3 | *.so
4 |
--------------------------------------------------------------------------------
/scripts/pose/simple_pose/coco.sh:
--------------------------------------------------------------------------------
1 | python train_simple_pose.py \
2 | --model simple_pose_resnet50_v1b --mode hybrid --num-joints 17 \
3 | --lr 0.001 --wd 0.0 --lr-mode step --lr-decay-epoch 90,120 \
4 | --num-epochs 140 --batch-size 32 --num-gpus 8 -j 60 \
5 | --dtype float32 --warmup-epochs 0 --use-pretrained-base \
6 | --save-dir params_simple_pose_resnet50_v1b \
7 | --logging-file simple_pose_resnet50_v1b.log --log-interval 100
8 |
--------------------------------------------------------------------------------
/scripts/pose/simple_pose/validate.sh:
--------------------------------------------------------------------------------
1 | python validate.py \
2 | --model simple_pose_resnet18_v1b --num-joints 17 \
3 | --batch-size 128 --num-gpus 8 -j 60
4 |
--------------------------------------------------------------------------------
/scripts/re-id/baseline/networks/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | from .resnet import resnet18, resnet34, resnet50
4 |
--------------------------------------------------------------------------------
/scripts/tracking/smot/eval.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import sys
4 | from multiprocessing import Pool
5 | from terminaltables import AsciiTable
6 |
7 | from helper import *
8 |
9 |
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument('--model-name', type=str, default='smot')
12 | parser.add_argument('--gt-dir', type=str, required=True)
13 | parser.add_argument('--pred-dir', type=str, required=True)
14 | parser.add_argument('--min-iou', type=float, default=0.5)
15 | parser.add_argument('--num-worker', type=int, default=32)
16 |
17 |
18 | if __name__ == '__main__':
19 | args = parser.parse_args()
20 |
21 | gt_pred_pairs = get_gt_pred_pairs(args.gt_dir, args.pred_dir, iou_thresh=args.min_iou)
22 |
23 | pool = Pool(args.num_worker)
24 | results = dict(pool.starmap(run_video, gt_pred_pairs))
25 | pool.close()
26 | pool.join()
27 | print("run on {} videos".format(len(gt_pred_pairs)))
28 |
29 | headers = ['MOTA', 'IDF1', 'IDR', 'IDP', 'N. Trans', 'FP','FN', 'IDsw.', 'MT/GT', 'Prec.', 'Rec.', 'F1']
30 | data = [mota(results), idf1(results), idr(results), idp(results), num_transfer(results), num_fp(results), num_misses(results), num_sw(results),
31 | '{}/{}'.format(mt(results), num_tracks(results)), precision(results), recall(results), f1(results)]
32 |
33 | table = AsciiTable([headers, data], title='Tracking Results: {}'.format(args.model_name))
34 | print(table.table)
35 |
36 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/tests/__init__.py
--------------------------------------------------------------------------------
/tests/auto/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/tests/auto/__init__.py
--------------------------------------------------------------------------------
/tests/auto/test_hybrid_auto_tasks.py:
--------------------------------------------------------------------------------
1 | from gluoncv.auto.tasks import ImageClassification
2 | import autogluon.core as ag
3 | from nose.tools import nottest
4 |
5 | IMAGE_CLASS_DATASET, _, IMAGE_CLASS_TEST = ImageClassification.Dataset.from_folders(
6 | 'https://autogluon.s3.amazonaws.com/datasets/shopee-iet.zip')
7 |
8 | def test_hybrid_image_classification():
9 | from gluoncv.auto.tasks import ImageClassification
10 | model = ag.Categorical('resnet18_v1b', 'resnet18')
11 | task = ImageClassification({'model': model, 'num_trials': 4, 'epochs': 1, 'batch_size': 8})
12 | classifier = task.fit(IMAGE_CLASS_DATASET)
13 | assert task.fit_summary().get('valid_acc', 0) > 0
14 | test_result = classifier.predict(IMAGE_CLASS_TEST)
15 |
16 | if __name__ == '__main__':
17 | import nose
18 | nose.runmodule()
19 |
--------------------------------------------------------------------------------
/tests/auto/test_torch_auto_tasks.py:
--------------------------------------------------------------------------------
1 | from gluoncv.auto.tasks import ImageClassification
2 | import autogluon.core as ag
3 | from nose.tools import nottest
4 |
5 | IMAGE_CLASS_DATASET, _, IMAGE_CLASS_TEST = ImageClassification.Dataset.from_folders(
6 | 'https://autogluon.s3.amazonaws.com/datasets/shopee-iet.zip')
7 |
8 | def test_torch_image_classification():
9 | from gluoncv.auto.tasks import ImageClassification
10 | task = ImageClassification({'model': 'resnet18', 'num_trials': 1, 'epochs': 1, 'batch_size': 8})
11 | classifier = task.fit(IMAGE_CLASS_DATASET)
12 | assert task.fit_summary().get('valid_acc', 0) > 0
13 | test_result = classifier.predict(IMAGE_CLASS_TEST)
14 |
15 | def test_torch_image_classification_custom_net():
16 | from gluoncv.auto.tasks import ImageClassification
17 | from timm import create_model
18 | import torch.nn as nn
19 | net = create_model('resnet18')
20 | net.fc = nn.Linear(512, 4)
21 | task = ImageClassification({'num_trials': 1, 'epochs': 1, 'custom_net': net, 'batch_size': 8})
22 | classifier = task.fit(IMAGE_CLASS_DATASET)
23 | assert task.fit_summary().get('valid_acc', 0) > 0
24 | test_result = classifier.predict(IMAGE_CLASS_TEST)
25 |
26 | if __name__ == '__main__':
27 | import nose
28 | nose.runmodule()
29 |
--------------------------------------------------------------------------------
/tests/model_zoo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/tests/model_zoo/__init__.py
--------------------------------------------------------------------------------
/tests/model_zoo_torch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/tests/model_zoo_torch/__init__.py
--------------------------------------------------------------------------------
/tests/py3_auto.yml:
--------------------------------------------------------------------------------
1 | name: gluon_cv_py3_mxnet
2 | channels:
3 | - conda-forge
4 | - defaults
5 | - pytorch
6 | dependencies:
7 | - python=3.6
8 | - perl
9 | - sphinx=1.7.2
10 | - nose
11 | - coverage=4.5.4
12 | - scipy
13 | - cython
14 | - pip=20.2.4
15 | - requests
16 | - matplotlib
17 | - tqdm
18 | - pillow
19 | - pandas==1.3
20 | - pytorch==1.6.0
21 | - torchvision==0.7.0
22 | - pip:
23 | - https://repo.mxnet.io/dist/python/cu100mkl/mxnet_cu100mkl-1.6.0b20191010-py2.py3-none-manylinux1_x86_64.whl
24 | - coverage-badge
25 | - awscli
26 | - nose-timer
27 | - opencv-python
28 | - git+https://github.com/zhanghang1989/detail-api.git#subdirectory=PythonAPI
29 | - portalocker
30 | - autocfg>=0.0.6
31 | - autogluon.core==0.2.0
32 | - timm==0.5.4
33 |
--------------------------------------------------------------------------------
/tests/py3_mxnet.yml:
--------------------------------------------------------------------------------
1 | name: gluon_cv_py3_mxnet
2 | channels:
3 | - conda-forge
4 | - defaults
5 | dependencies:
6 | - python=3.6
7 | - perl
8 | - sphinx=1.7.2
9 | - nose
10 | - coverage=4.5.4
11 | - scipy
12 | - cython
13 | - pip=20.2.4
14 | - requests
15 | - matplotlib
16 | - tqdm
17 | - pillow
18 | - pip:
19 | - https://repo.mxnet.io/dist/python/cu100mkl/mxnet_cu100mkl-1.6.0b20191010-py2.py3-none-manylinux1_x86_64.whl
20 | - coverage-badge
21 | - awscli
22 | - nose-timer
23 | - opencv-python
24 | - git+https://github.com/zhanghang1989/detail-api.git#subdirectory=PythonAPI
25 | - portalocker
26 | - autocfg
--------------------------------------------------------------------------------
/tests/py3_mxnet_ci.yml:
--------------------------------------------------------------------------------
1 | name: gluon_cv_py3_mxnet
2 | channels:
3 | - conda-forge
4 | - defaults
5 | dependencies:
6 | - python=3.7
7 | - nose
8 | - coverage=4.5.4
9 | - pip:
10 | - mxnet
11 | - coverage-badge
12 | - nose-timer
13 |
--------------------------------------------------------------------------------
/tests/py3_torch.yml:
--------------------------------------------------------------------------------
1 | name: gluon_cv_py3_pytorch
2 | channels:
3 | - pytorch
4 | - conda-forge
5 | - defaults
6 | dependencies:
7 | - python=3.6
8 | - perl
9 | - sphinx=1.7.2
10 | - nose
11 | - coverage=4.5.4
12 | - scipy
13 | - cython
14 | - pip=20.2.4
15 | - requests
16 | - matplotlib
17 | - tqdm
18 | - pillow
19 | - pytorch=1.6.0
20 | - torchvision=0.7.0
21 | - pip:
22 | - coverage-badge
23 | - awscli
24 | - nose-timer
25 | - opencv-python
26 | - git+https://github.com/zhanghang1989/detail-api.git#subdirectory=PythonAPI
27 | - portalocker
28 | - tensorboardx
29 | - decord
30 | - opencv-python-headless
31 | - yacs
32 |
--------------------------------------------------------------------------------
/tests/pylint.yml:
--------------------------------------------------------------------------------
1 | name: gluon_cv_pylint
2 | dependencies:
3 | - python=3.7
4 | - pip=20.2.4
5 | - pip:
6 | - pylint==2.4.4
7 |
--------------------------------------------------------------------------------
/tests/unittests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dmlc/gluon-cv/567775619f3b97d47e7c360748912a4fd883ff52/tests/unittests/__init__.py
--------------------------------------------------------------------------------
/tests/unittests/test_nn.py:
--------------------------------------------------------------------------------
1 | import mxnet as mx
2 | from gluoncv.nn import GroupNorm
3 |
4 | def test_groupnorm():
5 | ctx=mx.context.current_context()
6 | x = mx.nd.random.uniform(1, 2, (4, 16, 8, 8), ctx=ctx)
7 | gn = GroupNorm(4, 16)
8 | gn.initialize(ctx=ctx)
9 | y = gn(x)
10 | y = y.reshape(0, 4, -1)
11 | print('y.mean(2) =', y.mean(2))
12 | mx.test_utils.assert_almost_equal(y.mean(2).asnumpy(),
13 | mx.nd.zeros_like(y.mean(2)).asnumpy(),
14 | rtol=1e-3, atol=1e-3)
15 |
16 | if __name__ == '__main__':
17 | import nose
18 | nose.runmodule()
19 |
--------------------------------------------------------------------------------
/tests/unittests/test_utils_bbox.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import numpy as np
4 | import gluoncv as gcv
5 |
6 | def test_bbox_xywh_to_xyxy():
7 | # test list
8 | a = [20, 30, 100.2, 300.4]
9 | expected = [20, 30, 119.2, 329.4]
10 | np.testing.assert_allclose(gcv.utils.bbox.bbox_xywh_to_xyxy(a), expected)
11 | aa = np.array([a, a])
12 | bb = np.array([expected, expected])
13 | np.testing.assert_allclose(gcv.utils.bbox.bbox_xywh_to_xyxy(aa), bb)
14 |
15 | if __name__ == '__main__':
16 | import nose
17 | nose.runmodule()
18 |
--------------------------------------------------------------------------------
/tests/unittests/test_utils_block.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import numpy as np
4 | import gluoncv as gcv
5 | from mxnet.gluon.nn import BatchNorm
6 |
7 | def check_bn_frozen_callback(net, value):
8 | if isinstance(net, BatchNorm):
9 | assert value == net._kwargs['use_global_stats']
10 |
11 | def test_block_freeze_bn():
12 | net = gcv.model_zoo.get_model('resnet18_v1')
13 | gcv.utils.recursive_visit(net, check_bn_frozen_callback, value=False)
14 | gcv.utils.freeze_bn(net, True)
15 | gcv.utils.recursive_visit(net, check_bn_frozen_callback, value=True)
16 | gcv.utils.freeze_bn(net, False)
17 | gcv.utils.recursive_visit(net, check_bn_frozen_callback, value=False)
18 |
19 | if __name__ == '__main__':
20 | import nose
21 | nose.runmodule()
22 |
--------------------------------------------------------------------------------
/tests/unittests/tiny_datasets.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gluoncv import data
3 |
4 | class COCODetectionTiny(data.COCODetection):
5 | CLASSES = ['bicycle', 'motorcycle']
6 |
7 | def __init__(self, root=os.path.join('~', '.mxnet', 'datasets', 'tiny_coco'),
8 | splits=('instances_val2017_tiny',), **kwargs):
9 | super().__init__(root=root, splits=splits, **kwargs)
10 |
11 | class COCOInstanceTiny(data.COCOInstance):
12 | CLASSES = ['bicycle', 'motorcycle']
13 |
14 | def __init__(self, root=os.path.join('~', '.mxnet', 'datasets', 'tiny_coco'),
15 | splits=('instances_val2017_tiny',), **kwargs):
16 | super().__init__(root=root, splits=splits, **kwargs)
17 |
18 | class VOCDetectionTiny(data.VOCDetection):
19 | CLASSES = ['motorbike', 'person']
20 |
21 | def __init__(self, root=os.path.join('~', '.mxnet', 'datasets', 'tiny_motorbike'),
22 | splits=(('tiny_motorbike', 'trainval'),), **kwargs):
23 | super().__init__(root=root, splits=splits, **kwargs)
24 |
25 | class VOCSegmentationTiny(data.VOCSegmentation):
26 | CLASSES = ['motorbike', 'person']
27 | BASE_DIR = 'tiny_motorbike'
28 |
29 | def __init__(self, root=os.path.expanduser(os.path.join('~', '.mxnet', 'datasets', 'tiny_motorbike')),
30 | split='train', **kwargs):
31 | super().__init__(root=root, split=split, **kwargs)
32 |
--------------------------------------------------------------------------------
/tools/batch/README.md:
--------------------------------------------------------------------------------
1 | # Launch AWS Batch Jobs
2 |
3 | Once you've correctly configured the AWS CLI, you may use submit-job.py to deploy your job.
4 |
5 | #### Requirements
6 |
7 | **boto3** is required. To install it:
8 |
9 | ```shell
10 | pip install boto3
11 | ```
12 |
13 | You'll also need to configure it so that the script can authenticate you successfully:
14 |
15 | https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration
16 |
17 | #### Some arguments
18 |
19 | * --job-type: which instance you want your job to be ran on
20 | * --source-ref: the branch name
21 | * --remote: reporsitory url
22 | * --command: the command you want to execute
23 | * --wait: let the script hang and display status of the required job
24 |
25 | Example:
26 |
27 | ```shell
28 | python3 submit-job.py \
29 | --job-type c4.2x \
30 | --source-ref master \
31 | --work-dir docs/tutorials/classification \
32 | --remote https://github.com/dmlc/gluon-cv \
33 | --command "python3 demo_cifar10.py" \
34 | --wait
35 | ```
36 |
37 | For a full list of arguments and their default values:
38 |
39 | ```shell
40 | python3 submit-job.py -h
41 | ```
42 |
43 |
--------------------------------------------------------------------------------
/tools/batch/batch-test.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | import boto3
3 |
4 | batch = boto3.client('batch', region_name='us-east-1')
5 | response = batch.describe_job_definitions(status='ACTIVE')['jobDefinitions']
6 | instance_type_info = {}
7 | for res in response:
8 | jobDefinition = res['jobDefinitionName'] # example: gluon-cv-p2_8xlarge:1
9 | instance = jobDefinition.split('-')[-1].split(':')[0].replace('large', '') # example: p2_8x
10 | job_queue = jobDefinition.split('-')[-1].split('_')[0] # example: p2
11 | instance_type_info[instance] = {'job_definition': jobDefinition, 'job_queue': job_queue}
12 |
13 | for instance in instance_type_info:
14 | command = ['python3', \
15 | 'submit-job.py', \
16 | '--name', instance+'-test', \
17 | '--job-type', instance.replace('large', ''), \
18 | '--source-ref', 'master', \
19 | '--work-dir', 'docs/tutorials/classification', \
20 | '--remote', 'https://github.com/dmlc/gluon-cv', \
21 | '--command', 'python3 demo_cifar10.py'
22 | ]
23 | subprocess.run(command)
24 |
--------------------------------------------------------------------------------
/tools/batch/docker/Dockerfile.cpu:
--------------------------------------------------------------------------------
1 | FROM ubuntu:18.04
2 |
3 | RUN apt-get update && apt-get install -y --no-install-recommends \
4 | build-essential \
5 | locales \
6 | cmake \
7 | wget \
8 | subversion \
9 | git \
10 | curl \
11 | vim \
12 | unzip \
13 | sudo \
14 | ca-certificates \
15 | libjpeg-dev \
16 | libpng-dev \
17 | libfreetype6-dev \
18 | libopenblas-dev \
19 | python3-dev \
20 | python3-pip \
21 | python3-setuptools \
22 | pandoc \
23 | libxft-dev &&\
24 | rm -rf /var/lib/apt/lists/*
25 |
26 | RUN pip3 install --upgrade pip
27 | RUN pip3 install --no-cache --upgrade \
28 | wheel \
29 | cmake \
30 | awscli \
31 | pypandoc
32 | RUN git clone https://github.com/dmlc/gluon-cv
33 | WORKDIR gluon-cv
34 | ADD gluon_cv_job.sh .
35 | RUN chmod +x gluon_cv_job.sh
36 |
--------------------------------------------------------------------------------
/tools/batch/docker/README.md:
--------------------------------------------------------------------------------
1 | # Updating the Docker Image for AWS Batch
2 |
3 | To update the docker:
4 |
5 | - Update the Dockerfile
6 | - Make sure docker and docker-compose, as well as the docker python package are installed.
7 | - Export the AWS account credentials as environment variables
8 | - CD to the same folder as the Dockerfile and execute the following:
9 |
10 | ```shell
11 | # First export your ecr repo address as a environment variable
12 | export $AWS_ECR_REPO=${your_repo}
13 |
14 | # This executes a command that logs into ECR.
15 | aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $AWS_ECR_REPO
16 |
17 | # Following script will build, tag, and push the image
18 | # For cpu
19 | ./docker_deploy.sh cpu
20 | # For gpu
21 | ./docker_deploy.sh gpu
22 |
23 | ```
24 |
25 |
--------------------------------------------------------------------------------
/tools/batch/docker/docker_deploy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | TYPE=$1
4 |
5 | if [ -z $TYPE ]; then
6 | echo "No type detected. Choices: cpu, gpu"
7 | exit 1
8 | fi;
9 |
10 | if [ $TYPE == cpu ] || [ $TYPE == CPU ]; then
11 | docker build --no-cache -f Dockerfile.cpu -t gluon-cv-1:cpu-latest .
12 | docker tag gluon-cv-1:cpu-latest $AWS_ECR_REPO:cpu-latest
13 | docker push $AWS_ECR_REPO:cpu-latest
14 | elif [ $TYPE == gpu ] || [ $TYPE == GPU ]; then
15 | docker build --no-cache -f Dockerfile.gpu -t gluon-cv-1:latest .
16 | docker tag gluon-cv-1:latest $AWS_ECR_REPO:latest
17 | docker push $AWS_ECR_REPO:latest
18 | else
19 | echo "Invalid type detected. Choices: cpu, gpu"
20 | exit 1
21 | fi;
22 |
--------------------------------------------------------------------------------
/tools/batch/docker/gluon_cv_job.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | date
3 | echo "Args: $@"
4 | env
5 | echo "jobId: $AWS_BATCH_JOB_ID"
6 | echo "jobQueue: $AWS_BATCH_JQ_NAME"
7 | echo "computeEnvironment: $AWS_BATCH_CE_NAME"
8 |
9 | SOURCE_REF=$1
10 | WORK_DIR=$2
11 | COMMAND=$3
12 | SAVED_OUTPUT=$4
13 | SAVE_PATH=$5
14 | REMOTE=$6
15 | DEVICE=${7:-gpu}
16 |
17 | if [ ! -z $REMOTE ]; then
18 | git remote set-url origin $REMOTE
19 | fi;
20 |
21 | git fetch origin $SOURCE_REF:working
22 | git checkout working
23 | if [ $DEVICE == "cpu" ]; then
24 | python3 -m pip install -U --quiet "mxnet==1.7.0.post1"
25 | python3 -m pip install -U --quiet torch==1.6.0+cpu torchvision==0.7.0+cpu
26 | else
27 | python3 -m pip install -U --quiet "mxnet-cu102==1.7.0"
28 | python3 -m pip install -U --quiet torch==1.6.0 torchvision==0.7.0
29 | fi;
30 |
31 | python3 -m pip install --quiet -e .
32 | python3 -m pip install --quiet timm==0.5.4
33 |
34 | cd $WORK_DIR
35 | /bin/bash -o pipefail -c "$COMMAND"
36 | COMMAND_EXIT_CODE=$?
37 | if [[ -f $SAVED_OUTPUT ]]; then
38 | aws s3 cp $SAVED_OUTPUT s3://gluon-cv-dev/batch/$AWS_BATCH_JOB_ID/$SAVE_PATH;
39 | elif [[ -d $SAVED_OUTPUT ]]; then
40 | aws s3 cp --recursive $SAVED_OUTPUT s3://gluon-cv-dev/batch/$AWS_BATCH_JOB_ID/$SAVE_PATH;
41 | fi;
42 | exit $COMMAND_EXIT_CODE
43 |
--------------------------------------------------------------------------------
/tools/batch/template/launch-template-data-cpu.json:
--------------------------------------------------------------------------------
1 | {
2 | "LaunchTemplateName":"increase-volume-batch-linux2",
3 | "LaunchTemplateData": {
4 | "BlockDeviceMappings": [
5 | {
6 | "Ebs": {
7 | "DeleteOnTermination": true,
8 | "VolumeSize": 256,
9 | "VolumeType": "gp2"
10 | },
11 | "DeviceName": "/dev/xvdcz"
12 | }
13 | ]
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/tools/batch/template/launch-template-data-gpu.json:
--------------------------------------------------------------------------------
1 | {
2 | "LaunchTemplateName":"increase-volume-batch-linux1",
3 | "LaunchTemplateData": {
4 | "BlockDeviceMappings": [
5 | {
6 | "Ebs": {
7 | "DeleteOnTermination": true,
8 | "VolumeSize": 100,
9 | "VolumeType": "gp2"
10 | },
11 | "DeviceName": "/dev/xvda"
12 | },
13 | {
14 | "Ebs": {
15 | "DeleteOnTermination": true,
16 | "VolumeSize": 100,
17 | "VolumeType": "gp2"
18 | },
19 | "DeviceName": "/dev/xvdcz"
20 | }
21 | ]
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/tools/docker/README.md:
--------------------------------------------------------------------------------
1 | # Docker Support in GluonCV
2 |
3 | We provide the [Docker](https://www.docker.com/) container with everything set up to run GluonCV. With the prebuilt docker image, there is no need to worry about the operating systems or system dependencies. You can launch a [JupyterLab](https://jupyterlab.readthedocs.io/en/stable/) development environment and try out to use GluonCV to solve your problem.
4 |
5 | ## Run Docker
6 |
7 | You can run the docker with the following command.
8 |
9 | ```
10 | docker pull gluonai/gluon-cv:gpu-latest
11 | docker run --gpus all --rm -it -p 8888:8888 -p 8787:8787 -p 8786:8786 --shm-size=2g gluonai/gluon-cv:gpu-latest
12 | ```
13 |
14 | Here, we open the ports 8888, 8787, 8786, which are used for connecting to JupyterLab. Also, we set `--shm-size` to `2g`. This sets the shared memory storage to 2GB. Since NCCL will create shared memory segments, this argument is essential for the JupyterNotebook to work with NCCL. (See also https://github.com/NVIDIA/nccl/issues/290).
15 |
16 | ## Build your own Docker Image
17 |
18 | To build a docker image fom the dockerfile, you may use the following command:
19 |
20 | ```
21 | docker build -t gluonai/gluon-cv:gpu-latest .
22 | ```
--------------------------------------------------------------------------------
/tools/docker/devel_entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source /start_jupyter.sh
4 |
5 | exec "$@"
6 |
--------------------------------------------------------------------------------
/tools/docker/start_jupyter.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Run Jupyter in foreground if $JUPYTER_FG is set
4 | if [[ "${JUPYTER_FG}" == "true" ]]; then
5 | jupyter-lab --allow-root --ip=0.0.0.0 --no-browser --NotebookApp.token=''
6 | exit 0
7 | else
8 | nohup jupyter-lab --allow-root --ip=0.0.0.0 --no-browser --NotebookApp.token='' > /dev/null 2>&1 &
9 |
10 | echo "Notebook server successfully started, a JupyterLab instance has been executed!"
11 | echo "Make local folders visible by volume mounting to /workspace/notebook"
12 | echo "To access visit http://localhost:8888 on your host machine."
13 | echo 'Ensure the following arguments to "docker run" are added to expose the server ports to your host machine:
14 | -p 8888:8888 -p 8787:8787 -p 8786:8786'
15 | fi
16 |
--------------------------------------------------------------------------------