├── .gitignore ├── .gitmodules ├── .travis.yml ├── LICENSE ├── README.md ├── __init__.py ├── chainer_ ├── LICENSE.txt ├── README.md ├── __init__.py ├── chainercv2 │ ├── __init__.py │ ├── model_provider.py │ └── models │ │ ├── __init__.py │ │ ├── airnet.py │ │ ├── airnext.py │ │ ├── alexnet.py │ │ ├── alphapose_coco.py │ │ ├── bagnet.py │ │ ├── bamresnet.py │ │ ├── bisenet.py │ │ ├── bninception.py │ │ ├── cbamresnet.py │ │ ├── centernet.py │ │ ├── cgnet.py │ │ ├── channelnet.py │ │ ├── common.py │ │ ├── condensenet.py │ │ ├── dabnet.py │ │ ├── danet.py │ │ ├── darknet.py │ │ ├── darknet53.py │ │ ├── darts.py │ │ ├── deeplabv3.py │ │ ├── densenet.py │ │ ├── densenet_cifar.py │ │ ├── diapreresnet.py │ │ ├── diapreresnet_cifar.py │ │ ├── diaresnet.py │ │ ├── diaresnet_cifar.py │ │ ├── dicenet.py │ │ ├── diracnetv2.py │ │ ├── dla.py │ │ ├── dpn.py │ │ ├── drn.py │ │ ├── efficientnet.py │ │ ├── efficientnetedge.py │ │ ├── espnetv2.py │ │ ├── fastscnn.py │ │ ├── fastseresnet.py │ │ ├── fbnet.py │ │ ├── fcn8sd.py │ │ ├── fdmobilenet.py │ │ ├── fishnet.py │ │ ├── fpenet.py │ │ ├── ghostnet.py │ │ ├── hardnet.py │ │ ├── hrnet.py │ │ ├── ibppose_coco.py │ │ ├── icnet.py │ │ ├── igcv3.py │ │ ├── inceptionresnetv1.py │ │ ├── inceptionresnetv2.py │ │ ├── inceptionv3.py │ │ ├── inceptionv4.py │ │ ├── irevnet.py │ │ ├── jasper.py │ │ ├── jasperdr.py │ │ ├── lednet.py │ │ ├── lffd.py │ │ ├── lwopenpose_cmupan.py │ │ ├── menet.py │ │ ├── mixnet.py │ │ ├── mnasnet.py │ │ ├── mobilenet.py │ │ ├── mobilenet_cub.py │ │ ├── mobilenetb.py │ │ ├── mobilenetv2.py │ │ ├── mobilenetv3.py │ │ ├── model_store.py │ │ ├── nasnet.py │ │ ├── nin_cifar.py │ │ ├── ntsnet_cub.py │ │ ├── nvpattexp.py │ │ ├── octresnet.py │ │ ├── others │ │ └── __init__.py │ │ ├── peleenet.py │ │ ├── pnasnet.py │ │ ├── polynet.py │ │ ├── preresnet.py │ │ ├── preresnet_cifar.py │ │ ├── proxylessnas.py │ │ ├── proxylessnas_cub.py │ │ ├── pspnet.py │ │ ├── pyramidnet.py │ │ ├── pyramidnet_cifar.py │ │ ├── quartznet.py │ │ ├── regnet.py │ │ ├── resattnet.py │ │ ├── resdropresnet_cifar.py │ │ ├── resnesta.py │ │ ├── resnet.py │ │ ├── resnet_cifar.py │ │ ├── resnet_cub.py │ │ ├── resneta.py │ │ ├── resnetd.py │ │ ├── resnext.py │ │ ├── resnext_cifar.py │ │ ├── rir_cifar.py │ │ ├── ror_cifar.py │ │ ├── scnet.py │ │ ├── selecsls.py │ │ ├── senet.py │ │ ├── sepreresnet.py │ │ ├── sepreresnet_cifar.py │ │ ├── seresnet.py │ │ ├── seresnet_cifar.py │ │ ├── seresnet_cub.py │ │ ├── seresnext.py │ │ ├── shakedropresnet_cifar.py │ │ ├── shakeshakeresnet_cifar.py │ │ ├── sharesnet.py │ │ ├── shufflenet.py │ │ ├── shufflenetv2.py │ │ ├── shufflenetv2b.py │ │ ├── simplepose_coco.py │ │ ├── simpleposemobile_coco.py │ │ ├── sinet.py │ │ ├── sknet.py │ │ ├── sparsenet.py │ │ ├── spnasnet.py │ │ ├── squeezenet.py │ │ ├── squeezenext.py │ │ ├── vgg.py │ │ ├── voca.py │ │ ├── vovnet.py │ │ ├── wrn.py │ │ ├── wrn1bit_cifar.py │ │ ├── wrn_cifar.py │ │ ├── xception.py │ │ ├── xdensenet.py │ │ ├── xdensenet_cifar.py │ │ └── zfnet.py ├── dataset_utils.py ├── datasets │ ├── __init__.py │ ├── ade20k_seg_dataset.py │ ├── cifar100_cls_dataset.py │ ├── cifar10_cls_dataset.py │ ├── cityscapes_seg_dataset.py │ ├── coco_hpe1_dataset.py │ ├── coco_hpe2_dataset.py │ ├── coco_hpe3_dataset.py │ ├── coco_seg_dataset.py │ ├── cub200_2011_cls_dataset.py │ ├── dataset_metainfo.py │ ├── imagenet1k_cls_dataset.py │ ├── seg_dataset.py │ ├── svhn_cls_dataset.py │ └── voc_seg_dataset.py ├── metrics │ ├── __init__.py │ ├── cls_metrics.py │ ├── det_metrics.py │ ├── hpe_metrics.py │ ├── metric.py │ ├── seg_metrics.py │ └── seg_metrics_np.py ├── setup.cfg ├── setup.py └── utils.py ├── convert_models.py ├── deploy ├── Dockerfile ├── bootstrap_eval.sh └── bootstrap_train_gl.sh ├── eval_ch.py ├── eval_gl.py ├── eval_gl_det.py ├── eval_ke.py ├── eval_pt.py ├── eval_tf.py ├── eval_tf2.py ├── examples ├── convert_tf2_to_tfl.py ├── demo_gl.py ├── demo_pt.py └── demo_tf2.py ├── gluon ├── LICENSE.txt ├── README.md ├── __init__.py ├── dataset_utils.py ├── datasets │ ├── __init__.py │ ├── ade20k_seg_dataset.py │ ├── asr_dataset.py │ ├── cifar100_cls_dataset.py │ ├── cifar10_cls_dataset.py │ ├── cityscapes_seg_dataset.py │ ├── coco_det_dataset.py │ ├── coco_hpe1_dataset.py │ ├── coco_hpe2_dataset.py │ ├── coco_hpe3_dataset.py │ ├── coco_seg_dataset.py │ ├── cub200_2011_cls_dataset.py │ ├── dataset_metainfo.py │ ├── hpatches_mch_dataset.py │ ├── imagenet1k_cls_dataset.py │ ├── imagenet1k_rec_cls_dataset.py │ ├── librispeech_asr_dataset.py │ ├── mcv_asr_dataset.py │ ├── seg_dataset.py │ ├── svhn_cls_dataset.py │ ├── voc_seg_dataset.py │ └── widerface_det_dataset.py ├── distillation.py ├── gluoncv2 │ ├── __init__.py │ ├── model_provider.py │ └── models │ │ ├── __init__.py │ │ ├── airnet.py │ │ ├── airnext.py │ │ ├── alexnet.py │ │ ├── alphapose_coco.py │ │ ├── bagnet.py │ │ ├── bamresnet.py │ │ ├── bisenet.py │ │ ├── bninception.py │ │ ├── cbamresnet.py │ │ ├── centernet.py │ │ ├── cgnet.py │ │ ├── channelnet.py │ │ ├── common.py │ │ ├── condensenet.py │ │ ├── crunet.py │ │ ├── crunetb.py │ │ ├── dabnet.py │ │ ├── danet.py │ │ ├── darknet.py │ │ ├── darknet53.py │ │ ├── darts.py │ │ ├── deeplabv3.py │ │ ├── densenet.py │ │ ├── densenet_cifar.py │ │ ├── diapreresnet.py │ │ ├── diapreresnet_cifar.py │ │ ├── diaresnet.py │ │ ├── diaresnet_cifar.py │ │ ├── dicenet.py │ │ ├── diracnetv2.py │ │ ├── dla.py │ │ ├── dpn.py │ │ ├── drn.py │ │ ├── efficientnet.py │ │ ├── efficientnetedge.py │ │ ├── espnetv2.py │ │ ├── fastscnn.py │ │ ├── fastseresnet.py │ │ ├── fbnet.py │ │ ├── fcn8sd.py │ │ ├── fdmobilenet.py │ │ ├── fishnet.py │ │ ├── fpenet.py │ │ ├── fractalnet_cifar.py │ │ ├── ghostnet.py │ │ ├── hardnet.py │ │ ├── hrnet.py │ │ ├── ibnbresnet.py │ │ ├── ibndensenet.py │ │ ├── ibnresnet.py │ │ ├── ibnresnext.py │ │ ├── ibppose_coco.py │ │ ├── icnet.py │ │ ├── igcv3.py │ │ ├── inceptionresnetv1.py │ │ ├── inceptionresnetv2.py │ │ ├── inceptionv3.py │ │ ├── inceptionv4.py │ │ ├── irevnet.py │ │ ├── isqrtcovresnet.py │ │ ├── jasper.py │ │ ├── jasperdr.py │ │ ├── lednet.py │ │ ├── lffd.py │ │ ├── lwopenpose_cmupan.py │ │ ├── menet.py │ │ ├── mixnet.py │ │ ├── mnasnet.py │ │ ├── mobilenet.py │ │ ├── mobilenet_cub.py │ │ ├── mobilenetb.py │ │ ├── mobilenetv2.py │ │ ├── mobilenetv3.py │ │ ├── model_store.py │ │ ├── msdnet.py │ │ ├── nasnet.py │ │ ├── nin_cifar.py │ │ ├── ntsnet_cub.py │ │ ├── nvpattexp.py │ │ ├── octresnet.py │ │ ├── octresnet_cifar.py │ │ ├── others │ │ └── __init__.py │ │ ├── peleenet.py │ │ ├── pnasnet.py │ │ ├── polynet.py │ │ ├── preresnet.py │ │ ├── preresnet_cifar.py │ │ ├── proxylessnas.py │ │ ├── proxylessnas_cub.py │ │ ├── pspnet.py │ │ ├── pyramidnet.py │ │ ├── pyramidnet_cifar.py │ │ ├── quartznet.py │ │ ├── regnet.py │ │ ├── regnetv.py │ │ ├── res2net.py │ │ ├── resattnet.py │ │ ├── resdropresnet_cifar.py │ │ ├── resnesta.py │ │ ├── resnet.py │ │ ├── resnet_cifar.py │ │ ├── resnet_cub.py │ │ ├── resneta.py │ │ ├── resnetd.py │ │ ├── resnext.py │ │ ├── resnext_cifar.py │ │ ├── rir_cifar.py │ │ ├── ror_cifar.py │ │ ├── scnet.py │ │ ├── selecsls.py │ │ ├── senet.py │ │ ├── sepreresnet.py │ │ ├── sepreresnet_cifar.py │ │ ├── seresnet.py │ │ ├── seresnet_cifar.py │ │ ├── seresnet_cub.py │ │ ├── seresnext.py │ │ ├── shakedropresnet_cifar.py │ │ ├── shakeshakeresnet_cifar.py │ │ ├── sharesnet.py │ │ ├── shufflenet.py │ │ ├── shufflenetv2.py │ │ ├── shufflenetv2b.py │ │ ├── simplepose_coco.py │ │ ├── simpleposemobile_coco.py │ │ ├── sinet.py │ │ ├── sknet.py │ │ ├── sparsenet.py │ │ ├── spnasnet.py │ │ ├── squeezenet.py │ │ ├── squeezenext.py │ │ ├── superpointnet.py │ │ ├── vgg.py │ │ ├── visemenet.py │ │ ├── voca.py │ │ ├── vovnet.py │ │ ├── wrn.py │ │ ├── wrn1bit_cifar.py │ │ ├── wrn_cifar.py │ │ ├── xception.py │ │ ├── xdensenet.py │ │ ├── xdensenet_cifar.py │ │ └── zfnet.py ├── losses.py ├── lr_scheduler.py ├── metrics │ ├── __init__.py │ ├── asr_metrics.py │ ├── cls_metrics.py │ ├── det_metrics.py │ ├── hpe_metrics.py │ ├── metrics.py │ ├── seg_metrics.py │ ├── seg_metrics_nd.py │ └── seg_metrics_np.py ├── model_stats.py ├── setup.cfg ├── setup.py ├── utils.py └── weighted_random_sampler.py ├── keras_ ├── LICENSE.txt ├── README.md ├── __init__.py ├── kerascv │ ├── __init__.py │ ├── model_provider.py │ └── models │ │ ├── __init__.py │ │ ├── alexnet.py │ │ ├── common.py │ │ ├── darknet.py │ │ ├── darknet53.py │ │ ├── densenet.py │ │ ├── efficientnet.py │ │ ├── igcv3.py │ │ ├── menet.py │ │ ├── mnasnet.py │ │ ├── mobilenet.py │ │ ├── mobilenetv2.py │ │ ├── mobilenetv3.py │ │ ├── model_store.py │ │ ├── others │ │ └── __init__.py │ │ ├── preresnet.py │ │ ├── resnet.py │ │ ├── resnext.py │ │ ├── senet.py │ │ ├── sepreresnet.py │ │ ├── seresnet.py │ │ ├── seresnext.py │ │ ├── shufflenet.py │ │ ├── shufflenetv2.py │ │ ├── shufflenetv2b.py │ │ ├── squeezenet.py │ │ ├── squeezenext.py │ │ ├── vgg.py │ │ └── zfnet.py ├── setup.cfg ├── setup.py └── utils.py ├── load_model.py ├── other ├── __init__.py ├── chainer_ │ ├── __init__.py │ ├── cifar1.py │ ├── imagenet1k1.py │ ├── seg_utils1.py │ ├── top_k_accuracy1.py │ ├── train_ch_cifar.py │ └── train_ch_in1k.py ├── datasets │ └── __init__.py ├── eval_ch_cifar-.py ├── eval_ch_in1k-.py ├── eval_ch_seg-.py ├── eval_gl_mch.py ├── eval_gl_seg-.py ├── eval_pt_cifar-.py ├── eval_pt_cub-.py ├── eval_pt_mch.py ├── eval_pt_seg-.py ├── gluon │ ├── __init__.py │ ├── khpa │ │ ├── __init__.py │ │ ├── eval_gl_khpa.py │ │ ├── khpa_cls_dataset.py │ │ ├── khpa_utils.py │ │ └── train_gl_khpa.py │ └── seg_utils1.py ├── pytorch │ ├── __init__.py │ ├── cifar1.py │ ├── cub200_2011_utils1.py │ ├── imagenet1k1.py │ └── seg_utils.py ├── train_gl_cifar-.py ├── train_gl_seg.py └── train_pt_cifar-.py ├── prep_model.py ├── pytorch ├── __init__.py ├── dataset_utils.py ├── datasets │ ├── __init__.py │ ├── ade20k_seg_dataset.py │ ├── asr_dataset.py │ ├── cifar100_cls_dataset.py │ ├── cifar10_cls_dataset.py │ ├── cityscapes_seg_dataset.py │ ├── coco_det_dataset.py │ ├── coco_hpe1_dataset.py │ ├── coco_hpe2_dataset.py │ ├── coco_hpe3_dataset.py │ ├── coco_seg_dataset.py │ ├── cub200_2011_cls_dataset.py │ ├── dataset_metainfo.py │ ├── hpatches_mch_dataset.py │ ├── hpe_dataset.py │ ├── imagenet1k_cls_dataset.py │ ├── librispeech_asr_dataset.py │ ├── mcv_asr_dataset.py │ ├── mpii_hpe_dataset.py │ ├── seg_dataset.py │ ├── svhn_cls_dataset.py │ └── voc_seg_dataset.py ├── metrics │ ├── __init__.py │ ├── asr_metrics.py │ ├── cls_metrics.py │ ├── det_metrics.py │ ├── hpe_metrics.py │ ├── metric.py │ ├── ret_metrics.py │ ├── seg_metrics.py │ └── seg_metrics_np.py ├── model_stats.py └── utils.py ├── requirements.txt ├── sotabench.py ├── tensorflow2 ├── LICENSE.txt ├── README.md ├── __init__.py ├── dataset_utils.py ├── datasets │ ├── __init__.py │ ├── ade20k_seg_dataset.py │ ├── cifar100_cls_dataset.py │ ├── cifar10_cls_dataset.py │ ├── cityscapes_seg_dataset.py │ ├── cls_dataset.py │ ├── coco_hpe1_dataset.py │ ├── coco_hpe2_dataset.py │ ├── coco_hpe3_dataset.py │ ├── coco_seg_dataset.py │ ├── cub200_2011_cls_dataset.py │ ├── dataset_metainfo.py │ ├── imagenet1k_cls_dataset.py │ ├── seg_dataset.py │ ├── svhn_cls_dataset.py │ └── voc_seg_dataset.py ├── metrics │ ├── __init__.py │ ├── cls_metrics.py │ ├── det_metrics.py │ ├── hpe_metrics.py │ ├── metric.py │ ├── seg_metrics.py │ └── seg_metrics_np.py ├── setup.cfg ├── setup.py ├── tf2cv │ ├── __init__.py │ ├── model_provider.py │ └── models │ │ ├── __init__.py │ │ ├── airnet.py │ │ ├── airnext.py │ │ ├── alexnet.py │ │ ├── alphapose_coco.py │ │ ├── bagnet.py │ │ ├── bamresnet.py │ │ ├── bisenet.py │ │ ├── bninception.py │ │ ├── cbamresnet.py │ │ ├── centernet.py │ │ ├── cgnet.py │ │ ├── common.py │ │ ├── dabnet.py │ │ ├── danet.py │ │ ├── darknet.py │ │ ├── darknet53.py │ │ ├── deeplabv3.py │ │ ├── densenet.py │ │ ├── densenet_cifar.py │ │ ├── dicenet.py │ │ ├── diracnetv2.py │ │ ├── dla.py │ │ ├── dpn.py │ │ ├── drn.py │ │ ├── efficientnet.py │ │ ├── efficientnetedge.py │ │ ├── espnetv2.py │ │ ├── fastscnn.py │ │ ├── fastseresnet.py │ │ ├── fbnet.py │ │ ├── fcn8sd.py │ │ ├── fdmobilenet.py │ │ ├── fpenet.py │ │ ├── ghostnet.py │ │ ├── grmiposelite_coco.py │ │ ├── hardnet.py │ │ ├── hrnet.py │ │ ├── ibnbresnet.py │ │ ├── ibndensenet.py │ │ ├── ibnresnet.py │ │ ├── ibnresnext.py │ │ ├── ibppose_coco.py │ │ ├── icnet.py │ │ ├── igcv3.py │ │ ├── inceptionresnetv1.py │ │ ├── inceptionresnetv2.py │ │ ├── inceptionv3.py │ │ ├── inceptionv4.py │ │ ├── jasper.py │ │ ├── jasperdr.py │ │ ├── lednet.py │ │ ├── lffd.py │ │ ├── lwopenpose_cmupan.py │ │ ├── menet.py │ │ ├── mixnet.py │ │ ├── mnasnet.py │ │ ├── mobilenet.py │ │ ├── mobilenet_cub.py │ │ ├── mobilenetb.py │ │ ├── mobilenetv2.py │ │ ├── mobilenetv3.py │ │ ├── model_store.py │ │ ├── nasnet.py │ │ ├── nvpattexp.py │ │ ├── peleenet.py │ │ ├── pnasnet.py │ │ ├── polynet.py │ │ ├── preresnet.py │ │ ├── preresnet_cifar.py │ │ ├── proxylessnas.py │ │ ├── proxylessnas_cub.py │ │ ├── pspnet.py │ │ ├── pyramidnet.py │ │ ├── pyramidnet_cifar.py │ │ ├── quartznet.py │ │ ├── regnet.py │ │ ├── resnesta.py │ │ ├── resnet.py │ │ ├── resnet_cifar.py │ │ ├── resnet_cub.py │ │ ├── resneta.py │ │ ├── resnetd.py │ │ ├── resnext.py │ │ ├── resnext_cifar.py │ │ ├── scnet.py │ │ ├── selecsls.py │ │ ├── senet.py │ │ ├── sepreresnet.py │ │ ├── sepreresnet_cifar.py │ │ ├── seresnet.py │ │ ├── seresnet_cifar.py │ │ ├── seresnet_cub.py │ │ ├── seresnext.py │ │ ├── shufflenet.py │ │ ├── shufflenetv2.py │ │ ├── shufflenetv2b.py │ │ ├── simplepose_coco.py │ │ ├── simpleposemobile_coco.py │ │ ├── sinet.py │ │ ├── sknet.py │ │ ├── spnasnet.py │ │ ├── squeezenet.py │ │ ├── squeezenext.py │ │ ├── vgg.py │ │ ├── visemenet.py │ │ ├── voca.py │ │ ├── vovnet.py │ │ ├── wrn.py │ │ ├── wrn_cifar.py │ │ ├── xception.py │ │ └── zfnet.py └── utils.py ├── tensorflow_ ├── LICENSE.txt ├── README.md ├── __init__.py ├── setup.cfg ├── setup.py ├── tensorflowcv │ ├── __init__.py │ ├── model_provider.py │ └── models │ │ ├── __init__.py │ │ ├── alexnet.py │ │ ├── channelnet.py │ │ ├── common.py │ │ ├── darknet.py │ │ ├── darknet53.py │ │ ├── densenet.py │ │ ├── igcv3.py │ │ ├── menet.py │ │ ├── mnasnet.py │ │ ├── mobilenet.py │ │ ├── mobilenetv2.py │ │ ├── mobilenetv3.py │ │ ├── model_store.py │ │ ├── others │ │ └── __init__.py │ │ ├── preresnet.py │ │ ├── resnet.py │ │ ├── resnext.py │ │ ├── senet.py │ │ ├── sepreresnet.py │ │ ├── seresnet.py │ │ ├── seresnext.py │ │ ├── shufflenet.py │ │ ├── shufflenetv2.py │ │ ├── shufflenetv2b.py │ │ ├── squeezenet.py │ │ ├── squeezenext.py │ │ ├── vgg.py │ │ └── zfnet.py ├── utils.py └── utils_tp.py ├── tests ├── __init__.py ├── convert_gl2pt_batchnorm.py ├── convert_gl2pt_conv2d.py ├── convert_gl2pt_dense.py ├── convert_gl2tf2_avgpool2d.py ├── convert_gl2tf2_batchnorm.py ├── convert_gl2tf2_conv2d.py ├── convert_gl2tf2_conv2d_b.py ├── convert_gl2tf2_dwconv2d.py ├── convert_gl2tf2_lstm.py ├── convert_gl2tf_avgpool2d.py ├── convert_gl2tf_batchnorm.py ├── convert_gl2tf_conv1x1.py ├── convert_gl2tf_conv2d.py ├── convert_gl2tf_dense.py ├── convert_gl2tf_dwconv2d.py ├── convert_gl2tf_gconv2d.py └── convert_gl2tf_maxpool2d.py ├── train_ch.py ├── train_gl.py ├── train_gl_mealv2.py ├── train_ke.py ├── train_pt.py ├── train_tf.py └── train_tf2.py /.gitignore: -------------------------------------------------------------------------------- 1 | # PyCharm ### 2 | .idea/ 3 | 4 | # Visual Studio ### 5 | Release/ 6 | Debug/ 7 | .vs/ 8 | *.VC.db 9 | *.sdf 10 | *.suo 11 | *.opendb 12 | *.psess 13 | *.vsp 14 | *.vspx 15 | *.sln 16 | *.pyproj 17 | x64 18 | 19 | # R ### 20 | .Rhistory 21 | 22 | # Byte-compiled / optimized / DLL files 23 | __pycache__/ 24 | *.py[cod] 25 | *$py.class 26 | 27 | # C extensions 28 | *.so 29 | 30 | # Distribution / packaging 31 | .Python 32 | build/ 33 | develop-eggs/ 34 | dist/ 35 | downloads/ 36 | eggs/ 37 | .eggs/ 38 | lib/ 39 | lib64/ 40 | parts/ 41 | sdist/ 42 | var/ 43 | wheels/ 44 | *.egg-info/ 45 | .installed.cfg 46 | *.egg 47 | MANIFEST 48 | 49 | # PyInstaller 50 | # Usually these files are written by a python script from a template 51 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 52 | *.manifest 53 | *.spec 54 | 55 | # Installer logs 56 | pip-log.txt 57 | pip-delete-this-directory.txt 58 | 59 | # Unit test / coverage reports 60 | htmlcov/ 61 | .tox/ 62 | .coverage 63 | .coverage.* 64 | .cache 65 | nosetests.xml 66 | coverage.xml 67 | *.cover 68 | .hypothesis/ 69 | .pytest_cache/ 70 | 71 | # Translations 72 | *.mo 73 | *.pot 74 | 75 | # Django stuff: 76 | *.log 77 | local_settings.py 78 | db.sqlite3 79 | 80 | # Flask stuff: 81 | instance/ 82 | .webassets-cache 83 | 84 | # Scrapy stuff: 85 | .scrapy 86 | 87 | # Sphinx documentation 88 | docs/_build/ 89 | 90 | # PyBuilder 91 | target/ 92 | 93 | # Jupyter Notebook 94 | .ipynb_checkpoints 95 | 96 | # pyenv 97 | .python-version 98 | 99 | # celery beat schedule file 100 | celerybeat-schedule 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | 127 | # Virtual Envs 128 | venv* -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "pytorchcv"] 2 | path = pytorchcv 3 | url = https://github.com/osmr/pytorchcv 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | group: travis_latest 2 | dist: jammy 3 | language: python 4 | cache: pip 5 | python: 6 | - "3.10" 7 | #- nightly 8 | #- pypy 9 | #- pypy3 10 | matrix: 11 | allow_failures: 12 | - python: nightly 13 | - python: pypy 14 | - python: pypy3 15 | install: 16 | #- pip install -r requirements.txt 17 | - pip install flake8 # pytest # add another testing frameworks later 18 | before_script: 19 | # stop the build if there are Python syntax errors or undefined names 20 | - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics --exclude=./venv 21 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 22 | - flake8 . --count --max-complexity=49 --max-line-length=127 --statistics --exclude=./gluon/gluoncv2/models/others,./pytorchcv/pytorchcv/models/others,./chainer_/chainercv2/models/others,./keras_/kerascv/models/others,./tensorflow_/tensorflowcv/models/others,./other,./venv 23 | script: 24 | - true # pytest --capture=sys # add others tests here 25 | notifications: 26 | on_success: change 27 | on_failure: change # `always` will be the setting once code changes slow down -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018-2024 Oleg Sémery 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/__init__.py -------------------------------------------------------------------------------- /chainer_/LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018-2021 Oleg Sémery 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /chainer_/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/chainer_/__init__.py -------------------------------------------------------------------------------- /chainer_/chainercv2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/chainer_/chainercv2/__init__.py -------------------------------------------------------------------------------- /chainer_/chainercv2/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/chainer_/chainercv2/models/__init__.py -------------------------------------------------------------------------------- /chainer_/chainercv2/models/jasperdr.py: -------------------------------------------------------------------------------- 1 | """ 2 | Jasper DR (Dense Residual) for ASR, implemented in Chainer. 3 | Original paper: 'Jasper: An End-to-End Convolutional Neural Acoustic Model,' https://arxiv.org/abs/1904.03288. 4 | """ 5 | 6 | __all__ = ['jasperdr10x5_en', 'jasperdr10x5_en_nr'] 7 | 8 | from .jasper import get_jasper 9 | 10 | 11 | def jasperdr10x5_en(classes=29, **kwargs): 12 | """ 13 | Jasper DR 10x5 model for English language from 'Jasper: An End-to-End Convolutional Neural Acoustic Model,' 14 | https://arxiv.org/abs/1904.03288. 15 | 16 | Parameters 17 | ---------- 18 | classes : int, default 29 19 | Number of classification classes (number of graphemes). 20 | pretrained : bool, default False 21 | Whether to load the pretrained weights for model. 22 | root : str, default '~/.chainer/models' 23 | Location for keeping the model parameters. 24 | """ 25 | return get_jasper(classes=classes, version=("jasper", "10x5"), use_dr=True, model_name="jasperdr10x5_en", 26 | **kwargs) 27 | 28 | 29 | def jasperdr10x5_en_nr(classes=29, **kwargs): 30 | """ 31 | Jasper DR 10x5 model for English language (with presence of noise) from 'Jasper: An End-to-End Convolutional Neural 32 | Acoustic Model,' https://arxiv.org/abs/1904.03288. 33 | 34 | Parameters 35 | ---------- 36 | classes : int, default 29 37 | Number of classification classes (number of graphemes). 38 | pretrained : bool, default False 39 | Whether to load the pretrained weights for model. 40 | root : str, default '~/.chainer/models' 41 | Location for keeping the model parameters. 42 | """ 43 | return get_jasper(classes=classes, version=("jasper", "10x5"), use_dr=True, model_name="jasperdr10x5_en_nr", 44 | **kwargs) 45 | 46 | 47 | def _test(): 48 | import numpy as np 49 | import chainer 50 | 51 | chainer.global_config.train = False 52 | 53 | pretrained = False 54 | audio_features = 64 55 | 56 | models = [ 57 | jasperdr10x5_en, 58 | jasperdr10x5_en_nr, 59 | ] 60 | 61 | for model in models: 62 | net = model( 63 | in_channels=audio_features, 64 | pretrained=pretrained) 65 | 66 | weight_count = net.count_params() 67 | print("m={}, {}".format(model.__name__, weight_count)) 68 | assert (model != jasperdr10x5_en or weight_count == 332632349) 69 | assert (model != jasperdr10x5_en_nr or weight_count == 332632349) 70 | 71 | batch = 3 72 | seq_len = np.random.randint(60, 150, batch) 73 | seq_len_max = seq_len.max() + 2 74 | x = np.random.rand(batch, audio_features, seq_len_max).astype(np.float32) 75 | x_len = seq_len.astype(np.long) 76 | 77 | y, y_len = net(x, x_len) 78 | assert (y.shape[:2] == (batch, net.classes)) 79 | assert (y.shape[2] in [seq_len_max // 2, seq_len_max // 2 + 1]) 80 | 81 | 82 | if __name__ == "__main__": 83 | _test() 84 | -------------------------------------------------------------------------------- /chainer_/chainercv2/models/mobilenetb.py: -------------------------------------------------------------------------------- 1 | """ 2 | MobileNet(B) with simplified depthwise separable convolution block for ImageNet-1K, implemented in Chainer. 3 | Original paper: 'MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications,' 4 | https://arxiv.org/abs/1704.04861. 5 | """ 6 | 7 | __all__ = ['mobilenetb_w1', 'mobilenetb_w3d4', 'mobilenetb_wd2', 'mobilenetb_wd4'] 8 | 9 | from .mobilenet import get_mobilenet 10 | 11 | 12 | def mobilenetb_w1(**kwargs): 13 | """ 14 | 1.0 MobileNet(B)-224 model with simplified depthwise separable convolution block from 'MobileNets: Efficient 15 | Convolutional Neural Networks for Mobile Vision Applications,' https://arxiv.org/abs/1704.04861. 16 | 17 | Parameters 18 | ---------- 19 | pretrained : bool, default False 20 | Whether to load the pretrained weights for model. 21 | root : str, default '~/.chainer/models' 22 | Location for keeping the model parameters. 23 | """ 24 | return get_mobilenet(width_scale=1.0, dws_simplified=True, model_name="mobilenetb_w1", **kwargs) 25 | 26 | 27 | def mobilenetb_w3d4(**kwargs): 28 | """ 29 | 0.75 MobileNet(B)-224 model with simplified depthwise separable convolution block from 'MobileNets: Efficient 30 | Convolutional Neural Networks for Mobile Vision Applications,' https://arxiv.org/abs/1704.04861. 31 | 32 | Parameters 33 | ---------- 34 | pretrained : bool, default False 35 | Whether to load the pretrained weights for model. 36 | root : str, default '~/.chainer/models' 37 | Location for keeping the model parameters. 38 | """ 39 | return get_mobilenet(width_scale=0.75, dws_simplified=True, model_name="mobilenetb_w3d4", **kwargs) 40 | 41 | 42 | def mobilenetb_wd2(**kwargs): 43 | """ 44 | 0.5 MobileNet(B)-224 model with simplified depthwise separable convolution block from 'MobileNets: Efficient 45 | Convolutional Neural Networks for Mobile Vision Applications,' https://arxiv.org/abs/1704.04861. 46 | 47 | Parameters 48 | ---------- 49 | pretrained : bool, default False 50 | Whether to load the pretrained weights for model. 51 | root : str, default '~/.chainer/models' 52 | Location for keeping the model parameters. 53 | """ 54 | return get_mobilenet(width_scale=0.5, dws_simplified=True, model_name="mobilenetb_wd2", **kwargs) 55 | 56 | 57 | def mobilenetb_wd4(**kwargs): 58 | """ 59 | 0.25 MobileNet(B)-224 model with simplified depthwise separable convolution block from 'MobileNets: Efficient 60 | Convolutional Neural Networks for Mobile Vision Applications,' https://arxiv.org/abs/1704.04861. 61 | 62 | Parameters 63 | ---------- 64 | pretrained : bool, default False 65 | Whether to load the pretrained weights for model. 66 | root : str, default '~/.chainer/models' 67 | Location for keeping the model parameters. 68 | """ 69 | return get_mobilenet(width_scale=0.25, dws_simplified=True, model_name="mobilenetb_wd4", **kwargs) 70 | 71 | 72 | def _test(): 73 | import numpy as np 74 | import chainer 75 | 76 | chainer.global_config.train = False 77 | 78 | pretrained = False 79 | 80 | models = [ 81 | mobilenetb_w1, 82 | mobilenetb_w3d4, 83 | mobilenetb_wd2, 84 | mobilenetb_wd4, 85 | ] 86 | 87 | for model in models: 88 | 89 | net = model(pretrained=pretrained) 90 | 91 | weight_count = net.count_params() 92 | print("m={}, {}".format(model.__name__, weight_count)) 93 | assert (model != mobilenetb_w1 or weight_count == 4222056) 94 | assert (model != mobilenetb_w3d4 or weight_count == 2578120) 95 | assert (model != mobilenetb_wd2 or weight_count == 1326632) 96 | assert (model != mobilenetb_wd4 or weight_count == 467592) 97 | 98 | x = np.zeros((1, 3, 224, 224), np.float32) 99 | y = net(x) 100 | assert (y.shape == (1, 1000)) 101 | 102 | 103 | if __name__ == "__main__": 104 | _test() 105 | -------------------------------------------------------------------------------- /chainer_/chainercv2/models/others/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/chainer_/chainercv2/models/others/__init__.py -------------------------------------------------------------------------------- /chainer_/chainercv2/models/proxylessnas_cub.py: -------------------------------------------------------------------------------- 1 | """ 2 | ProxylessNAS for CUB-200-2011, implemented in Chainer. 3 | Original paper: 'ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware,' 4 | https://arxiv.org/abs/1812.00332. 5 | """ 6 | 7 | __all__ = ['proxylessnas_cpu_cub', 'proxylessnas_gpu_cub', 'proxylessnas_mobile_cub', 'proxylessnas_mobile14_cub'] 8 | 9 | from .proxylessnas import get_proxylessnas 10 | 11 | 12 | def proxylessnas_cpu_cub(classes=200, **kwargs): 13 | """ 14 | ProxylessNAS (CPU) model for CUB-200-2011 from 'ProxylessNAS: Direct Neural Architecture Search on Target Task and 15 | Hardware,' https://arxiv.org/abs/1812.00332. 16 | 17 | Parameters 18 | ---------- 19 | classes : int, default 200 20 | Number of classification classes. 21 | pretrained : bool, default False 22 | Whether to load the pretrained weights for model. 23 | root : str, default '~/.chainer/models' 24 | Location for keeping the model parameters. 25 | """ 26 | return get_proxylessnas(classes=classes, version="cpu", model_name="proxylessnas_cpu_cub", **kwargs) 27 | 28 | 29 | def proxylessnas_gpu_cub(classes=200, **kwargs): 30 | """ 31 | ProxylessNAS (GPU) model for CUB-200-2011 from 'ProxylessNAS: Direct Neural Architecture Search on Target Task and 32 | Hardware,' https://arxiv.org/abs/1812.00332. 33 | 34 | Parameters 35 | ---------- 36 | classes : int, default 200 37 | Number of classification classes. 38 | pretrained : bool, default False 39 | Whether to load the pretrained weights for model. 40 | root : str, default '~/.chainer/models' 41 | Location for keeping the model parameters. 42 | """ 43 | return get_proxylessnas(classes=classes, version="gpu", model_name="proxylessnas_gpu_cub", **kwargs) 44 | 45 | 46 | def proxylessnas_mobile_cub(classes=200, **kwargs): 47 | """ 48 | ProxylessNAS (Mobile) model for CUB-200-2011 from 'ProxylessNAS: Direct Neural Architecture Search on Target Task 49 | and Hardware,' https://arxiv.org/abs/1812.00332. 50 | 51 | Parameters 52 | ---------- 53 | classes : int, default 200 54 | Number of classification classes. 55 | pretrained : bool, default False 56 | Whether to load the pretrained weights for model. 57 | root : str, default '~/.chainer/models' 58 | Location for keeping the model parameters. 59 | """ 60 | return get_proxylessnas(classes=classes, version="mobile", model_name="proxylessnas_mobile_cub", **kwargs) 61 | 62 | 63 | def proxylessnas_mobile14_cub(classes=200, **kwargs): 64 | """ 65 | ProxylessNAS (Mobile-14) model for CUB-200-2011 from 'ProxylessNAS: Direct Neural Architecture Search on Target Task 66 | and Hardware,' https://arxiv.org/abs/1812.00332. 67 | 68 | Parameters 69 | ---------- 70 | classes : int, default 200 71 | Number of classification classes. 72 | pretrained : bool, default False 73 | Whether to load the pretrained weights for model. 74 | root : str, default '~/.chainer/models' 75 | Location for keeping the model parameters. 76 | """ 77 | return get_proxylessnas(classes=classes, version="mobile14", model_name="proxylessnas_mobile14_cub", **kwargs) 78 | 79 | 80 | def _test(): 81 | import numpy as np 82 | import chainer 83 | 84 | chainer.global_config.train = False 85 | 86 | pretrained = False 87 | 88 | models = [ 89 | proxylessnas_cpu_cub, 90 | proxylessnas_gpu_cub, 91 | proxylessnas_mobile_cub, 92 | proxylessnas_mobile14_cub, 93 | ] 94 | 95 | for model in models: 96 | 97 | net = model(pretrained=pretrained) 98 | weight_count = net.count_params() 99 | print("m={}, {}".format(model.__name__, weight_count)) 100 | assert (model != proxylessnas_cpu_cub or weight_count == 3215248) 101 | assert (model != proxylessnas_gpu_cub or weight_count == 5736648) 102 | assert (model != proxylessnas_mobile_cub or weight_count == 3055712) 103 | assert (model != proxylessnas_mobile14_cub or weight_count == 5423168) 104 | 105 | x = np.zeros((14, 3, 224, 224), np.float32) 106 | y = net(x) 107 | assert (y.shape == (14, 200)) 108 | 109 | 110 | if __name__ == "__main__": 111 | _test() 112 | -------------------------------------------------------------------------------- /chainer_/chainercv2/models/zfnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | ZFNet for ImageNet-1K, implemented in Chainer. 3 | Original paper: 'Visualizing and Understanding Convolutional Networks,' https://arxiv.org/abs/1311.2901. 4 | """ 5 | 6 | __all__ = ['zfnet', 'zfnetb'] 7 | 8 | import os 9 | from chainer.serializers import load_npz 10 | from .alexnet import AlexNet 11 | 12 | 13 | def get_zfnet(version="a", 14 | model_name=None, 15 | pretrained=False, 16 | root=os.path.join("~", ".chainer", "models"), 17 | **kwargs): 18 | """ 19 | Create ZFNet model with specific parameters. 20 | 21 | Parameters 22 | ---------- 23 | version : str, default 'a' 24 | Version of ZFNet ('a' or 'b'). 25 | model_name : str or None, default None 26 | Model name for loading pretrained model. 27 | pretrained : bool, default False 28 | Whether to load the pretrained weights for model. 29 | root : str, default '~/.chainer/models' 30 | Location for keeping the model parameters. 31 | """ 32 | if version == "a": 33 | channels = [[96], [256], [384, 384, 256]] 34 | ksizes = [[7], [5], [3, 3, 3]] 35 | strides = [[2], [2], [1, 1, 1]] 36 | pads = [[1], [0], [1, 1, 1]] 37 | use_lrn = True 38 | elif version == "b": 39 | channels = [[96], [256], [512, 1024, 512]] 40 | ksizes = [[7], [5], [3, 3, 3]] 41 | strides = [[2], [2], [1, 1, 1]] 42 | pads = [[1], [0], [1, 1, 1]] 43 | use_lrn = True 44 | else: 45 | raise ValueError("Unsupported ZFNet version {}".format(version)) 46 | 47 | net = AlexNet( 48 | channels=channels, 49 | ksizes=ksizes, 50 | strides=strides, 51 | pads=pads, 52 | use_lrn=use_lrn, 53 | **kwargs) 54 | 55 | if pretrained: 56 | if (model_name is None) or (not model_name): 57 | raise ValueError("Parameter `model_name` should be properly initialized for loading pretrained model.") 58 | from .model_store import get_model_file 59 | load_npz( 60 | file=get_model_file( 61 | model_name=model_name, 62 | local_model_store_dir_path=root), 63 | obj=net) 64 | 65 | return net 66 | 67 | 68 | def zfnet(**kwargs): 69 | """ 70 | ZFNet model from 'Visualizing and Understanding Convolutional Networks,' https://arxiv.org/abs/1311.2901. 71 | 72 | Parameters 73 | ---------- 74 | pretrained : bool, default False 75 | Whether to load the pretrained weights for model. 76 | root : str, default '~/.chainer/models' 77 | Location for keeping the model parameters. 78 | """ 79 | return get_zfnet(model_name="zfnet", **kwargs) 80 | 81 | 82 | def zfnetb(**kwargs): 83 | """ 84 | ZFNet-b model from 'Visualizing and Understanding Convolutional Networks,' https://arxiv.org/abs/1311.2901. 85 | 86 | Parameters 87 | ---------- 88 | pretrained : bool, default False 89 | Whether to load the pretrained weights for model. 90 | root : str, default '~/.chainer/models' 91 | Location for keeping the model parameters. 92 | """ 93 | return get_zfnet(version="b", model_name="zfnetb", **kwargs) 94 | 95 | 96 | def _test(): 97 | import numpy as np 98 | import chainer 99 | 100 | chainer.global_config.train = False 101 | 102 | pretrained = False 103 | 104 | models = [ 105 | zfnet, 106 | zfnetb, 107 | ] 108 | 109 | for model in models: 110 | net = model(pretrained=pretrained) 111 | weight_count = net.count_params() 112 | print("m={}, {}".format(model.__name__, weight_count)) 113 | assert (model != zfnet or weight_count == 62357608) 114 | assert (model != zfnetb or weight_count == 107627624) 115 | 116 | x = np.zeros((1, 3, 224, 224), np.float32) 117 | y = net(x) 118 | assert (y.shape == (1, 1000)) 119 | 120 | 121 | if __name__ == "__main__": 122 | _test() 123 | -------------------------------------------------------------------------------- /chainer_/dataset_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dataset routines. 3 | """ 4 | 5 | __all__ = ['get_dataset_metainfo', 'get_train_data_source', 'get_val_data_source', 'get_test_data_source'] 6 | 7 | from chainer.iterators import MultiprocessIterator 8 | from .datasets.imagenet1k_cls_dataset import ImageNet1KMetaInfo 9 | from .datasets.cub200_2011_cls_dataset import CUB200MetaInfo 10 | from .datasets.cifar10_cls_dataset import CIFAR10MetaInfo 11 | from .datasets.cifar100_cls_dataset import CIFAR100MetaInfo 12 | from .datasets.svhn_cls_dataset import SVHNMetaInfo 13 | from .datasets.voc_seg_dataset import VOCMetaInfo 14 | from .datasets.ade20k_seg_dataset import ADE20KMetaInfo 15 | from .datasets.cityscapes_seg_dataset import CityscapesMetaInfo 16 | from .datasets.coco_seg_dataset import CocoSegMetaInfo 17 | from .datasets.coco_hpe1_dataset import CocoHpe1MetaInfo 18 | from .datasets.coco_hpe2_dataset import CocoHpe2MetaInfo 19 | from .datasets.coco_hpe3_dataset import CocoHpe3MetaInfo 20 | 21 | 22 | def get_dataset_metainfo(dataset_name): 23 | """ 24 | Get dataset metainfo by name of dataset. 25 | 26 | Parameters 27 | ---------- 28 | dataset_name : str 29 | Dataset name. 30 | 31 | Returns 32 | ------- 33 | DatasetMetaInfo 34 | Dataset metainfo. 35 | """ 36 | dataset_metainfo_map = { 37 | "ImageNet1K": ImageNet1KMetaInfo, 38 | "CUB200_2011": CUB200MetaInfo, 39 | "CIFAR10": CIFAR10MetaInfo, 40 | "CIFAR100": CIFAR100MetaInfo, 41 | "SVHN": SVHNMetaInfo, 42 | "VOC": VOCMetaInfo, 43 | "ADE20K": ADE20KMetaInfo, 44 | "Cityscapes": CityscapesMetaInfo, 45 | "CocoSeg": CocoSegMetaInfo, 46 | "CocoHpe1": CocoHpe1MetaInfo, 47 | "CocoHpe2": CocoHpe2MetaInfo, 48 | "CocoHpe3": CocoHpe3MetaInfo, 49 | } 50 | if dataset_name in dataset_metainfo_map.keys(): 51 | return dataset_metainfo_map[dataset_name]() 52 | else: 53 | raise Exception("Unrecognized dataset: {}".format(dataset_name)) 54 | 55 | 56 | def get_train_data_source(ds_metainfo, 57 | batch_size, 58 | num_workers): 59 | transform = ds_metainfo.train_transform(ds_metainfo=ds_metainfo) 60 | dataset = ds_metainfo.dataset_class( 61 | root=ds_metainfo.root_dir_path, 62 | mode="train", 63 | transform=transform) 64 | ds_metainfo.update_from_dataset(dataset) 65 | iterator = MultiprocessIterator( 66 | dataset=dataset, 67 | batch_size=batch_size, 68 | repeat=False, 69 | shuffle=True, 70 | n_processes=num_workers, 71 | shared_mem=300000000) 72 | return { 73 | # "transform": transform, 74 | "iterator": iterator, 75 | "ds_len": len(dataset) 76 | } 77 | 78 | 79 | def get_val_data_source(ds_metainfo, 80 | batch_size, 81 | num_workers): 82 | transform = ds_metainfo.val_transform(ds_metainfo=ds_metainfo) 83 | dataset = ds_metainfo.dataset_class( 84 | root=ds_metainfo.root_dir_path, 85 | mode="val", 86 | transform=transform) 87 | ds_metainfo.update_from_dataset(dataset) 88 | iterator = MultiprocessIterator( 89 | dataset=dataset, 90 | batch_size=batch_size, 91 | repeat=False, 92 | shuffle=False, 93 | n_processes=num_workers, 94 | shared_mem=100000000) 95 | return { 96 | # "transform": transform, 97 | "iterator": iterator, 98 | "ds_len": len(dataset) 99 | } 100 | 101 | 102 | def get_test_data_source(ds_metainfo, 103 | batch_size, 104 | num_workers): 105 | transform = ds_metainfo.test_transform(ds_metainfo=ds_metainfo) 106 | dataset = ds_metainfo.dataset_class( 107 | root=ds_metainfo.root_dir_path, 108 | mode="test", 109 | transform=transform) 110 | ds_metainfo.update_from_dataset(dataset) 111 | iterator = MultiprocessIterator( 112 | dataset=dataset, 113 | batch_size=batch_size, 114 | repeat=False, 115 | shuffle=False, 116 | n_processes=num_workers, 117 | shared_mem=300000000) 118 | return { 119 | # "transform": transform, 120 | "iterator": iterator, 121 | "ds_len": len(dataset) 122 | } 123 | -------------------------------------------------------------------------------- /chainer_/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/chainer_/datasets/__init__.py -------------------------------------------------------------------------------- /chainer_/datasets/ade20k_seg_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from PIL import Image 4 | from .seg_dataset import SegDataset 5 | from .voc_seg_dataset import VOCMetaInfo 6 | 7 | 8 | class ADE20KSegDataset(SegDataset): 9 | """ 10 | ADE20K semantic segmentation dataset. 11 | 12 | Parameters 13 | ---------- 14 | root : str 15 | Path to a folder with `ADEChallengeData2016` subfolder. 16 | mode : str, default 'train' 17 | 'train', 'val', 'test', or 'demo'. 18 | transform : callable, optional 19 | A function that transforms the image. 20 | """ 21 | def __init__(self, 22 | root, 23 | mode="train", 24 | transform=None, 25 | **kwargs): 26 | super(ADE20KSegDataset, self).__init__( 27 | root=root, 28 | mode=mode, 29 | transform=transform, 30 | **kwargs) 31 | 32 | base_dir_path = os.path.join(root, "ADEChallengeData2016") 33 | assert os.path.exists(base_dir_path), "Please prepare dataset" 34 | 35 | image_dir_path = os.path.join(base_dir_path, "images") 36 | mask_dir_path = os.path.join(base_dir_path, "annotations") 37 | 38 | mode_dir_name = "training" if mode == "train" else "validation" 39 | image_dir_path = os.path.join(image_dir_path, mode_dir_name) 40 | mask_dir_path = os.path.join(mask_dir_path, mode_dir_name) 41 | 42 | self.images = [] 43 | self.masks = [] 44 | for image_file_name in os.listdir(image_dir_path): 45 | image_file_stem, _ = os.path.splitext(image_file_name) 46 | if image_file_name.endswith(".jpg"): 47 | image_file_path = os.path.join(image_dir_path, image_file_name) 48 | mask_file_name = image_file_stem + ".png" 49 | mask_file_path = os.path.join(mask_dir_path, mask_file_name) 50 | if os.path.isfile(mask_file_path): 51 | self.images.append(image_file_path) 52 | self.masks.append(mask_file_path) 53 | else: 54 | print("Cannot find the mask: {}".format(mask_file_path)) 55 | 56 | assert (len(self.images) == len(self.masks)) 57 | if len(self.images) == 0: 58 | raise RuntimeError("Found 0 images in subfolders of: {}\n".format(base_dir_path)) 59 | 60 | self.add_getter('img', self._get_image) 61 | self.add_getter('label', self._get_label) 62 | 63 | def _get_image(self, i): 64 | image = Image.open(self.images[i]).convert("RGB") 65 | assert (self.mode in ("test", "demo")) 66 | image = self._img_transform(image) 67 | if self.transform is not None: 68 | image = self.transform(image) 69 | return image 70 | 71 | def _get_label(self, i): 72 | if self.mode == "demo": 73 | return os.path.basename(self.images[i]) 74 | assert (self.mode == "test") 75 | mask = Image.open(self.masks[i]) 76 | mask = self._mask_transform(mask) 77 | return mask 78 | 79 | classes = 150 80 | vague_idx = 150 81 | use_vague = True 82 | background_idx = -1 83 | ignore_bg = False 84 | 85 | @staticmethod 86 | def _mask_transform(mask): 87 | np_mask = np.array(mask).astype(np.int32) 88 | np_mask[np_mask == 0] = ADE20KSegDataset.vague_idx + 1 89 | np_mask -= 1 90 | return np_mask 91 | 92 | def __len__(self): 93 | return len(self.images) 94 | 95 | 96 | class ADE20KMetaInfo(VOCMetaInfo): 97 | def __init__(self): 98 | super(ADE20KMetaInfo, self).__init__() 99 | self.label = "ADE20K" 100 | self.short_label = "voc" 101 | self.root_dir_name = "ade20k" 102 | self.dataset_class = ADE20KSegDataset 103 | self.num_classes = ADE20KSegDataset.classes 104 | self.test_metric_extra_kwargs = [ 105 | {"vague_idx": ADE20KSegDataset.vague_idx, 106 | "use_vague": ADE20KSegDataset.use_vague, 107 | "macro_average": False}, 108 | {"num_classes": ADE20KSegDataset.classes, 109 | "vague_idx": ADE20KSegDataset.vague_idx, 110 | "use_vague": ADE20KSegDataset.use_vague, 111 | "bg_idx": ADE20KSegDataset.background_idx, 112 | "ignore_bg": ADE20KSegDataset.ignore_bg, 113 | "macro_average": False}] 114 | -------------------------------------------------------------------------------- /chainer_/datasets/cifar100_cls_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | CIFAR-100 classification dataset. 3 | """ 4 | 5 | import os 6 | from chainer.dataset import DatasetMixin 7 | from chainer.datasets.cifar import get_cifar100 8 | from .cifar10_cls_dataset import CIFAR10MetaInfo 9 | 10 | 11 | class CIFAR100(DatasetMixin): 12 | """ 13 | CIFAR-100 image classification dataset. 14 | 15 | 16 | Parameters 17 | ---------- 18 | root : str, default '~/.chainer/datasets/cifar100' 19 | Path to temp folder for storing data. 20 | mode : str, default 'train' 21 | 'train', 'val', or 'test'. 22 | transform : function, default None 23 | A function that takes data and label and transforms them. 24 | """ 25 | def __init__(self, 26 | root=os.path.join("~", ".chainer", "datasets", "cifar100"), 27 | mode="train", 28 | transform=None): 29 | assert (root is not None) 30 | self.transform = transform 31 | train_ds, test_ds = get_cifar100() 32 | self.base = train_ds if mode == "train" else test_ds 33 | 34 | def __len__(self): 35 | return len(self.base) 36 | 37 | def get_example(self, i): 38 | image, label = self.base[i] 39 | image = self.transform(image) 40 | return image, label 41 | 42 | 43 | class CIFAR100MetaInfo(CIFAR10MetaInfo): 44 | def __init__(self): 45 | super(CIFAR100MetaInfo, self).__init__() 46 | self.label = "CIFAR100" 47 | self.root_dir_name = "cifar100" 48 | self.dataset_class = CIFAR100 49 | self.num_classes = 100 50 | -------------------------------------------------------------------------------- /chainer_/datasets/cifar10_cls_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | CIFAR-10 classification dataset. 3 | """ 4 | 5 | import os 6 | import numpy as np 7 | from chainer.dataset import DatasetMixin 8 | from chainer.datasets.cifar import get_cifar10 9 | from chainercv.transforms import random_crop 10 | from chainercv.transforms import random_flip 11 | from .dataset_metainfo import DatasetMetaInfo 12 | 13 | 14 | class CIFAR10(DatasetMixin): 15 | """ 16 | CIFAR-10 image classification dataset. 17 | 18 | 19 | Parameters 20 | ---------- 21 | root : str, default '~/.chainer/datasets/cifar10' 22 | Path to temp folder for storing data. 23 | mode : str, default 'train' 24 | 'train', 'val', or 'test'. 25 | transform : function, default None 26 | A function that takes data and label and transforms them. 27 | """ 28 | def __init__(self, 29 | root=os.path.join("~", ".chainer", "datasets", "cifar10"), 30 | mode="train", 31 | transform=None): 32 | assert (root is not None) 33 | self.transform = transform 34 | train_ds, test_ds = get_cifar10() 35 | self.base = train_ds if mode == "train" else test_ds 36 | 37 | def __len__(self): 38 | return len(self.base) 39 | 40 | def get_example(self, i): 41 | image, label = self.base[i] 42 | image = self.transform(image) 43 | return image, label 44 | 45 | 46 | class CIFAR10MetaInfo(DatasetMetaInfo): 47 | def __init__(self): 48 | super(CIFAR10MetaInfo, self).__init__() 49 | self.label = "CIFAR10" 50 | self.short_label = "cifar" 51 | self.root_dir_name = "cifar10" 52 | self.dataset_class = CIFAR10 53 | self.num_training_samples = 50000 54 | self.in_channels = 3 55 | self.num_classes = 10 56 | self.input_image_size = (32, 32) 57 | self.train_metric_capts = ["Train.Err"] 58 | self.train_metric_names = ["Top1Error"] 59 | self.train_metric_extra_kwargs = [{"name": "err"}] 60 | self.val_metric_capts = ["Val.Err"] 61 | self.val_metric_names = ["Top1Error"] 62 | self.val_metric_extra_kwargs = [{"name": "err"}] 63 | self.saver_acc_ind = 0 64 | self.train_transform = CIFARTrainTransform 65 | self.val_transform = CIFARValTransform 66 | self.test_transform = CIFARValTransform 67 | self.ml_type = "imgcls" 68 | 69 | 70 | class CIFARTrainTransform(object): 71 | """ 72 | CIFAR-10 training transform. 73 | """ 74 | def __init__(self, 75 | ds_metainfo, 76 | mean_rgb=(0.4914, 0.4822, 0.4465), 77 | std_rgb=(0.2023, 0.1994, 0.2010)): 78 | assert (ds_metainfo is not None) 79 | self.mean = np.array(mean_rgb, np.float32)[:, np.newaxis, np.newaxis] 80 | self.std = np.array(std_rgb, np.float32)[:, np.newaxis, np.newaxis] 81 | 82 | def __call__(self, img): 83 | img = random_crop(img=img, size=self.resize_value) 84 | img = random_flip(img=img, x_random=True) 85 | img -= self.mean 86 | img /= self.std 87 | return img 88 | 89 | 90 | class CIFARValTransform(object): 91 | """ 92 | CIFAR-10 validation transform. 93 | """ 94 | def __init__(self, 95 | ds_metainfo, 96 | mean_rgb=(0.4914, 0.4822, 0.4465), 97 | std_rgb=(0.2023, 0.1994, 0.2010)): 98 | assert (ds_metainfo is not None) 99 | self.mean = np.array(mean_rgb, np.float32)[:, np.newaxis, np.newaxis] 100 | self.std = np.array(std_rgb, np.float32)[:, np.newaxis, np.newaxis] 101 | 102 | def __call__(self, img): 103 | img -= self.mean 104 | img /= self.std 105 | return img 106 | -------------------------------------------------------------------------------- /chainer_/datasets/dataset_metainfo.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base dataset metainfo class. 3 | """ 4 | 5 | import os 6 | 7 | 8 | class DatasetMetaInfo(object): 9 | def __init__(self): 10 | self.use_imgrec = False 11 | self.label = None 12 | self.root_dir_name = None 13 | self.root_dir_path = None 14 | self.dataset_class = None 15 | self.num_training_samples = None 16 | self.in_channels = None 17 | self.num_classes = None 18 | self.input_image_size = None 19 | self.train_metric_capts = None 20 | self.train_metric_names = None 21 | self.train_metric_extra_kwargs = None 22 | self.val_metric_capts = None 23 | self.val_metric_names = None 24 | self.val_metric_extra_kwargs = None 25 | self.test_metric_capts = None 26 | self.test_metric_names = None 27 | self.test_metric_extra_kwargs = None 28 | self.saver_acc_ind = None 29 | self.ml_type = None 30 | self.allow_hybridize = True 31 | self.train_net_extra_kwargs = None 32 | self.test_net_extra_kwargs = None 33 | self.load_ignore_extra = False 34 | 35 | def add_dataset_parser_arguments(self, 36 | parser, 37 | work_dir_path): 38 | parser.add_argument( 39 | "--data-dir", 40 | type=str, 41 | default=os.path.join(work_dir_path, self.root_dir_name), 42 | help="path to directory with {} dataset".format(self.label)) 43 | parser.add_argument( 44 | "--num-classes", 45 | type=int, 46 | default=self.num_classes, 47 | help="number of classes") 48 | parser.add_argument( 49 | "--in-channels", 50 | type=int, 51 | default=self.in_channels, 52 | help="number of input channels") 53 | 54 | def update(self, 55 | args): 56 | self.root_dir_path = args.data_dir 57 | self.num_classes = args.num_classes 58 | self.in_channels = args.in_channels 59 | 60 | def update_from_dataset(self, 61 | dataset): 62 | """ 63 | Update dataset metainfo after a dataset class instance creation. 64 | 65 | Parameters 66 | ---------- 67 | args : obj 68 | A dataset class instance. 69 | """ 70 | pass 71 | -------------------------------------------------------------------------------- /chainer_/datasets/seg_dataset.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | from PIL import Image, ImageOps, ImageFilter 4 | from chainercv.chainer_experimental.datasets.sliceable import GetterDataset 5 | 6 | 7 | class SegDataset(GetterDataset): 8 | """ 9 | Segmentation base dataset. 10 | 11 | Parameters 12 | ---------- 13 | root : str 14 | Path to data folder. 15 | mode : str 16 | 'train', 'val', 'test', or 'demo'. 17 | transform : callable 18 | A function that transforms the image. 19 | """ 20 | def __init__(self, 21 | root, 22 | mode, 23 | transform, 24 | base_size=520, 25 | crop_size=480): 26 | super(SegDataset, self).__init__() 27 | assert (mode in ("train", "val", "test", "demo")) 28 | assert (mode in ("test", "demo")) 29 | self.root = root 30 | self.mode = mode 31 | self.transform = transform 32 | self.base_size = base_size 33 | self.crop_size = crop_size 34 | 35 | def _val_sync_transform(self, image, mask): 36 | outsize = self.crop_size 37 | short_size = outsize 38 | w, h = image.size 39 | if w > h: 40 | oh = short_size 41 | ow = int(1.0 * w * oh / h) 42 | else: 43 | ow = short_size 44 | oh = int(1.0 * h * ow / w) 45 | image = image.resize((ow, oh), Image.BILINEAR) 46 | mask = mask.resize((ow, oh), Image.NEAREST) 47 | # center crop 48 | w, h = image.size 49 | x1 = int(round(0.5 * (w - outsize))) 50 | y1 = int(round(0.5 * (h - outsize))) 51 | image = image.crop((x1, y1, x1 + outsize, y1 + outsize)) 52 | mask = mask.crop((x1, y1, x1 + outsize, y1 + outsize)) 53 | # final transform 54 | image, mask = self._img_transform(image), self._mask_transform(mask) 55 | return image, mask 56 | 57 | def _sync_transform(self, image, mask): 58 | # random mirror 59 | if random.random() < 0.5: 60 | image = image.transpose(Image.FLIP_LEFT_RIGHT) 61 | mask = mask.transpose(Image.FLIP_LEFT_RIGHT) 62 | crop_size = self.crop_size 63 | # random scale (short edge) 64 | short_size = random.randint(int(self.base_size * 0.5), int(self.base_size * 2.0)) 65 | w, h = image.size 66 | if h > w: 67 | ow = short_size 68 | oh = int(1.0 * h * ow / w) 69 | else: 70 | oh = short_size 71 | ow = int(1.0 * w * oh / h) 72 | image = image.resize((ow, oh), Image.BILINEAR) 73 | mask = mask.resize((ow, oh), Image.NEAREST) 74 | # pad crop 75 | if short_size < crop_size: 76 | padh = crop_size - oh if oh < crop_size else 0 77 | padw = crop_size - ow if ow < crop_size else 0 78 | image = ImageOps.expand(image, border=(0, 0, padw, padh), fill=0) 79 | mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0) 80 | # random crop crop_size 81 | w, h = image.size 82 | x1 = random.randint(0, w - crop_size) 83 | y1 = random.randint(0, h - crop_size) 84 | image = image.crop((x1, y1, x1 + crop_size, y1 + crop_size)) 85 | mask = mask.crop((x1, y1, x1 + crop_size, y1 + crop_size)) 86 | # gaussian blur as in PSP 87 | if random.random() < 0.5: 88 | image = image.filter(ImageFilter.GaussianBlur( 89 | radius=random.random())) 90 | # final transform 91 | image, mask = self._img_transform(image), self._mask_transform(mask) 92 | return image, mask 93 | 94 | @staticmethod 95 | def _img_transform(image): 96 | return np.array(image) 97 | 98 | @staticmethod 99 | def _mask_transform(mask): 100 | return np.array(mask).astype(np.int32) 101 | -------------------------------------------------------------------------------- /chainer_/datasets/svhn_cls_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | SVHN classification dataset. 3 | """ 4 | 5 | import os 6 | from chainer.dataset import DatasetMixin 7 | from chainer.datasets.svhn import get_svhn 8 | from .cifar10_cls_dataset import CIFAR10MetaInfo 9 | 10 | 11 | class SVHN(DatasetMixin): 12 | """ 13 | SVHN image classification dataset from http://ufldl.stanford.edu/housenumbers/. 14 | Each sample is an image (in 3D NDArray) with shape (32, 32, 3). 15 | Note: The SVHN dataset assigns the label `10` to the digit `0`. However, in this Dataset, 16 | we assign the label `0` to the digit `0`. 17 | 18 | Parameters 19 | ---------- 20 | root : str, default '~/.chainer/datasets/svhn' 21 | Path to temp folder for storing data. 22 | mode : str, default 'train' 23 | 'train', 'val', or 'test'. 24 | transform : function, default None 25 | A function that takes data and label and transforms them. 26 | """ 27 | def __init__(self, 28 | root=os.path.join("~", ".chainer", "datasets", "svhn"), 29 | mode="train", 30 | transform=None): 31 | assert (root is not None) 32 | self.transform = transform 33 | train_ds, test_ds = get_svhn() 34 | self.base = train_ds if mode == "train" else test_ds 35 | 36 | def __len__(self): 37 | return len(self.base) 38 | 39 | def get_example(self, i): 40 | image, label = self.base[i] 41 | image = self.transform(image) 42 | return image, label 43 | 44 | 45 | class SVHNMetaInfo(CIFAR10MetaInfo): 46 | def __init__(self): 47 | super(SVHNMetaInfo, self).__init__() 48 | self.label = "SVHN" 49 | self.root_dir_name = "svhn" 50 | self.dataset_class = SVHN 51 | self.num_training_samples = 73257 52 | -------------------------------------------------------------------------------- /chainer_/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/chainer_/metrics/__init__.py -------------------------------------------------------------------------------- /chainer_/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license_file = LICENSE.txt 3 | 4 | [bdist_wheel] 5 | universal=1 -------------------------------------------------------------------------------- /chainer_/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from os import path 3 | from io import open 4 | 5 | here = path.abspath(path.dirname(__file__)) 6 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 7 | long_description = f.read() 8 | 9 | setup( 10 | name='chainercv2', 11 | version='0.0.62', 12 | description='Image classification and segmentation models for Chainer', 13 | license='MIT', 14 | long_description=long_description, 15 | long_description_content_type='text/markdown', 16 | url='https://github.com/osmr/imgclsmob', 17 | author='Oleg Sémery', 18 | author_email='osemery@gmail.com', 19 | classifiers=[ 20 | 'Development Status :: 3 - Alpha', 21 | 'Intended Audience :: Science/Research', 22 | 'License :: OSI Approved :: MIT License', 23 | 'Operating System :: OS Independent', 24 | 'Programming Language :: Python', 25 | 'Topic :: Scientific/Engineering :: Image Recognition', 26 | ], 27 | keywords='machine-learning deep-learning neuralnetwork image-classification chainer imagenet cifar svhn vgg resnet ' 28 | 'pyramidnet diracnet densenet condensenet wrn drn dpn darknet fishnet espnetv2 xdensnet squeezenet ' 29 | 'squeezenext shufflenet menet mobilenet igcv3 mnasnet darts xception inception polynet nasnet pnasnet ror ' 30 | 'proxylessnas dianet efficientnet mixnet image-segmentation voc ade20k cityscapes coco pspnet deeplabv3 ' 31 | 'fcn', 32 | packages=find_packages(exclude=['datasets', 'metrics', 'others', '*.others', 'others.*', '*.others.*']), 33 | include_package_data=True, 34 | install_requires=['requests', 'chainer>=5.0.0'], 35 | ) 36 | -------------------------------------------------------------------------------- /deploy/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 2 | LABEL maintainer="osemery@gmail.com" 3 | 4 | RUN apt update 5 | RUN apt install -y python3-pip 6 | RUN apt install -y ipython3 git htop mc wget 7 | RUN apt install -y libsm6 libxext6 libxrender-dev 8 | 9 | RUN pip3 install --upgrade mxnet-cu100 10 | RUN pip3 install --upgrade torch torchvision 11 | RUN pip3 install --upgrade chainer cupy-cuda100 chainercv 12 | #RUN pip3 install --upgrade keras-mxnet 13 | RUN pip3 install --upgrade tensorflow-gpu tensorpack 14 | RUN pip3 install --upgrade keras 15 | RUN pip3 install --upgrade pandas Pillow tqdm opencv-python 16 | #RUN pip3 install --upgrade gluoncv2 pytorchcv 17 | 18 | ADD bootstrap_eval.sh /root/ 19 | RUN chmod ugo+x /root/bootstrap_eval.sh 20 | CMD /root/bootstrap_eval.sh -------------------------------------------------------------------------------- /deploy/bootstrap_eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | git clone https://github.com/osmr/imgclsmob.git 4 | 5 | mkdir imgclsmob_data 6 | cd imgclsmob_data 7 | 8 | #mkdir imagenet_rec 9 | #cd imagenet_rec 10 | #wget http://soleka.sadmin.ru/SOLEKA/val.idx 11 | #wget http://soleka.sadmin.ru/SOLEKA/val.rec 12 | 13 | mkdir imagenet 14 | cd imagenet 15 | wget http://soleka.sadmin.ru/SOLEKA/imagenet_val.zip 16 | unzip imagenet_val.zip 17 | rm imagenet_val.zip 18 | 19 | cd ../../imgclsmob 20 | 21 | #python3 eval_gl.py --num-gpus=1 --model=resnet18 --batch-size=100 -j=4 --save-dir=../imgclsmob_data/resnet18/ --use-pretrained --calc-flops 22 | python3 eval_gl.py --dataset=ImageNet1K --num-gpus=1 --model=resnet18 --batch-size=100 -j=4 --save-dir=../imgclsmob_data/resnet18/ --use-pretrained --calc-flops 23 | python3 eval_pt.py --num-gpus=1 --model=resnet18 --batch-size=100 -j=4 --save-dir=../imgclsmob_data/resnet18/ --use-pretrained --calc-flops 24 | python3 eval_ch.py --num-gpus=1 --model=resnet18 --batch-size=100 -j=4 --save-dir=../imgclsmob_data/resnet18/ --use-pretrained 25 | #python3 eval_ke.py --num-gpus=1 --model=resnet18 --batch-size=100 -j=4 --save-dir=../imgclsmob_data/resnet18/ --use-pretrained 26 | python3 eval_tf.py --num-gpus=1 --model=resnet18 --batch-size=100 -j=4 --save-dir=../imgclsmob_data/resnet18/ --use-pretrained 27 | 28 | cp ~/.mxnet/models/resnet18-0951-98a2545b.params ~/imgclsmob_data/resnet18/ 29 | python3 convert_models.py --src-fwk=gluon --dst-fwk=pytorch --src-model=resnet18 --dst-model=resnet18 --src-params=../imgclsmob_data/resnet18/resnet18-0951-98a2545b.params --dst-params=../imgclsmob_data/resnet18/resnet18.pth --save-dir=../imgclsmob_data/resnet18/ 30 | python3 eval_pt.py --num-gpus=1 --model=resnet18 --batch-size=100 -j=4 --save-dir=../imgclsmob_data/resnet18/ --resume=../imgclsmob_data/resnet18/resnet18.pth --calc-flops 31 | python3 convert_models.py --src-fwk=gluon --dst-fwk=chainer --src-model=resnet18 --dst-model=resnet18 --src-params=../imgclsmob_data/resnet18/resnet18-0951-98a2545b.params --dst-params=../imgclsmob_data/resnet18/resnet18.npz --save-dir=../imgclsmob_data/resnet18/ 32 | python3 eval_ch.py --num-gpus=1 --model=resnet18 --batch-size=100 -j=4 --save-dir=../imgclsmob_data/resnet18/ --resume=../imgclsmob_data/resnet18/resnet18.npz 33 | #python3 convert_models.py --src-fwk=gluon --dst-fwk=keras --src-model=resnet18 --dst-model=resnet18 --src-params=../imgclsmob_data/resnet18/resnet18-0951-98a2545b.params --dst-params=../imgclsmob_data/resnet18/resnet18.h5 --save-dir=../imgclsmob_data/resnet18/ 34 | #python3 eval_ke.py --num-gpus=1 --model=resnet18 --batch-size=100 -j=4 --save-dir=../imgclsmob_data/resnet18/ --resume=../imgclsmob_data/resnet18/resnet18.h5 35 | python3 convert_models.py --src-fwk=gluon --dst-fwk=tensorflow --src-model=resnet18 --dst-model=resnet18 --src-params=../imgclsmob_data/resnet18/resnet18-0951-98a2545b.params --dst-params=../imgclsmob_data/resnet18/resnet18.tf.npz --save-dir=../imgclsmob_data/resnet18/ 36 | python3 eval_tf.py --num-gpus=1 --model=resnet18 --batch-size=100 -j=4 --save-dir=../imgclsmob_data/resnet18/ --resume=../imgclsmob_data/resnet18/resnet18.tf.npz -------------------------------------------------------------------------------- /deploy/bootstrap_train_gl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | git clone https://github.com/osmr/imgclsmob.git 4 | 5 | mkdir imgclsmob_data 6 | cd imgclsmob_data 7 | mkdir imagenet_rec 8 | cd imagenet_rec 9 | wget http://soleka.sadmin.ru/SOLEKA/val.idx 10 | wget http://soleka.sadmin.ru/SOLEKA/val.rec 11 | wget http://soleka.sadmin.ru/SOLEKA/train.idx 12 | wget http://soleka.sadmin.ru/SOLEKA/train.rec 13 | 14 | 15 | cd ../../imgclsmob 16 | python3 train_gl.py --num-gpus=1 --model=resnet18 --save-dir=../imgclsmob_data/resnet18/ --batch-size=320 --batch-size-scale=4 -j=12 --num-epochs=200 --lr=0.5 --lr-mode=cosine --wd=0.0001 --warmup-epochs=5 --warmup-mode=cosine --mixup --label-smoothing --gamma-wd-mult=0.0001 --beta-wd-mult=0.0001 --bias-wd-mult=0.01 --attempt=1 --start-epoch=1 17 | python3 train_gl.py --dataset=ImageNet1K --num-gpus=1 --model=resnet18 --save-dir=../imgclsmob_data/resnet18/ --batch-size=320 --batch-size-scale=4 -j=12 --num-epochs=200 --lr=0.5 --lr-mode=cosine --wd=0.0001 --warmup-epochs=5 --warmup-mode=cosine --mixup --label-smoothing --gamma-wd-mult=0.0001 --beta-wd-mult=0.0001 --bias-wd-mult=0.01 --attempt=1 --start-epoch=1 -------------------------------------------------------------------------------- /examples/convert_tf2_to_tfl.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for converting trained model from TensorFlow 2.0 to TensorFlow Lite. 3 | """ 4 | 5 | import argparse 6 | import numpy as np 7 | import tensorflow as tf 8 | from tf2cv.model_provider import get_model as tf2cv_get_model 9 | from tensorflow2.utils import prepare_model 10 | 11 | 12 | def parse_args(): 13 | """ 14 | Create python script parameters. 15 | 16 | Returns 17 | ------- 18 | ArgumentParser 19 | Resulted args. 20 | """ 21 | parser = argparse.ArgumentParser( 22 | description="Converting a model from TensorFlow 2.0 to TensorFlow Lite", 23 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 24 | parser.add_argument( 25 | "--model", 26 | type=str, 27 | required=True, 28 | help="type of model to use. see model_provider for options") 29 | parser.add_argument( 30 | "--input", 31 | type=str, 32 | help="path to model weights") 33 | parser.add_argument( 34 | "--input-shape", 35 | type=int, 36 | default=(1, 640, 480, 3), 37 | help="input tensor shape") 38 | parser.add_argument( 39 | "--output-dir", 40 | type=str, 41 | help="path to dir for output TFL file") 42 | 43 | args = parser.parse_args() 44 | return args 45 | 46 | 47 | def main(): 48 | """ 49 | Main body of script. 50 | """ 51 | gpus = tf.config.experimental.list_physical_devices("GPU") 52 | if gpus: 53 | for gpu in gpus: 54 | tf.config.experimental.set_memory_growth(gpu, True) 55 | 56 | args = parse_args() 57 | 58 | if args.input: 59 | net_extra_kwargs = {"in_size": args.input_shape[1:3]} 60 | model = prepare_model( 61 | model_name=args.model, 62 | use_pretrained=False, 63 | pretrained_model_file_path=args.input, 64 | net_extra_kwargs=net_extra_kwargs) 65 | else: 66 | model = tf2cv_get_model( 67 | args.model, 68 | pretrained=True) 69 | 70 | x = tf.zeros(shape=args.input_shape) 71 | _ = model.predict(x) 72 | 73 | # Convert the model. 74 | converter = tf.lite.TFLiteConverter.from_keras_model(model) 75 | 76 | # converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_LATENCY] 77 | # converter.optimizations = [tf.lite.Optimize.DEFAULT] 78 | 79 | # dataset = np.load(args.dataset) 80 | # def representative_dataset_gen(): 81 | # for i in range(len(dataset)): 82 | # yield [dataset[i:i + 1]] 83 | 84 | # converter.representative_dataset = representative_dataset_gen 85 | # converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] 86 | # converter.inference_input_type = tf.int8 87 | # converter.inference_output_type = tf.int8 88 | 89 | # converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS] 90 | 91 | tflite_model = converter.convert() 92 | 93 | if args.output_dir is not None: 94 | open("{}/{}.tflite".format(args.output_dir, args.model), "wb").write(tflite_model) 95 | 96 | # Load TFLite model and allocate tensors. 97 | interpreter = tf.lite.Interpreter(model_content=tflite_model) 98 | interpreter.allocate_tensors() 99 | 100 | # Get input and output tensors. 101 | input_details = interpreter.get_input_details() 102 | output_details = interpreter.get_output_details() 103 | 104 | # Test the TensorFlow Lite model on random input data. 105 | input_shape = input_details[0]["shape"] 106 | input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32) 107 | interpreter.set_tensor(input_details[0]["index"], input_data) 108 | 109 | interpreter.invoke() 110 | 111 | # The function `get_tensor()` returns a copy of the tensor data. 112 | # Use `tensor()` in order to get a pointer to the tensor. 113 | tflite_results = interpreter.get_tensor(output_details[0]["index"]) 114 | 115 | # Test the TensorFlow model on random input data. 116 | tf_results = model(tf.constant(input_data)) 117 | 118 | # Compare the result. 119 | for tf_result, tflite_result in zip(tf_results, tflite_results): 120 | np.testing.assert_almost_equal(tf_result[0], tflite_result, decimal=5) 121 | 122 | print("All OK.") 123 | 124 | 125 | if __name__ == "__main__": 126 | main() 127 | -------------------------------------------------------------------------------- /examples/demo_gl.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for evaluating trained model on MXNet/Gluon / ImageNet-1K (demo mode). 3 | """ 4 | 5 | import math 6 | import argparse 7 | import numpy as np 8 | import cv2 9 | import mxnet as mx 10 | from gluoncv.data import ImageNet1kAttr 11 | from gluoncv2.model_provider import get_model as glcv2_get_model 12 | 13 | 14 | def parse_args(): 15 | """ 16 | Create python script parameters. 17 | 18 | Returns 19 | ------- 20 | ArgumentParser 21 | Resulted args. 22 | """ 23 | parser = argparse.ArgumentParser( 24 | description="Evaluate an ImageNet-1K model on Gluon (demo mode)", 25 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 26 | parser.add_argument( 27 | "--model", 28 | type=str, 29 | required=True, 30 | help="type of model to use. see model_provider for options") 31 | parser.add_argument( 32 | "--image", 33 | type=str, 34 | required=True, 35 | help="path to testing image") 36 | parser.add_argument( 37 | "--num-gpus", 38 | type=int, 39 | default=0, 40 | help="number of gpus to use") 41 | parser.add_argument( 42 | "--input-size", 43 | type=int, 44 | default=224, 45 | help="size of the input for model") 46 | parser.add_argument( 47 | "--resize-inv-factor", 48 | type=float, 49 | default=0.875, 50 | help="inverted ratio for input image crop") 51 | parser.add_argument( 52 | "--mean-rgb", 53 | nargs=3, 54 | type=float, 55 | default=(0.485, 0.456, 0.406), 56 | help="Mean of RGB channels in the dataset") 57 | parser.add_argument( 58 | "--std-rgb", 59 | nargs=3, 60 | type=float, 61 | default=(0.229, 0.224, 0.225), 62 | help="STD of RGB channels in the dataset") 63 | 64 | args = parser.parse_args() 65 | return args 66 | 67 | 68 | def main(): 69 | """ 70 | Main body of script. 71 | """ 72 | args = parse_args() 73 | 74 | # Load a testing image: 75 | image = cv2.imread(args.image, flags=cv2.IMREAD_COLOR) 76 | # cv2.imshow("image", image) 77 | # cv2.waitKey(0) 78 | # cv2.destroyAllWindows() 79 | image = cv2.cvtColor(image, code=cv2.COLOR_BGR2RGB) 80 | 81 | # Resize image with keeping aspect ratio: 82 | resize_value = int(math.ceil(float(args.input_size) / args.resize_inv_factor)) 83 | h, w = image.shape[:2] 84 | if not ((w == resize_value and w <= h) or (h == resize_value and h <= w)): 85 | resize_size = (resize_value, int(resize_value * h / w)) if w < h else (int(resize_value * w / h), resize_value) 86 | image = cv2.resize(image, dsize=resize_size, interpolation=cv2.INTER_LINEAR) 87 | 88 | # Center crop of the image: 89 | h, w = image.shape[:2] 90 | th, tw = args.input_size, args.input_size 91 | ih = int(round(0.5 * (h - th))) 92 | jw = int(round(0.5 * (w - tw))) 93 | image = image[ih:(ih + th), jw:(jw + tw), :] 94 | # cv2.imshow("image2", image) 95 | # cv2.waitKey(0) 96 | # cv2.destroyAllWindows() 97 | 98 | # Convert image to a float tensor and normalize it: 99 | x = image.astype(np.float32) 100 | x = x / 255.0 101 | x = (x - np.array(args.mean_rgb)) / np.array(args.std_rgb) 102 | 103 | # Create MXNet context: 104 | mx_ctx = [mx.gpu(i) for i in range(args.num_gpus)] if args.num_gpus > 0 else [mx.cpu()] 105 | 106 | # Convert the tensor to a MXNet nd-array: 107 | x = x.transpose(2, 0, 1) 108 | x = np.expand_dims(x, axis=0) 109 | x = mx.nd.array(x, ctx=mx.cpu()) 110 | 111 | # Create model with loading pretrained weights: 112 | net = glcv2_get_model(args.model, pretrained=True, ctx=mx_ctx) 113 | 114 | # Evaluate the network: 115 | y = net(x) 116 | probs = mx.nd.softmax(y) 117 | 118 | # Show results: 119 | top_k = 5 120 | probs_np = probs.asnumpy().squeeze(axis=0) 121 | top_k_inds = probs_np.argsort()[::-1][:top_k] 122 | classes = ImageNet1kAttr().classes 123 | print("The input picture is classified to be:") 124 | for k in range(top_k): 125 | print("{idx}: [{class_name}], with probability {prob:.3f}.".format( 126 | idx=(k + 1), 127 | class_name=classes[top_k_inds[k]], 128 | prob=probs_np[top_k_inds[k]])) 129 | 130 | 131 | if __name__ == "__main__": 132 | main() 133 | -------------------------------------------------------------------------------- /examples/demo_pt.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for evaluating trained model on PyTorch / ImageNet-1K (demo mode). 3 | """ 4 | 5 | import math 6 | import argparse 7 | import numpy as np 8 | import cv2 9 | import torch 10 | from gluoncv.data import ImageNet1kAttr 11 | from pytorchcv.model_provider import get_model as ptcv_get_model 12 | 13 | 14 | def parse_args(): 15 | """ 16 | Create python script parameters. 17 | 18 | Returns 19 | ------- 20 | ArgumentParser 21 | Resulted args. 22 | """ 23 | parser = argparse.ArgumentParser( 24 | description="Evaluate an ImageNet-1K model on PyTorch (demo mode)", 25 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 26 | parser.add_argument( 27 | "--model", 28 | type=str, 29 | required=True, 30 | help="type of model to use. see model_provider for options") 31 | parser.add_argument( 32 | "--image", 33 | type=str, 34 | required=True, 35 | help="path to testing image") 36 | parser.add_argument( 37 | "--num-gpus", 38 | type=int, 39 | default=0, 40 | help="number of gpus to use") 41 | parser.add_argument( 42 | "--input-size", 43 | type=int, 44 | default=224, 45 | help="size of the input for model") 46 | parser.add_argument( 47 | "--resize-inv-factor", 48 | type=float, 49 | default=0.875, 50 | help="inverted ratio for input image crop") 51 | parser.add_argument( 52 | "--mean-rgb", 53 | nargs=3, 54 | type=float, 55 | default=(0.485, 0.456, 0.406), 56 | help="Mean of RGB channels in the dataset") 57 | parser.add_argument( 58 | "--std-rgb", 59 | nargs=3, 60 | type=float, 61 | default=(0.229, 0.224, 0.225), 62 | help="STD of RGB channels in the dataset") 63 | 64 | args = parser.parse_args() 65 | return args 66 | 67 | 68 | def main(): 69 | """ 70 | Main body of script. 71 | """ 72 | args = parse_args() 73 | 74 | # Load a testing image: 75 | image = cv2.imread(args.image, flags=cv2.IMREAD_COLOR) 76 | # cv2.imshow("image", image) 77 | # cv2.waitKey(0) 78 | # cv2.destroyAllWindows() 79 | image = cv2.cvtColor(image, code=cv2.COLOR_BGR2RGB) 80 | 81 | # Resize image with keeping aspect ratio: 82 | resize_value = int(math.ceil(float(args.input_size) / args.resize_inv_factor)) 83 | h, w = image.shape[:2] 84 | if not ((w == resize_value and w <= h) or (h == resize_value and h <= w)): 85 | resize_size = (resize_value, int(resize_value * h / w)) if w < h else (int(resize_value * w / h), resize_value) 86 | image = cv2.resize(image, dsize=resize_size, interpolation=cv2.INTER_LINEAR) 87 | 88 | # Center crop of the image: 89 | h, w = image.shape[:2] 90 | th, tw = args.input_size, args.input_size 91 | ih = int(round(0.5 * (h - th))) 92 | jw = int(round(0.5 * (w - tw))) 93 | image = image[ih:(ih + th), jw:(jw + tw), :] 94 | # cv2.imshow("image2", image) 95 | # cv2.waitKey(0) 96 | # cv2.destroyAllWindows() 97 | 98 | # Convert image to a float tensor and normalize it: 99 | x = image.astype(np.float32) 100 | x = x / 255.0 101 | x = (x - np.array(args.mean_rgb)) / np.array(args.std_rgb) 102 | 103 | # Create `use_cuda` flag: 104 | use_cuda = (args.num_gpus > 0) 105 | 106 | # Convert the tensor to a Pytorch tensor: 107 | x = x.transpose(2, 0, 1) 108 | x = np.expand_dims(x, axis=0) 109 | x = torch.FloatTensor(x) 110 | if use_cuda: 111 | x = x.cuda() 112 | 113 | # Create model with loading pretrained weights: 114 | net = ptcv_get_model(args.model, pretrained=True) 115 | net.eval() 116 | if use_cuda: 117 | net = net.cuda() 118 | 119 | # Evaluate the network: 120 | y = net(x) 121 | probs = torch.nn.Softmax(dim=-1)(y) 122 | 123 | # Show results: 124 | top_k = 5 125 | probs_np = probs.cpu().detach().numpy().squeeze(axis=0) 126 | top_k_inds = probs_np.argsort()[::-1][:top_k] 127 | classes = ImageNet1kAttr().classes 128 | print("The input picture is classified to be:") 129 | for k in range(top_k): 130 | print("{idx}: [{class_name}], with probability {prob:.3f}.".format( 131 | idx=(k + 1), 132 | class_name=classes[top_k_inds[k]], 133 | prob=probs_np[top_k_inds[k]])) 134 | 135 | 136 | if __name__ == "__main__": 137 | main() 138 | -------------------------------------------------------------------------------- /examples/demo_tf2.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for evaluating trained model on TensorFlow 2.0 / ImageNet-1K (demo mode). 3 | """ 4 | 5 | import math 6 | import argparse 7 | import numpy as np 8 | import cv2 9 | import tensorflow as tf 10 | from gluoncv.data import ImageNet1kAttr 11 | from tf2cv.model_provider import get_model as tf2cv_get_model 12 | 13 | 14 | def parse_args(): 15 | """ 16 | Create python script parameters. 17 | 18 | Returns 19 | ------- 20 | ArgumentParser 21 | Resulted args. 22 | """ 23 | parser = argparse.ArgumentParser( 24 | description="Evaluate an ImageNet-1K model on TensorFlow 2.0 (demo mode)", 25 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 26 | parser.add_argument( 27 | "--model", 28 | type=str, 29 | required=True, 30 | help="type of model to use. see model_provider for options") 31 | parser.add_argument( 32 | "--image", 33 | type=str, 34 | required=True, 35 | help="path to testing image") 36 | parser.add_argument( 37 | "--num-gpus", 38 | type=int, 39 | default=0, 40 | help="number of gpus to use") 41 | parser.add_argument( 42 | "--input-size", 43 | type=int, 44 | default=224, 45 | help="size of the input for model") 46 | parser.add_argument( 47 | "--resize-inv-factor", 48 | type=float, 49 | default=0.875, 50 | help="inverted ratio for input image crop") 51 | parser.add_argument( 52 | "--mean-rgb", 53 | nargs=3, 54 | type=float, 55 | default=(0.485, 0.456, 0.406), 56 | help="Mean of RGB channels in the dataset") 57 | parser.add_argument( 58 | "--std-rgb", 59 | nargs=3, 60 | type=float, 61 | default=(0.229, 0.224, 0.225), 62 | help="STD of RGB channels in the dataset") 63 | 64 | args = parser.parse_args() 65 | return args 66 | 67 | 68 | def main(): 69 | """ 70 | Main body of script. 71 | """ 72 | args = parse_args() 73 | 74 | # Load a testing image: 75 | image = cv2.imread(args.image, flags=cv2.IMREAD_COLOR) 76 | # cv2.imshow("image", image) 77 | # cv2.waitKey(0) 78 | # cv2.destroyAllWindows() 79 | image = cv2.cvtColor(image, code=cv2.COLOR_BGR2RGB) 80 | 81 | # Resize image with keeping aspect ratio: 82 | resize_value = int(math.ceil(float(args.input_size) / args.resize_inv_factor)) 83 | h, w = image.shape[:2] 84 | if not ((w == resize_value and w <= h) or (h == resize_value and h <= w)): 85 | resize_size = (resize_value, int(resize_value * h / w)) if w < h else (int(resize_value * w / h), resize_value) 86 | image = cv2.resize(image, dsize=resize_size, interpolation=cv2.INTER_LINEAR) 87 | 88 | # Center crop of the image: 89 | h, w = image.shape[:2] 90 | th, tw = args.input_size, args.input_size 91 | ih = int(round(0.5 * (h - th))) 92 | jw = int(round(0.5 * (w - tw))) 93 | image = image[ih:(ih + th), jw:(jw + tw), :] 94 | # cv2.imshow("image2", image) 95 | # cv2.waitKey(0) 96 | # cv2.destroyAllWindows() 97 | 98 | # Convert image to a float tensor and normalize it: 99 | x = image.astype(np.float32) 100 | x = x / 255.0 101 | x = (x - np.array(args.mean_rgb)) / np.array(args.std_rgb) 102 | 103 | # Set No-GPU mode: 104 | if args.num_gpus == 0: 105 | tf.config.set_visible_devices([], "GPU") 106 | 107 | # Convert the tensor to a TF tensor: 108 | x = np.expand_dims(x, axis=0) 109 | x = tf.convert_to_tensor(x, dtype=np.float32) 110 | 111 | # Create model with loading pretrained weights: 112 | net = tf2cv_get_model(args.model, pretrained=True) 113 | 114 | # Evaluate the network: 115 | y = net(x) 116 | probs = tf.nn.softmax(y) 117 | 118 | # Show results: 119 | top_k = 5 120 | probs_np = probs.numpy().squeeze(axis=0) 121 | top_k_inds = probs_np.argsort()[::-1][:top_k] 122 | classes = ImageNet1kAttr().classes 123 | print("The input picture is classified to be:") 124 | for k in range(top_k): 125 | print("{idx}: [{class_name}], with probability {prob:.3f}.".format( 126 | idx=(k + 1), 127 | class_name=classes[top_k_inds[k]], 128 | prob=probs_np[top_k_inds[k]])) 129 | 130 | 131 | if __name__ == "__main__": 132 | main() 133 | -------------------------------------------------------------------------------- /gluon/LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018-2021 Oleg Sémery 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /gluon/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/gluon/__init__.py -------------------------------------------------------------------------------- /gluon/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/gluon/datasets/__init__.py -------------------------------------------------------------------------------- /gluon/datasets/asr_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | Automatic Speech Recognition (ASR) abstract dataset. 3 | """ 4 | 5 | __all__ = ['AsrDataset', 'asr_test_transform'] 6 | 7 | from mxnet.gluon.data import dataset 8 | from mxnet.gluon.data.vision import transforms 9 | from gluon.gluoncv2.models.jasper import NemoAudioReader 10 | 11 | 12 | class AsrDataset(dataset.Dataset): 13 | """ 14 | Automatic Speech Recognition (ASR) abstract dataset. 15 | 16 | Parameters 17 | ---------- 18 | root : str 19 | Path to the folder stored the dataset. 20 | mode : str 21 | 'train', 'val', 'test', or 'demo'. 22 | transform : func 23 | A function that takes data and transforms it. 24 | """ 25 | def __init__(self, 26 | root, 27 | mode, 28 | transform): 29 | super(AsrDataset, self).__init__() 30 | assert (mode in ("train", "val", "test", "demo")) 31 | self.root = root 32 | self.mode = mode 33 | self._transform = transform 34 | self.data = [] 35 | self.audio_reader = NemoAudioReader() 36 | 37 | def __getitem__(self, index): 38 | wav_file_path, label_text = self.data[index] 39 | audio_data = self.audio_reader.read_from_file(wav_file_path) 40 | audio_len = audio_data.shape[0] 41 | return (audio_data, audio_len), label_text 42 | 43 | def __len__(self): 44 | return len(self.data) 45 | 46 | 47 | def asr_test_transform(ds_metainfo): 48 | assert (ds_metainfo is not None) 49 | return transforms.Compose([]) 50 | -------------------------------------------------------------------------------- /gluon/datasets/cifar100_cls_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | CIFAR-100 classification dataset. 3 | """ 4 | 5 | import os 6 | from mxnet.gluon.data.vision import CIFAR100 7 | from .cifar10_cls_dataset import CIFAR10MetaInfo 8 | 9 | 10 | class CIFAR100Fine(CIFAR100): 11 | """ 12 | CIFAR-100 image classification dataset. 13 | 14 | 15 | Parameters 16 | ---------- 17 | root : str, default $MXNET_HOME/datasets/cifar100 18 | Path to temp folder for storing data. 19 | mode : str, default 'train' 20 | 'train', 'val', or 'test'. 21 | transform : function, default None 22 | A user defined callback that transforms each sample. 23 | """ 24 | def __init__(self, 25 | root=os.path.join("~", ".mxnet", "datasets", "cifar100"), 26 | mode="train", 27 | transform=None): 28 | super(CIFAR100Fine, self).__init__( 29 | root=root, 30 | fine_label=True, 31 | train=(mode == "train"), 32 | transform=transform) 33 | 34 | 35 | class CIFAR100MetaInfo(CIFAR10MetaInfo): 36 | def __init__(self): 37 | super(CIFAR100MetaInfo, self).__init__() 38 | self.label = "CIFAR100" 39 | self.root_dir_name = "cifar100" 40 | self.dataset_class = CIFAR100Fine 41 | self.num_classes = 100 42 | -------------------------------------------------------------------------------- /gluon/datasets/dataset_metainfo.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base dataset metainfo class. 3 | """ 4 | 5 | import os 6 | 7 | 8 | class DatasetMetaInfo(object): 9 | """ 10 | Base descriptor of dataset. 11 | """ 12 | 13 | def __init__(self): 14 | self.use_imgrec = False 15 | self.do_transform = False 16 | self.do_transform_first = True 17 | self.last_batch = None 18 | self.batchify_fn = None 19 | self.label = None 20 | self.root_dir_name = None 21 | self.root_dir_path = None 22 | self.dataset_class = None 23 | self.dataset_class_extra_kwargs = None 24 | self.num_training_samples = None 25 | self.in_channels = None 26 | self.num_classes = None 27 | self.input_image_size = None 28 | self.train_metric_capts = None 29 | self.train_metric_names = None 30 | self.train_metric_extra_kwargs = None 31 | self.train_use_weighted_sampler = False 32 | self.val_metric_capts = None 33 | self.val_metric_names = None 34 | self.val_metric_extra_kwargs = None 35 | self.test_metric_capts = None 36 | self.test_metric_names = None 37 | self.test_metric_extra_kwargs = None 38 | self.saver_acc_ind = None 39 | self.ml_type = None 40 | self.allow_hybridize = True 41 | self.train_net_extra_kwargs = {"root": os.path.join("~", ".mxnet", "models")} 42 | self.test_net_extra_kwargs = None 43 | self.load_ignore_extra = False 44 | self.loss_name = None 45 | self.loss_extra_kwargs = None 46 | 47 | def add_dataset_parser_arguments(self, 48 | parser, 49 | work_dir_path): 50 | """ 51 | Create python script parameters (for dataset specific metainfo). 52 | 53 | Parameters 54 | ---------- 55 | parser : ArgumentParser 56 | ArgumentParser instance. 57 | work_dir_path : str 58 | Path to working directory. 59 | """ 60 | parser.add_argument( 61 | "--data-dir", 62 | type=str, 63 | default=os.path.join(work_dir_path, self.root_dir_name), 64 | help="path to directory with {} dataset".format(self.label)) 65 | parser.add_argument( 66 | "--num-classes", 67 | type=int, 68 | default=self.num_classes, 69 | help="number of classes") 70 | parser.add_argument( 71 | "--in-channels", 72 | type=int, 73 | default=self.in_channels, 74 | help="number of input channels") 75 | parser.add_argument( 76 | "--net-root", 77 | type=str, 78 | default=os.path.join("~", ".mxnet", "models"), 79 | help="root for pretrained net cache") 80 | 81 | def update(self, 82 | args): 83 | """ 84 | Update dataset metainfo after user customizing. 85 | 86 | Parameters 87 | ---------- 88 | args : ArgumentParser 89 | Main script arguments. 90 | """ 91 | self.root_dir_path = args.data_dir 92 | self.num_classes = args.num_classes 93 | self.in_channels = args.in_channels 94 | self.train_net_extra_kwargs["root"] = args.net_root 95 | 96 | def update_from_dataset(self, 97 | dataset): 98 | """ 99 | Update dataset metainfo after a dataset class instance creation. 100 | 101 | Parameters 102 | ---------- 103 | args : obj 104 | A dataset class instance. 105 | """ 106 | pass 107 | -------------------------------------------------------------------------------- /gluon/datasets/imagenet1k_rec_cls_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | ImageNet-1K classification dataset (via MXNet image record iterators). 3 | """ 4 | 5 | import os 6 | import mxnet as mx 7 | from .imagenet1k_cls_dataset import ImageNet1KMetaInfo, calc_val_resize_value 8 | 9 | 10 | class ImageNet1KRecMetaInfo(ImageNet1KMetaInfo): 11 | def __init__(self): 12 | super(ImageNet1KRecMetaInfo, self).__init__() 13 | self.use_imgrec = True 14 | self.label = "ImageNet1K_rec" 15 | self.root_dir_name = "imagenet_rec" 16 | self.dataset_class = None 17 | self.num_training_samples = 1281167 18 | self.train_imgrec_file_path = "train.rec" 19 | self.train_imgidx_file_path = "train.idx" 20 | self.val_imgrec_file_path = "val.rec" 21 | self.val_imgidx_file_path = "val.idx" 22 | self.train_imgrec_iter = imagenet_train_imgrec_iter 23 | self.val_imgrec_iter = imagenet_val_imgrec_iter 24 | 25 | 26 | def imagenet_train_imgrec_iter(ds_metainfo, 27 | batch_size, 28 | num_workers, 29 | mean_rgb=(123.68, 116.779, 103.939), 30 | std_rgb=(58.393, 57.12, 57.375), 31 | jitter_param=0.4, 32 | lighting_param=0.1): 33 | assert (isinstance(ds_metainfo.input_image_size, tuple) and len(ds_metainfo.input_image_size) == 2) 34 | imgrec_file_path = os.path.join(ds_metainfo.root_dir_path, ds_metainfo.train_imgrec_file_path) 35 | imgidx_file_path = os.path.join(ds_metainfo.root_dir_path, ds_metainfo.train_imgidx_file_path) 36 | data_shape = (ds_metainfo.in_channels,) + ds_metainfo.input_image_size 37 | kwargs = { 38 | "path_imgrec": imgrec_file_path, 39 | "path_imgidx": imgidx_file_path, 40 | "preprocess_threads": num_workers, 41 | "shuffle": True, 42 | "batch_size": batch_size, 43 | "data_shape": data_shape, 44 | "mean_r": mean_rgb[0], 45 | "mean_g": mean_rgb[1], 46 | "mean_b": mean_rgb[2], 47 | "std_r": std_rgb[0], 48 | "std_g": std_rgb[1], 49 | "std_b": std_rgb[2], 50 | "rand_mirror": True, 51 | "random_resized_crop": True, 52 | "max_aspect_ratio": (4.0 / 3.0), 53 | "min_aspect_ratio": (3.0 / 4.0), 54 | "max_random_area": 1, 55 | "min_random_area": 0.08, 56 | "brightness": jitter_param, 57 | "saturation": jitter_param, 58 | "contrast": jitter_param, 59 | "pca_noise": lighting_param 60 | } 61 | if ds_metainfo.aug_type == "aug0": 62 | pass 63 | elif ds_metainfo.aug_type == "aug1": 64 | kwargs["inter_method"] = 10 65 | elif ds_metainfo.aug_type == "aug2": 66 | kwargs["inter_method"] = 10 67 | kwargs["max_rotate_angle"] = 30 68 | kwargs["max_shear_ratio"] = 0.05 69 | else: 70 | raise RuntimeError("Unknown augmentation type: {}\n".format(ds_metainfo.aug_type)) 71 | return mx.io.ImageRecordIter(**kwargs) 72 | 73 | 74 | def imagenet_val_imgrec_iter(ds_metainfo, 75 | batch_size, 76 | num_workers, 77 | mean_rgb=(123.68, 116.779, 103.939), 78 | std_rgb=(58.393, 57.12, 57.375)): 79 | assert (isinstance(ds_metainfo.input_image_size, tuple) and len(ds_metainfo.input_image_size) == 2) 80 | imgrec_file_path = os.path.join(ds_metainfo.root_dir_path, ds_metainfo.val_imgrec_file_path) 81 | imgidx_file_path = os.path.join(ds_metainfo.root_dir_path, ds_metainfo.val_imgidx_file_path) 82 | data_shape = (ds_metainfo.in_channels,) + ds_metainfo.input_image_size 83 | resize_value = calc_val_resize_value( 84 | input_image_size=ds_metainfo.input_image_size, 85 | resize_inv_factor=ds_metainfo.resize_inv_factor) 86 | return mx.io.ImageRecordIter( 87 | path_imgrec=imgrec_file_path, 88 | path_imgidx=imgidx_file_path, 89 | preprocess_threads=num_workers, 90 | shuffle=False, 91 | batch_size=batch_size, 92 | resize=resize_value, 93 | data_shape=data_shape, 94 | mean_r=mean_rgb[0], 95 | mean_g=mean_rgb[1], 96 | mean_b=mean_rgb[2], 97 | std_r=std_rgb[0], 98 | std_g=std_rgb[1], 99 | std_b=std_rgb[2]) 100 | -------------------------------------------------------------------------------- /gluon/datasets/seg_dataset.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import mxnet as mx 4 | from PIL import Image, ImageOps, ImageFilter 5 | from mxnet.gluon.data import dataset 6 | 7 | 8 | class SegDataset(dataset.Dataset): 9 | """ 10 | Segmentation base dataset. 11 | 12 | Parameters 13 | ---------- 14 | root : str 15 | Path to data folder. 16 | mode : str 17 | 'train', 'val', 'test', or 'demo'. 18 | transform : callable 19 | A function that transforms the image. 20 | """ 21 | def __init__(self, 22 | root, 23 | mode, 24 | transform, 25 | base_size=520, 26 | crop_size=480): 27 | assert (mode in ("train", "val", "test", "demo")) 28 | self.root = root 29 | self.mode = mode 30 | self.transform = transform 31 | self.base_size = base_size 32 | self.crop_size = crop_size 33 | 34 | def _val_sync_transform(self, image, mask, ctx=mx.cpu()): 35 | short_size = self.crop_size 36 | w, h = image.size 37 | if w > h: 38 | oh = short_size 39 | ow = int(float(w * oh) / h) 40 | else: 41 | ow = short_size 42 | oh = int(float(h * ow) / w) 43 | image = image.resize((ow, oh), Image.BILINEAR) 44 | mask = mask.resize((ow, oh), Image.NEAREST) 45 | # Center crop: 46 | outsize = self.crop_size 47 | x1 = int(round(0.5 * (ow - outsize))) 48 | y1 = int(round(0.5 * (oh - outsize))) 49 | image = image.crop((x1, y1, x1 + outsize, y1 + outsize)) 50 | mask = mask.crop((x1, y1, x1 + outsize, y1 + outsize)) 51 | # Final transform: 52 | image, mask = self._img_transform(image, ctx=ctx), self._mask_transform(mask, ctx=ctx) 53 | return image, mask 54 | 55 | def _train_sync_transform(self, image, mask, ctx=mx.cpu()): 56 | # Random mirror: 57 | if random.random() < 0.5: 58 | image = image.transpose(Image.FLIP_LEFT_RIGHT) 59 | mask = mask.transpose(Image.FLIP_LEFT_RIGHT) 60 | # Random scale (short edge): 61 | short_size = random.randint(int(self.base_size * 0.5), int(self.base_size * 2.0)) 62 | w, h = image.size 63 | if w > h: 64 | oh = short_size 65 | ow = int(float(w * oh) / h) 66 | else: 67 | ow = short_size 68 | oh = int(float(h * ow) / w) 69 | image = image.resize((ow, oh), Image.BILINEAR) 70 | mask = mask.resize((ow, oh), Image.NEAREST) 71 | # Pad crop: 72 | crop_size = self.crop_size 73 | if short_size < crop_size: 74 | padh = crop_size - oh if oh < crop_size else 0 75 | padw = crop_size - ow if ow < crop_size else 0 76 | image = ImageOps.expand(image, border=(0, 0, padw, padh), fill=0) 77 | mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0) 78 | # Random crop crop_size: 79 | w, h = image.size 80 | x1 = random.randint(0, w - crop_size) 81 | y1 = random.randint(0, h - crop_size) 82 | image = image.crop((x1, y1, x1 + crop_size, y1 + crop_size)) 83 | mask = mask.crop((x1, y1, x1 + crop_size, y1 + crop_size)) 84 | # Gaussian blur as in PSP: 85 | if random.random() < 0.5: 86 | image = image.filter(ImageFilter.GaussianBlur(radius=random.random())) 87 | # Final transform: 88 | image, mask = self._img_transform(image, ctx=ctx), self._mask_transform(mask, ctx=ctx) 89 | return image, mask 90 | 91 | @staticmethod 92 | def _img_transform(image, ctx=mx.cpu()): 93 | return mx.nd.array(np.array(image), ctx=ctx) 94 | 95 | @staticmethod 96 | def _mask_transform(mask, ctx=mx.cpu()): 97 | return mx.nd.array(np.array(mask), ctx=ctx, dtype=np.int32) 98 | -------------------------------------------------------------------------------- /gluon/datasets/svhn_cls_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | SVHN classification dataset. 3 | """ 4 | 5 | import os 6 | import numpy as np 7 | import mxnet as mx 8 | from mxnet import gluon 9 | from mxnet.gluon.utils import download, check_sha1 10 | from .cifar10_cls_dataset import CIFAR10MetaInfo 11 | 12 | 13 | class SVHN(gluon.data.dataset._DownloadedDataset): 14 | """ 15 | SVHN image classification dataset from http://ufldl.stanford.edu/housenumbers/. 16 | Each sample is an image (in 3D NDArray) with shape (32, 32, 3). 17 | Note: The SVHN dataset assigns the label `10` to the digit `0`. However, in this Dataset, 18 | we assign the label `0` to the digit `0`. 19 | 20 | Parameters 21 | ---------- 22 | root : str, default $MXNET_HOME/datasets/svhn 23 | Path to temp folder for storing data. 24 | mode : str, default 'train' 25 | 'train', 'val', or 'test'. 26 | transform : function, default None 27 | A user defined callback that transforms each sample. 28 | """ 29 | def __init__(self, 30 | root=os.path.join("~", ".mxnet", "datasets", "svhn"), 31 | mode="train", 32 | transform=None): 33 | self._mode = mode 34 | self._train_data = [("http://ufldl.stanford.edu/housenumbers/train_32x32.mat", "train_32x32.mat", 35 | "e6588cae42a1a5ab5efe608cc5cd3fb9aaffd674")] 36 | self._test_data = [("http://ufldl.stanford.edu/housenumbers/test_32x32.mat", "test_32x32.mat", 37 | "29b312382ca6b9fba48d41a7b5c19ad9a5462b20")] 38 | super(SVHN, self).__init__(root, transform) 39 | 40 | def _get_data(self): 41 | if any(not os.path.exists(path) or not check_sha1(path, sha1) for path, sha1 in 42 | ((os.path.join(self._root, name), sha1) for _, name, sha1 in self._train_data + self._test_data)): 43 | for url, _, sha1 in self._train_data + self._test_data: 44 | download(url=url, path=self._root, sha1_hash=sha1) 45 | 46 | if self._mode == "train": 47 | data_files = self._train_data[0] 48 | else: 49 | data_files = self._test_data[0] 50 | 51 | import scipy.io as sio 52 | 53 | loaded_mat = sio.loadmat(os.path.join(self._root, data_files[1])) 54 | 55 | data = loaded_mat["X"] 56 | data = np.transpose(data, (3, 0, 1, 2)) 57 | self._data = mx.nd.array(data, dtype=data.dtype) 58 | 59 | self._label = loaded_mat["y"].astype(np.int32).squeeze() 60 | np.place(self._label, self._label == 10, 0) 61 | 62 | 63 | class SVHNMetaInfo(CIFAR10MetaInfo): 64 | def __init__(self): 65 | super(SVHNMetaInfo, self).__init__() 66 | self.label = "SVHN" 67 | self.root_dir_name = "svhn" 68 | self.dataset_class = SVHN 69 | self.num_training_samples = 73257 70 | -------------------------------------------------------------------------------- /gluon/distillation.py: -------------------------------------------------------------------------------- 1 | """ 2 | DNN distillation routines. 3 | """ 4 | 5 | __all__ = ['MealDiscriminator', 'MealAdvLoss'] 6 | 7 | from mxnet.gluon import nn, HybridBlock 8 | from .gluoncv2.models.common import conv1x1, conv1x1_block 9 | from mxnet.gluon.loss import SigmoidBinaryCrossEntropyLoss 10 | 11 | 12 | class MealDiscriminator(HybridBlock): 13 | """ 14 | MEALv2 discriminator. 15 | 16 | Parameters 17 | ---------- 18 | classes : int, default 1000 19 | Number of classification classes. 20 | bn_use_global_stats : bool, default False 21 | Whether global moving statistics is used instead of local batch-norm for BatchNorm layers. 22 | bn_cudnn_off : bool, default False 23 | Whether to disable CUDNN batch normalization operator. 24 | """ 25 | def __init__(self, 26 | classes=1000, 27 | bn_use_global_stats=False, 28 | bn_cudnn_off=False, 29 | **kwargs): 30 | super(MealDiscriminator, self).__init__(**kwargs) 31 | in_channels = classes 32 | channels = [200, 40, 8] 33 | 34 | with self.name_scope(): 35 | self.features = nn.HybridSequential(prefix="") 36 | for out_channels in channels: 37 | self.features.add(conv1x1_block( 38 | in_channels=in_channels, 39 | out_channels=out_channels, 40 | bn_use_global_stats=bn_use_global_stats, 41 | bn_cudnn_off=bn_cudnn_off)) 42 | in_channels = out_channels 43 | 44 | self.output = nn.HybridSequential(prefix="") 45 | self.output.add(conv1x1( 46 | in_channels=in_channels, 47 | out_channels=1, 48 | use_bias=True)) 49 | self.output.add(nn.Flatten()) 50 | 51 | def hybrid_forward(self, F, x): 52 | x = x.expand_dims(-1).expand_dims(-1) 53 | x = self.features(x) 54 | x = self.output(x) 55 | x = x.squeeze(1) 56 | return x 57 | 58 | 59 | class MealAdvLoss(SigmoidBinaryCrossEntropyLoss): 60 | """ 61 | MEALv2 adversarial loss. 62 | 63 | Parameters 64 | ---------- 65 | from_sigmoid : bool, default is `False` 66 | Whether the input is from the output of sigmoid. Set this to false will make 67 | the loss calculate sigmoid and BCE together, which is more numerically 68 | stable through log-sum-exp trick. 69 | weight : float or None 70 | Global scalar weight for loss. 71 | batch_axis : int, default 0 72 | The axis that represents mini-batch. 73 | """ 74 | def __init__(self, 75 | **kwargs): 76 | super(MealAdvLoss, self).__init__(**kwargs) 77 | 78 | def hybrid_forward(self, F, pred, label, sample_weight=None, pos_weight=None): 79 | z_pred = F.zeros_like(pred) 80 | loss_pred = super(MealAdvLoss, self).hybrid_forward(F, pred, z_pred) 81 | 82 | z_label = F.ones_like(label) 83 | loss_label = super(MealAdvLoss, self).hybrid_forward(F, label, z_label) 84 | 85 | return loss_pred + loss_label 86 | 87 | 88 | def _test(): 89 | import numpy as np 90 | import mxnet as mx 91 | 92 | model = MealDiscriminator 93 | net = model() 94 | 95 | ctx = mx.cpu() 96 | net.initialize(ctx=ctx) 97 | 98 | # net.hybridize() 99 | net_params = net.collect_params() 100 | weight_count = 0 101 | for param in net_params.values(): 102 | if (param.shape is None) or (not param._differentiable): 103 | continue 104 | weight_count += np.prod(param.shape) 105 | print("m={}, {}".format(model.__name__, weight_count)) 106 | # assert (model != MealDiscriminator or weight_count == 208834) 107 | 108 | batch = 14 109 | classes = 1000 110 | x = mx.nd.random.normal(shape=(batch, classes), ctx=ctx) 111 | y = net(x) 112 | assert (y.shape == (batch,)) 113 | 114 | loss = MealAdvLoss() 115 | z = loss(y, 1 - y) 116 | print(z) 117 | pass 118 | 119 | 120 | if __name__ == "__main__": 121 | _test() 122 | -------------------------------------------------------------------------------- /gluon/gluoncv2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/gluon/gluoncv2/__init__.py -------------------------------------------------------------------------------- /gluon/gluoncv2/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/gluon/gluoncv2/models/__init__.py -------------------------------------------------------------------------------- /gluon/gluoncv2/models/jasperdr.py: -------------------------------------------------------------------------------- 1 | """ 2 | Jasper DR (Dense Residual) for ASR, implemented in Gluon. 3 | Original paper: 'Jasper: An End-to-End Convolutional Neural Acoustic Model,' https://arxiv.org/abs/1904.03288. 4 | """ 5 | 6 | __all__ = ['jasperdr10x5_en', 'jasperdr10x5_en_nr'] 7 | 8 | from .jasper import get_jasper 9 | 10 | 11 | def jasperdr10x5_en(classes=29, **kwargs): 12 | """ 13 | Jasper DR 10x5 model for English language from 'Jasper: An End-to-End Convolutional Neural Acoustic Model,' 14 | https://arxiv.org/abs/1904.03288. 15 | 16 | Parameters 17 | ---------- 18 | classes : int, default 29 19 | Number of classification classes (number of graphemes). 20 | pretrained : bool, default False 21 | Whether to load the pretrained weights for model. 22 | ctx : Context, default CPU 23 | The context in which to load the pretrained weights. 24 | root : str, default '~/.mxnet/models' 25 | Location for keeping the model parameters. 26 | """ 27 | return get_jasper(classes=classes, version=("jasper", "10x5"), use_dr=True, model_name="jasperdr10x5_en", 28 | **kwargs) 29 | 30 | 31 | def jasperdr10x5_en_nr(classes=29, **kwargs): 32 | """ 33 | Jasper DR 10x5 model for English language (with presence of noise) from 'Jasper: An End-to-End Convolutional Neural 34 | Acoustic Model,' https://arxiv.org/abs/1904.03288. 35 | 36 | Parameters 37 | ---------- 38 | classes : int, default 29 39 | Number of classification classes (number of graphemes). 40 | pretrained : bool, default False 41 | Whether to load the pretrained weights for model. 42 | ctx : Context, default CPU 43 | The context in which to load the pretrained weights. 44 | root : str, default '~/.mxnet/models' 45 | Location for keeping the model parameters. 46 | """ 47 | return get_jasper(classes=classes, version=("jasper", "10x5"), use_dr=True, model_name="jasperdr10x5_en_nr", 48 | **kwargs) 49 | 50 | 51 | def _calc_width(net): 52 | import numpy as np 53 | net_params = net.collect_params() 54 | weight_count = 0 55 | for param in net_params.values(): 56 | if (param.shape is None) or (not param._differentiable): 57 | continue 58 | weight_count += np.prod(param.shape) 59 | return weight_count 60 | 61 | 62 | def _test(): 63 | import numpy as np 64 | import mxnet as mx 65 | 66 | pretrained = False 67 | audio_features = 64 68 | 69 | models = [ 70 | jasperdr10x5_en, 71 | jasperdr10x5_en_nr, 72 | ] 73 | 74 | for model in models: 75 | 76 | net = model( 77 | in_channels=audio_features, 78 | pretrained=pretrained) 79 | 80 | ctx = mx.cpu() 81 | if not pretrained: 82 | net.initialize(ctx=ctx) 83 | 84 | # net.hybridize() 85 | weight_count = _calc_width(net) 86 | print("m={}, {}".format(model.__name__, weight_count)) 87 | assert (model != jasperdr10x5_en or weight_count == 332632349) 88 | assert (model != jasperdr10x5_en_nr or weight_count == 332632349) 89 | 90 | batch = 3 91 | seq_len = np.random.randint(60, 150, batch) 92 | seq_len_max = seq_len.max() + 2 93 | x = mx.nd.random.normal(shape=(batch, audio_features, seq_len_max), ctx=ctx) 94 | x_len = mx.nd.array(seq_len, ctx=ctx, dtype=np.long) 95 | 96 | y, y_len = net(x, x_len) 97 | assert (y.shape[:2] == (batch, net.classes)) 98 | assert (y.shape[2] in [seq_len_max // 2, seq_len_max // 2 + 1]) 99 | 100 | 101 | if __name__ == "__main__": 102 | _test() 103 | -------------------------------------------------------------------------------- /gluon/gluoncv2/models/others/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/gluon/gluoncv2/models/others/__init__.py -------------------------------------------------------------------------------- /gluon/losses.py: -------------------------------------------------------------------------------- 1 | """ 2 | Loss functions. 3 | """ 4 | 5 | __all__ = ['SegSoftmaxCrossEntropyLoss', 'MixSoftmaxCrossEntropyLoss'] 6 | 7 | from mxnet.gluon.loss import Loss, _reshape_like 8 | 9 | 10 | class SegSoftmaxCrossEntropyLoss(Loss): 11 | """ 12 | SoftmaxCrossEntropyLoss with ignore labels (for segmentation task). 13 | 14 | Parameters 15 | ---------- 16 | axis : int, default -1 17 | The axis to sum over when computing softmax and entropy. 18 | sparse_label : bool, default True 19 | Whether label is an integer array instead of probability distribution. 20 | from_logits : bool, default False 21 | Whether input is a log probability (usually from log_softmax) instead of unnormalized numbers. 22 | weight : float or None 23 | Global scalar weight for loss. 24 | batch_axis : int, default 0 25 | The axis that represents mini-batch. 26 | ignore_label : int, default -1 27 | The label to ignore. 28 | size_average : bool, default False 29 | Whether to re-scale loss with regard to ignored labels. 30 | """ 31 | def __init__(self, 32 | sparse_label=True, 33 | batch_axis=0, 34 | ignore_label=-1, 35 | size_average=True, 36 | **kwargs): 37 | super(SegSoftmaxCrossEntropyLoss, self).__init__(None, batch_axis, **kwargs) 38 | self._sparse_label = sparse_label 39 | self._ignore_label = ignore_label 40 | self._size_average = size_average 41 | 42 | def hybrid_forward(self, F, pred, label): 43 | """ 44 | Compute loss. 45 | """ 46 | softmaxout = F.SoftmaxOutput( 47 | pred, 48 | label.astype(pred.dtype), 49 | ignore_label=self._ignore_label, 50 | multi_output=self._sparse_label, 51 | use_ignore=True, 52 | normalization=("valid" if self._size_average else "null")) 53 | if self._sparse_label: 54 | loss = -F.pick(F.log(softmaxout), label, axis=1, keepdims=True) 55 | else: 56 | label = _reshape_like(F, label, pred) 57 | loss = -F.sum(F.log(softmaxout) * label, axis=-1, keepdims=True) 58 | loss = F.where(label.expand_dims(axis=1) == self._ignore_label, F.zeros_like(loss), loss) 59 | return F.mean(loss, axis=self._batch_axis, exclude=True) 60 | 61 | 62 | class MixSoftmaxCrossEntropyLoss(SegSoftmaxCrossEntropyLoss): 63 | """ 64 | SegSoftmaxCrossEntropyLoss with auxiliary loss support. 65 | 66 | Parameters 67 | ---------- 68 | aux : bool, default True 69 | Whether to use auxiliary loss. 70 | aux_weight : float, default 0.2 71 | The weight for aux loss. 72 | ignore_label : int, default -1 73 | The label to ignore. 74 | """ 75 | def __init__(self, 76 | aux=True, 77 | aux_weight=0.2, 78 | ignore_label=-1, 79 | **kwargs): 80 | super(MixSoftmaxCrossEntropyLoss, self).__init__(ignore_label=ignore_label, **kwargs) 81 | self.aux = aux 82 | self.aux_weight = aux_weight 83 | 84 | def _aux_forward(self, F, pred1, pred2, label): 85 | """ 86 | Compute loss including auxiliary output. 87 | """ 88 | loss1 = super(MixSoftmaxCrossEntropyLoss, self).hybrid_forward(F, pred1, label) 89 | loss2 = super(MixSoftmaxCrossEntropyLoss, self). hybrid_forward(F, pred2, label) 90 | return loss1 + self.aux_weight * loss2 91 | 92 | def hybrid_forward(self, F, preds, label, **kwargs): 93 | """ 94 | Compute loss. 95 | """ 96 | if self.aux: 97 | return self._aux_forward(F, preds[0], preds[1], label) 98 | else: 99 | return super(MixSoftmaxCrossEntropyLoss, self).hybrid_forward(F, preds, label) 100 | -------------------------------------------------------------------------------- /gluon/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/gluon/metrics/__init__.py -------------------------------------------------------------------------------- /gluon/metrics/asr_metrics.py: -------------------------------------------------------------------------------- 1 | """ 2 | Evaluation Metrics for Automatic Speech Recognition (ASR). 3 | """ 4 | 5 | import mxnet as mx 6 | 7 | __all__ = ['WER'] 8 | 9 | 10 | class WER(mx.metric.EvalMetric): 11 | """ 12 | Computes Word Error Rate (WER) for Automatic Speech Recognition (ASR). 13 | 14 | Parameters 15 | ---------- 16 | vocabulary : list of str 17 | Vocabulary of the dataset. 18 | name : str, default 'wer' 19 | Name of this metric instance for display. 20 | output_names : list of str, or None, default None 21 | Name of predictions that should be used when updating with update_dict. 22 | By default include all predictions. 23 | label_names : list of str, or None, default None 24 | Name of labels that should be used when updating with update_dict. 25 | By default include all labels. 26 | """ 27 | def __init__(self, 28 | vocabulary, 29 | name="wer", 30 | output_names=None, 31 | label_names=None): 32 | super(WER, self).__init__( 33 | name=name, 34 | output_names=output_names, 35 | label_names=label_names, 36 | has_global_stats=True) 37 | self.vocabulary = vocabulary 38 | self.ctc_decoder = CtcDecoder(vocabulary=vocabulary) 39 | 40 | def update(self, labels, preds): 41 | """ 42 | Updates the internal evaluation result. 43 | 44 | Parameters 45 | ---------- 46 | labels : list of `NDArray` 47 | The labels of the data. 48 | preds : list of `NDArray` 49 | Predicted values. 50 | """ 51 | import editdistance 52 | 53 | for labels_i, preds_i in zip(labels, preds): 54 | labels_code = labels_i.asnumpy() 55 | labels_i = [] 56 | for label_code in labels_code: 57 | label_text = "".join([self.ctc_decoder.labels_map[c] for c in label_code]) 58 | labels_i.append(label_text) 59 | 60 | preds_i = preds_i[0] 61 | greedy_predictions = preds_i.swapaxes(1, 2).log_softmax(axis=-1).argmax(axis=-1, keepdims=False).asnumpy() 62 | preds_i = self.ctc_decoder(greedy_predictions) 63 | 64 | assert (len(labels_i) == len(preds_i)) 65 | for pred, label in zip(preds_i, labels_i): 66 | pred = pred.split() 67 | label = label.split() 68 | 69 | word_error_count = editdistance.eval(label, pred) 70 | word_count = max(len(label), len(pred)) 71 | 72 | assert (word_error_count <= word_count) 73 | 74 | self.sum_metric += word_error_count 75 | self.global_sum_metric += word_error_count 76 | self.num_inst += word_count 77 | self.global_num_inst += word_count 78 | 79 | 80 | class CtcDecoder(object): 81 | """ 82 | CTC decoder (to decode a sequence of labels to words). 83 | 84 | Parameters 85 | ---------- 86 | vocabulary : list of str 87 | Vocabulary of the dataset. 88 | """ 89 | def __init__(self, 90 | vocabulary): 91 | super().__init__() 92 | self.blank_id = len(vocabulary) 93 | self.labels_map = dict([(i, vocabulary[i]) for i in range(len(vocabulary))]) 94 | 95 | def __call__(self, 96 | predictions): 97 | """ 98 | Decode a sequence of labels to words. 99 | 100 | Parameters 101 | ---------- 102 | predictions : np.array of int or list(list(int)) 103 | Tensor with predicted labels. 104 | 105 | Returns 106 | ------- 107 | list of str 108 | Words. 109 | """ 110 | hypotheses = [] 111 | for prediction in predictions: 112 | decoded_prediction = [] 113 | previous = self.blank_id 114 | for p in prediction: 115 | if (p != previous or previous == self.blank_id) and p != self.blank_id: 116 | decoded_prediction.append(p) 117 | previous = p 118 | hypothesis = "".join([self.labels_map[c] for c in decoded_prediction]) 119 | hypotheses.append(hypothesis) 120 | return hypotheses 121 | -------------------------------------------------------------------------------- /gluon/metrics/cls_metrics.py: -------------------------------------------------------------------------------- 1 | """ 2 | Evaluation Metrics for Image Classification. 3 | """ 4 | 5 | import mxnet as mx 6 | 7 | __all__ = ['Top1Error', 'TopKError'] 8 | 9 | 10 | class Top1Error(mx.metric.Accuracy): 11 | """ 12 | Computes top-1 error (inverted accuracy classification score). 13 | 14 | Parameters 15 | ---------- 16 | axis : int, default 1 17 | The axis that represents classes. 18 | name : str, default 'top_1_error' 19 | Name of this metric instance for display. 20 | output_names : list of str, or None, default None 21 | Name of predictions that should be used when updating with update_dict. 22 | By default include all predictions. 23 | label_names : list of str, or None, default None 24 | Name of labels that should be used when updating with update_dict. 25 | By default include all labels. 26 | """ 27 | def __init__(self, 28 | axis=1, 29 | name="top_1_error", 30 | output_names=None, 31 | label_names=None): 32 | super(Top1Error, self).__init__( 33 | axis=axis, 34 | name=name, 35 | output_names=output_names, 36 | label_names=label_names) 37 | 38 | def get(self): 39 | """ 40 | Gets the current evaluation result. 41 | 42 | Returns 43 | ------- 44 | names : list of str 45 | Name of the metrics. 46 | values : list of float 47 | Value of the evaluations. 48 | """ 49 | if self.num_inst == 0: 50 | return self.name, float("nan") 51 | else: 52 | return self.name, 1.0 - self.sum_metric / self.num_inst 53 | 54 | 55 | class TopKError(mx.metric.TopKAccuracy): 56 | """ 57 | Computes top-k error (inverted top k predictions accuracy). 58 | 59 | Parameters 60 | ---------- 61 | top_k : int 62 | Whether targets are out of top k predictions, default 1 63 | name : str, default 'top_k_error' 64 | Name of this metric instance for display. 65 | output_names : list of str, or None, default None 66 | Name of predictions that should be used when updating with update_dict. 67 | By default include all predictions. 68 | label_names : list of str, or None, default None 69 | Name of labels that should be used when updating with update_dict. 70 | By default include all labels. 71 | """ 72 | def __init__(self, 73 | top_k=1, 74 | name="top_k_error", 75 | output_names=None, 76 | label_names=None): 77 | name_ = name 78 | super(TopKError, self).__init__( 79 | top_k=top_k, 80 | name=name, 81 | output_names=output_names, 82 | label_names=label_names) 83 | self.name = name_.replace("_k_", "_{}_".format(top_k)) 84 | 85 | def get(self): 86 | """ 87 | Gets the current evaluation result. 88 | 89 | Returns 90 | ------- 91 | names : list of str 92 | Name of the metrics. 93 | values : list of float 94 | Value of the evaluations. 95 | """ 96 | if self.num_inst == 0: 97 | return self.name, float("nan") 98 | else: 99 | return self.name, 1.0 - self.sum_metric / self.num_inst 100 | -------------------------------------------------------------------------------- /gluon/metrics/metrics.py: -------------------------------------------------------------------------------- 1 | """ 2 | Evaluation metrics for common tasks. 3 | """ 4 | 5 | import mxnet as mx 6 | if mx.__version__ < "2.0.0": 7 | from mxnet.metric import EvalMetric 8 | else: 9 | from mxnet.gluon.metric import EvalMetric 10 | 11 | __all__ = ['LossValue'] 12 | 13 | 14 | class LossValue(EvalMetric): 15 | """ 16 | Computes simple loss value fake metric. 17 | 18 | Parameters 19 | ---------- 20 | name : str 21 | Name of this metric instance for display. 22 | output_names : list of str, or None 23 | Name of predictions that should be used when updating with update_dict. 24 | By default include all predictions. 25 | label_names : list of str, or None 26 | Name of labels that should be used when updating with update_dict. 27 | By default include all labels. 28 | """ 29 | def __init__(self, 30 | name="loss", 31 | output_names=None, 32 | label_names=None): 33 | super(LossValue, self).__init__( 34 | name, 35 | output_names=output_names, 36 | label_names=label_names) 37 | 38 | def update(self, labels, preds): 39 | """ 40 | Updates the internal evaluation result. 41 | 42 | Parameters 43 | ---------- 44 | labels : None 45 | Unused argument. 46 | preds : list of `NDArray` 47 | Loss values. 48 | """ 49 | loss = sum([ll.mean().asscalar() for ll in preds]) / len(preds) 50 | self.sum_metric += loss 51 | self.global_sum_metric += loss 52 | self.num_inst += 1 53 | self.global_num_inst += 1 54 | -------------------------------------------------------------------------------- /gluon/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license_file = LICENSE.txt 3 | 4 | [bdist_wheel] 5 | universal=1 -------------------------------------------------------------------------------- /gluon/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from os import path 3 | from io import open 4 | 5 | here = path.abspath(path.dirname(__file__)) 6 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 7 | long_description = f.read() 8 | 9 | setup( 10 | name='gluoncv2', 11 | version='0.0.64', 12 | description='Image classification and segmentation models for Gluon', 13 | license='MIT', 14 | long_description=long_description, 15 | long_description_content_type='text/markdown', 16 | url='https://github.com/osmr/imgclsmob', 17 | author='Oleg Sémery', 18 | author_email='osemery@gmail.com', 19 | classifiers=[ 20 | 'Development Status :: 3 - Alpha', 21 | 'Intended Audience :: Science/Research', 22 | 'License :: OSI Approved :: MIT License', 23 | 'Operating System :: OS Independent', 24 | 'Programming Language :: Python', 25 | 'Topic :: Scientific/Engineering :: Image Recognition', 26 | ], 27 | keywords='machine-learning deep-learning neuralnetwork image-classification mxnet gluon imagenet cifar svhn vgg ' 28 | 'resnet pyramidnet diracnet densenet condensenet wrn drn dpn darknet fishnet espnetv2 hrnet xdensnet ' 29 | 'squeezenet squeezenext shufflenet menet mobilenet igcv3 mnasnet darts xception inception polynet nasnet ' 30 | 'pnasnet ror proxylessnas dianet efficientnet mixnet image-segmentation voc ade20k cityscapes coco pspnet ' 31 | 'deeplabv3 fcn', 32 | packages=find_packages(exclude=['datasets', 'metrics', 'others', '*.others', 'others.*', '*.others.*']), 33 | include_package_data=True, 34 | install_requires=['numpy'], 35 | ) 36 | -------------------------------------------------------------------------------- /gluon/weighted_random_sampler.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dataset weighted random sampler. 3 | """ 4 | 5 | __all__ = ['WeightedRandomSampler'] 6 | 7 | import numpy as np 8 | import mxnet as mx 9 | from mxnet.gluon.data import Sampler 10 | 11 | 12 | class WeightedRandomSampler(Sampler): 13 | """ 14 | Samples elements from [0, length) randomly without replacement. 15 | 16 | Parameters 17 | ---------- 18 | length : int 19 | Length of the sequence. 20 | weights : np.array of float 21 | Normalized weights of samples. 22 | """ 23 | def __init__(self, 24 | length, 25 | weights): 26 | assert (isinstance(length, int) and length > 0) 27 | assert (len(weights) == length) 28 | assert (np.abs(weights.sum() - 1.0) <= 1e-5) 29 | self._length = length 30 | self._weights = weights.copy() 31 | 32 | def __iter__(self): 33 | indices = mx.nd.random.multinomial(mx.nd.array(self._weights), shape=self._length).asnumpy() 34 | np.random.shuffle(indices) 35 | return iter(indices) 36 | 37 | def __len__(self): 38 | return self._length 39 | -------------------------------------------------------------------------------- /keras_/LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018-2024 Oleg Sémery 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /keras_/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/keras_/__init__.py -------------------------------------------------------------------------------- /keras_/kerascv/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/keras_/kerascv/__init__.py -------------------------------------------------------------------------------- /keras_/kerascv/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/keras_/kerascv/models/__init__.py -------------------------------------------------------------------------------- /keras_/kerascv/models/others/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/keras_/kerascv/models/others/__init__.py -------------------------------------------------------------------------------- /keras_/kerascv/models/zfnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | ZFNet for ImageNet-1K, implemented in Keras. 3 | Original paper: 'Visualizing and Understanding Convolutional Networks,' https://arxiv.org/abs/1311.2901. 4 | """ 5 | 6 | __all__ = ['zfnet', 'zfnetb'] 7 | 8 | import os 9 | from .common import is_channels_first 10 | from .alexnet import alexnet_model 11 | 12 | 13 | def get_zfnet(version="a", 14 | model_name=None, 15 | pretrained=False, 16 | root=os.path.join("~", ".keras", "models"), 17 | **kwargs): 18 | """ 19 | Create ZFNet model with specific parameters. 20 | 21 | Parameters 22 | ---------- 23 | version : str, default 'a' 24 | Version of ZFNet ('a' or 'b'). 25 | model_name : str or None, default None 26 | Model name for loading pretrained model. 27 | pretrained : bool, default False 28 | Whether to load the pretrained weights for model. 29 | root : str, default '~/.keras/models' 30 | Location for keeping the model parameters. 31 | """ 32 | if version == "a": 33 | channels = [[96], [256], [384, 384, 256]] 34 | kernel_sizes = [[7], [5], [3, 3, 3]] 35 | strides = [[2], [2], [1, 1, 1]] 36 | paddings = [[1], [0], [1, 1, 1]] 37 | use_lrn = True 38 | elif version == "b": 39 | channels = [[96], [256], [512, 1024, 512]] 40 | kernel_sizes = [[7], [5], [3, 3, 3]] 41 | strides = [[2], [2], [1, 1, 1]] 42 | paddings = [[1], [0], [1, 1, 1]] 43 | use_lrn = True 44 | else: 45 | raise ValueError("Unsupported ZFNet version {}".format(version)) 46 | 47 | net = alexnet_model( 48 | channels=channels, 49 | kernel_sizes=kernel_sizes, 50 | strides=strides, 51 | paddings=paddings, 52 | use_lrn=use_lrn, 53 | **kwargs) 54 | 55 | if pretrained: 56 | if (model_name is None) or (not model_name): 57 | raise ValueError("Parameter `model_name` should be properly initialized for loading pretrained model.") 58 | from .model_store import download_model 59 | download_model( 60 | net=net, 61 | model_name=model_name, 62 | local_model_store_dir_path=root) 63 | 64 | return net 65 | 66 | 67 | def zfnet(**kwargs): 68 | """ 69 | ZFNet model from 'Visualizing and Understanding Convolutional Networks,' https://arxiv.org/abs/1311.2901. 70 | 71 | Parameters 72 | ---------- 73 | pretrained : bool, default False 74 | Whether to load the pretrained weights for model. 75 | root : str, default '~/.keras/models' 76 | Location for keeping the model parameters. 77 | """ 78 | return get_zfnet(model_name="zfnet", **kwargs) 79 | 80 | 81 | def zfnetb(**kwargs): 82 | """ 83 | ZFNet-b model from 'Visualizing and Understanding Convolutional Networks,' https://arxiv.org/abs/1311.2901. 84 | 85 | Parameters 86 | ---------- 87 | pretrained : bool, default False 88 | Whether to load the pretrained weights for model. 89 | root : str, default '~/.keras/models' 90 | Location for keeping the model parameters. 91 | """ 92 | return get_zfnet(version="b", model_name="zfnetb", **kwargs) 93 | 94 | 95 | def _test(): 96 | import numpy as np 97 | import keras 98 | 99 | pretrained = False 100 | 101 | models = [ 102 | zfnet, 103 | zfnetb, 104 | ] 105 | 106 | for model in models: 107 | 108 | net = model(pretrained=pretrained) 109 | # net.summary() 110 | weight_count = keras.utils.layer_utils.count_params(net.trainable_weights) 111 | print("m={}, {}".format(model.__name__, weight_count)) 112 | assert (model != zfnet or weight_count == 62357608) 113 | assert (model != zfnetb or weight_count == 107627624) 114 | 115 | if is_channels_first(): 116 | x = np.zeros((1, 3, 224, 224), np.float32) 117 | else: 118 | x = np.zeros((1, 224, 224, 3), np.float32) 119 | y = net.predict(x) 120 | assert (y.shape == (1, 1000)) 121 | 122 | 123 | if __name__ == "__main__": 124 | _test() 125 | -------------------------------------------------------------------------------- /keras_/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license_file = LICENSE.txt 3 | 4 | [bdist_wheel] 5 | universal=1 -------------------------------------------------------------------------------- /keras_/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from os import path 3 | from io import open 4 | 5 | here = path.abspath(path.dirname(__file__)) 6 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 7 | long_description = f.read() 8 | 9 | setup( 10 | name='kerascv', 11 | version='0.0.40', 12 | description='Image classification models for Keras', 13 | license='MIT', 14 | long_description=long_description, 15 | long_description_content_type='text/markdown', 16 | url='https://github.com/osmr/imgclsmob', 17 | author='Oleg Sémery', 18 | author_email='osemery@gmail.com', 19 | classifiers=[ 20 | 'Development Status :: 3 - Alpha', 21 | 'Intended Audience :: Science/Research', 22 | 'License :: OSI Approved :: MIT License', 23 | 'Operating System :: OS Independent', 24 | 'Programming Language :: Python', 25 | 'Topic :: Scientific/Engineering :: Image Recognition', 26 | ], 27 | keywords='machine-learning deep-learning neuralnetwork image-classification keras keras-mxnet imagenet vgg resnet ' 28 | 'resnext senet densenet darknet squeezenet squeezenext shufflenet menet mobilenent igcv3 mnasnet ' 29 | 'efficientnet', 30 | packages=find_packages(exclude=['others', '*.others', 'others.*', '*.others.*']), 31 | include_package_data=True, 32 | install_requires=['h5py'], 33 | ) 34 | -------------------------------------------------------------------------------- /load_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for downloading model weights. 3 | """ 4 | 5 | import argparse 6 | import numpy as np 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description="Download model", 11 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 12 | parser.add_argument( 13 | "--model", 14 | type=str, 15 | required=True, 16 | help="model name") 17 | args = parser.parse_args() 18 | return args 19 | 20 | 21 | def main(): 22 | args = parse_args() 23 | 24 | from gluon.utils import prepare_model as prepare_model_gl 25 | prepare_model_gl( 26 | model_name=args.model, 27 | use_pretrained=True, 28 | pretrained_model_file_path="", 29 | dtype=np.float32) 30 | 31 | from pytorch.utils import prepare_model as prepare_model_pt 32 | prepare_model_pt( 33 | model_name=args.model, 34 | use_pretrained=True, 35 | pretrained_model_file_path="", 36 | use_cuda=False) 37 | 38 | from chainer_.utils import prepare_model as prepare_model_ch 39 | prepare_model_ch( 40 | model_name=args.model, 41 | use_pretrained=True, 42 | pretrained_model_file_path="") 43 | 44 | from tensorflow2.utils import prepare_model as prepare_model_tf2 45 | prepare_model_tf2( 46 | model_name=args.model, 47 | use_pretrained=True, 48 | pretrained_model_file_path="", 49 | use_cuda=False) 50 | 51 | 52 | if __name__ == '__main__': 53 | main() 54 | -------------------------------------------------------------------------------- /other/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/other/__init__.py -------------------------------------------------------------------------------- /other/chainer_/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/other/chainer_/__init__.py -------------------------------------------------------------------------------- /other/chainer_/top_k_accuracy1.py: -------------------------------------------------------------------------------- 1 | import six 2 | from chainer.backends import cuda 3 | from chainer.function import Function 4 | from chainer.utils import type_check 5 | 6 | 7 | class TopKAccuracy(Function): 8 | 9 | def __init__(self, k=1): 10 | self.k = k 11 | 12 | def check_type_forward(self, in_types): 13 | type_check._argname(in_types, ('x', 't')) 14 | x_type, t_type = in_types 15 | 16 | type_check.expect( 17 | x_type.dtype.kind == 'f', 18 | t_type.dtype.kind == 'i' 19 | ) 20 | 21 | t_ndim = type_check.eval(t_type.ndim) 22 | type_check.expect( 23 | x_type.ndim >= t_type.ndim, 24 | x_type.shape[0] == t_type.shape[0], 25 | x_type.shape[2: t_ndim + 1] == t_type.shape[1:] 26 | ) 27 | for i in six.moves.range(t_ndim + 1, type_check.eval(x_type.ndim)): 28 | type_check.expect(x_type.shape[i] == 1) 29 | 30 | def forward(self, inputs): 31 | xp = cuda.get_array_module(*inputs) 32 | y, t = inputs 33 | 34 | argsorted_pred = xp.argsort(y)[:, -self.k:] 35 | return xp.asarray(xp.any(argsorted_pred.T == t, axis=0).mean(dtype=xp.float32)), 36 | 37 | 38 | def top_k_accuracy(y, t, k=1): 39 | return TopKAccuracy(k=k)(y, t) 40 | -------------------------------------------------------------------------------- /other/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/other/datasets/__init__.py -------------------------------------------------------------------------------- /other/gluon/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/other/gluon/__init__.py -------------------------------------------------------------------------------- /other/gluon/khpa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/other/gluon/khpa/__init__.py -------------------------------------------------------------------------------- /other/pytorch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/other/pytorch/__init__.py -------------------------------------------------------------------------------- /other/pytorch/cub200_2011_utils1.py: -------------------------------------------------------------------------------- 1 | """ 2 | CUB-200-2011 fine-grained classification dataset routines. 3 | """ 4 | 5 | __all__ = ['add_dataset_parser_arguments', 'get_train_data_loader', 'get_val_data_loader'] 6 | 7 | import math 8 | import torch.utils.data 9 | import torchvision.transforms as transforms 10 | from pytorch.datasets.cub200_2011_cls_dataset import CUB200_2011 11 | 12 | 13 | def add_dataset_parser_arguments(parser): 14 | parser.add_argument( 15 | '--data-dir', 16 | type=str, 17 | default='../imgclsmob_data/CUB_200_2011', 18 | help='path to directory with CUB-200-2011 dataset') 19 | parser.add_argument( 20 | '--input-size', 21 | type=int, 22 | default=448, 23 | help='size of the input for model') 24 | parser.add_argument( 25 | '--resize-inv-factor', 26 | type=float, 27 | default=0.74667, 28 | help='inverted ratio for input image crop') 29 | 30 | parser.add_argument( 31 | '--num-classes', 32 | type=int, 33 | default=200, 34 | help='number of classes') 35 | parser.add_argument( 36 | '--in-channels', 37 | type=int, 38 | default=3, 39 | help='number of input channels') 40 | 41 | 42 | def get_train_data_loader(dataset_dir, 43 | batch_size, 44 | num_workers, 45 | input_image_size=448): 46 | mean_rgb = (0.485, 0.456, 0.406) 47 | std_rgb = (0.229, 0.224, 0.225) 48 | jitter_param = 0.4 49 | 50 | transform_train = transforms.Compose([ 51 | transforms.RandomResizedCrop(input_image_size), 52 | transforms.RandomHorizontalFlip(), 53 | transforms.ColorJitter( 54 | brightness=jitter_param, 55 | contrast=jitter_param, 56 | saturation=jitter_param), 57 | transforms.ToTensor(), 58 | transforms.Normalize( 59 | mean=mean_rgb, 60 | std=std_rgb)]) 61 | 62 | dataset = CUB200_2011( 63 | root=dataset_dir, 64 | train=True, 65 | transform=transform_train) 66 | 67 | train_loader = torch.utils.data.DataLoader( 68 | dataset=dataset, 69 | batch_size=batch_size, 70 | shuffle=True, 71 | num_workers=num_workers, 72 | pin_memory=True) 73 | 74 | return train_loader 75 | 76 | 77 | def get_val_data_loader(dataset_dir, 78 | batch_size, 79 | num_workers, 80 | input_image_size=448, 81 | resize_inv_factor=0.74667): 82 | assert (resize_inv_factor > 0.0) 83 | resize_value = int(math.ceil(float(input_image_size) / resize_inv_factor)) 84 | 85 | mean_rgb = (0.485, 0.456, 0.406) 86 | std_rgb = (0.229, 0.224, 0.225) 87 | 88 | transform_val = transforms.Compose([ 89 | transforms.Resize(resize_value), 90 | transforms.CenterCrop(input_image_size), 91 | transforms.ToTensor(), 92 | transforms.Normalize( 93 | mean=mean_rgb, 94 | std=std_rgb) 95 | ]) 96 | 97 | dataset = CUB200_2011( 98 | root=dataset_dir, 99 | train=False, 100 | transform=transform_val) 101 | 102 | val_loader = torch.utils.data.DataLoader( 103 | dataset=dataset, 104 | batch_size=batch_size, 105 | shuffle=False, 106 | num_workers=num_workers, 107 | pin_memory=True) 108 | 109 | return val_loader 110 | -------------------------------------------------------------------------------- /pytorch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/pytorch/__init__.py -------------------------------------------------------------------------------- /pytorch/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/pytorch/datasets/__init__.py -------------------------------------------------------------------------------- /pytorch/datasets/asr_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | Automatic Speech Recognition (ASR) abstract dataset. 3 | """ 4 | 5 | __all__ = ['AsrDataset', 'asr_test_transform'] 6 | 7 | import torch.utils.data as data 8 | import torchvision.transforms as transforms 9 | from pytorchcv.models.jasper import NemoAudioReader 10 | 11 | 12 | class AsrDataset(data.Dataset): 13 | """ 14 | Automatic Speech Recognition (ASR) abstract dataset. 15 | 16 | Parameters 17 | ---------- 18 | root : str 19 | Path to the folder stored the dataset. 20 | mode : str 21 | 'train', 'val', 'test', or 'demo'. 22 | transform : func 23 | A function that takes data and transforms it. 24 | """ 25 | def __init__(self, 26 | root, 27 | mode, 28 | transform): 29 | super(AsrDataset, self).__init__() 30 | assert (mode in ("train", "val", "test", "demo")) 31 | self.root = root 32 | self.mode = mode 33 | self.transform = transform 34 | self.data = [] 35 | self.audio_reader = NemoAudioReader() 36 | 37 | def __getitem__(self, index): 38 | wav_file_path, label_text = self.data[index] 39 | audio_data = self.audio_reader.read_from_file(wav_file_path) 40 | audio_len = audio_data.shape[0] 41 | return (audio_data, audio_len), label_text 42 | 43 | def __len__(self): 44 | return len(self.data) 45 | 46 | 47 | def asr_test_transform(ds_metainfo): 48 | assert (ds_metainfo is not None) 49 | return transforms.Compose([ 50 | transforms.ToTensor(), 51 | ]) 52 | -------------------------------------------------------------------------------- /pytorch/datasets/cifar100_cls_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | CIFAR-100 classification dataset. 3 | """ 4 | 5 | import os 6 | from torchvision.datasets import CIFAR100 7 | from .cifar10_cls_dataset import CIFAR10MetaInfo 8 | 9 | 10 | class CIFAR100Fine(CIFAR100): 11 | """ 12 | CIFAR-100 image classification dataset. 13 | 14 | 15 | Parameters 16 | ---------- 17 | root : str, default '~/.torch/datasets/cifar100' 18 | Path to temp folder for storing data. 19 | mode : str, default 'train' 20 | 'train', 'val', or 'test'. 21 | transform : function, default None 22 | A function that takes data and label and transforms them. 23 | """ 24 | def __init__(self, 25 | root=os.path.join("~", ".torch", "datasets", "cifar100"), 26 | mode="train", 27 | transform=None): 28 | super(CIFAR100Fine, self).__init__( 29 | root=root, 30 | train=(mode == "train"), 31 | transform=transform, 32 | download=True) 33 | 34 | 35 | class CIFAR100MetaInfo(CIFAR10MetaInfo): 36 | def __init__(self): 37 | super(CIFAR100MetaInfo, self).__init__() 38 | self.label = "CIFAR100" 39 | self.root_dir_name = "cifar100" 40 | self.dataset_class = CIFAR100Fine 41 | self.num_classes = 100 42 | -------------------------------------------------------------------------------- /pytorch/datasets/cifar10_cls_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | CIFAR-10 classification dataset. 3 | """ 4 | 5 | import os 6 | from torchvision.datasets import CIFAR10 7 | import torchvision.transforms as transforms 8 | from .dataset_metainfo import DatasetMetaInfo 9 | 10 | 11 | class CIFAR10Fine(CIFAR10): 12 | """ 13 | CIFAR-10 image classification dataset. 14 | 15 | 16 | Parameters 17 | ---------- 18 | root : str, default '~/.torch/datasets/cifar10' 19 | Path to temp folder for storing data. 20 | mode : str, default 'train' 21 | 'train', 'val', or 'test'. 22 | transform : function, default None 23 | A function that takes data and label and transforms them. 24 | """ 25 | def __init__(self, 26 | root=os.path.join("~", ".torch", "datasets", "cifar10"), 27 | mode="train", 28 | transform=None): 29 | super(CIFAR10Fine, self).__init__( 30 | root=root, 31 | train=(mode == "train"), 32 | transform=transform, 33 | download=True) 34 | 35 | 36 | class CIFAR10MetaInfo(DatasetMetaInfo): 37 | def __init__(self): 38 | super(CIFAR10MetaInfo, self).__init__() 39 | self.label = "CIFAR10" 40 | self.short_label = "cifar" 41 | self.root_dir_name = "cifar10" 42 | self.dataset_class = CIFAR10Fine 43 | self.num_training_samples = 50000 44 | self.in_channels = 3 45 | self.num_classes = 10 46 | self.input_image_size = (32, 32) 47 | self.train_metric_capts = ["Train.Err"] 48 | self.train_metric_names = ["Top1Error"] 49 | self.train_metric_extra_kwargs = [{"name": "err"}] 50 | self.val_metric_capts = ["Val.Err"] 51 | self.val_metric_names = ["Top1Error"] 52 | self.val_metric_extra_kwargs = [{"name": "err"}] 53 | self.saver_acc_ind = 0 54 | self.train_transform = cifar10_train_transform 55 | self.val_transform = cifar10_val_transform 56 | self.test_transform = cifar10_val_transform 57 | self.ml_type = "imgcls" 58 | 59 | 60 | def cifar10_train_transform(ds_metainfo, 61 | mean_rgb=(0.4914, 0.4822, 0.4465), 62 | std_rgb=(0.2023, 0.1994, 0.2010), 63 | jitter_param=0.4): 64 | assert (ds_metainfo is not None) 65 | assert (ds_metainfo.input_image_size[0] == 32) 66 | return transforms.Compose([ 67 | transforms.RandomCrop( 68 | size=32, 69 | padding=4), 70 | transforms.RandomHorizontalFlip(), 71 | transforms.ColorJitter( 72 | brightness=jitter_param, 73 | contrast=jitter_param, 74 | saturation=jitter_param), 75 | transforms.ToTensor(), 76 | transforms.Normalize( 77 | mean=mean_rgb, 78 | std=std_rgb) 79 | ]) 80 | 81 | 82 | def cifar10_val_transform(ds_metainfo, 83 | mean_rgb=(0.4914, 0.4822, 0.4465), 84 | std_rgb=(0.2023, 0.1994, 0.2010)): 85 | assert (ds_metainfo is not None) 86 | return transforms.Compose([ 87 | transforms.ToTensor(), 88 | transforms.Normalize( 89 | mean=mean_rgb, 90 | std=std_rgb) 91 | ]) 92 | -------------------------------------------------------------------------------- /pytorch/datasets/dataset_metainfo.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base dataset metainfo class. 3 | """ 4 | 5 | import os 6 | 7 | 8 | class DatasetMetaInfo(object): 9 | """ 10 | Base descriptor of dataset. 11 | """ 12 | 13 | def __init__(self): 14 | self.use_imgrec = False 15 | self.label = None 16 | self.root_dir_name = None 17 | self.root_dir_path = None 18 | self.dataset_class = None 19 | self.dataset_class_extra_kwargs = None 20 | self.num_training_samples = None 21 | self.in_channels = None 22 | self.num_classes = None 23 | self.input_image_size = None 24 | self.train_metric_capts = None 25 | self.train_metric_names = None 26 | self.train_metric_extra_kwargs = None 27 | self.train_use_weighted_sampler = False 28 | self.val_metric_capts = None 29 | self.val_metric_names = None 30 | self.val_metric_extra_kwargs = None 31 | self.test_metric_capts = None 32 | self.test_metric_names = None 33 | self.test_metric_extra_kwargs = None 34 | self.saver_acc_ind = None 35 | self.ml_type = None 36 | self.allow_hybridize = True 37 | self.train_net_extra_kwargs = None 38 | self.test_net_extra_kwargs = None 39 | self.load_ignore_extra = False 40 | 41 | def add_dataset_parser_arguments(self, 42 | parser, 43 | work_dir_path): 44 | """ 45 | Create python script parameters (for dataset specific metainfo). 46 | 47 | Parameters 48 | ---------- 49 | parser : ArgumentParser 50 | ArgumentParser instance. 51 | work_dir_path : str 52 | Path to working directory. 53 | """ 54 | parser.add_argument( 55 | "--data-dir", 56 | type=str, 57 | default=os.path.join(work_dir_path, self.root_dir_name), 58 | help="path to directory with {} dataset".format(self.label)) 59 | parser.add_argument( 60 | "--num-classes", 61 | type=int, 62 | default=self.num_classes, 63 | help="number of classes") 64 | parser.add_argument( 65 | "--in-channels", 66 | type=int, 67 | default=self.in_channels, 68 | help="number of input channels") 69 | 70 | def update(self, 71 | args): 72 | """ 73 | Update dataset metainfo after user customizing. 74 | 75 | Parameters 76 | ---------- 77 | args : ArgumentParser 78 | Main script arguments. 79 | """ 80 | self.root_dir_path = args.data_dir 81 | self.num_classes = args.num_classes 82 | self.in_channels = args.in_channels 83 | 84 | def update_from_dataset(self, 85 | dataset): 86 | """ 87 | Update dataset metainfo after a dataset class instance creation. 88 | 89 | Parameters 90 | ---------- 91 | args : obj 92 | A dataset class instance. 93 | """ 94 | pass 95 | -------------------------------------------------------------------------------- /pytorch/datasets/seg_dataset.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | from PIL import Image, ImageOps, ImageFilter 4 | import torch.utils.data as data 5 | 6 | 7 | class SegDataset(data.Dataset): 8 | """ 9 | Segmentation base dataset. 10 | 11 | Parameters 12 | ---------- 13 | root : str 14 | Path to the folder stored the dataset. 15 | mode : str 16 | 'train', 'val', 'test', or 'demo'. 17 | transform : func 18 | A function that takes data and transforms it. 19 | """ 20 | def __init__(self, 21 | root, 22 | mode, 23 | transform, 24 | base_size=520, 25 | crop_size=480): 26 | assert (mode in ("train", "val", "test", "demo")) 27 | self.root = root 28 | self.mode = mode 29 | self.transform = transform 30 | self.base_size = base_size 31 | self.crop_size = crop_size 32 | 33 | def _val_sync_transform(self, image, mask): 34 | outsize = self.crop_size 35 | short_size = outsize 36 | w, h = image.size 37 | if w > h: 38 | oh = short_size 39 | ow = int(1.0 * w * oh / h) 40 | else: 41 | ow = short_size 42 | oh = int(1.0 * h * ow / w) 43 | image = image.resize((ow, oh), Image.BILINEAR) 44 | mask = mask.resize((ow, oh), Image.NEAREST) 45 | # center crop 46 | w, h = image.size 47 | x1 = int(round(0.5 * (w - outsize))) 48 | y1 = int(round(0.5 * (h - outsize))) 49 | image = image.crop((x1, y1, x1 + outsize, y1 + outsize)) 50 | mask = mask.crop((x1, y1, x1 + outsize, y1 + outsize)) 51 | # final transform 52 | image, mask = self._img_transform(image), self._mask_transform(mask) 53 | return image, mask 54 | 55 | def _sync_transform(self, image, mask): 56 | # random mirror 57 | if random.random() < 0.5: 58 | image = image.transpose(Image.FLIP_LEFT_RIGHT) 59 | mask = mask.transpose(Image.FLIP_LEFT_RIGHT) 60 | crop_size = self.crop_size 61 | # random scale (short edge) 62 | short_size = random.randint(int(self.base_size * 0.5), int(self.base_size * 2.0)) 63 | w, h = image.size 64 | if h > w: 65 | ow = short_size 66 | oh = int(1.0 * h * ow / w) 67 | else: 68 | oh = short_size 69 | ow = int(1.0 * w * oh / h) 70 | image = image.resize((ow, oh), Image.BILINEAR) 71 | mask = mask.resize((ow, oh), Image.NEAREST) 72 | # pad crop 73 | if short_size < crop_size: 74 | padh = crop_size - oh if oh < crop_size else 0 75 | padw = crop_size - ow if ow < crop_size else 0 76 | image = ImageOps.expand(image, border=(0, 0, padw, padh), fill=0) 77 | mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0) 78 | # random crop crop_size 79 | w, h = image.size 80 | x1 = random.randint(0, w - crop_size) 81 | y1 = random.randint(0, h - crop_size) 82 | image = image.crop((x1, y1, x1 + crop_size, y1 + crop_size)) 83 | mask = mask.crop((x1, y1, x1 + crop_size, y1 + crop_size)) 84 | # gaussian blur as in PSP 85 | if random.random() < 0.5: 86 | image = image.filter(ImageFilter.GaussianBlur( 87 | radius=random.random())) 88 | # final transform 89 | image, mask = self._img_transform(image), self._mask_transform(mask) 90 | return image, mask 91 | 92 | @staticmethod 93 | def _img_transform(image): 94 | return np.array(image) 95 | 96 | @staticmethod 97 | def _mask_transform(mask): 98 | return np.array(mask).astype(np.int32) 99 | -------------------------------------------------------------------------------- /pytorch/datasets/svhn_cls_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | SVHN classification dataset. 3 | """ 4 | 5 | import os 6 | from torchvision.datasets import SVHN 7 | from .cifar10_cls_dataset import CIFAR10MetaInfo 8 | 9 | 10 | class SVHNFine(SVHN): 11 | """ 12 | SVHN image classification dataset from http://ufldl.stanford.edu/housenumbers/. 13 | Each sample is an image (in 3D NDArray) with shape (32, 32, 3). 14 | Note: The SVHN dataset assigns the label `10` to the digit `0`. However, in this Dataset, 15 | we assign the label `0` to the digit `0`. 16 | 17 | Parameters 18 | ---------- 19 | root : str, default '~/.torch/datasets/svhn' 20 | Path to temp folder for storing data. 21 | mode : str, default 'train' 22 | 'train', 'val', or 'test'. 23 | transform : function, default None 24 | A function that takes data and label and transforms them. 25 | """ 26 | def __init__(self, 27 | root=os.path.join("~", ".torch", "datasets", "svhn"), 28 | mode="train", 29 | transform=None): 30 | super(SVHNFine, self).__init__( 31 | root=root, 32 | split=("train" if mode == "train" else "test"), 33 | transform=transform, 34 | download=True) 35 | 36 | 37 | class SVHNMetaInfo(CIFAR10MetaInfo): 38 | def __init__(self): 39 | super(SVHNMetaInfo, self).__init__() 40 | self.label = "SVHN" 41 | self.root_dir_name = "svhn" 42 | self.dataset_class = SVHNFine 43 | self.num_training_samples = 73257 44 | -------------------------------------------------------------------------------- /pytorch/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/pytorch/metrics/__init__.py -------------------------------------------------------------------------------- /pytorch/metrics/asr_metrics.py: -------------------------------------------------------------------------------- 1 | """ 2 | Evaluation Metrics for Automatic Speech Recognition (ASR). 3 | """ 4 | 5 | from .metric import EvalMetric 6 | 7 | __all__ = ['WER'] 8 | 9 | 10 | class WER(EvalMetric): 11 | """ 12 | Computes Word Error Rate (WER) for Automatic Speech Recognition (ASR). 13 | 14 | Parameters 15 | ---------- 16 | vocabulary : list of str 17 | Vocabulary of the dataset. 18 | name : str, default 'wer' 19 | Name of this metric instance for display. 20 | output_names : list of str, or None, default None 21 | Name of predictions that should be used when updating with update_dict. 22 | By default include all predictions. 23 | label_names : list of str, or None, default None 24 | Name of labels that should be used when updating with update_dict. 25 | By default include all labels. 26 | """ 27 | def __init__(self, 28 | vocabulary, 29 | name="wer", 30 | output_names=None, 31 | label_names=None): 32 | super(WER, self).__init__( 33 | name=name, 34 | output_names=output_names, 35 | label_names=label_names, 36 | has_global_stats=True) 37 | self.vocabulary = vocabulary 38 | self.ctc_decoder = CtcDecoder(vocabulary=vocabulary) 39 | 40 | def update(self, labels, preds): 41 | """ 42 | Updates the internal evaluation result. 43 | 44 | Parameters 45 | ---------- 46 | labels : torch.Tensor 47 | The labels of the data with class indices as values, one per sample. 48 | preds : torch.Tensor 49 | Prediction values for samples. Each prediction value can either be the class index, 50 | or a vector of likelihoods for all classes. 51 | """ 52 | import editdistance 53 | 54 | labels_code = labels.cpu().numpy() 55 | labels = [] 56 | for label_code in labels_code: 57 | label_text = "".join([self.ctc_decoder.labels_map[c] for c in label_code]) 58 | labels.append(label_text) 59 | 60 | preds = preds[0] 61 | greedy_predictions = preds.transpose(1, 2).log_softmax(dim=-1).argmax(dim=-1, keepdim=False).cpu().numpy() 62 | preds = self.ctc_decoder(greedy_predictions) 63 | 64 | assert (len(labels) == len(preds)) 65 | for pred, label in zip(preds, labels): 66 | pred = pred.split() 67 | label = label.split() 68 | 69 | word_error_count = editdistance.eval(label, pred) 70 | word_count = max(len(label), len(pred)) 71 | 72 | assert (word_error_count <= word_count) 73 | 74 | self.sum_metric += word_error_count 75 | self.global_sum_metric += word_error_count 76 | self.num_inst += word_count 77 | self.global_num_inst += word_count 78 | 79 | 80 | class CtcDecoder(object): 81 | """ 82 | CTC decoder (to decode a sequence of labels to words). 83 | 84 | Parameters 85 | ---------- 86 | vocabulary : list of str 87 | Vocabulary of the dataset. 88 | """ 89 | def __init__(self, 90 | vocabulary): 91 | super().__init__() 92 | self.blank_id = len(vocabulary) 93 | self.labels_map = dict([(i, vocabulary[i]) for i in range(len(vocabulary))]) 94 | 95 | def __call__(self, 96 | predictions): 97 | """ 98 | Decode a sequence of labels to words. 99 | 100 | Parameters 101 | ---------- 102 | predictions : np.array of int or list(list(int)) 103 | Tensor with predicted labels. 104 | 105 | Returns 106 | ------- 107 | list of str 108 | Words. 109 | """ 110 | hypotheses = [] 111 | for prediction in predictions: 112 | decoded_prediction = [] 113 | previous = self.blank_id 114 | for p in prediction: 115 | if (p != previous or previous == self.blank_id) and p != self.blank_id: 116 | decoded_prediction.append(p) 117 | previous = p 118 | hypothesis = "".join([self.labels_map[c] for c in decoded_prediction]) 119 | hypotheses.append(hypothesis) 120 | return hypotheses 121 | -------------------------------------------------------------------------------- /pytorch/metrics/hpe_metrics.py: -------------------------------------------------------------------------------- 1 | """ 2 | Evaluation Metrics for Human Pose Estimation. 3 | """ 4 | 5 | from .metric import EvalMetric 6 | 7 | __all__ = ['CocoHpeOksApMetric'] 8 | 9 | 10 | class CocoHpeOksApMetric(EvalMetric): 11 | """ 12 | Detection metric for COCO Keypoint task. 13 | 14 | Parameters 15 | ---------- 16 | coco_annotations_file_path : str 17 | COCO anotation file path. 18 | pose_postprocessing_fn : func 19 | An function for pose post-processing. 20 | use_file : bool, default False 21 | Whether to use temporary file for estimation. 22 | validation_ids : bool, default False 23 | Whether to use temporary file for estimation. 24 | name : str, default 'CocoOksAp' 25 | Name of this metric instance for display. 26 | """ 27 | def __init__(self, 28 | coco_annotations_file_path, 29 | pose_postprocessing_fn, 30 | validation_ids=None, 31 | use_file=False, 32 | name="CocoOksAp"): 33 | super(CocoHpeOksApMetric, self).__init__(name=name) 34 | self.coco_annotations_file_path = coco_annotations_file_path 35 | self.pose_postprocessing_fn = pose_postprocessing_fn 36 | self.validation_ids = validation_ids 37 | self.use_file = use_file 38 | self.coco_result = [] 39 | 40 | def reset(self): 41 | self.coco_result = [] 42 | 43 | def get(self): 44 | """ 45 | Get evaluation metrics. 46 | """ 47 | import copy 48 | from pycocotools.coco import COCO 49 | gt = COCO(self.coco_annotations_file_path) 50 | 51 | if self.use_file: 52 | import tempfile 53 | import json 54 | with tempfile.NamedTemporaryFile(mode="w", suffix=".json") as f: 55 | json.dump(self.coco_result, f) 56 | f.flush() 57 | pred = gt.loadRes(f.name) 58 | else: 59 | def calc_pred(coco, anns): 60 | import numpy as np 61 | import copy 62 | 63 | pred = COCO() 64 | pred.dataset["images"] = [img for img in coco.dataset["images"]] 65 | 66 | annsImgIds = [ann["image_id"] for ann in anns] 67 | assert set(annsImgIds) == (set(annsImgIds) & set(coco.getImgIds())) 68 | 69 | pred.dataset["categories"] = copy.deepcopy(coco.dataset["categories"]) 70 | for id, ann in enumerate(anns): 71 | s = ann["keypoints"] 72 | x = s[0::3] 73 | y = s[1::3] 74 | x0, x1, y0, y1 = np.min(x), np.max(x), np.min(y), np.max(y) 75 | ann["area"] = (x1 - x0) * (y1 - y0) 76 | ann["id"] = id + 1 77 | ann["bbox"] = [x0, y0, x1 - x0, y1 - y0] 78 | 79 | pred.dataset["annotations"] = anns 80 | pred.createIndex() 81 | return pred 82 | pred = calc_pred(gt, copy.deepcopy(self.coco_result)) 83 | 84 | from pycocotools.cocoeval import COCOeval 85 | coco_eval = COCOeval(gt, pred, "keypoints") 86 | if self.validation_ids is not None: 87 | coco_eval.params.imgIds = self.validation_ids 88 | coco_eval.params.useSegm = None 89 | coco_eval.evaluate() 90 | coco_eval.accumulate() 91 | coco_eval.summarize() 92 | 93 | return self.name, tuple(coco_eval.stats[:3]) 94 | 95 | def update(self, labels, preds): 96 | """ 97 | Updates the internal evaluation result. 98 | 99 | Parameters 100 | ---------- 101 | labels : torch.Tensor 102 | The labels of the data. 103 | preds : torch.Tensor 104 | Predicted values. 105 | """ 106 | label = labels.cpu().detach().numpy() 107 | pred = preds.cpu().detach().numpy() 108 | 109 | pred_pts_score, pred_person_score, label_img_id = self.pose_postprocessing_fn(pred, label) 110 | 111 | for idx in range(len(pred_pts_score)): 112 | image_id = int(label_img_id[idx]) 113 | kpt = pred_pts_score[idx].flatten().tolist() 114 | score = float(pred_person_score[idx]) 115 | self.coco_result.append({ 116 | "image_id": image_id, 117 | "category_id": 1, 118 | "keypoints": kpt, 119 | "score": score}) 120 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | requests 3 | mxnet>=1.2.1 4 | torch>=0.4.0 5 | torchvision>=0.2.1 6 | chainer>=5.0.0b4 7 | chainercv>=0.10.0 8 | git+https://github.com/awslabs/keras-apache-mxnet.git 9 | tensorflow-gpu 10 | tensorflow_addons 11 | git+https://github.com/tensorpack/tensorpack.git 12 | opencv-python 13 | pandas 14 | Pillow 15 | imgaug 16 | scipy 17 | tqdm 18 | Cython 19 | cvutil -------------------------------------------------------------------------------- /sotabench.py: -------------------------------------------------------------------------------- 1 | from torchbench.image_classification import ImageNet 2 | from pytorchcv.models.common.model_store import get_model_metainfo_dict 3 | from pytorchcv.model_provider import get_model as ptcv_get_model 4 | import torchvision.transforms as transforms 5 | import torch 6 | import math 7 | from sys import version_info 8 | 9 | 10 | model_metainfo_dict = get_model_metainfo_dict() 11 | for model_name, model_metainfo in (model_metainfo_dict.items() if version_info[0] >= 3 else model_metainfo_dict.iteritems()): # noqa 12 | caption, paper, ds, img_size, scale, batch, rem = model_metainfo[4:] 13 | net = ptcv_get_model(model_name, pretrained=True) 14 | if (ds != "in1k") or (img_size == 0) or ((len(rem) > 0) and (rem[-1] == "*")): 15 | continue 16 | paper_model_name = caption 17 | paper_arxiv_id = paper 18 | input_image_size = img_size 19 | resize_inv_factor = scale 20 | batch_size = batch 21 | model_description = "pytorch" + (rem if rem == "" else ", " + rem) 22 | assert (not hasattr(net, "in_size")) or (input_image_size == net.in_size[0]) 23 | ImageNet.benchmark( 24 | model=net, 25 | model_description=model_description, 26 | paper_model_name=paper_model_name, 27 | paper_arxiv_id=paper_arxiv_id, 28 | input_transform=transforms.Compose([ 29 | transforms.Resize(int(math.ceil(float(input_image_size) / resize_inv_factor))), 30 | transforms.CenterCrop(input_image_size), 31 | transforms.ToTensor(), 32 | transforms.Normalize( 33 | mean=[0.485, 0.456, 0.406], 34 | std=[0.229, 0.224, 0.225]), 35 | ]), 36 | batch_size=batch_size, 37 | num_gpu=1, 38 | # data_root=os.path.join("..", "imgclsmob_data", "imagenet") 39 | ) 40 | torch.cuda.empty_cache() 41 | -------------------------------------------------------------------------------- /tensorflow2/LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018-2021 Oleg Sémery 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tensorflow2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/tensorflow2/__init__.py -------------------------------------------------------------------------------- /tensorflow2/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/tensorflow2/datasets/__init__.py -------------------------------------------------------------------------------- /tensorflow2/datasets/cifar100_cls_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | CIFAR-100 classification dataset. 3 | """ 4 | 5 | from tensorflow.keras.datasets import cifar100 6 | from .cifar10_cls_dataset import CIFAR10MetaInfo 7 | 8 | 9 | class CIFAR100MetaInfo(CIFAR10MetaInfo): 10 | def __init__(self): 11 | super(CIFAR100MetaInfo, self).__init__() 12 | self.label = "CIFAR100" 13 | self.root_dir_name = "cifar100" 14 | self.num_classes = 100 15 | self.train_generator = cifar100_train_generator 16 | self.val_generator = cifar100_val_generator 17 | self.test_generator = cifar100_val_generator 18 | 19 | 20 | def cifar100_train_generator(data_generator, 21 | ds_metainfo, 22 | batch_size): 23 | """ 24 | Create image generator for training subset. 25 | 26 | Parameters 27 | ---------- 28 | data_generator : ImageDataGenerator 29 | Image transform sequence. 30 | ds_metainfo : DatasetMetaInfo 31 | ImageNet-1K dataset metainfo. 32 | batch_size : int 33 | Batch size. 34 | 35 | Returns 36 | ------- 37 | Sequential 38 | Image transform sequence. 39 | """ 40 | assert (ds_metainfo is not None) 41 | (x_train, y_train), _ = cifar100.load_data() 42 | generator = data_generator.flow( 43 | x=x_train, 44 | y=y_train, 45 | batch_size=batch_size, 46 | shuffle=False) 47 | return generator 48 | 49 | 50 | def cifar100_val_generator(data_generator, 51 | ds_metainfo, 52 | batch_size): 53 | """ 54 | Create image generator for validation subset. 55 | 56 | Parameters 57 | ---------- 58 | data_generator : ImageDataGenerator 59 | Image transform sequence. 60 | ds_metainfo : DatasetMetaInfo 61 | ImageNet-1K dataset metainfo. 62 | batch_size : int 63 | Batch size. 64 | 65 | Returns 66 | ------- 67 | Sequential 68 | Image transform sequence. 69 | """ 70 | assert (ds_metainfo is not None) 71 | _, (x_test, y_test) = cifar100.load_data() 72 | generator = data_generator.flow( 73 | x=x_test, 74 | y=y_test, 75 | batch_size=batch_size, 76 | shuffle=False) 77 | return generator 78 | -------------------------------------------------------------------------------- /tensorflow2/datasets/cls_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classification dataset routines. 3 | """ 4 | 5 | __all__ = ['img_normalization'] 6 | 7 | import numpy as np 8 | 9 | 10 | def img_normalization(img, 11 | mean_rgb, 12 | std_rgb): 13 | """ 14 | Normalization as in the ImageNet-1K validation procedure. 15 | 16 | Parameters 17 | ---------- 18 | img : np.array 19 | input image. 20 | mean_rgb : tuple of 3 float 21 | Mean of RGB channels in the dataset. 22 | std_rgb : tuple of 3 float 23 | STD of RGB channels in the dataset. 24 | 25 | Returns 26 | ------- 27 | np.array 28 | Output image. 29 | """ 30 | # print(img.max()) 31 | mean_rgb = np.array(mean_rgb, np.float32) * 255.0 32 | std_rgb = np.array(std_rgb, np.float32) * 255.0 33 | img = (img - mean_rgb) / std_rgb 34 | return img 35 | -------------------------------------------------------------------------------- /tensorflow2/datasets/dataset_metainfo.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base dataset metainfo class. 3 | """ 4 | 5 | import os 6 | 7 | 8 | class DatasetMetaInfo(object): 9 | """ 10 | Base descriptor of dataset. 11 | """ 12 | 13 | def __init__(self): 14 | self.use_imgrec = False 15 | self.label = None 16 | self.root_dir_name = None 17 | self.root_dir_path = None 18 | self.dataset_class = None 19 | self.dataset_class_extra_kwargs = None 20 | self.num_training_samples = None 21 | self.in_channels = None 22 | self.num_classes = None 23 | self.input_image_size = None 24 | self.train_metric_capts = None 25 | self.train_metric_names = None 26 | self.train_metric_extra_kwargs = None 27 | self.train_use_weighted_sampler = False 28 | self.val_metric_capts = None 29 | self.val_metric_names = None 30 | self.val_metric_extra_kwargs = None 31 | self.test_metric_capts = None 32 | self.test_metric_names = None 33 | self.test_metric_extra_kwargs = None 34 | self.saver_acc_ind = None 35 | self.ml_type = None 36 | self.allow_hybridize = True 37 | self.train_net_extra_kwargs = None 38 | self.test_net_extra_kwargs = None 39 | self.load_ignore_extra = False 40 | 41 | def add_dataset_parser_arguments(self, 42 | parser, 43 | work_dir_path): 44 | """ 45 | Create python script parameters (for dataset specific metainfo). 46 | 47 | Parameters 48 | ---------- 49 | parser : ArgumentParser 50 | ArgumentParser instance. 51 | work_dir_path : str 52 | Path to working directory. 53 | """ 54 | parser.add_argument( 55 | "--data-dir", 56 | type=str, 57 | default=os.path.join(work_dir_path, self.root_dir_name), 58 | help="path to directory with {} dataset".format(self.label)) 59 | parser.add_argument( 60 | "--num-classes", 61 | type=int, 62 | default=self.num_classes, 63 | help="number of classes") 64 | parser.add_argument( 65 | "--in-channels", 66 | type=int, 67 | default=self.in_channels, 68 | help="number of input channels") 69 | 70 | def update(self, 71 | args): 72 | """ 73 | Update dataset metainfo after user customizing. 74 | 75 | Parameters 76 | ---------- 77 | args : ArgumentParser 78 | Main script arguments. 79 | """ 80 | self.root_dir_path = args.data_dir 81 | self.num_classes = args.num_classes 82 | self.in_channels = args.in_channels 83 | 84 | def update_from_dataset(self, 85 | dataset): 86 | """ 87 | Update dataset metainfo after a dataset class instance creation. 88 | 89 | Parameters 90 | ---------- 91 | args : obj 92 | A dataset class instance. 93 | """ 94 | pass 95 | -------------------------------------------------------------------------------- /tensorflow2/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/tensorflow2/metrics/__init__.py -------------------------------------------------------------------------------- /tensorflow2/metrics/hpe_metrics.py: -------------------------------------------------------------------------------- 1 | """ 2 | Evaluation Metrics for Human Pose Estimation. 3 | """ 4 | 5 | from .metric import EvalMetric 6 | 7 | __all__ = ['CocoHpeOksApMetric'] 8 | 9 | 10 | class CocoHpeOksApMetric(EvalMetric): 11 | """ 12 | Detection metric for COCO bbox task. 13 | 14 | Parameters 15 | ---------- 16 | coco_annotations_file_path : str 17 | COCO anotation file path. 18 | pose_postprocessing_fn : func 19 | An function for pose post-processing. 20 | use_file : bool, default False 21 | Whether to use temporary file for estimation. 22 | validation_ids : bool, default False 23 | Whether to use temporary file for estimation. 24 | name : str, default 'CocoOksAp' 25 | Name of this metric instance for display. 26 | """ 27 | def __init__(self, 28 | coco_annotations_file_path, 29 | pose_postprocessing_fn, 30 | validation_ids=None, 31 | use_file=False, 32 | name="CocoOksAp"): 33 | super(CocoHpeOksApMetric, self).__init__(name=name) 34 | self.coco_annotations_file_path = coco_annotations_file_path 35 | self.pose_postprocessing_fn = pose_postprocessing_fn 36 | self.validation_ids = validation_ids 37 | self.use_file = use_file 38 | self.coco_result = [] 39 | 40 | def reset(self): 41 | self.coco_result = [] 42 | 43 | def get(self): 44 | """ 45 | Get evaluation metrics. 46 | """ 47 | import copy 48 | from pycocotools.coco import COCO 49 | gt = COCO(self.coco_annotations_file_path) 50 | 51 | if self.use_file: 52 | import tempfile 53 | import json 54 | with tempfile.NamedTemporaryFile(mode="w", suffix=".json") as f: 55 | json.dump(self.coco_result, f) 56 | f.flush() 57 | pred = gt.loadRes(f.name) 58 | else: 59 | def calc_pred(coco, anns): 60 | import numpy as np 61 | import copy 62 | 63 | pred = COCO() 64 | pred.dataset["images"] = [img for img in coco.dataset["images"]] 65 | 66 | annsImgIds = [ann["image_id"] for ann in anns] 67 | assert set(annsImgIds) == (set(annsImgIds) & set(coco.getImgIds())) 68 | 69 | pred.dataset["categories"] = copy.deepcopy(coco.dataset["categories"]) 70 | for id, ann in enumerate(anns): 71 | s = ann["keypoints"] 72 | x = s[0::3] 73 | y = s[1::3] 74 | x0, x1, y0, y1 = np.min(x), np.max(x), np.min(y), np.max(y) 75 | ann["area"] = (x1 - x0) * (y1 - y0) 76 | ann["id"] = id + 1 77 | ann["bbox"] = [x0, y0, x1 - x0, y1 - y0] 78 | 79 | pred.dataset["annotations"] = anns 80 | pred.createIndex() 81 | return pred 82 | pred = calc_pred(gt, copy.deepcopy(self.coco_result)) 83 | 84 | from pycocotools.cocoeval import COCOeval 85 | coco_eval = COCOeval(gt, pred, "keypoints") 86 | if self.validation_ids is not None: 87 | coco_eval.params.imgIds = self.validation_ids 88 | coco_eval.params.useSegm = None 89 | coco_eval.evaluate() 90 | coco_eval.accumulate() 91 | coco_eval.summarize() 92 | 93 | return self.name, tuple(coco_eval.stats[:3]) 94 | 95 | def update(self, labels, preds): 96 | """ 97 | Updates the internal evaluation result. 98 | 99 | Parameters 100 | ---------- 101 | labels : tensor 102 | The labels of the data. 103 | preds : tensor 104 | Predicted values. 105 | """ 106 | label = labels.numpy() 107 | pred = preds.numpy() 108 | 109 | pred_pts_score, pred_person_score, label_img_id = self.pose_postprocessing_fn(pred, label) 110 | 111 | for idx in range(len(pred_pts_score)): 112 | image_id = int(label_img_id[idx]) 113 | kpt = pred_pts_score[idx].flatten().tolist() 114 | score = float(pred_person_score[idx]) 115 | self.coco_result.append({ 116 | "image_id": image_id, 117 | "category_id": 1, 118 | "keypoints": kpt, 119 | "score": score}) 120 | -------------------------------------------------------------------------------- /tensorflow2/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license_file = LICENSE.txt 3 | 4 | [bdist_wheel] 5 | universal=1 -------------------------------------------------------------------------------- /tensorflow2/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from os import path 3 | from io import open 4 | 5 | here = path.abspath(path.dirname(__file__)) 6 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 7 | long_description = f.read() 8 | 9 | setup( 10 | name='tf2cv', 11 | version='0.0.18', 12 | description='Image classification models for TensorFlow 2.0', 13 | license='MIT', 14 | long_description=long_description, 15 | long_description_content_type='text/markdown', 16 | url='https://github.com/osmr/imgclsmob', 17 | author='Oleg Sémery', 18 | author_email='osemery@gmail.com', 19 | classifiers=[ 20 | 'Development Status :: 3 - Alpha', 21 | 'Intended Audience :: Science/Research', 22 | 'License :: OSI Approved :: MIT License', 23 | 'Operating System :: OS Independent', 24 | 'Programming Language :: Python', 25 | 'Topic :: Scientific/Engineering :: Image Recognition', 26 | ], 27 | keywords='machine-learning deep-learning neuralnetwork image-classification tensorflow imagenet vgg resnet resnext ' 28 | 'senet densenet darknet squeezenet squeezenext shufflenet menet mobilenent igcv3 mnasnet', 29 | packages=find_packages(exclude=['datasets', 'metrics', 'others', '*.others', 'others.*', '*.others.*']), 30 | include_package_data=True, 31 | install_requires=['numpy', 'requests'], 32 | ) 33 | -------------------------------------------------------------------------------- /tensorflow2/tf2cv/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/tensorflow2/tf2cv/__init__.py -------------------------------------------------------------------------------- /tensorflow2/tf2cv/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/tensorflow2/tf2cv/models/__init__.py -------------------------------------------------------------------------------- /tensorflow2/tf2cv/models/jasperdr.py: -------------------------------------------------------------------------------- 1 | """ 2 | Jasper DR (Dense Residual) for ASR, implemented in TensorFlow. 3 | Original paper: 'Jasper: An End-to-End Convolutional Neural Acoustic Model,' https://arxiv.org/abs/1904.03288. 4 | """ 5 | 6 | __all__ = ['jasperdr10x5_en', 'jasperdr10x5_en_nr'] 7 | 8 | from .jasper import get_jasper 9 | from .common import is_channels_first 10 | 11 | 12 | def jasperdr10x5_en(classes=29, **kwargs): 13 | """ 14 | Jasper DR 10x5 model for English language from 'Jasper: An End-to-End Convolutional Neural Acoustic Model,' 15 | https://arxiv.org/abs/1904.03288. 16 | 17 | Parameters 18 | ---------- 19 | classes : int, default 29 20 | Number of classification classes (number of graphemes). 21 | pretrained : bool, default False 22 | Whether to load the pretrained weights for model. 23 | root : str, default '~/.tensorflow/models' 24 | Location for keeping the model parameters. 25 | """ 26 | return get_jasper(classes=classes, version=("jasper", "10x5"), use_dr=True, model_name="jasperdr10x5_en", 27 | **kwargs) 28 | 29 | 30 | def jasperdr10x5_en_nr(classes=29, **kwargs): 31 | """ 32 | Jasper DR 10x5 model for English language (with presence of noise) from 'Jasper: An End-to-End Convolutional Neural 33 | Acoustic Model,' https://arxiv.org/abs/1904.03288. 34 | 35 | Parameters 36 | ---------- 37 | classes : int, default 29 38 | Number of classification classes (number of graphemes). 39 | pretrained : bool, default False 40 | Whether to load the pretrained weights for model. 41 | root : str, default '~/.tensorflow/models' 42 | Location for keeping the model parameters. 43 | """ 44 | return get_jasper(classes=classes, version=("jasper", "10x5"), use_dr=True, model_name="jasperdr10x5_en_nr", 45 | **kwargs) 46 | 47 | 48 | def _test(): 49 | import numpy as np 50 | import tensorflow.keras.backend as K 51 | import tensorflow as tf 52 | 53 | data_format = "channels_last" 54 | # data_format = "channels_first" 55 | pretrained = False 56 | audio_features = 64 57 | classes = 29 58 | 59 | models = [ 60 | jasperdr10x5_en, 61 | jasperdr10x5_en_nr, 62 | ] 63 | 64 | for model in models: 65 | 66 | net = model( 67 | in_channels=audio_features, 68 | pretrained=pretrained, 69 | data_format=data_format) 70 | 71 | batch = 3 72 | seq_len = np.random.randint(60, 150, batch) 73 | seq_len_max = seq_len.max() + 2 74 | x = tf.random.normal((batch, audio_features, seq_len_max) if is_channels_first(data_format) else 75 | (batch, seq_len_max, audio_features)) 76 | x_len = tf.convert_to_tensor(seq_len.astype(np.long)) 77 | 78 | y, y_len = net(x, x_len) 79 | assert (y.shape.as_list()[0] == batch) 80 | if is_channels_first(data_format): 81 | assert (y.shape.as_list()[1] == classes) 82 | assert (y.shape.as_list()[2] in [seq_len_max // 2, seq_len_max // 2 + 1]) 83 | else: 84 | assert (y.shape.as_list()[1] in [seq_len_max // 2, seq_len_max // 2 + 1]) 85 | assert (y.shape.as_list()[2] == classes) 86 | 87 | weight_count = sum([np.prod(K.get_value(w).shape) for w in net.trainable_weights]) 88 | print("m={}, {}".format(model.__name__, weight_count)) 89 | assert (model != jasperdr10x5_en or weight_count == 332632349) 90 | assert (model != jasperdr10x5_en_nr or weight_count == 332632349) 91 | 92 | 93 | if __name__ == "__main__": 94 | _test() 95 | -------------------------------------------------------------------------------- /tensorflow2/tf2cv/models/mobilenetb.py: -------------------------------------------------------------------------------- 1 | """ 2 | MobileNet(B) with simplified depthwise separable convolution block for ImageNet-1K, implemented in TensorFlow. 3 | Original paper: 'MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications,' 4 | https://arxiv.org/abs/1704.04861. 5 | """ 6 | 7 | __all__ = ['mobilenetb_w1', 'mobilenetb_w3d4', 'mobilenetb_wd2', 'mobilenetb_wd4'] 8 | 9 | from .mobilenet import get_mobilenet 10 | 11 | 12 | def mobilenetb_w1(**kwargs): 13 | """ 14 | 1.0 MobileNet(B)-224 model with simplified depthwise separable convolution block from 'MobileNets: Efficient 15 | Convolutional Neural Networks for Mobile Vision Applications,' https://arxiv.org/abs/1704.04861. 16 | 17 | Parameters 18 | ---------- 19 | pretrained : bool, default False 20 | Whether to load the pretrained weights for model. 21 | root : str, default '~/.tensorflow/models' 22 | Location for keeping the model parameters. 23 | """ 24 | return get_mobilenet(width_scale=1.0, dws_simplified=True, model_name="mobilenetb_w1", **kwargs) 25 | 26 | 27 | def mobilenetb_w3d4(**kwargs): 28 | """ 29 | 0.75 MobileNet(B)-224 model with simplified depthwise separable convolution block from 'MobileNets: Efficient 30 | Convolutional Neural Networks for Mobile Vision Applications,' https://arxiv.org/abs/1704.04861. 31 | 32 | Parameters 33 | ---------- 34 | pretrained : bool, default False 35 | Whether to load the pretrained weights for model. 36 | root : str, default '~/.tensorflow/models' 37 | Location for keeping the model parameters. 38 | """ 39 | return get_mobilenet(width_scale=0.75, dws_simplified=True, model_name="mobilenetb_w3d4", **kwargs) 40 | 41 | 42 | def mobilenetb_wd2(**kwargs): 43 | """ 44 | 0.5 MobileNet(B)-224 model with simplified depthwise separable convolution block from 'MobileNets: Efficient 45 | Convolutional Neural Networks for Mobile Vision Applications,' https://arxiv.org/abs/1704.04861. 46 | 47 | Parameters 48 | ---------- 49 | pretrained : bool, default False 50 | Whether to load the pretrained weights for model. 51 | root : str, default '~/.tensorflow/models' 52 | Location for keeping the model parameters. 53 | """ 54 | return get_mobilenet(width_scale=0.5, dws_simplified=True, model_name="mobilenetb_wd2", **kwargs) 55 | 56 | 57 | def mobilenetb_wd4(**kwargs): 58 | """ 59 | 0.25 MobileNet(B)-224 model with simplified depthwise separable convolution block from 'MobileNets: Efficient 60 | Convolutional Neural Networks for Mobile Vision Applications,' https://arxiv.org/abs/1704.04861. 61 | 62 | Parameters 63 | ---------- 64 | pretrained : bool, default False 65 | Whether to load the pretrained weights for model. 66 | root : str, default '~/.tensorflow/models' 67 | Location for keeping the model parameters. 68 | """ 69 | return get_mobilenet(width_scale=0.25, dws_simplified=True, model_name="mobilenetb_wd4", **kwargs) 70 | 71 | 72 | def _test(): 73 | import numpy as np 74 | import tensorflow as tf 75 | import tensorflow.keras.backend as K 76 | 77 | pretrained = False 78 | 79 | models = [ 80 | mobilenetb_w1, 81 | mobilenetb_w3d4, 82 | mobilenetb_wd2, 83 | mobilenetb_wd4, 84 | ] 85 | 86 | for model in models: 87 | 88 | net = model(pretrained=pretrained) 89 | 90 | batch = 14 91 | x = tf.random.normal((batch, 224, 224, 3)) 92 | y = net(x) 93 | assert (tuple(y.shape.as_list()) == (batch, 1000)) 94 | 95 | weight_count = sum([np.prod(K.get_value(w).shape) for w in net.trainable_weights]) 96 | print("m={}, {}".format(model.__name__, weight_count)) 97 | assert (model != mobilenetb_w1 or weight_count == 4222056) 98 | assert (model != mobilenetb_w3d4 or weight_count == 2578120) 99 | assert (model != mobilenetb_wd2 or weight_count == 1326632) 100 | assert (model != mobilenetb_wd4 or weight_count == 467592) 101 | 102 | 103 | if __name__ == "__main__": 104 | _test() 105 | -------------------------------------------------------------------------------- /tensorflow2/tf2cv/models/zfnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | ZFNet for ImageNet-1K, implemented in TensorFlow. 3 | Original paper: 'Visualizing and Understanding Convolutional Networks,' https://arxiv.org/abs/1311.2901. 4 | """ 5 | 6 | __all__ = ['zfnet', 'zfnetb'] 7 | 8 | import os 9 | import tensorflow as tf 10 | from .alexnet import AlexNet 11 | 12 | 13 | def get_zfnet(version="a", 14 | model_name=None, 15 | pretrained=False, 16 | root=os.path.join("~", ".tensorflow", "models"), 17 | **kwargs): 18 | """ 19 | Create ZFNet model with specific parameters. 20 | 21 | Parameters 22 | ---------- 23 | version : str, default 'a' 24 | Version of ZFNet ('a' or 'b'). 25 | model_name : str or None, default None 26 | Model name for loading pretrained model. 27 | pretrained : bool, default False 28 | Whether to load the pretrained weights for model. 29 | root : str, default '~/.tensorflow/models' 30 | Location for keeping the model parameters. 31 | """ 32 | if version == "a": 33 | channels = [[96], [256], [384, 384, 256]] 34 | kernel_sizes = [[7], [5], [3, 3, 3]] 35 | strides = [[2], [2], [1, 1, 1]] 36 | paddings = [[1], [0], [1, 1, 1]] 37 | use_lrn = True 38 | elif version == "b": 39 | channels = [[96], [256], [512, 1024, 512]] 40 | kernel_sizes = [[7], [5], [3, 3, 3]] 41 | strides = [[2], [2], [1, 1, 1]] 42 | paddings = [[1], [0], [1, 1, 1]] 43 | use_lrn = True 44 | else: 45 | raise ValueError("Unsupported ZFNet version {}".format(version)) 46 | 47 | net = AlexNet( 48 | channels=channels, 49 | kernel_sizes=kernel_sizes, 50 | strides=strides, 51 | paddings=paddings, 52 | use_lrn=use_lrn, 53 | **kwargs) 54 | 55 | if pretrained: 56 | if (model_name is None) or (not model_name): 57 | raise ValueError("Parameter `model_name` should be properly initialized for loading pretrained model.") 58 | from .model_store import get_model_file 59 | in_channels = kwargs["in_channels"] if ("in_channels" in kwargs) else 3 60 | input_shape = (1,) + (in_channels,) + net.in_size if net.data_format == "channels_first" else\ 61 | (1,) + net.in_size + (in_channels,) 62 | net.build(input_shape=input_shape) 63 | net.load_weights( 64 | filepath=get_model_file( 65 | model_name=model_name, 66 | local_model_store_dir_path=root)) 67 | 68 | return net 69 | 70 | 71 | def zfnet(**kwargs): 72 | """ 73 | ZFNet model from 'Visualizing and Understanding Convolutional Networks,' https://arxiv.org/abs/1311.2901. 74 | 75 | Parameters 76 | ---------- 77 | pretrained : bool, default False 78 | Whether to load the pretrained weights for model. 79 | root : str, default '~/.tensorflow/models' 80 | Location for keeping the model parameters. 81 | """ 82 | return get_zfnet(model_name="zfnet", **kwargs) 83 | 84 | 85 | def zfnetb(**kwargs): 86 | """ 87 | ZFNet-b model from 'Visualizing and Understanding Convolutional Networks,' https://arxiv.org/abs/1311.2901. 88 | 89 | Parameters 90 | ---------- 91 | pretrained : bool, default False 92 | Whether to load the pretrained weights for model. 93 | root : str, default '~/.tensorflow/models' 94 | Location for keeping the model parameters. 95 | """ 96 | return get_zfnet(version="b", model_name="zfnetb", **kwargs) 97 | 98 | 99 | def _test(): 100 | import numpy as np 101 | import tensorflow.keras.backend as K 102 | 103 | pretrained = False 104 | 105 | models = [ 106 | zfnet, 107 | zfnetb, 108 | ] 109 | 110 | for model in models: 111 | 112 | net = model(pretrained=pretrained) 113 | 114 | batch = 14 115 | x = tf.random.normal((batch, 224, 224, 3)) 116 | y = net(x) 117 | assert (tuple(y.shape.as_list()) == (batch, 1000)) 118 | 119 | weight_count = sum([np.prod(K.get_value(w).shape) for w in net.trainable_weights]) 120 | print("m={}, {}".format(model.__name__, weight_count)) 121 | assert (model != zfnet or weight_count == 62357608) 122 | assert (model != zfnetb or weight_count == 107627624) 123 | 124 | 125 | if __name__ == "__main__": 126 | _test() 127 | -------------------------------------------------------------------------------- /tensorflow_/LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Oleg Sémery 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tensorflow_/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/tensorflow_/__init__.py -------------------------------------------------------------------------------- /tensorflow_/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license_file = LICENSE.txt 3 | 4 | [bdist_wheel] 5 | universal=1 -------------------------------------------------------------------------------- /tensorflow_/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from os import path 3 | from io import open 4 | 5 | here = path.abspath(path.dirname(__file__)) 6 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 7 | long_description = f.read() 8 | 9 | setup( 10 | name='tensorflowcv', 11 | version='0.0.38', 12 | description='Image classification models for TensorFlow', 13 | license='MIT', 14 | long_description=long_description, 15 | long_description_content_type='text/markdown', 16 | url='https://github.com/osmr/imgclsmob', 17 | author='Oleg Sémery', 18 | author_email='osemery@gmail.com', 19 | classifiers=[ 20 | 'Development Status :: 3 - Alpha', 21 | 'Intended Audience :: Science/Research', 22 | 'License :: OSI Approved :: MIT License', 23 | 'Operating System :: OS Independent', 24 | 'Programming Language :: Python', 25 | 'Topic :: Scientific/Engineering :: Image Recognition', 26 | ], 27 | keywords='machine-learning deep-learning neuralnetwork image-classification tensorflow imagenet vgg resnet resnext ' 28 | 'senet densenet darknet squeezenet squeezenext shufflenet menet mobilenent igcv3 mnasnet', 29 | packages=find_packages(exclude=['others', '*.others', 'others.*', '*.others.*']), 30 | include_package_data=True, 31 | install_requires=['numpy', 'requests'], 32 | ) 33 | -------------------------------------------------------------------------------- /tensorflow_/tensorflowcv/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/tensorflow_/tensorflowcv/__init__.py -------------------------------------------------------------------------------- /tensorflow_/tensorflowcv/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/tensorflow_/tensorflowcv/models/__init__.py -------------------------------------------------------------------------------- /tensorflow_/tensorflowcv/models/others/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/tensorflow_/tensorflowcv/models/others/__init__.py -------------------------------------------------------------------------------- /tensorflow_/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from .tensorflowcv.model_provider import get_model 5 | from .tensorflowcv.models.common import is_channels_first 6 | 7 | 8 | def save_model_params(sess, 9 | file_path): 10 | # assert file_path.endswith('.npz') 11 | param_dict = {v.name: v.eval(sess) for v in tf.global_variables()} 12 | np.savez_compressed(file_path, **param_dict) 13 | 14 | 15 | def load_model_params(net, 16 | param_dict, 17 | sess, 18 | ignore_missing=False): 19 | for param_name, param_data in param_dict: 20 | with tf.variable_scope(param_name, reuse=True): 21 | try: 22 | var = tf.get_variable(param_name) 23 | sess.run(var.assign(param_data)) 24 | except ValueError: 25 | if not ignore_missing: 26 | raise 27 | 28 | 29 | def prepare_model(model_name, 30 | use_pretrained, 31 | pretrained_model_file_path): 32 | data_format = "channels_first" 33 | kwargs = {"pretrained": use_pretrained, "data_format": data_format} 34 | 35 | net = get_model(model_name, **kwargs) 36 | input_image_size = net.in_size[0] if hasattr(net, 'in_size') else 224 37 | 38 | x_shape = (None, 3, input_image_size, input_image_size) if is_channels_first(data_format) else\ 39 | (None, input_image_size, input_image_size, 3) 40 | x = tf.placeholder( 41 | dtype=tf.float32, 42 | shape=x_shape, 43 | name='xx') 44 | y_net = net(x) 45 | 46 | if use_pretrained or pretrained_model_file_path: 47 | from .tensorflowcv.model_provider import init_variables_from_state_dict 48 | with tf.Session() as sess: 49 | from .tensorflowcv.model_provider import load_state_dict 50 | if pretrained_model_file_path: 51 | init_variables_from_state_dict( 52 | sess=sess, 53 | state_dict=load_state_dict(file_path=pretrained_model_file_path)) 54 | else: 55 | init_variables_from_state_dict(sess=sess, state_dict=net.state_dict) 56 | 57 | return y_net 58 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmr/imgclsmob/c1a844dde061f6d72f25011dc1af9a2e35b1bdfc/tests/__init__.py -------------------------------------------------------------------------------- /tests/convert_gl2pt_batchnorm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mxnet as mx 3 | import torch 4 | from torch.autograd import Variable 5 | 6 | LENGTH = 64 7 | 8 | 9 | class GluonModel(mx.gluon.HybridBlock): 10 | 11 | def __init__(self, 12 | **kwargs): 13 | super(GluonModel, self).__init__(**kwargs) 14 | 15 | with self.name_scope(): 16 | self.bn = mx.gluon.nn.BatchNorm( 17 | momentum=0.9, 18 | epsilon=1e-5, 19 | in_channels=LENGTH, 20 | use_global_stats=False) 21 | 22 | def hybrid_forward(self, F, x): 23 | x = self.bn(x) 24 | return x 25 | 26 | 27 | class PytorchModel(torch.nn.Module): 28 | 29 | def __init__(self): 30 | super(PytorchModel, self).__init__() 31 | 32 | self.bn = torch.nn.BatchNorm2d( 33 | num_features=LENGTH, 34 | eps=1e-5, 35 | momentum=0.9) 36 | 37 | def forward(self, x): 38 | x = self.bn(x) 39 | return x 40 | 41 | 42 | def main(): 43 | 44 | success = True 45 | for i in range(10): 46 | g = np.random.randn(LENGTH, ).astype(np.float32) 47 | b = np.random.randn(LENGTH, ).astype(np.float32) 48 | m = np.random.randn(LENGTH, ).astype(np.float32) 49 | v = np.random.randn(LENGTH, ).astype(np.float32) 50 | b = b - b.min() + 1.0 51 | v = v - v.min() + 1.0 52 | 53 | IMG_SIZE = 224 54 | x = np.random.randn(1, LENGTH, IMG_SIZE, IMG_SIZE).astype(np.float32) 55 | 56 | gl_model = GluonModel() 57 | 58 | # ctx = mx.cpu() 59 | ctx = mx.gpu(0) 60 | gl_params = gl_model._collect_params_with_prefix() 61 | gl_params['bn.gamma']._load_init(mx.nd.array(g, ctx), ctx) 62 | gl_params['bn.beta']._load_init(mx.nd.array(b, ctx), ctx) 63 | gl_params['bn.running_mean']._load_init(mx.nd.array(m, ctx), ctx) 64 | gl_params['bn.running_var']._load_init(mx.nd.array(v, ctx), ctx) 65 | # gl_model.initialize() 66 | 67 | gl_x = mx.nd.array(x, ctx) 68 | gl_y = gl_model(gl_x).asnumpy() 69 | 70 | pt_model = PytorchModel() 71 | pt_model.eval() 72 | 73 | pt_params = pt_model.state_dict() 74 | pt_params['bn.weight'] = torch.from_numpy(g) 75 | pt_params['bn.bias'] = torch.from_numpy(b) 76 | pt_params['bn.running_mean'] = torch.from_numpy(m) 77 | pt_params['bn.running_var'] = torch.from_numpy(v) 78 | pt_model.load_state_dict(pt_params) 79 | 80 | pt_model = pt_model.cuda() 81 | 82 | pt_x = Variable(torch.from_numpy(x)).cuda() 83 | pt_y = pt_model(pt_x).detach().cpu().numpy() 84 | 85 | diff = np.abs(gl_y - pt_y) 86 | dist = np.sum(diff) 87 | if dist > 1e-5: 88 | success = False 89 | print("i={}, dist={}".format(i, dist)) 90 | # print(gl_y) 91 | # print(pt_y) 92 | 93 | if success: 94 | print("All ok.") 95 | 96 | 97 | if __name__ == '__main__': 98 | main() 99 | -------------------------------------------------------------------------------- /tests/convert_gl2pt_conv2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mxnet as mx 3 | import torch 4 | from torch.autograd import Variable 5 | 6 | 7 | class GluonModel(mx.gluon.HybridBlock): 8 | 9 | def __init__(self, 10 | **kwargs): 11 | super(GluonModel, self).__init__(**kwargs) 12 | 13 | with self.name_scope(): 14 | self.conv = mx.gluon.nn.Conv2D( 15 | channels=64, 16 | kernel_size=7, 17 | strides=2, 18 | padding=3, 19 | use_bias=True, 20 | in_channels=3) 21 | 22 | def hybrid_forward(self, F, x): 23 | x = self.conv(x) 24 | return x 25 | 26 | 27 | class PytorchModel(torch.nn.Module): 28 | 29 | def __init__(self): 30 | super(PytorchModel, self).__init__() 31 | 32 | self.conv = torch.nn.Conv2d( 33 | in_channels=3, 34 | out_channels=64, 35 | kernel_size=7, 36 | stride=2, 37 | padding=3, 38 | bias=True) 39 | 40 | def forward(self, x): 41 | x = self.conv(x) 42 | return x 43 | 44 | 45 | def main(): 46 | 47 | success = True 48 | for i in range(10): 49 | # w = np.random.randint(10, size=(64, 3, 7, 7)).astype(np.float32) 50 | # x = np.random.randint(10, size=(1, 3, 224, 224)).astype(np.float32) 51 | w = np.random.randn(64, 3, 7, 7).astype(np.float32) 52 | b = np.random.randn(64, ).astype(np.float32) 53 | x = np.random.randn(10, 3, 224, 224).astype(np.float32) 54 | 55 | gl_model = GluonModel() 56 | 57 | # ctx = mx.cpu() 58 | ctx = mx.gpu(0) 59 | gl_params = gl_model._collect_params_with_prefix() 60 | gl_params['conv.weight']._load_init(mx.nd.array(w, ctx), ctx) 61 | gl_params['conv.bias']._load_init(mx.nd.array(b, ctx), ctx) 62 | 63 | gl_x = mx.nd.array(x, ctx) 64 | gl_y = gl_model(gl_x).asnumpy() 65 | 66 | pt_model = PytorchModel() 67 | pt_model.eval() 68 | 69 | pt_params = pt_model.state_dict() 70 | pt_params['conv.weight'] = torch.from_numpy(w) 71 | pt_params['conv.bias'] = torch.from_numpy(b) 72 | pt_model.load_state_dict(pt_params) 73 | 74 | pt_model = pt_model.cuda() 75 | 76 | pt_x = Variable(torch.from_numpy(x)).cuda() 77 | pt_y = pt_model(pt_x).detach().cpu().numpy() 78 | 79 | dist = np.sum(np.abs(gl_y - pt_y)) 80 | if dist > 1e-5: 81 | success = False 82 | print("i={}, dist={}".format(i, dist)) 83 | # print(gl_y) 84 | # print(tf_y) 85 | 86 | if success: 87 | print("All ok.") 88 | 89 | 90 | if __name__ == '__main__': 91 | main() 92 | -------------------------------------------------------------------------------- /tests/convert_gl2pt_dense.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mxnet as mx 3 | import torch 4 | from torch.autograd import Variable 5 | 6 | 7 | class GluonModel(mx.gluon.HybridBlock): 8 | 9 | def __init__(self, 10 | **kwargs): 11 | super(GluonModel, self).__init__(**kwargs) 12 | 13 | with self.name_scope(): 14 | self.dense = mx.gluon.nn.Dense( 15 | units=1000, 16 | use_bias=False, 17 | in_units=1024) 18 | 19 | def hybrid_forward(self, F, x): 20 | x = self.dense(x) 21 | return x 22 | 23 | 24 | class PytorchModel(torch.nn.Module): 25 | 26 | def __init__(self): 27 | super(PytorchModel, self).__init__() 28 | 29 | self.dense = torch.nn.Linear( 30 | in_features=1024, 31 | out_features=1000, 32 | bias=False) 33 | 34 | def forward(self, x): 35 | x = self.dense(x) 36 | return x 37 | 38 | 39 | def main(): 40 | 41 | success = True 42 | for i in range(10): 43 | w = np.random.randn(1000, 1024).astype(np.float32) 44 | # b = np.random.randn(1000, ).astype(np.float32) 45 | x = np.random.randn(1, 1024).astype(np.float32) 46 | 47 | gl_model = GluonModel() 48 | 49 | # ctx = mx.cpu() 50 | ctx = mx.gpu(0) 51 | gl_params = gl_model._collect_params_with_prefix() 52 | gl_params['dense.weight']._load_init(mx.nd.array(w, ctx), ctx) 53 | # gl_params['dense.bias']._load_init(mx.nd.array(b, ctx), ctx) 54 | 55 | gl_x = mx.nd.array(x, ctx) 56 | gl_y = gl_model(gl_x).asnumpy() 57 | 58 | pt_model = PytorchModel() 59 | pt_model.eval() 60 | 61 | pt_params = pt_model.state_dict() 62 | pt_params['dense.weight'] = torch.from_numpy(w) 63 | # pt_params['dense.bias'] = torch.from_numpy(b) 64 | pt_model.load_state_dict(pt_params) 65 | 66 | pt_model = pt_model.cuda() 67 | 68 | pt_x = Variable(torch.from_numpy(x)).cuda() 69 | pt_y = pt_model(pt_x).detach().cpu().numpy() 70 | 71 | dist = np.sum(np.abs(gl_y - pt_y)) 72 | if dist > 1e-5: 73 | success = False 74 | print("i={}, dist={}".format(i, dist)) 75 | # print(gl_y) 76 | # print(pt_y) 77 | y = np.matmul(w.astype(np.float64), x[0].astype(np.float64)) 78 | # y = np.dot(w, x[0]) 79 | gl_dist = np.sum(np.abs(gl_y - y)) 80 | pt_dist = np.sum(np.abs(pt_y - y)) 81 | print("i={}, gl_dist={}".format(i, gl_dist)) 82 | print("i={}, pt_dist={}".format(i, pt_dist)) 83 | 84 | if success: 85 | print("All ok.") 86 | 87 | 88 | if __name__ == '__main__': 89 | main() 90 | -------------------------------------------------------------------------------- /tests/convert_gl2tf2_conv2d_b.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import tensorflow.keras.layers as nn 4 | 5 | 6 | def is_channels_first(data_format): 7 | """ 8 | Is tested data format channels first. 9 | 10 | Parameters 11 | ---------- 12 | data_format : str, default 'channels_last' 13 | The ordering of the dimensions in tensors. 14 | 15 | Returns 16 | ------- 17 | bool 18 | A flag. 19 | """ 20 | return data_format == "channels_first" 21 | 22 | 23 | class TF2Model(tf.keras.Model): 24 | 25 | def __init__(self, 26 | data_format="channels_last", 27 | **kwargs): 28 | super(TF2Model, self).__init__(**kwargs) 29 | self.conv = nn.Conv2D( 30 | filters=64, 31 | kernel_size=(7, 7), 32 | strides=1, 33 | padding="same", 34 | data_format=data_format, 35 | dilation_rate=1, 36 | use_bias=False, 37 | name="conv") 38 | 39 | def call(self, x): 40 | x = self.conv(x) 41 | return x 42 | 43 | 44 | def gl_calc(gl_w, x): 45 | import mxnet as mx 46 | 47 | class GluonModel(mx.gluon.HybridBlock): 48 | 49 | def __init__(self, 50 | **kwargs): 51 | super(GluonModel, self).__init__(**kwargs) 52 | 53 | with self.name_scope(): 54 | self.conv = mx.gluon.nn.Conv2D( 55 | channels=64, 56 | kernel_size=(7, 7), 57 | strides=1, 58 | padding=(3, 3), 59 | use_bias=False, 60 | in_channels=3) 61 | 62 | def hybrid_forward(self, F, x): 63 | x = self.conv(x) 64 | return x 65 | 66 | gl_model = GluonModel() 67 | 68 | # ctx = mx.cpu() 69 | ctx = mx.gpu(0) 70 | gl_params = gl_model._collect_params_with_prefix() 71 | # gl_w = np.transpose(tf2_w, axes=(3, 2, 0, 1)) 72 | gl_params['conv.weight']._load_init(mx.nd.array(gl_w, ctx), ctx) 73 | # gl_params['conv.bias']._load_init(mx.nd.array(b, ctx), ctx) 74 | 75 | gl_x = mx.nd.array(x, ctx) 76 | gl_y = gl_model(gl_x).asnumpy() 77 | 78 | return gl_y 79 | 80 | 81 | def main(): 82 | gpus = tf.config.experimental.list_physical_devices("GPU") 83 | if gpus: 84 | for gpu in gpus: 85 | tf.config.experimental.set_memory_growth(gpu, True) 86 | 87 | success = True 88 | for i in range(10): 89 | gl_w = np.random.randn(64, 3, 7, 7).astype(np.float32) 90 | # tf2_w = np.random.randn(7, 7, 3, 64).astype(np.float32) 91 | b = np.random.randn(64, ).astype(np.float32) 92 | x = np.random.randn(10, 3, 224, 256).astype(np.float32) 93 | assert (b is not None) 94 | 95 | data_format = "channels_last" 96 | # data_format = "channels_first" 97 | tf2_use_cuda = True 98 | 99 | if not tf2_use_cuda: 100 | with tf.device("/cpu:0"): 101 | tf2_model = TF2Model(data_format=data_format) 102 | else: 103 | tf2_model = TF2Model(data_format=data_format) 104 | input_shape = (1, 224, 256, 3) if data_format == "channels_last" else (1, 3, 224, 256) 105 | tf2_model.build(input_shape=input_shape) 106 | tf2_params = {v.name: v for v in tf2_model.weights} 107 | # print(tf2_params["conv/kernel:0"].shape) 108 | # tf2_w = np.transpose(gl_w, axes=(2, 3, 1, 0)) 109 | tf2_w = np.transpose(gl_w, axes=(2, 3, 1, 0)) 110 | tf2_params["conv/kernel:0"].assign(tf2_w) 111 | # tf2_params["conv/bias:0"].assign(b) 112 | 113 | tf2_x = x.transpose((0, 2, 3, 1)) if data_format == "channels_last" else x 114 | tf2_x = tf.convert_to_tensor(tf2_x) 115 | tf2_y = tf2_model(tf2_x).numpy() 116 | if data_format == "channels_last": 117 | tf2_y = tf2_y.transpose((0, 3, 1, 2)) 118 | 119 | gl_y = gl_calc(gl_w, x) 120 | 121 | dist = np.sum(np.abs(gl_y - tf2_y)) 122 | if dist > 1e-5: 123 | success = False 124 | print("i={}, dist={}".format(i, dist)) 125 | # print(gl_y) 126 | # print(tf_y) 127 | 128 | if success: 129 | print("All ok.") 130 | 131 | 132 | if __name__ == "__main__": 133 | main() 134 | -------------------------------------------------------------------------------- /tests/convert_gl2tf2_dwconv2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import tensorflow.keras.layers as nn 4 | 5 | channels = 12 6 | 7 | 8 | def is_channels_first(data_format): 9 | """ 10 | Is tested data format channels first. 11 | 12 | Parameters 13 | ---------- 14 | data_format : str, default 'channels_last' 15 | The ordering of the dimensions in tensors. 16 | 17 | Returns 18 | ------- 19 | bool 20 | A flag. 21 | """ 22 | return data_format == "channels_first" 23 | 24 | 25 | class TF2Model(tf.keras.Model): 26 | 27 | def __init__(self, 28 | data_format="channels_last", 29 | **kwargs): 30 | super(TF2Model, self).__init__(**kwargs) 31 | self.conv = nn.DepthwiseConv2D( 32 | # filters=channels, 33 | kernel_size=(7, 7), 34 | strides=2, 35 | padding="same", 36 | data_format=data_format, 37 | dilation_rate=1, 38 | use_bias=False, 39 | name="conv") 40 | 41 | def call(self, x): 42 | x = self.conv(x) 43 | return x 44 | 45 | 46 | def gl_calc(gl_w, x): 47 | import mxnet as mx 48 | 49 | class GluonModel(mx.gluon.HybridBlock): 50 | 51 | def __init__(self, 52 | **kwargs): 53 | super(GluonModel, self).__init__(**kwargs) 54 | 55 | with self.name_scope(): 56 | self.conv = mx.gluon.nn.Conv2D( 57 | channels=channels, 58 | kernel_size=(7, 7), 59 | strides=2, 60 | padding=(3, 3), 61 | groups=channels, 62 | use_bias=False, 63 | in_channels=channels) 64 | 65 | def hybrid_forward(self, F, x): 66 | x = self.conv(x) 67 | return x 68 | 69 | gl_model = GluonModel() 70 | 71 | # ctx = mx.cpu() 72 | ctx = mx.gpu(0) 73 | gl_params = gl_model._collect_params_with_prefix() 74 | # gl_w = np.transpose(tf2_w, axes=(3, 2, 0, 1)) 75 | gl_params['conv.weight']._load_init(mx.nd.array(gl_w, ctx), ctx) 76 | # gl_params['conv.bias']._load_init(mx.nd.array(b, ctx), ctx) 77 | 78 | gl_x = mx.nd.array(x, ctx) 79 | gl_y = gl_model(gl_x).asnumpy() 80 | 81 | return gl_y 82 | 83 | 84 | def main(): 85 | gpus = tf.config.experimental.list_physical_devices("GPU") 86 | if gpus: 87 | for gpu in gpus: 88 | tf.config.experimental.set_memory_growth(gpu, True) 89 | 90 | success = True 91 | for i in range(10): 92 | gl_w = np.random.randn(channels, 1, 7, 7).astype(np.float32) 93 | # tf2_w = np.random.randn(7, 7, 1, channels).astype(np.float32) 94 | b = np.random.randn(channels, ).astype(np.float32) 95 | x = np.random.randn(10, channels, 224, 256).astype(np.float32) 96 | assert (b is not None) 97 | 98 | data_format = "channels_last" 99 | # data_format = "channels_first" 100 | tf2_use_cuda = True 101 | 102 | if not tf2_use_cuda: 103 | with tf.device("/cpu:0"): 104 | tf2_model = TF2Model(data_format=data_format) 105 | else: 106 | tf2_model = TF2Model(data_format=data_format) 107 | input_shape = (1, 224, 256, channels) if data_format == "channels_last" else (1, channels, 224, 256) 108 | tf2_model.build(input_shape=input_shape) 109 | tf2_params = {v.name: v for v in tf2_model.weights} 110 | # print(tf2_params["conv/kernel:0"].shape) 111 | # tf2_w = np.transpose(gl_w, axes=(2, 3, 1, 0)) 112 | tf2_w = np.transpose(gl_w, axes=(2, 3, 0, 1)) 113 | tf2_params["conv/depthwise_kernel:0"].assign(tf2_w) 114 | # tf2_params["conv/bias:0"].assign(b) 115 | 116 | tf2_x = x.transpose((0, 2, 3, 1)) if data_format == "channels_last" else x 117 | tf2_x = tf.convert_to_tensor(tf2_x) 118 | tf2_y = tf2_model(tf2_x).numpy() 119 | if data_format == "channels_last": 120 | tf2_y = tf2_y.transpose((0, 3, 1, 2)) 121 | 122 | gl_y = gl_calc(gl_w, x) 123 | 124 | dist = np.sum(np.abs(gl_y - tf2_y)) 125 | if dist > 1e-5: 126 | success = False 127 | print("i={}, dist={}".format(i, dist)) 128 | # print(gl_y) 129 | # print(tf_y) 130 | 131 | if success: 132 | print("All ok.") 133 | 134 | 135 | if __name__ == "__main__": 136 | main() 137 | -------------------------------------------------------------------------------- /tests/convert_gl2tf_avgpool2d.py: -------------------------------------------------------------------------------- 1 | # import math 2 | import numpy as np 3 | import mxnet as mx 4 | import tensorflow as tf 5 | 6 | 7 | class GluonModel(mx.gluon.HybridBlock): 8 | 9 | def __init__(self, 10 | **kwargs): 11 | super(GluonModel, self).__init__(**kwargs) 12 | 13 | with self.name_scope(): 14 | self.pool = mx.gluon.nn.AvgPool2D( 15 | pool_size=2, 16 | strides=2, 17 | padding=0) 18 | 19 | def hybrid_forward(self, F, x): 20 | x = self.pool(x) 21 | return x 22 | 23 | 24 | # def avgpool2d(x, 25 | # pool_size, 26 | # strides, 27 | # padding=0, 28 | # ceil_mode=False, 29 | # name=None): 30 | # """ 31 | # Average pooling operation for two dimensional (spatial) data. 32 | # 33 | # Parameters 34 | # ---------- 35 | # x : Tensor 36 | # Input tensor. 37 | # pool_size : int or tuple(int, int) 38 | # Size of the max pooling windows. 39 | # strides : int or tuple(int, int) 40 | # Strides of the pooling. 41 | # padding : int or tuple(int, int), default 0 42 | # Padding value for convolution layer. 43 | # ceil_mode : bool, default False 44 | # When `True`, will use ceil instead of floor to compute the output shape. 45 | # name : str, default 'conv2d' 46 | # Layer name. 47 | # 48 | # Returns 49 | # ------- 50 | # Tensor 51 | # Resulted tensor. 52 | # """ 53 | # if isinstance(padding, int): 54 | # padding = (padding, padding) 55 | # 56 | # if ceil_mode: 57 | # height = x.shape[2] 58 | # out_height = float(height + 2 * padding[0] - pool_size[0]) / strides[0] + 1.0 59 | # if math.ceil(out_height) > math.floor(out_height): 60 | # padding[0] += 1 61 | # width = x.shape[3] 62 | # out_width = float(width + 2 * padding[1] - pool_size[1]) / strides[1] + 1.0 63 | # if math.ceil(out_width) > math.floor(out_width): 64 | # padding[1] += 1 65 | # 66 | # if (padding[0] > 0) or (padding[1] > 0): 67 | # x = tf.pad(x, [[0, 0], [0, 0], list(padding), list(padding)], mode="REFLECT") 68 | # 69 | # x = tf.layers.average_pooling2d( 70 | # inputs=x, 71 | # pool_size=pool_size, 72 | # strides=strides, 73 | # padding='valid', 74 | # data_format='channels_first', 75 | # name=name) 76 | # return x 77 | 78 | 79 | def tensorflow_model(x): 80 | 81 | x = tf.layers.average_pooling2d( 82 | inputs=x, 83 | pool_size=2, 84 | strides=2, 85 | padding='valid', 86 | data_format='channels_first', 87 | name="pool") 88 | # x = avgpool2d( 89 | # x=x, 90 | # pool_size=2, 91 | # strides=2, 92 | # padding=1, 93 | # ceil_mode=False, 94 | # name="pool") 95 | return x 96 | 97 | 98 | def main(): 99 | 100 | success = True 101 | for i in range(10): 102 | x = np.random.randn(10, 10, 224, 224).astype(np.float32) 103 | 104 | gl_model = GluonModel() 105 | 106 | # ctx = mx.cpu() 107 | ctx = mx.gpu(0) 108 | 109 | gl_x = mx.nd.array(x, ctx) 110 | gl_y = gl_model(gl_x).asnumpy() 111 | 112 | xx = tf.placeholder( 113 | dtype=tf.float32, 114 | shape=(None, 10, 224, 224), 115 | name='xx') 116 | tf_model = tensorflow_model(xx) 117 | with tf.Session() as sess: 118 | tf_y = sess.run(tf_model, feed_dict={xx: x}) 119 | tf.reset_default_graph() 120 | 121 | dist = np.sum(np.abs(gl_y - tf_y)) 122 | if dist > 1e-5: 123 | success = False 124 | print("i={}, dist={}".format(i, dist)) 125 | # print(gl_y) 126 | # print(tf_y) 127 | 128 | if success: 129 | print("All ok.") 130 | 131 | 132 | if __name__ == '__main__': 133 | main() 134 | -------------------------------------------------------------------------------- /tests/convert_gl2tf_batchnorm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mxnet as mx 3 | import tensorflow as tf 4 | 5 | LENGTH = 64 6 | 7 | 8 | class GluonModel(mx.gluon.HybridBlock): 9 | 10 | def __init__(self, 11 | **kwargs): 12 | super(GluonModel, self).__init__(**kwargs) 13 | 14 | with self.name_scope(): 15 | self.bn = mx.gluon.nn.BatchNorm( 16 | momentum=0.9, 17 | epsilon=1e-5, 18 | in_channels=LENGTH, 19 | use_global_stats=False) 20 | 21 | def hybrid_forward(self, F, x): 22 | x = self.bn(x) 23 | return x 24 | 25 | 26 | def batchnorm(x, 27 | momentum=0.9, 28 | epsilon=1e-5, 29 | training=False, 30 | name=None): 31 | """ 32 | Batch normalization layer. 33 | 34 | Parameters 35 | ---------- 36 | x : Tensor 37 | Input tensor. 38 | momentum : float, default 0.9 39 | Momentum for the moving average. 40 | epsilon : float, default 1e-5 41 | Small float added to variance to avoid dividing by zero. 42 | training : bool, or a TensorFlow boolean scalar tensor, default False 43 | Whether to return the output in training mode or in inference mode. 44 | name : str, default 'conv2d' 45 | Layer name. 46 | 47 | Returns 48 | ------- 49 | Tensor 50 | Resulted tensor. 51 | """ 52 | x = tf.layers.batch_normalization( 53 | inputs=x, 54 | axis=1, 55 | momentum=momentum, 56 | epsilon=epsilon, 57 | training=training, 58 | name=name) 59 | return x 60 | 61 | 62 | def tensorflow_model(x): 63 | 64 | x = batchnorm( 65 | x=x, 66 | training=False, 67 | name="bn") 68 | return x 69 | 70 | 71 | def main(): 72 | 73 | success = True 74 | for i in range(10): 75 | g = np.random.randn(LENGTH, ).astype(np.float32) 76 | b = np.random.randn(LENGTH, ).astype(np.float32) 77 | m = np.random.randn(LENGTH, ).astype(np.float32) 78 | v = np.random.randn(LENGTH, ).astype(np.float32) 79 | b = b - b.min() + 1.0 80 | v = v - v.min() + 1.0 81 | 82 | IMG_SIZE = 224 83 | x = np.random.randn(10, LENGTH, IMG_SIZE, IMG_SIZE).astype(np.float32) 84 | 85 | gl_model = GluonModel() 86 | 87 | # ctx = mx.cpu() 88 | ctx = mx.gpu(0) 89 | gl_params = gl_model._collect_params_with_prefix() 90 | gl_params['bn.gamma']._load_init(mx.nd.array(g, ctx), ctx) 91 | gl_params['bn.beta']._load_init(mx.nd.array(b, ctx), ctx) 92 | gl_params['bn.running_mean']._load_init(mx.nd.array(m, ctx), ctx) 93 | gl_params['bn.running_var']._load_init(mx.nd.array(v, ctx), ctx) 94 | # gl_model.initialize() 95 | 96 | gl_x = mx.nd.array(x, ctx) 97 | gl_y = gl_model(gl_x).asnumpy() 98 | 99 | xx = tf.placeholder( 100 | dtype=tf.float32, 101 | shape=(None, LENGTH, IMG_SIZE, IMG_SIZE), 102 | name='xx') 103 | tf_model = tensorflow_model(xx) 104 | tf_params = {v.name: v for v in tf.global_variables()} 105 | with tf.Session() as sess: 106 | sess.run(tf_params['bn/gamma:0'].assign(g)) 107 | sess.run(tf_params['bn/beta:0'].assign(b)) 108 | sess.run(tf_params['bn/moving_mean:0'].assign(m)) 109 | sess.run(tf_params['bn/moving_variance:0'].assign(v)) 110 | 111 | tf_y = sess.run(tf_model, feed_dict={xx: x}) 112 | tf.reset_default_graph() 113 | 114 | diff = np.abs(gl_y - tf_y) 115 | dist = np.sum(diff) 116 | if dist > 1e-5: 117 | success = False 118 | print("i={}, dist={}".format(i, dist)) 119 | # print(gl_y) 120 | # print(tf_y) 121 | 122 | if success: 123 | print("All ok.") 124 | 125 | 126 | if __name__ == '__main__': 127 | main() 128 | -------------------------------------------------------------------------------- /tests/convert_gl2tf_dense.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mxnet as mx 3 | import tensorflow as tf 4 | # import tensorflow.contrib.slim as slim 5 | 6 | 7 | class GluonModel(mx.gluon.HybridBlock): 8 | 9 | def __init__(self, 10 | **kwargs): 11 | super(GluonModel, self).__init__(**kwargs) 12 | 13 | with self.name_scope(): 14 | self.dense = mx.gluon.nn.Dense( 15 | units=1000, 16 | use_bias=False, 17 | flatten=True, 18 | in_units=1024) 19 | 20 | def hybrid_forward(self, F, x): 21 | x = self.dense(x) 22 | return x 23 | 24 | 25 | def tensorflow_model(x): 26 | 27 | # x = slim.fully_connected( 28 | # inputs=x, 29 | # num_outputs=1000, 30 | # activation_fn=None, 31 | # scope='dense') 32 | x = tf.layers.dense( 33 | inputs=x, 34 | units=1000, 35 | use_bias=False, 36 | name="dense") 37 | return x 38 | 39 | 40 | def main(): 41 | 42 | success = True 43 | for i in range(10): 44 | # gl_w = np.random.randn(1000, 1024).astype(np.float32) 45 | tf_w = np.random.randn(1024, 1000).astype(np.float32) 46 | # b = np.random.randn(1000, ).astype(np.float32) 47 | x = np.random.randn(1, 1024).astype(np.float32) 48 | 49 | gl_model = GluonModel() 50 | 51 | # ctx = mx.cpu() 52 | ctx = mx.gpu(0) 53 | gl_params = gl_model._collect_params_with_prefix() 54 | gl_w = np.transpose(tf_w, axes=(1, 0)) 55 | gl_params['dense.weight']._load_init(mx.nd.array(gl_w, ctx), ctx) 56 | # gl_params['dense.bias']._load_init(mx.nd.array(b, ctx), ctx) 57 | 58 | gl_x = mx.nd.array(x, ctx) 59 | gl_y = gl_model(gl_x).asnumpy() 60 | 61 | xx = tf.placeholder( 62 | dtype=tf.float32, 63 | shape=(None, 1024), 64 | name='xx') 65 | tf_model = tensorflow_model(xx) 66 | tf_params = {v.name: v for v in tf.global_variables()} 67 | with tf.Session() as sess: 68 | # tf_w = np.transpose(gl_w, axes=(1, 0)) 69 | sess.run(tf_params['dense/kernel:0'].assign(tf_w)) 70 | # sess.run(tf_params['dense/bias:0'].assign(b)) 71 | # sess.run(tf_params['dense/weights:0'].assign(tf_w)) 72 | # sess.run(tf_params['dense/biases:0'].assign(b)) 73 | 74 | tf_y = sess.run(tf_model, feed_dict={xx: x}) 75 | tf.reset_default_graph() 76 | 77 | dist = np.sum(np.abs(gl_y - tf_y)) 78 | if dist > 1e-5: 79 | success = False 80 | print("i={}, dist={}".format(i, dist)) 81 | # print(gl_y) 82 | # print(tf_y) 83 | y = np.matmul(gl_w.astype(np.float64), x[0].astype(np.float64)) 84 | # y = np.dot(w, x[0]) 85 | gl_dist = np.sum(np.abs(gl_y - y)) 86 | tf_dist = np.sum(np.abs(tf_y - y)) 87 | print("i={}, gl_dist={}".format(i, gl_dist)) 88 | print("i={}, tf_dist={}".format(i, tf_dist)) 89 | 90 | if success: 91 | print("All ok.") 92 | 93 | 94 | if __name__ == '__main__': 95 | main() 96 | -------------------------------------------------------------------------------- /tests/convert_gl2tf_maxpool2d.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import mxnet as mx 4 | import tensorflow as tf 5 | 6 | 7 | class GluonModel(mx.gluon.HybridBlock): 8 | 9 | def __init__(self, 10 | **kwargs): 11 | super(GluonModel, self).__init__(**kwargs) 12 | 13 | with self.name_scope(): 14 | self.pool = mx.gluon.nn.MaxPool2D( 15 | pool_size=2, 16 | strides=2, 17 | padding=0) 18 | 19 | def hybrid_forward(self, F, x): 20 | x = self.pool(x) 21 | return x 22 | 23 | 24 | def maxpool2d(x, 25 | pool_size, 26 | strides, 27 | padding=0, 28 | ceil_mode=False, 29 | name=None): 30 | """ 31 | Max pooling operation for two dimensional (spatial) data. 32 | 33 | Parameters 34 | ---------- 35 | x : Tensor 36 | Input tensor. 37 | pool_size : int or tuple(int, int) 38 | Size of the max pooling windows. 39 | strides : int or tuple(int, int) 40 | Strides of the pooling. 41 | padding : int or tuple(int, int), default 0 42 | Padding value for convolution layer. 43 | ceil_mode : bool, default False 44 | When `True`, will use ceil instead of floor to compute the output shape. 45 | name : str, default 'conv2d' 46 | Layer name. 47 | 48 | Returns 49 | ------- 50 | Tensor 51 | Resulted tensor. 52 | """ 53 | if isinstance(padding, int): 54 | padding = (padding, padding) 55 | 56 | if ceil_mode: 57 | height = x.shape[2] 58 | out_height = float(height + 2 * padding[0] - pool_size[0]) / strides[0] + 1.0 59 | if math.ceil(out_height) > math.floor(out_height): 60 | padding[0] += 1 61 | width = x.shape[3] 62 | out_width = float(width + 2 * padding[1] - pool_size[1]) / strides[1] + 1.0 63 | if math.ceil(out_width) > math.floor(out_width): 64 | padding[1] += 1 65 | 66 | if (padding[0] > 0) or (padding[1] > 0): 67 | x = tf.pad(x, [[0, 0], [0, 0], list(padding), list(padding)], mode="REFLECT") 68 | 69 | x = tf.layers.max_pooling2d( 70 | inputs=x, 71 | pool_size=pool_size, 72 | strides=strides, 73 | padding='valid', 74 | data_format='channels_first', 75 | name=name) 76 | 77 | # if isinstance(pool_size, int): 78 | # pool_size = (pool_size, pool_size) 79 | # if isinstance(strides, int): 80 | # strides = (strides, strides) 81 | # x = tf.nn.max_pool( 82 | # value=x, 83 | # ksize=(1, 1) + pool_size, 84 | # strides=(1, 1) + strides, 85 | # padding='VALID', 86 | # data_format='NCHW', 87 | # name=name) 88 | 89 | return x 90 | 91 | 92 | def tensorflow_model(x): 93 | 94 | x = maxpool2d( 95 | x=x, 96 | pool_size=2, 97 | strides=2, 98 | padding=0, 99 | ceil_mode=False, 100 | name="pool") 101 | return x 102 | 103 | 104 | def main(): 105 | 106 | success = True 107 | for i in range(10): 108 | x = np.random.randn(10, 10, 224, 224).astype(np.float32) 109 | 110 | gl_model = GluonModel() 111 | 112 | # ctx = mx.cpu() 113 | ctx = mx.gpu(0) 114 | 115 | gl_x = mx.nd.array(x, ctx) 116 | gl_y = gl_model(gl_x).asnumpy() 117 | 118 | xx = tf.placeholder( 119 | dtype=tf.float32, 120 | shape=(None, 10, 224, 224), 121 | name='xx') 122 | tf_model = tensorflow_model(xx) 123 | with tf.Session() as sess: 124 | tf_y = sess.run(tf_model, feed_dict={xx: x}) 125 | tf.reset_default_graph() 126 | 127 | dist = np.sum(np.abs(gl_y - tf_y)) 128 | if dist > 1e-5: 129 | success = False 130 | print("i={}, dist={}".format(i, dist)) 131 | # print(gl_y) 132 | # print(tf_y) 133 | 134 | if success: 135 | print("All ok.") 136 | 137 | 138 | if __name__ == '__main__': 139 | main() 140 | --------------------------------------------------------------------------------