├── LICENSE ├── README.md ├── doc ├── fastgpu.md └── image │ ├── ecluster_kill.png │ ├── ecluster_ls.png │ ├── ecluster_ls_bert.jpeg │ ├── ecluster_start.png │ ├── ecluster_stop.png │ ├── ecluster_tmux_bert.jpeg │ ├── figure1.png │ ├── figure2.png │ └── figure3.png ├── mxnet ├── faster-rcnn │ ├── README.md │ ├── gluon-cv │ │ ├── README.md │ │ ├── demo_faster_rcnn.py │ │ ├── eval_faster_rcnn.py │ │ ├── mscoco.py │ │ ├── requirements.txt │ │ ├── run-1-8.sh │ │ ├── save-parameter │ │ │ ├── custom_faster_rcnn_fpn_resnet50_v1b_coco_eval_2020_03_31_15_03_27.json │ │ │ └── custom_faster_rcnn_fpn_resnet50_v1b_coco_train.log │ │ ├── train-perseus.sh │ │ └── train_faster_rcnn.py │ └── train_faster_rcnn.py └── insightface │ ├── README.md │ ├── ecluster_ls_insightface.png │ ├── ecluster_tmux_insightface.jpg │ ├── insightface │ ├── .gitignore │ ├── .gitmodules │ ├── LICENSE │ ├── README.md │ └── src │ │ ├── age_iter.py │ │ ├── align │ │ ├── __init__.py │ │ ├── align_celeb.py │ │ ├── align_dataset.py │ │ ├── align_dataset_mtcnn.py │ │ ├── align_dlib.py │ │ ├── align_facescrub.py │ │ ├── align_insight.py │ │ ├── align_lfw.py │ │ ├── align_megaface.py │ │ ├── det1.npy │ │ ├── det2.npy │ │ ├── det3.npy │ │ └── detect_face.py │ │ ├── api │ │ ├── app.py │ │ └── face_model.py │ │ ├── common │ │ ├── __init__.py │ │ ├── face_image.py │ │ ├── face_preprocess.py │ │ └── noise_sgd.py │ │ ├── data.py │ │ ├── data │ │ ├── age_merge.py │ │ ├── agedb2pack.py │ │ ├── agedb2pack2.py │ │ ├── cfp2pack.py │ │ ├── dataset_c2c.py │ │ ├── dataset_clean.py │ │ ├── dataset_info.py │ │ ├── dataset_merge.py │ │ ├── dataset_relabel.py │ │ ├── dir2lst.py │ │ ├── dir2lst_ytf.py │ │ ├── dir2rec.py │ │ ├── face2rec2.py │ │ ├── glint2lst.py │ │ └── lfw2pack.py │ │ ├── eval │ │ ├── do_ver.sh │ │ ├── gen_glint.py │ │ ├── lfw.py │ │ ├── verification.py │ │ ├── ytf.py │ │ └── ytf_badcases.py │ │ ├── image_iter.py │ │ ├── losses │ │ └── center_loss.py │ │ ├── megaface │ │ ├── README.md │ │ ├── facescrub_noises.txt │ │ ├── gen_megaface.py │ │ ├── megaface_noises.txt │ │ └── remove_noises.py │ │ ├── model_parallel │ │ ├── data_split_iter.py │ │ ├── executor_manager.py │ │ └── model.py │ │ ├── requirements.txt │ │ ├── run-mpi-1-8.sh │ │ ├── run-mpi-pdb.sh │ │ ├── symbols │ │ ├── fdensenet.py │ │ ├── fdpn.py │ │ ├── finception_resnet_v2.py │ │ ├── fmobilefacenet.py │ │ ├── fmobilenet.py │ │ ├── fmobilenetv2.py │ │ ├── fnasnet.py │ │ ├── fresnet.py │ │ ├── fxception.py │ │ ├── spherenet.py │ │ └── symbol_utils.py │ │ ├── train-pdb.sh │ │ ├── train-perseus.sh │ │ ├── train.py │ │ ├── train.sh │ │ ├── train_softmax.py │ │ ├── train_triplet.py │ │ ├── triplet_image_iter.py │ │ └── utils │ │ └── benchmark.py │ └── train_insightface.py ├── pytorch ├── GTC │ ├── LossFunction.py │ ├── enhance-data.py │ ├── inference.py │ ├── pre-processing.py │ ├── run-perseus.sh │ ├── train-perseus.sh │ └── train.py ├── README.md ├── fastgpu_script.py └── gtc-demo │ ├── GTC │ ├── LossFunction.py │ ├── enhance-data.py │ ├── inference.py │ ├── pre-processing.py │ ├── run-perseus.sh │ ├── train-perseus.sh │ └── train.py │ ├── README.md │ └── fastgpu.py └── tensorflow ├── bert ├── README.md ├── README_SPOT.md ├── docs │ ├── ecluster_kill.jpg │ ├── ecluster_ls_display.jpg │ └── training_output_log.jpg ├── fastgpu_ls_display.jpg ├── perseus-bert │ ├── CONTRIBUTING.md │ ├── LICENSE │ ├── README.md │ ├── RUN_CLASSIFIER_1CARD_vs_8CARD.md │ ├── __init__.py │ ├── create_pretraining_data.py │ ├── extract_features.py │ ├── fused_layer_norm.py │ ├── modeling.py │ ├── modeling_test.py │ ├── multilingual.md │ ├── optimization.py │ ├── optimization_test.py │ ├── predict_client_grpc.py │ ├── predicting_movie_reviews_with_bert_on_tf_hub.ipynb │ ├── requirements.txt │ ├── run_classifier.py │ ├── run_classifier_inference.py │ ├── run_classifier_util.py │ ├── run_classifier_with_tfhub.py │ ├── run_pretraining.py │ ├── run_squad.py │ ├── sample_text.txt │ ├── sentence_segmentation.py │ ├── test.py │ ├── tokenization.py │ └── tokenization_test.py ├── train_news_classifier.py └── training_output_log.jpg └── image_classification ├── README.md ├── command.sh ├── docs ├── ResNet50_batchsize256.png ├── ResNet50_batchsize64.png └── VGG16_batchsize64.png ├── fastgpu_script.py ├── resnet50_bs256_command.sh ├── resnet50_bs64_command.sh ├── rn50_bs256_command.sh ├── rn50_bs64_command.sh ├── scripts └── tf_cnn_benchmarks │ ├── README.md │ ├── all_reduce_benchmark.py │ ├── all_reduce_benchmark_test.py │ ├── allreduce.py │ ├── allreduce_test.py │ ├── batch_allreduce.py │ ├── benchmark_cnn.py │ ├── benchmark_cnn_distributed_test.py │ ├── benchmark_cnn_distributed_test_runner.py │ ├── benchmark_cnn_test.py │ ├── cnn_util.py │ ├── cnn_util_test.py │ ├── coco_metric.py │ ├── constants.py │ ├── convnet_builder.py │ ├── datasets.py │ ├── flags.py │ ├── leading_indicators_test.py │ ├── mlperf.py │ ├── mlperf_test.py │ ├── models │ ├── __init__.py │ ├── alexnet_model.py │ ├── densenet_model.py │ ├── experimental │ │ ├── __init__.py │ │ ├── deepspeech.py │ │ └── official_ncf_model.py │ ├── googlenet_model.py │ ├── inception_model.py │ ├── lenet_model.py │ ├── mobilenet.py │ ├── mobilenet_conv_blocks.py │ ├── mobilenet_test.py │ ├── mobilenet_v2.py │ ├── model.py │ ├── model_config.py │ ├── nasnet_model.py │ ├── nasnet_test.py │ ├── nasnet_utils.py │ ├── official_resnet_model.py │ ├── overfeat_model.py │ ├── resnet_model.py │ ├── resnet_model_test.py │ ├── ssd_model.py │ ├── tf1_only │ │ ├── __init__.py │ │ ├── mobilenet.py │ │ ├── mobilenet_conv_blocks.py │ │ ├── mobilenet_test.py │ │ ├── mobilenet_v2.py │ │ ├── nasnet_model.py │ │ ├── nasnet_test.py │ │ ├── nasnet_utils.py │ │ └── ssd_model.py │ ├── trivial_model.py │ └── vgg_model.py │ ├── platforms │ ├── __init__.py │ ├── default │ │ ├── __init__.py │ │ └── util.py │ └── util.py │ ├── preprocessing.py │ ├── run_tests.py │ ├── ssd_constants.py │ ├── ssd_dataloader.py │ ├── test_data │ ├── __init__.py │ ├── fake_tf_record_data │ │ ├── train-00000-of-00008 │ │ ├── train-00001-of-00008 │ │ ├── train-00002-of-00008 │ │ ├── train-00003-of-00008 │ │ ├── train-00004-of-00008 │ │ ├── train-00005-of-00008 │ │ ├── train-00006-of-00008 │ │ ├── train-00007-of-00008 │ │ ├── validation-00000-of-00002 │ │ └── validation-00001-of-00002 │ ├── images │ │ ├── black_image.jpg │ │ └── white_image.jpg │ └── tfrecord_image_generator.py │ ├── test_util.py │ ├── tf_cnn_benchmarks.py │ ├── variable_mgr.py │ ├── variable_mgr_util.py │ └── variable_mgr_util_test.py └── vgg16_bs64_command.sh /README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | AIACC demos aimed to provide high performance and friendly deployment of end to end AI training and inference tasks using infrastructure of Alibaba Cloud. 3 | 4 | ## FastGPU 5 | 6 | FastGPU is a set of fast one-click deployment tools for artificial intelligence computing built on Alibaba Cloud. 7 | [Usage of FastGPU](https://help.aliyun.com/document_detail/203740.html) 8 | [Cloud Shell](https://shell.aliyun.com/?__source=sls.console.aliyun.com#/) 9 | 10 | ## AIACC-Training 11 | The AI training acceleration tool AIACC-Training (formerly Ali-Perseus-Training) is a unified distributed deep learning training acceleration engine launched by Alibaba Cloud. It supports four mainstream distributed training frameworks, namely Tensorflow, Pytorch, MXNet and Caffe. 12 | [AIACC Training 1.5](https://help.aliyun.com/document_detail/198783.html?spm=a2c4g.200965.0.0.197578865QJKJq#section-o8s-hpj-7z8) 13 | [AIACC Training 2.0 - ACSpeed](https://help.aliyun.com/document_detail/462422.html?spm=a2c4g.462031.0.0.3d7c4208xw3dr7) + 14 | [AGSpeed](https://help.aliyun.com/document_detail/468640.html?spm=a2c4g.462422.0.0.599618d6ZeRPmS) 15 | 16 | ## Open Source Solutions 17 | Current solutions: 18 | - pytorch 19 | + gtc-demo 20 | - tensorflow 21 | + bert 22 | + image-classification 23 | - mxnet 24 | + insightface 25 | + faster-rcnn 26 | -------------------------------------------------------------------------------- /doc/image/ecluster_kill.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/doc/image/ecluster_kill.png -------------------------------------------------------------------------------- /doc/image/ecluster_ls.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/doc/image/ecluster_ls.png -------------------------------------------------------------------------------- /doc/image/ecluster_ls_bert.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/doc/image/ecluster_ls_bert.jpeg -------------------------------------------------------------------------------- /doc/image/ecluster_start.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/doc/image/ecluster_start.png -------------------------------------------------------------------------------- /doc/image/ecluster_stop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/doc/image/ecluster_stop.png -------------------------------------------------------------------------------- /doc/image/ecluster_tmux_bert.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/doc/image/ecluster_tmux_bert.jpeg -------------------------------------------------------------------------------- /doc/image/figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/doc/image/figure1.png -------------------------------------------------------------------------------- /doc/image/figure2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/doc/image/figure2.png -------------------------------------------------------------------------------- /doc/image/figure3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/doc/image/figure3.png -------------------------------------------------------------------------------- /mxnet/faster-rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Object Detection using faster-rcnn . 2 | 3 | ## prerequisite 4 | * Aliyun account 5 | * fastgpu package installed. 6 | 7 | ## Training 8 | * 1.Register your aliyun acount using below command. 9 | ```Bash 10 | export ALIYUN_ACCESS_KEY_ID=xxxxx 11 | export ALIYUN_ACCESS_KEY_SECRET=xxxxx 12 | export ALIYUN_DEFAULT_REGION=cn-beijing 13 | ``` 14 | 15 | * 2.Run the training job with 16 | ```Bash 17 | python train_faster_rcnn.py 18 | ``` 19 | After the training job deployed to cloud, the console display as followed. 20 | ```Bash 21 | Logging to /fastgpu/runs/perseus-faster-rcnn-1 22 | training deploy time is: xxxs. 23 | ``` 24 | 25 | * 3.Use `fastgpu ls` to display the cloud machine. 26 | 27 | * 4.Attach to running console using `fastgpu tmux task0.perseus-faster-rcnn`. 28 | 29 | ## Time 30 | The deploy time is about 35 min, the training time is about 15min per epoch, The total one epoch time is about 50 min. 31 | 32 | 33 | -------------------------------------------------------------------------------- /mxnet/faster-rcnn/gluon-cv/README.md: -------------------------------------------------------------------------------- 1 | # Faster R-CNN: Towards real-time object detection with region proposal networks. [1] 2 | 3 | [GluonCV Model Zoo](http://gluon-cv.mxnet.io/model_zoo/index.html#object-detection) 4 | 5 | 放置路径下:gluon-cv/scripts/detection/faster_rcnn 6 | 7 | - `--amp` Use [Automatic Mixed Precision training](https://mxnet.incubator.apache.org/versions/master/tutorials/amp/amp_tutorial.html), automatically casting FP16 where safe. 8 | - `--horovod` Use [Horovod](https://github.com/horovod/horovod) for distributed training, with a network agnostic wrapper for the optimizer, allowing efficient allreduce using OpemMPI and NCCL. 9 | 10 | 11 | ## References 12 | 1. Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. "Faster R-CNN: Towards real-time object detection with region proposal networks." In IEEE Transactions on Pattern Analysis and Machine Intelligence, 2016. 13 | 2. Ross Girshick. "Fast R-CNN." In Proceedings of the IEEE International Conference on Computer Vision, 2015. 14 | -------------------------------------------------------------------------------- /mxnet/faster-rcnn/gluon-cv/demo_faster_rcnn.py: -------------------------------------------------------------------------------- 1 | """Faster RCNN Demo script.""" 2 | import os 3 | import argparse 4 | import mxnet as mx 5 | import gluoncv as gcv 6 | gcv.utils.check_version('0.6.0') 7 | from gluoncv.data.transforms import presets 8 | from matplotlib import pyplot as plt 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser(description='Test with Faster RCNN networks.') 12 | parser.add_argument('--network', type=str, default='faster_rcnn_resnet50_v1b_coco', 13 | help="Faster RCNN full network name") 14 | parser.add_argument('--images', type=str, default='', 15 | help='Test images, use comma to split multiple.') 16 | parser.add_argument('--gpus', type=str, default='', 17 | help='Training with GPUs, you can specify 1,3 for example.') 18 | parser.add_argument('--pretrained', type=str, default='True', 19 | help='Load weights from previously saved parameters. You can specify parameter file name.') 20 | parser.add_argument('--thresh', type=float, default=0.5, 21 | help='Threshold of object score when visualize the bboxes.') 22 | args = parser.parse_args() 23 | return args 24 | 25 | if __name__ == '__main__': 26 | args = parse_args() 27 | # context list 28 | ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()] 29 | ctx = [mx.cpu()] if not ctx else ctx 30 | 31 | # grab some image if not specified 32 | if not args.images.strip(): 33 | gcv.utils.download('https://github.com/dmlc/web-data/blob/master/' + 34 | 'gluoncv/detection/biking.jpg?raw=true', 'biking.jpg') 35 | image_list = ['biking.jpg'] 36 | else: 37 | image_list = [x.strip() for x in args.images.split(',') if x.strip()] 38 | 39 | if args.pretrained.lower() in ['true', '1', 'yes', 't']: 40 | net = gcv.model_zoo.get_model(args.network, pretrained=True) 41 | else: 42 | net = gcv.model_zoo.get_model(args.network, pretrained=False, pretrained_base=False) 43 | net.load_parameters(args.pretrained) 44 | net.set_nms(0.3, 200) 45 | net.collect_params().reset_ctx(ctx = ctx) 46 | 47 | for image in image_list: 48 | ax = None 49 | x, img = presets.rcnn.load_test(image, short=net.short, max_size=net.max_size) 50 | x = x.as_in_context(ctx[0]) 51 | ids, scores, bboxes = [xx[0].asnumpy() for xx in net(x)] 52 | ax = gcv.utils.viz.plot_bbox(img, bboxes, scores, ids, thresh=args.thresh, 53 | class_names=net.classes, ax=ax) 54 | plt.show() 55 | -------------------------------------------------------------------------------- /mxnet/faster-rcnn/gluon-cv/mscoco.py: -------------------------------------------------------------------------------- 1 | """Prepare MS COCO datasets""" 2 | import os 3 | import shutil 4 | import argparse 5 | import zipfile 6 | from gluoncv.utils import download, makedirs 7 | from gluoncv.data.mscoco.utils import try_import_pycocotools 8 | 9 | _TARGET_DIR = os.path.expanduser('~/.mxnet/datasets/coco') 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser( 13 | description='Initialize MS COCO dataset.', 14 | epilog='Example: python mscoco.py --download-dir ~/mscoco', 15 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 16 | parser.add_argument('--download-dir', type=str, default='~/mscoco/', help='dataset directory on disk') 17 | parser.add_argument('--no-download', action='store_true', help='disable automatic download if set') 18 | parser.add_argument('--overwrite', action='store_true', help='overwrite downloaded files if set, in case they are corrupted') 19 | args = parser.parse_args() 20 | return args 21 | 22 | def download_coco(path, overwrite=False): 23 | _DOWNLOAD_URLS = [ 24 | ('http://images.cocodataset.org/zips/train2017.zip', 25 | '10ad623668ab00c62c096f0ed636d6aff41faca5'), 26 | ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', 27 | '8551ee4bb5860311e79dace7e79cb91e432e78b3'), 28 | ('http://images.cocodataset.org/zips/val2017.zip', 29 | '4950dc9d00dbe1c933ee0170f5797584351d2a41'), 30 | # ('http://images.cocodataset.org/annotations/stuff_annotations_trainval2017.zip', 31 | # '46cdcf715b6b4f67e980b529534e79c2edffe084'), 32 | # test2017.zip, for those who want to attend the competition. 33 | # ('http://images.cocodataset.org/zips/test2017.zip', 34 | # '4e443f8a2eca6b1dac8a6c57641b67dd40621a49'), 35 | ] 36 | makedirs(path) 37 | for url, checksum in _DOWNLOAD_URLS: 38 | filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) 39 | # extract 40 | with zipfile.ZipFile(filename) as zf: 41 | zf.extractall(path=path) 42 | 43 | if __name__ == '__main__': 44 | args = parse_args() 45 | path = os.path.expanduser(args.download_dir) 46 | if not os.path.isdir(path) or not os.path.isdir(os.path.join(path, 'train2017')) \ 47 | or not os.path.isdir(os.path.join(path, 'val2017')) \ 48 | or not os.path.isdir(os.path.join(path, 'annotations')): 49 | if args.no_download: 50 | raise ValueError(('{} is not a valid directory, make sure it is present.' 51 | ' Or you should not disable "--no-download" to grab it'.format(path))) 52 | else: 53 | download_coco(path, overwrite=args.overwrite) 54 | 55 | # make symlink 56 | makedirs(os.path.expanduser('~/.mxnet/datasets')) 57 | if os.path.isdir(_TARGET_DIR): 58 | os.remove(_TARGET_DIR) 59 | os.symlink(path, _TARGET_DIR) 60 | try_import_pycocotools() 61 | -------------------------------------------------------------------------------- /mxnet/faster-rcnn/gluon-cv/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.17 2 | gluoncv==0.7.0 3 | -------------------------------------------------------------------------------- /mxnet/faster-rcnn/gluon-cv/run-1-8.sh: -------------------------------------------------------------------------------- 1 | mpirun -allow-run-as-root -np 8 -npernode 8 ./train-perseus.sh 2 | -------------------------------------------------------------------------------- /mxnet/faster-rcnn/gluon-cv/save-parameter/custom_faster_rcnn_fpn_resnet50_v1b_coco_eval_2020_03_31_15_03_27.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/mxnet/faster-rcnn/gluon-cv/save-parameter/custom_faster_rcnn_fpn_resnet50_v1b_coco_eval_2020_03_31_15_03_27.json -------------------------------------------------------------------------------- /mxnet/faster-rcnn/gluon-cv/train-perseus.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #export MXNET_CPU_WORKER_NTHREADS=8 3 | export MXNET_CUDNN_AUTOTUNE_DEFAULT=0 4 | export MXNET_ENGINE_TYPE=ThreadedEnginePerDevice 5 | 6 | # Disable the openmp tuning, as perseus is using one process 7 | # per GPU, the openmp tuning function assume each process own 8 | # all the CPU resources which cause very long time tuning and 9 | # is harmful for performance actually. 10 | export MXNET_USE_OPERATOR_TUNING=0 11 | export MXNET_USE_NUM_CORES_OPERATOR_TUNING=1 12 | export OMP_NUM_THREADS=1 13 | 14 | export MXNET_GPU_WORKER_NSTREAMS=1 15 | 16 | # Force perseus to use hybrid allreduce 17 | export PERSEUS_ALLREDUCE_MODE=0 18 | # Set maximum perseus stream count to 6 19 | export PERSEUS_ALLREDUCE_DTYPE=1 20 | export PERSEUS_ALLREDUCE_NANCHECK=0 21 | 22 | #export NCCL_DEBUG=INFO 23 | #export NCCL_P2P_DISABLE=1 24 | 25 | # mxnet profiler 26 | #export MXNET_PROFILER_AUTOSTART=1 27 | 28 | EPOCH=2 29 | BS=2 30 | NORM_LAYER='syncbn' # bn syncbn perseussyncbn 31 | #NORM_LAYER='perseussyncbn' 32 | SAVE_PATH='save-parameter/' 33 | DATASET="coco" 34 | #KVSTORE='nccl' #local device nccl 35 | #KVSTORE='device' 36 | #KVSTORE='dist-device-sync' 37 | KVSTORE='dist-sync-perseus' 38 | #export CUDA_VISIBLE_DEVICES='0,1,2,3,4,5,6,7' 39 | 40 | GPUS='0' # only for nccl 41 | #/root/anaconda/envs/mxnet_1.5.1.post0_cu10.0_py36/bin/python -u ./train_faster_rcnn.py \ 42 | python -u ./train_faster_rcnn.py \ 43 | --epochs $EPOCH \ 44 | --batch-size $BS \ 45 | --kv-store $KVSTORE \ 46 | --save-prefix $SAVE_PATH \ 47 | --dataset $DATASET \ 48 | --gpus $GPUS \ 49 | --norm-layer $NORM_LAYER \ 50 | custom-model 51 | -------------------------------------------------------------------------------- /mxnet/insightface/README.md: -------------------------------------------------------------------------------- 1 | # Face Recognition using insightface . 2 | 3 | ## prerequisite 4 | * Aliyun account 5 | * ncluster package installed. 6 | 7 | ## Training 8 | * 1.Register your aliyun acount using below command. 9 | ```Bash 10 | export ALIYUN_ACCESS_KEY_ID=xxxxx 11 | export ALIYUN_ACCESS_KEY_SECRET=xxxxx 12 | export ALIYUN_DEFAULT_REGION=cn-beijing 13 | ``` 14 | 15 | * 2.Run the training job with 16 | ```Bash 17 | python train_insightface.py 18 | ``` 19 | After the training job deployed to cloud, the console display as followed. 20 | ```Bash 21 | Logging to /ncluster/runs/perseus-insightface-1 22 | training deploy time is: xxxs. 23 | ``` 24 | 25 | * 3.Use `ecluster ls` to display the cloud machine. 26 | ![](./ecluster_ls_insightface.png) 27 | 28 | * 4.Attach to running console using `ecluster tmux task0.perseus-insightface`. 29 | ![](./ecluster_tmux_insightface.jpg) 30 | 31 | ## Time 32 | The deploy time is about 5 min, the training time is about 2600sec per epoch, The total one epoch time is about 45 min. 33 | 34 | ## Accuracy 35 | On Instance of 8x V100, resnet101, each gpu batchsize 32, embedding 512, class_num 85164 36 | Under the fix learning rate, the statistical results display as followed: 37 | - After one epoch, the accuracy is 0.90. 38 | - After three epoch, the accuracy is up to 0.97. 39 | 40 | ## Speedup 41 | On Instance of 8x V100 (gn6v-c10g1.20xlarge) the speedup result 42 | Condition: resnet101, each gpu batchsize 32, embedding 512, class_num 200w 43 | 44 | | machine-gpu | each gpu training speed | gpu memory | speed up | 45 | | ----------- | ----------------------- | ---------- | -------- | 46 | | 1-8 | 150samples/s | 12429M | 1 | 47 | | 2-16 | 140samples/s | 10309M | 0.93 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /mxnet/insightface/ecluster_ls_insightface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/mxnet/insightface/ecluster_ls_insightface.png -------------------------------------------------------------------------------- /mxnet/insightface/ecluster_tmux_insightface.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/mxnet/insightface/ecluster_tmux_insightface.jpg -------------------------------------------------------------------------------- /mxnet/insightface/insightface/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | src/logs/ 104 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "alignment/SDUNet"] 2 | path = alignment/SDUNet 3 | url = https://github.com/deepinsight/SDUNet 4 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Jiankang Deng and Jia Guo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/mxnet/insightface/insightface/src/align/__init__.py -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/align/det1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/mxnet/insightface/insightface/src/align/det1.npy -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/align/det2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/mxnet/insightface/insightface/src/align/det2.npy -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/align/det3.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/mxnet/insightface/insightface/src/align/det3.npy -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/api/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | import face_model 3 | import argparse 4 | import json 5 | import base64 6 | #import requests 7 | import numpy as np 8 | import urllib 9 | import cv2 10 | from flask import Flask, render_template, request, jsonify 11 | 12 | 13 | parser = argparse.ArgumentParser(description='do verification') 14 | # general 15 | parser.add_argument('--image-size', default='112,112', help='') 16 | parser.add_argument('--model', default='../model/softmax,50', help='path to load model.') 17 | parser.add_argument('--gpu', default=0, type=int, help='gpu id') 18 | parser.add_argument('--threshold', default=1.24, type=float, help='ver dist threshold') 19 | args = parser.parse_args() 20 | 21 | model = face_model.FaceModel(args) 22 | 23 | app = Flask(__name__) 24 | 25 | @app.route('/') 26 | def hello_world(): 27 | return 'Hello, This is InsightFace!' 28 | 29 | def image_resize(image): 30 | m = min(image.shape[0], image.shape[1]) 31 | f = 640.0/m 32 | if f<1.0: 33 | image = cv2.resize(image, (int(image.shape[1]*f), int(image.shape[0]*f))) 34 | return image 35 | 36 | def get_image(data): 37 | image = None 38 | if 'url' in data: 39 | url = data['url'] 40 | if url.startswith('http'): 41 | resp = urllib.urlopen(url) 42 | image = np.asarray(bytearray(resp.read()), dtype="uint8") 43 | image = cv2.imdecode(image, cv2.IMREAD_COLOR) 44 | else: 45 | image = cv2.imread(url, cv2.IMREAD_COLOR) 46 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 47 | image = image_resize(image) 48 | elif 'data' in data: 49 | _bin = data['data'] 50 | if _bin is not None: 51 | if not isinstance(_bin, list): 52 | _bin = base64.b64decode(_bin) 53 | _bin = np.fromstring(_bin, np.uint8) 54 | image = cv2.imdecode(_bin, cv2.IMREAD_COLOR) 55 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 56 | image = image_resize(image) 57 | else: 58 | image = [] 59 | for __bin in _bin: 60 | __bin = base64.b64decode(__bin) 61 | __bin = np.fromstring(__bin, np.uint8) 62 | _image = cv2.imdecode(__bin, cv2.IMREAD_COLOR) 63 | _image = cv2.cvtColor(_image, cv2.COLOR_BGR2RGB) 64 | _image = image_resize(_image) 65 | image.append(_image) 66 | 67 | return image 68 | 69 | @app.route('/ver', methods=['POST']) 70 | def ver(): 71 | try: 72 | data = request.data 73 | values = json.loads(data) 74 | source_image = get_image(values['source']) 75 | if source_image is None: 76 | print('source image is None') 77 | return '-1' 78 | assert not isinstance(source_image, list) 79 | print(source_image.shape) 80 | target_image = get_image(values['target']) 81 | if target_image is None: 82 | print('target image is None') 83 | return '-1' 84 | #print(target_image.shape) 85 | if not isinstance(target_image, list): 86 | target_image = [target_image] 87 | #print('before call') 88 | #ret = model.is_same_id(source_image, target_image) 89 | ret = model.sim(source_image, target_image) 90 | except Exception as ex: 91 | print(ex) 92 | return '-1' 93 | 94 | #return str(int(ret)) 95 | print('sim', ret) 96 | return "%1.3f"%ret 97 | 98 | if __name__ == '__main__': 99 | app.run('0.0.0.0', port=18080, debug=False) 100 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/mxnet/insightface/insightface/src/common/__init__.py -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/common/face_preprocess.py: -------------------------------------------------------------------------------- 1 | 2 | import cv2 3 | import numpy as np 4 | from skimage import transform as trans 5 | 6 | def parse_lst_line(line): 7 | vec = line.strip().split("\t") 8 | assert len(vec)>=3 9 | aligned = int(vec[0]) 10 | image_path = vec[1] 11 | label = int(vec[2]) 12 | bbox = None 13 | landmark = None 14 | #print(vec) 15 | if len(vec)>3: 16 | bbox = np.zeros( (4,), dtype=np.int32) 17 | for i in xrange(3,7): 18 | bbox[i-3] = int(vec[i]) 19 | landmark = None 20 | if len(vec)>7: 21 | _l = [] 22 | for i in xrange(7,17): 23 | _l.append(float(vec[i])) 24 | landmark = np.array(_l).reshape( (2,5) ).T 25 | #print(aligned) 26 | return image_path, label, bbox, landmark, aligned 27 | 28 | 29 | 30 | 31 | def read_image(img_path, **kwargs): 32 | mode = kwargs.get('mode', 'rgb') 33 | layout = kwargs.get('layout', 'HWC') 34 | if mode=='gray': 35 | img = cv2.imread(img_path, cv2.CV_LOAD_IMAGE_GRAYSCALE) 36 | else: 37 | img = cv2.imread(img_path, cv2.CV_LOAD_IMAGE_COLOR) 38 | if mode=='rgb': 39 | #print('to rgb') 40 | img = img[...,::-1] 41 | if layout=='CHW': 42 | img = np.transpose(img, (2,0,1)) 43 | return img 44 | 45 | 46 | def preprocess(img, bbox=None, landmark=None, **kwargs): 47 | if isinstance(img, str): 48 | img = read_image(img, **kwargs) 49 | M = None 50 | image_size = [] 51 | str_image_size = kwargs.get('image_size', '') 52 | if len(str_image_size)>0: 53 | image_size = [int(x) for x in str_image_size.split(',')] 54 | if len(image_size)==1: 55 | image_size = [image_size[0], image_size[0]] 56 | assert len(image_size)==2 57 | assert image_size[0]==112 58 | assert image_size[0]==112 or image_size[1]==96 59 | if landmark is not None: 60 | assert len(image_size)==2 61 | src = np.array([ 62 | [30.2946, 51.6963], 63 | [65.5318, 51.5014], 64 | [48.0252, 71.7366], 65 | [33.5493, 92.3655], 66 | [62.7299, 92.2041] ], dtype=np.float32 ) 67 | if image_size[1]==112: 68 | src[:,0] += 8.0 69 | dst = landmark.astype(np.float32) 70 | 71 | tform = trans.SimilarityTransform() 72 | tform.estimate(dst, src) 73 | M = tform.params[0:2,:] 74 | #M = cv2.estimateRigidTransform( dst.reshape(1,5,2), src.reshape(1,5,2), False) 75 | 76 | if M is None: 77 | if bbox is None: #use center crop 78 | det = np.zeros(4, dtype=np.int32) 79 | det[0] = int(img.shape[1]*0.0625) 80 | det[1] = int(img.shape[0]*0.0625) 81 | det[2] = img.shape[1] - det[0] 82 | det[3] = img.shape[0] - det[1] 83 | else: 84 | det = bbox 85 | margin = kwargs.get('margin', 44) 86 | bb = np.zeros(4, dtype=np.int32) 87 | bb[0] = np.maximum(det[0]-margin/2, 0) 88 | bb[1] = np.maximum(det[1]-margin/2, 0) 89 | bb[2] = np.minimum(det[2]+margin/2, img.shape[1]) 90 | bb[3] = np.minimum(det[3]+margin/2, img.shape[0]) 91 | ret = img[bb[1]:bb[3],bb[0]:bb[2],:] 92 | if len(image_size)>0: 93 | ret = cv2.resize(ret, (image_size[1], image_size[0])) 94 | return ret 95 | else: #do align using landmark 96 | assert len(image_size)==2 97 | 98 | #src = src[0:3,:] 99 | #dst = dst[0:3,:] 100 | 101 | 102 | #print(src.shape, dst.shape) 103 | #print(src) 104 | #print(dst) 105 | #print(M) 106 | warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0) 107 | 108 | #tform3 = trans.ProjectiveTransform() 109 | #tform3.estimate(src, dst) 110 | #warped = trans.warp(img, tform3, output_shape=_shape) 111 | return warped 112 | 113 | 114 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/common/noise_sgd.py: -------------------------------------------------------------------------------- 1 | import mxnet.optimizer as optimizer 2 | from mxnet import ndarray as nd 3 | 4 | class NoiseSGD(optimizer.SGD): 5 | """Noise SGD. 6 | 7 | 8 | This optimizer accepts the same arguments as :class:`.SGD`. 9 | """ 10 | def __init__(self, scale, **kwargs): 11 | super(NoiseSGD, self).__init__(**kwargs) 12 | print('init noise sgd with', scale) 13 | self.scale = scale 14 | 15 | def update(self, index, weight, grad, state): 16 | assert(isinstance(weight, NDArray)) 17 | assert(isinstance(grad, NDArray)) 18 | self._update_count(index) 19 | lr = self._get_lr(index) 20 | wd = self._get_wd(index) 21 | 22 | grad = grad * self.rescale_grad 23 | if self.clip_gradient is not None: 24 | grad = clip(grad, -self.clip_gradient, self.clip_gradient) 25 | noise = nd.random.normal(scale = self.scale, shape = grad.shape, dtype=grad.dtype, ctx = grad.context) 26 | grad += noise 27 | 28 | if state is not None: 29 | mom = state 30 | mom[:] *= self.momentum 31 | grad += wd * weight 32 | mom[:] += grad 33 | grad[:] += self.momentum * mom 34 | weight[:] += -lr * grad 35 | else: 36 | assert self.momentum == 0.0 37 | weight[:] += -lr * (grad + wd * weight) 38 | 39 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/data/age_merge.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import os 5 | import sys 6 | import mxnet as mx 7 | from mxnet import ndarray as nd 8 | import random 9 | import argparse 10 | import cv2 11 | import time 12 | import sklearn 13 | import numpy as np 14 | 15 | 16 | 17 | def main(args): 18 | if not os.path.exists(args.output): 19 | os.makedirs(args.output) 20 | train_writer = mx.recordio.MXIndexedRecordIO(os.path.join(args.output, 'train.idx'), os.path.join(args.output, 'train.rec'), 'w') 21 | val_writer = mx.recordio.MXIndexedRecordIO(os.path.join(args.output, 'val.idx'), os.path.join(args.output, 'val.rec'), 'w') 22 | train_widx = [0] 23 | val_widx = [0] 24 | stat = [0,0] 25 | #for ds in ['ms1m', 'megaage', 'imdb']: 26 | for ds in ['megaage', 'imdb']: 27 | for n in ['train', 'val']: 28 | #if ds=='ms1m' or ds=='imdb': 29 | # continue 30 | repeat = 1 31 | if args.mode=='age': 32 | if args.lite: 33 | if ds!='megaage': 34 | continue 35 | if n=='val' and ds!='megaage': 36 | continue 37 | if n=='train' and ds=='megaage': 38 | if args.lite==0: 39 | repeat = 10 40 | if n=='train' and ds=='imdb': 41 | repeat = 1 42 | elif args.mode=='gender': 43 | if ds!='imdb': 44 | continue 45 | else: 46 | if n=='train' and ds=='megaage': 47 | repeat = 10 48 | writer = train_writer 49 | widx = train_widx 50 | if n=='val': 51 | writer = val_writer 52 | widx = val_widx 53 | path = os.path.join(args.input, ds, '%s.rec'%n) 54 | if not os.path.exists(path): 55 | continue 56 | imgrec = mx.recordio.MXIndexedRecordIO(path[:-3]+'idx', path, 'r') # pylint: disable=redefined-variable-type 57 | if ds=='ms1m': 58 | s = imgrec.read_idx(0) 59 | header, _ = mx.recordio.unpack(s) 60 | assert header.flag>0 61 | print('header0 label', header.label) 62 | header0 = (int(header.label[0]), int(header.label[1])) 63 | #assert(header.flag==1) 64 | imgidx = range(1, int(header.label[0])) 65 | else: 66 | imgidx = list(imgrec.keys) 67 | for idx in imgidx: 68 | if ds=='megaage' and idx==0: 69 | continue 70 | print('info', ds, n, idx) 71 | s = imgrec.read_idx(idx) 72 | _header, _content = mx.recordio.unpack(s) 73 | stat[0]+=1 74 | try: 75 | img = mx.image.imdecode(_content) 76 | except: 77 | stat[1]+=1 78 | print('error', ds, n, idx) 79 | continue 80 | #print(img.shape) 81 | if ds=='ms1m': 82 | nlabel = [_header.label] 83 | nlabel += [-1]*101 84 | elif ds=='megaage': 85 | #nlabel = [-1, -1] 86 | nlabel = [] 87 | age_label = [0]*100 88 | age = int(_header.label[0]) 89 | if age>100 or age<0: 90 | continue 91 | age = max(0, min(100, age)) 92 | #print('age', age) 93 | 94 | for a in xrange(0, age): 95 | age_label[a] = 1 96 | nlabel += age_label 97 | elif ds=='imdb': 98 | gender = int(_header.label[1]) 99 | if args.mode=='gender': 100 | nlabel = [gender] 101 | else: 102 | age_label = [0]*100 103 | age = int(_header.label[0]) 104 | age = max(0, min(100, age)) 105 | for a in xrange(0, age): 106 | age_label[a] = 1 107 | nlabel = age_label 108 | #nlabel += age_label 109 | for r in xrange(repeat): 110 | nheader = mx.recordio.IRHeader(0, nlabel, widx[0], 0) 111 | s = mx.recordio.pack(nheader, _content) 112 | writer.write_idx(widx[0], s) 113 | widx[0]+=1 114 | print('stat', stat) 115 | 116 | if __name__ == '__main__': 117 | parser = argparse.ArgumentParser(description='do dataset merge') 118 | # general 119 | parser.add_argument('--input', default='', type=str, help='') 120 | parser.add_argument('--output', default='', type=str, help='') 121 | parser.add_argument('--mode', default='age', type=str, help='') 122 | parser.add_argument('--lite', default=1, type=int, help='') 123 | args = parser.parse_args() 124 | main(args) 125 | 126 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/data/agedb2pack2.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | #import mxnet as mx 5 | #from mxnet import ndarray as nd 6 | import argparse 7 | import cv2 8 | import pickle 9 | import numpy as np 10 | import sys 11 | from scipy import misc 12 | import os 13 | import tensorflow as tf 14 | from scipy.io import loadmat 15 | sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'align')) 16 | #sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common')) 17 | import detect_face 18 | import face_image 19 | import face_preprocess 20 | #import lfw 21 | 22 | def to_rgb(img): 23 | w, h = img.shape 24 | ret = np.empty((w, h, 3), dtype=np.uint8) 25 | ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img 26 | return ret 27 | 28 | 29 | def IOU(Reframe,GTframe): 30 | x1 = Reframe[0]; 31 | y1 = Reframe[1]; 32 | width1 = Reframe[2]-Reframe[0]; 33 | height1 = Reframe[3]-Reframe[1]; 34 | 35 | x2 = GTframe[0] 36 | y2 = GTframe[1] 37 | width2 = GTframe[2]-GTframe[0] 38 | height2 = GTframe[3]-GTframe[1] 39 | 40 | endx = max(x1+width1,x2+width2) 41 | startx = min(x1,x2) 42 | width = width1+width2-(endx-startx) 43 | 44 | endy = max(y1+height1,y2+height2) 45 | starty = min(y1,y2) 46 | height = height1+height2-(endy-starty) 47 | 48 | if width <=0 or height <= 0: 49 | ratio = 0 50 | else: 51 | Area = width*height 52 | Area1 = width1*height1 53 | Area2 = width2*height2 54 | ratio = Area*1./(Area1+Area2-Area) 55 | return ratio 56 | 57 | parser = argparse.ArgumentParser(description='Package AgeDB images') 58 | # general 59 | parser.add_argument('--data-dir', default='', help='') 60 | parser.add_argument('--image-size', type=str, default='112,96', help='') 61 | parser.add_argument('--output', default='./', help='path to save.') 62 | args = parser.parse_args() 63 | 64 | 65 | for part in [ ('04_FINAL_protocol_30_years.mat', 'agedb_30') ]: 66 | mat_file = os.path.join(args.data_dir, part[0]) 67 | mat_data = loadmat(mat_file) 68 | print(mat_data.__class__) 69 | data = mat_data['splits'] 70 | 71 | bins = [] 72 | issame_list = [] 73 | nrof = [0, 0, 0] 74 | print('processing', part[1]) 75 | pp = 0 76 | for i in xrange(data.shape[0]): 77 | split = data[i][0][0][0][0] 78 | print(split.shape) 79 | for c in xrange(split.shape[1]): 80 | last_name = '' 81 | for r in xrange(split.shape[0]): 82 | pp+=1 83 | if pp%10==0: 84 | print('processing', pp, nrof) 85 | item = split[r][c][0][0] 86 | path = str(item[0][0]) 87 | vec = path.split('_') 88 | assert len(vec)>=5 89 | name = vec[0] 90 | if r==1: 91 | issame = False 92 | if name==last_name: 93 | issame = True 94 | #print(issame) 95 | issame_list.append(issame) 96 | last_name = name 97 | age = int(item[1]) 98 | #print(path, age) 99 | #sys.exit(0) 100 | img_path = os.path.join(args.data_dir, '03_Protocol_Images', path+".jpg") 101 | #print(img_path) 102 | img = misc.imread(img_path) 103 | if img.ndim == 2: 104 | img = to_rgb(img) 105 | assert img.ndim==3 106 | assert img.shape[2]==3 107 | #img = img[:,:,0:3] 108 | all_landmark = np.zeros( (68,2), dtype=np.float32) 109 | pts_file = img_path[0:-3]+"pts" 110 | pp=0 111 | 112 | for line in open(pts_file, 'r'): 113 | pp+=1 114 | pointid = pp-3 115 | if pointid<1 or pointid>68: 116 | continue 117 | point = [float(x) for x in line.strip().split()] 118 | assert len(point)==2 119 | point = np.array(point).reshape( (1,2) ) 120 | #print(pointid) 121 | all_landmark[pointid-1,:] = point 122 | 123 | 124 | _landmark = np.zeros( (5,2), dtype=np.float32) 125 | _landmark[0,:] = (all_landmark[36,:]+all_landmark[39,:])/2 126 | _landmark[1,:] = (all_landmark[42,:]+all_landmark[45,:])/2 127 | _landmark[2,:] = all_landmark[33,:] 128 | _landmark[3,:] = all_landmark[48,:] 129 | _landmark[4,:] = all_landmark[54,:] 130 | _bbox = None 131 | warped = face_preprocess.preprocess(img, bbox=_bbox, landmark = _landmark, image_size=args.image_size) 132 | warped = warped[...,::-1] #to bgr 133 | _, s = cv2.imencode('.jpg', warped) 134 | bins.append(s) 135 | print(nrof) 136 | outname = os.path.join(args.output, part[1]+'.bin') 137 | with open(outname, 'wb') as f: 138 | pickle.dump((bins, issame_list), f, protocol=pickle.HIGHEST_PROTOCOL) 139 | 140 | 141 | 142 | 143 | 144 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/data/dataset_c2c.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import os 5 | import sys 6 | import mxnet as mx 7 | from mxnet import ndarray as nd 8 | import random 9 | import argparse 10 | import cv2 11 | import time 12 | import sklearn 13 | from sklearn.decomposition import PCA 14 | from easydict import EasyDict as edict 15 | from sklearn.cluster import DBSCAN 16 | import numpy as np 17 | 18 | sys.path.append(os.path.join(os.path.dirname(__file__),'..', 'common')) 19 | import face_image 20 | 21 | 22 | def main(args): 23 | ctx = [] 24 | cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() 25 | if len(cvd)>0: 26 | for i in xrange(len(cvd.split(','))): 27 | ctx.append(mx.gpu(i)) 28 | if len(ctx)==0: 29 | ctx = [mx.cpu()] 30 | print('use cpu') 31 | else: 32 | print('gpu num:', len(ctx)) 33 | ctx_num = len(ctx) 34 | path_imgrec = os.path.join(args.input, 'train.rec') 35 | path_imgidx = os.path.join(args.input, 'train.idx') 36 | imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r') # pylint: disable=redefined-variable-type 37 | outf = open(os.path.join(args.input, 'c2c'), 'w') 38 | s = imgrec.read_idx(0) 39 | header, _ = mx.recordio.unpack(s) 40 | assert header.flag>0 41 | print('header0 label', header.label) 42 | header0 = (int(header.label[0]), int(header.label[1])) 43 | #assert(header.flag==1) 44 | imgidx = range(1, int(header.label[0])) 45 | id2range = {} 46 | seq_identity = range(int(header.label[0]), int(header.label[1])) 47 | for identity in seq_identity: 48 | s = imgrec.read_idx(identity) 49 | header, _ = mx.recordio.unpack(s) 50 | id2range[identity] = (int(header.label[0]), int(header.label[1])) 51 | print('id2range', len(id2range)) 52 | prop = face_image.load_property(args.input) 53 | image_size = prop.image_size 54 | print('image_size', image_size) 55 | vec = args.model.split(',') 56 | prefix = vec[0] 57 | epoch = int(vec[1]) 58 | print('loading',prefix, epoch) 59 | model = mx.mod.Module.load(prefix, epoch, context = ctx) 60 | model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))]) 61 | nrof_images = 0 62 | nrof_removed = 0 63 | idx = 1 64 | id2label = {} 65 | pp = 0 66 | for _id, v in id2range.iteritems(): 67 | pp+=1 68 | if pp%100==0: 69 | print('processing id', pp) 70 | _list = range(*v) 71 | ocontents = [] 72 | for i in xrange(len(_list)): 73 | _idx = _list[i] 74 | #print('_idx', _id, _idx) 75 | s = imgrec.read_idx(_idx) 76 | ocontents.append(s) 77 | #continue 78 | embeddings = None 79 | headers = [None]*len(ocontents) 80 | #print(len(ocontents)) 81 | ba = 0 82 | while True: 83 | bb = min(ba+args.batch_size, len(ocontents)) 84 | if ba>=bb: 85 | break 86 | _batch_size = bb-ba 87 | _batch_size2 = max(_batch_size, ctx_num) 88 | data = nd.zeros( (_batch_size2,3, image_size[0], image_size[1]) ) 89 | label = nd.zeros( (_batch_size2,) ) 90 | count = bb-ba 91 | ii=0 92 | for i in xrange(ba, bb): 93 | header, img = mx.recordio.unpack(ocontents[i]) 94 | headers[i] = header 95 | img = mx.image.imdecode(img) 96 | img = nd.transpose(img, axes=(2, 0, 1)) 97 | data[ii][:] = img 98 | label[ii][:] = header.label[0] 99 | ii+=1 100 | while ii<_batch_size2: 101 | data[ii][:] = data[0][:] 102 | label[ii][:] = label[0][:] 103 | ii+=1 104 | db = mx.io.DataBatch(data=(data,), label=(label,)) 105 | model.forward(db, is_train=False) 106 | net_out = model.get_outputs() 107 | net_out = net_out[0].asnumpy() 108 | if embeddings is None: 109 | embeddings = np.zeros( (len(ocontents), net_out.shape[1])) 110 | embeddings[ba:bb,:] = net_out[0:_batch_size,:] 111 | ba = bb 112 | embeddings = sklearn.preprocessing.normalize(embeddings) 113 | emb_mean = np.mean(embeddings, axis=0, keepdims=True) 114 | emb_mean = sklearn.preprocessing.normalize(emb_mean) 115 | sim = np.dot(embeddings, emb_mean.T) 116 | #print(sim.shape) 117 | sims = sim.flatten() 118 | assert len(_list)==len(sims) 119 | assert len(_list)==len(ocontents) 120 | for i in xrange(len(ocontents)): 121 | _sim = sims[i] 122 | _idx = _list[i] 123 | _header = headers[i] 124 | #TODO 125 | outf.write("%d,%f,%d\n"%(_idx, _sim, int(_header.label[1]))) 126 | outf.close() 127 | 128 | if __name__ == '__main__': 129 | parser = argparse.ArgumentParser(description='') 130 | # general 131 | parser.add_argument('--input', default='', type=str, help='') 132 | parser.add_argument('--model', default='../model/softmax,50', help='path to load model.') 133 | parser.add_argument('--batch-size', default=32, type=int, help='') 134 | args = parser.parse_args() 135 | main(args) 136 | 137 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/data/dataset_info.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import os 5 | import sys 6 | import mxnet as mx 7 | from mxnet import ndarray as nd 8 | import random 9 | import argparse 10 | import cv2 11 | import time 12 | import sklearn 13 | from sklearn.decomposition import PCA 14 | from easydict import EasyDict as edict 15 | from sklearn.cluster import DBSCAN 16 | import numpy as np 17 | 18 | sys.path.append(os.path.join(os.path.dirname(__file__),'..', 'common')) 19 | import face_image 20 | 21 | 22 | def main(args): 23 | path_imgrec = os.path.join(args.input, 'train.rec') 24 | path_imgidx = os.path.join(args.input, 'train.idx') 25 | imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r') # pylint: disable=redefined-variable-type 26 | s = imgrec.read_idx(0) 27 | header, _ = mx.recordio.unpack(s) 28 | assert header.flag>0 29 | print('header0 label', header.label) 30 | header0 = (int(header.label[0]), int(header.label[1])) 31 | print('identities', header0[1]-header0[0]) 32 | print('images', header0[0]) 33 | 34 | if __name__ == '__main__': 35 | parser = argparse.ArgumentParser(description='') 36 | # general 37 | parser.add_argument('--input', default='', type=str, help='') 38 | args = parser.parse_args() 39 | main(args) 40 | 41 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/data/dataset_relabel.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import os 5 | import sys 6 | import mxnet as mx 7 | from mxnet import ndarray as nd 8 | import random 9 | import argparse 10 | import cv2 11 | import time 12 | import sklearn 13 | from sklearn.decomposition import PCA 14 | from easydict import EasyDict as edict 15 | from sklearn.cluster import DBSCAN 16 | import numpy as np 17 | 18 | sys.path.append(os.path.join(os.path.dirname(__file__),'..', 'common')) 19 | import face_image 20 | 21 | def main(args): 22 | include_datasets = args.include.split(',') 23 | rec_list = [] 24 | for ds in include_datasets: 25 | path_imgrec = os.path.join(ds, 'train.rec') 26 | path_imgidx = os.path.join(ds, 'train.idx') 27 | imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r') # pylint: disable=redefined-variable-type 28 | rec_list.append(imgrec) 29 | if not os.path.exists(args.output): 30 | os.makedirs(args.output) 31 | writer = mx.recordio.MXIndexedRecordIO(os.path.join(args.output, 'train.idx'), os.path.join(args.output, 'train.rec'), 'w') 32 | for ds_id in xrange(len(rec_list)): 33 | id_list = [] 34 | imgrec = rec_list[ds_id] 35 | s = imgrec.read_idx(0) 36 | writer.write_idx(0, s) 37 | header, _ = mx.recordio.unpack(s) 38 | assert header.flag>0 39 | print('header0 label', header.label) 40 | header0 = (int(header.label[0]), int(header.label[1])) 41 | seq_identity = range(int(header.label[0]), int(header.label[1])) 42 | pp=0 43 | nlabel = -1 44 | for identity in seq_identity: 45 | pp+=1 46 | if pp%10==0: 47 | print('processing id', pp) 48 | s = imgrec.read_idx(identity) 49 | writer.write_idx(identity, s) 50 | header, _ = mx.recordio.unpack(s) 51 | nlabel+=1 52 | for _idx in xrange(int(header.label[0]), int(header.label[1])): 53 | s = imgrec.read_idx(_idx) 54 | _header, _content = mx.recordio.unpack(s) 55 | nheader = mx.recordio.IRHeader(0, nlabel, _idx, 0) 56 | s = mx.recordio.pack(nheader, _content) 57 | writer.write_idx(_idx, s) 58 | 59 | print('max label', nlabel) 60 | 61 | 62 | 63 | if __name__ == '__main__': 64 | parser = argparse.ArgumentParser(description='do dataset merge') 65 | # general 66 | parser.add_argument('--include', default='', type=str, help='') 67 | parser.add_argument('--output', default='', type=str, help='') 68 | args = parser.parse_args() 69 | main(args) 70 | 71 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/data/dir2lst.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | import os 4 | import face_image 5 | 6 | input_dir = sys.argv[1] 7 | 8 | dataset = face_image.get_dataset_common(input_dir, 2) 9 | 10 | for item in dataset: 11 | print("%d\t%s\t%d" % (1, item.image_path, int(item.classname))) 12 | 13 | 14 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/data/dir2lst_ytf.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from easydict import EasyDict as edict 4 | 5 | input_dir = '/raid5data/dplearn/YTF/aligned_images_DB' 6 | ret = [] 7 | label = 0 8 | person_names = [] 9 | for person_name in os.listdir(input_dir): 10 | person_names.append(person_name) 11 | person_names = sorted(person_names) 12 | for person_name in person_names: 13 | _subdir = os.path.join(input_dir, person_name) 14 | if not os.path.isdir(_subdir): 15 | continue 16 | for _subdir2 in os.listdir(_subdir): 17 | _subdir2 = os.path.join(_subdir, _subdir2) 18 | if not os.path.isdir(_subdir2): 19 | continue 20 | _ret = [] 21 | for img in os.listdir(_subdir2): 22 | fimage = edict() 23 | fimage.id = os.path.join(_subdir2, img) 24 | fimage.classname = str(label) 25 | fimage.image_path = os.path.join(_subdir2, img) 26 | fimage.bbox = None 27 | fimage.landmark = None 28 | _ret.append(fimage) 29 | ret += _ret 30 | label+=1 31 | for item in ret: 32 | print("%d\t%s\t%d" % (1, item.image_path, int(item.classname))) 33 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/data/glint2lst.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | import os 4 | import numpy as np 5 | 6 | input_dir = sys.argv[1] 7 | targets = sys.argv[2] 8 | targets = targets.strip().split(',') 9 | lmap = {} 10 | 11 | for ds in targets: 12 | #image_dir = os.path.join(input_dir, ds) 13 | lmk_file = os.path.join(input_dir, "%s_lmk"%(ds)) 14 | if not os.path.exists(lmk_file): 15 | lmk_file = os.path.join(input_dir, "%s_lmk.txt"%(ds)) 16 | if not os.path.exists(lmk_file): 17 | continue 18 | #print(ds) 19 | idx = 0 20 | for line in open(lmk_file, 'r'): 21 | idx+=1 22 | vec = line.strip().split(' ') 23 | assert len(vec)==12 or len(vec)==11 24 | image_file = os.path.join(input_dir, vec[0]) 25 | assert image_file.endswith('.jpg') 26 | vlabel = -1 #test mode 27 | if len(vec)==12: 28 | label = int(vec[1]) 29 | if label in lmap: 30 | vlabel = lmap[label] 31 | else: 32 | vlabel = len(lmap) 33 | lmap[label] = vlabel 34 | lmk = np.array([float(x) for x in vec[2:]], dtype=np.float32) 35 | else: 36 | lmk = np.array([float(x) for x in vec[1:]], dtype=np.float32) 37 | lmk = lmk.reshape( (5,2) ).T 38 | lmk_str = "\t".join( [str(x) for x in lmk.flatten()] ) 39 | print("0\t%s\t%d\t0\t0\t0\t0\t%s"%(image_file, vlabel, lmk_str)) 40 | #if idx>10: 41 | # break 42 | 43 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/data/lfw2pack.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | from mxnet import ndarray as nd 3 | import argparse 4 | import pickle 5 | import sys 6 | import os 7 | sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'eval')) 8 | import lfw 9 | 10 | parser = argparse.ArgumentParser(description='Package LFW images') 11 | # general 12 | parser.add_argument('--data-dir', default='', help='') 13 | parser.add_argument('--image-size', type=str, default='112,96', help='') 14 | parser.add_argument('--output', default='', help='path to save.') 15 | args = parser.parse_args() 16 | lfw_dir = args.data_dir 17 | image_size = [int(x) for x in args.image_size.split(',')] 18 | lfw_pairs = lfw.read_pairs(os.path.join(lfw_dir, 'pairs.txt')) 19 | lfw_paths, issame_list = lfw.get_paths(lfw_dir, lfw_pairs, 'jpg') 20 | lfw_bins = [] 21 | #lfw_data = nd.empty((len(lfw_paths), 3, image_size[0], image_size[1])) 22 | i = 0 23 | for path in lfw_paths: 24 | with open(path, 'rb') as fin: 25 | _bin = fin.read() 26 | lfw_bins.append(_bin) 27 | #img = mx.image.imdecode(_bin) 28 | #img = nd.transpose(img, axes=(2, 0, 1)) 29 | #lfw_data[i][:] = img 30 | i+=1 31 | if i%1000==0: 32 | print('loading lfw', i) 33 | 34 | with open(args.output, 'wb') as f: 35 | pickle.dump((lfw_bins, issame_list), f, protocol=pickle.HIGHEST_PROTOCOL) 36 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/eval/do_ver.sh: -------------------------------------------------------------------------------- 1 | 2 | #python -u verification.py --gpu 0 --data-dir /opt/jiaguo/faces_vgg_112x112 --image-size 112,112 --model '../../model/softmax1010d3-r101-p0_0_96_112_0,21|22|32' --target agedb_30 3 | python -u verification.py --gpu 0 --data-dir /opt/jiaguo/faces_normed --image-size 112,96 --model '../../model31/sphere-m51-p0_0_96_112_0,90' --target agedb_30 --batch-size 128 4 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/eval/ytf_badcases.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import sys 6 | import os 7 | import numpy as np 8 | import cv2 9 | 10 | pairs_file = '/raid5data/dplearn/YTF/splits2.txt' 11 | stat = [0,0] 12 | for line in open(pairs_file, 'r'): 13 | line = line.strip() 14 | if line.startswith('split'): 15 | continue 16 | vec = line.split(',') 17 | issame = int(vec[-1]) 18 | if issame: 19 | stat[0]+=1 20 | else: 21 | stat[1]+=1 22 | print('stat', stat) 23 | 24 | image_dir = '/raid5data/dplearn/YTF/images' 25 | 26 | def get_img(name, vid): 27 | input_dir = os.path.join(image_dir, name, str(vid)) 28 | paths = [] 29 | for img in os.listdir(input_dir): 30 | path = os.path.join(input_dir, img) 31 | paths.append(path) 32 | paths = sorted(paths) 33 | parts = 8 34 | assert len(paths)>=parts 35 | gap = len(paths)//parts 36 | img = None 37 | for i in xrange(parts): 38 | idx = gap*i 39 | path = paths[idx] 40 | _img = cv2.imread(path) 41 | #print(_img.shape) 42 | if img is None: 43 | img = _img 44 | else: 45 | img = np.concatenate( (img, _img), axis=1) 46 | return img 47 | 48 | 49 | text_color = (153,255,51) 50 | for input in ['ytf_false_positive', 'ytf_false_negative']: 51 | all_img = None 52 | pp = 0 53 | for line in open(input+".log", 'r'): 54 | if line.startswith("\t"): 55 | break 56 | vec = line.strip().split(',') 57 | img1 = get_img(vec[0], int(vec[1])) 58 | img2 = get_img(vec[2], int(vec[3])) 59 | img = np.concatenate( (img1, img2), axis=0) 60 | if all_img is None: 61 | all_img = img 62 | else: 63 | all_img = np.concatenate( (all_img, img), axis=0) 64 | blank_img = np.zeros( (20, 112*8,3), dtype=np.uint8) 65 | blank_img[:,:,:] = 255 66 | font = cv2.FONT_HERSHEY_SIMPLEX 67 | k = "centre-distance:%.3f"%(float(vec[4])) 68 | #print(k) 69 | cv2.putText(blank_img,k,(350,blank_img.shape[0]-4), font, 0.6, text_color, 2) 70 | all_img = np.concatenate( (all_img, blank_img), axis=0) 71 | pp+=1 72 | 73 | filename = os.path.join('badcases', input+".png") 74 | cv2.imwrite(filename, all_img) 75 | 76 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/losses/center_loss.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # MXNET_CPU_WORKER_NTHREADS must be greater than 1 for custom op to work on CPU 4 | #os.environ['MXNET_CPU_WORKER_NTHREADS'] = '2' 5 | import mxnet as mx 6 | 7 | 8 | # define metric of accuracy 9 | class Accuracy(mx.metric.EvalMetric): 10 | def __init__(self, num=None): 11 | super(Accuracy, self).__init__('accuracy', num) 12 | 13 | def update(self, labels, preds): 14 | mx.metric.check_label_shapes(labels, preds) 15 | 16 | if self.num is not None: 17 | assert len(labels) == self.num 18 | 19 | pred_label = mx.nd.argmax_channel(preds[0]).asnumpy().astype('int32') 20 | label = labels[0].asnumpy().astype('int32') 21 | 22 | mx.metric.check_label_shapes(label, pred_label) 23 | 24 | self.sum_metric += (pred_label.flat == label.flat).sum() 25 | self.num_inst += len(pred_label.flat) 26 | 27 | 28 | # define some metric of center_loss 29 | class CenterLossMetric(mx.metric.EvalMetric): 30 | def __init__(self): 31 | super(CenterLossMetric, self).__init__('center_loss') 32 | 33 | def update(self, labels, preds): 34 | self.sum_metric += preds[1].asnumpy()[0] 35 | self.num_inst += 1 36 | 37 | 38 | # see details: 39 | # 40 | class CenterLoss(mx.operator.CustomOp): 41 | def __init__(self, ctx, shapes, dtypes, num_class, alpha, scale=1.0): 42 | if not len(shapes[0]) == 2: 43 | raise ValueError('dim for input_data shoudl be 2 for CenterLoss') 44 | 45 | self.alpha = alpha 46 | self.batch_size = shapes[0][0] 47 | self.num_class = num_class 48 | self.scale = scale 49 | 50 | def forward(self, is_train, req, in_data, out_data, aux): 51 | labels = in_data[1].asnumpy() 52 | diff = aux[0] 53 | center = aux[1] 54 | 55 | # store x_i - c_yi 56 | for i in range(self.batch_size): 57 | diff[i] = in_data[0][i] - center[int(labels[i])] 58 | 59 | loss = mx.nd.sum(mx.nd.square(diff)) / self.batch_size / 2 60 | self.assign(out_data[0], req[0], loss) 61 | 62 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 63 | diff = aux[0] 64 | center = aux[1] 65 | sum_ = aux[2] 66 | 67 | # back grad is just scale * ( x_i - c_yi) 68 | grad_scale = float(self.scale/self.batch_size) 69 | self.assign(in_grad[0], req[0], diff * grad_scale) 70 | 71 | # update the center 72 | labels = in_data[1].asnumpy() 73 | label_occur = dict() 74 | for i, label in enumerate(labels): 75 | label_occur.setdefault(int(label), []).append(i) 76 | 77 | for label, sample_index in label_occur.items(): 78 | sum_[:] = 0 79 | for i in sample_index: 80 | sum_ = sum_ + diff[i] 81 | delta_c = sum_ / (1 + len(sample_index)) 82 | center[label] += self.alpha * delta_c 83 | 84 | 85 | @mx.operator.register("centerloss") 86 | class CenterLossProp(mx.operator.CustomOpProp): 87 | def __init__(self, num_class, alpha, scale=1.0, batchsize=64): 88 | super(CenterLossProp, self).__init__(need_top_grad=False) 89 | 90 | # convert it to numbers 91 | self.num_class = int(num_class) 92 | self.alpha = float(alpha) 93 | self.scale = float(scale) 94 | self.batchsize = int(batchsize) 95 | 96 | def list_arguments(self): 97 | return ['data', 'label'] 98 | 99 | def list_outputs(self): 100 | return ['output'] 101 | 102 | def list_auxiliary_states(self): 103 | # call them 'bias' for zero initialization 104 | return ['diff_bias', 'center_bias', 'sum_bias'] 105 | 106 | def infer_shape(self, in_shape): 107 | data_shape = in_shape[0] 108 | label_shape = (in_shape[0][0],) 109 | 110 | # store diff , same shape as input batch 111 | diff_shape = [self.batchsize, data_shape[1]] 112 | 113 | # store the center of each class , should be ( num_class, d ) 114 | center_shape = [self.num_class, diff_shape[1]] 115 | 116 | # computation buf 117 | sum_shape = [diff_shape[1],] 118 | 119 | output_shape = [1, ] 120 | return [data_shape, label_shape], [output_shape], [diff_shape, center_shape, sum_shape] 121 | 122 | def create_operator(self, ctx, shapes, dtypes): 123 | return CenterLoss(ctx, shapes, dtypes, self.num_class, self.alpha, self.scale) 124 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/megaface/README.md: -------------------------------------------------------------------------------- 1 | [2018.12.26] Now you can take a look at new megaface testing tool at ``https://github.com/deepinsight/insightface/tree/master/Evaluation/Megaface``. It is more easy to use. 2 | 3 | Please strictly follow these rules if you want to use our MegaFace noises list. 4 | 5 | * Please cite our paper and git repo if you want to use this list in your paper. 6 | * Please include the information like `We used the noises list proposed by InsightFace, at https://github.com/deepinsight/insightface` if you want to submit the result to MegaFace challenge. 7 | * To be fair, if you want to submit MegaFace result, please ensure there's no training set overlaps with FaceScrub identities. You can do this by removing identities from your training set whose cosine similarity is larger than 0.4 with any FaceScrub identity by comparing their centre feature vectors. 8 | * If you find more overlaps noise, please open an issue at InsightFace. 9 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.16 2 | matplotlib==2.2.2 3 | networkx==1.8.1 4 | dask==0.18.2 5 | scikit-image==0.12.3 6 | easydict==1.9 7 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/run-mpi-1-8.sh: -------------------------------------------------------------------------------- 1 | mpirun -allow-run-as-root -np 8 -npernode 8 ./train-perseus.sh 2 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/run-mpi-pdb.sh: -------------------------------------------------------------------------------- 1 | mpirun -allow-run-as-root -np 8 -npernode 8 ./train-pdb.sh 2 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/symbols/fmobilefacenet.py: -------------------------------------------------------------------------------- 1 | 2 | import mxnet as mx 3 | import symbol_utils 4 | 5 | bn_mom = 0.9 6 | #bn_mom = 0.9997 7 | 8 | def Act(data, act_type, name): 9 | #ignore param act_type, set it in this function 10 | body = mx.sym.LeakyReLU(data = data, act_type='prelu', name = name) 11 | #body = mx.sym.Activation(data=data, act_type='relu', name=name) 12 | return body 13 | 14 | def Conv(data, num_filter=1, kernel=(1, 1), stride=(1, 1), pad=(0, 0), num_group=1, name=None, suffix=''): 15 | conv = mx.sym.Convolution(data=data, num_filter=num_filter, kernel=kernel, num_group=num_group, stride=stride, pad=pad, no_bias=True, name='%s%s_conv2d' %(name, suffix)) 16 | bn = mx.sym.BatchNorm(data=conv, name='%s%s_batchnorm' %(name, suffix), fix_gamma=False,momentum=bn_mom) 17 | act = Act(data=bn, act_type='relu', name='%s%s_relu' %(name, suffix)) 18 | return act 19 | 20 | def Linear(data, num_filter=1, kernel=(1, 1), stride=(1, 1), pad=(0, 0), num_group=1, name=None, suffix=''): 21 | conv = mx.sym.Convolution(data=data, num_filter=num_filter, kernel=kernel, num_group=num_group, stride=stride, pad=pad, no_bias=True, name='%s%s_conv2d' %(name, suffix)) 22 | bn = mx.sym.BatchNorm(data=conv, name='%s%s_batchnorm' %(name, suffix), fix_gamma=False,momentum=bn_mom) 23 | return bn 24 | 25 | def ConvOnly(data, num_filter=1, kernel=(1, 1), stride=(1, 1), pad=(0, 0), num_group=1, name=None, suffix=''): 26 | conv = mx.sym.Convolution(data=data, num_filter=num_filter, kernel=kernel, num_group=num_group, stride=stride, pad=pad, no_bias=True, name='%s%s_conv2d' %(name, suffix)) 27 | return conv 28 | 29 | 30 | def DResidual(data, num_out=1, kernel=(3, 3), stride=(2, 2), pad=(1, 1), num_group=1, name=None, suffix=''): 31 | conv = Conv(data=data, num_filter=num_group, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name='%s%s_conv_sep' %(name, suffix)) 32 | conv_dw = Conv(data=conv, num_filter=num_group, num_group=num_group, kernel=kernel, pad=pad, stride=stride, name='%s%s_conv_dw' %(name, suffix)) 33 | proj = Linear(data=conv_dw, num_filter=num_out, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name='%s%s_conv_proj' %(name, suffix)) 34 | return proj 35 | 36 | def Residual(data, num_block=1, num_out=1, kernel=(3, 3), stride=(1, 1), pad=(1, 1), num_group=1, name=None, suffix=''): 37 | identity=data 38 | for i in range(num_block): 39 | shortcut=identity 40 | conv=DResidual(data=identity, num_out=num_out, kernel=kernel, stride=stride, pad=pad, num_group=num_group, name='%s%s_block' %(name, suffix), suffix='%d'%i) 41 | identity=conv+shortcut 42 | return identity 43 | 44 | 45 | def get_symbol(num_classes, **kwargs): 46 | global bn_mom 47 | bn_mom = kwargs.get('bn_mom', 0.9) 48 | wd_mult = kwargs.get('wd_mult', 1.) 49 | version_output = kwargs.get('version_output', 'GNAP') 50 | #assert version_output=='GDC' or version_output=='GNAP' 51 | fc_type = version_output 52 | data = mx.symbol.Variable(name="data") 53 | data = data-127.5 54 | data = data*0.0078125 55 | conv_1 = Conv(data, num_filter=64, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name="conv_1") 56 | conv_2_dw = Conv(conv_1, num_group=64, num_filter=64, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name="conv_2_dw") 57 | conv_23 = DResidual(conv_2_dw, num_out=64, kernel=(3, 3), stride=(2, 2), pad=(1, 1), num_group=128, name="dconv_23") 58 | conv_3 = Residual(conv_23, num_block=4, num_out=64, kernel=(3, 3), stride=(1, 1), pad=(1, 1), num_group=128, name="res_3") 59 | conv_34 = DResidual(conv_3, num_out=128, kernel=(3, 3), stride=(2, 2), pad=(1, 1), num_group=256, name="dconv_34") 60 | conv_4 = Residual(conv_34, num_block=6, num_out=128, kernel=(3, 3), stride=(1, 1), pad=(1, 1), num_group=256, name="res_4") 61 | conv_45 = DResidual(conv_4, num_out=128, kernel=(3, 3), stride=(2, 2), pad=(1, 1), num_group=512, name="dconv_45") 62 | conv_5 = Residual(conv_45, num_block=2, num_out=128, kernel=(3, 3), stride=(1, 1), pad=(1, 1), num_group=256, name="res_5") 63 | conv_6_sep = Conv(conv_5, num_filter=512, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="conv_6sep") 64 | 65 | fc1 = symbol_utils.get_fc1(conv_6_sep, num_classes, fc_type) 66 | return fc1 67 | 68 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/symbols/fmobilenetv2.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import mxnet.ndarray as nd 3 | import mxnet.gluon as gluon 4 | import mxnet.gluon.nn as nn 5 | import mxnet.autograd as ag 6 | import symbol_utils 7 | 8 | def ConvBlock(channels, kernel_size, strides): 9 | out = nn.HybridSequential() 10 | out.add( 11 | nn.Conv2D(channels, kernel_size, strides=strides, padding=1, use_bias=False), 12 | nn.BatchNorm(scale=True), 13 | nn.Activation('relu') 14 | ) 15 | return out 16 | 17 | def Conv1x1(channels, is_linear=False): 18 | out = nn.HybridSequential() 19 | out.add( 20 | nn.Conv2D(channels, 1, padding=0, use_bias=False), 21 | nn.BatchNorm(scale=True) 22 | ) 23 | if not is_linear: 24 | out.add(nn.Activation('relu')) 25 | return out 26 | 27 | def DWise(channels, stride): 28 | out = nn.HybridSequential() 29 | out.add( 30 | nn.Conv2D(channels, 3, strides=stride, padding=1, groups=channels, use_bias=False), 31 | nn.BatchNorm(scale=True), 32 | nn.Activation('relu') 33 | ) 34 | return out 35 | 36 | class InvertedResidual(nn.HybridBlock): 37 | def __init__(self, t, e, c, s, same_shape=True, **kwargs): 38 | super(InvertedResidual, self).__init__(**kwargs) 39 | self.same_shape = same_shape 40 | self.stride = s 41 | with self.name_scope(): 42 | self.bottleneck = nn.HybridSequential() 43 | self.bottleneck.add( 44 | Conv1x1(e*t), 45 | DWise(e*t, self.stride), 46 | Conv1x1(c, is_linear=True) 47 | ) 48 | if self.stride == 1 and not self.same_shape: 49 | self.conv_res = Conv1x1(c) 50 | def hybrid_forward(self, F, x): 51 | out = self.bottleneck(x) 52 | #if self.stride == 1 and self.same_shape: 53 | # out = F.elemwise_add(out, x) 54 | if self.stride == 1: 55 | if not self.same_shape: 56 | x = self.conv_res(x) 57 | out = F.elemwise_add(out, x) 58 | return out 59 | 60 | class MobilenetV2(nn.HybridBlock): 61 | def __init__(self, num_classes=1000, width_mult=1.0, **kwargs): 62 | super(MobilenetV2, self).__init__(**kwargs) 63 | 64 | self.w = width_mult 65 | 66 | self.cn = [int(x*self.w) for x in [32, 16, 24, 32, 64, 96, 160, 320]] 67 | 68 | def InvertedResidualSequence(t, cn_id, n, s): 69 | seq = nn.HybridSequential() 70 | seq.add(InvertedResidual(t, self.cn[cn_id-1], self.cn[cn_id], s, same_shape=False)) 71 | for _ in range(n-1): 72 | seq.add(InvertedResidual(t, self.cn[cn_id-1], self.cn[cn_id], 1)) 73 | return seq 74 | 75 | self.b0 = ConvBlock(self.cn[0], 3, 1) 76 | self.b1 = InvertedResidualSequence(1, 1, 1, 1) 77 | self.b2 = InvertedResidualSequence(6, 2, 2, 2) 78 | self.b3 = InvertedResidualSequence(6, 3, 3, 2) 79 | self.b4 = InvertedResidualSequence(6, 4, 4, 1) 80 | self.b5 = InvertedResidualSequence(6, 5, 3, 2) 81 | self.b6 = InvertedResidualSequence(6, 6, 3, 2) 82 | self.b7 = InvertedResidualSequence(6, 7, 1, 1) 83 | 84 | self.last_channels = int(1280*self.w) if self.w > 1.0 else 1280 85 | with self.name_scope(): 86 | self.features = nn.HybridSequential() 87 | with self.features.name_scope(): 88 | self.features.add(self.b0, self.b1, self.b2, self.b3, self.b4, self.b5, self.b6, self.b7) 89 | self.features.add(Conv1x1(self.last_channels)) 90 | #self.features.add(nn.GlobalAvgPool2D()) 91 | #self.features.add(nn.Flatten()) 92 | #self.output = nn.Dense(num_classes) 93 | def hybrid_forward(self, F, x): 94 | x = self.features(x) 95 | #x = self.output(x) 96 | return x 97 | 98 | def get_symbol(num_classes): 99 | net = MobilenetV2(num_classes, 1) 100 | data = mx.sym.Variable(name='data') 101 | data = data-127.5 102 | data = data*0.0078125 103 | body = net(data) 104 | fc1 = symbol_utils.get_fc1(body, num_classes, 'E') 105 | return fc1 106 | 107 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/symbols/spherenet.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import numpy as np 3 | import math 4 | from mxnet.base import _Null 5 | 6 | def conv_main(data, units, filters, workspace): 7 | body = data 8 | for i in xrange(len(units)): 9 | f = filters[i] 10 | _weight = mx.symbol.Variable("conv%d_%d_weight"%(i+1, 1), init=mx.init.Normal(0.01)) 11 | _bias = mx.symbol.Variable("conv%d_%d_bias"%(i+1, 1), lr_mult=2.0, wd_mult=0.0, init=mx.init.Constant(0.0)) 12 | body = mx.sym.Convolution(data=body, weight = _weight, bias = _bias, num_filter=f, kernel=(3, 3), stride=(2,2), pad=(1, 1), 13 | name= "conv%d_%d"%(i+1, 1), workspace=workspace) 14 | 15 | body = mx.sym.LeakyReLU(data = body, act_type='prelu', name = "relu%d_%d" % (i+1, 1)) 16 | idx = 2 17 | for j in xrange(units[i]): 18 | _weight = mx.symbol.Variable("conv%d_%d_weight"%(i+1, idx), init=mx.init.Normal(0.01)) 19 | _body = mx.sym.Convolution(data=body, weight=_weight, no_bias=True, num_filter=f, kernel=(3, 3), stride=(1,1), pad=(1, 1), 20 | name= "conv%d_%d"%(i+1, idx), workspace=workspace) 21 | 22 | _body = mx.sym.LeakyReLU(data = _body, act_type='prelu', name = "relu%d_%d" % (i+1, idx)) 23 | idx+=1 24 | _weight = mx.symbol.Variable("conv%d_%d_weight"%(i+1, idx), init=mx.init.Normal(0.01)) 25 | _body = mx.sym.Convolution(data=_body, weight=_weight, no_bias=True, num_filter=f, kernel=(3, 3), stride=(1,1), pad=(1, 1), 26 | name= "conv%d_%d"%(i+1, idx), workspace=workspace) 27 | _body = mx.sym.LeakyReLU(data = _body, act_type='prelu', name = "relu%d_%d" % (i+1, idx)) 28 | idx+=1 29 | body = body+_body 30 | 31 | return body 32 | 33 | def get_symbol(num_classes, num_layers, conv_workspace=256, **kwargs): 34 | if num_layers==64: 35 | units = [3,8,16,3] 36 | filters = [64,128,256,512] 37 | elif num_layers==20: 38 | units = [1,2,4,1] 39 | filters = [64,128,256,512] 40 | #filters = [64, 256, 512, 1024] 41 | elif num_layers==36: 42 | units = [2,4,8,2] 43 | filters = [64,128,256,512] 44 | #filters = [64, 256, 512, 1024] 45 | elif num_layers==60: 46 | units = [3,8,14,3] 47 | filters = [64,128,256,512] 48 | elif num_layers==104: 49 | units = [3,8,36,3] 50 | filters = [64,128,256,512] 51 | #filters = [64, 256, 512, 1024] 52 | data = mx.symbol.Variable('data') 53 | data = data-127.5 54 | data = data*0.0078125 55 | body = conv_main(data = data, units = units, filters = filters, workspace = conv_workspace) 56 | 57 | _weight = mx.symbol.Variable("fc1_weight", lr_mult=1.0) 58 | _bias = mx.symbol.Variable("fc1_bias", lr_mult=2.0, wd_mult=0.0) 59 | fc1 = mx.sym.FullyConnected(data=body, weight=_weight, bias=_bias, num_hidden=num_classes, name='fc1') 60 | return fc1 61 | 62 | 63 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/train-pdb.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export MXNET_CPU_WORKER_NTHREADS=8 3 | export MXNET_CUDNN_AUTOTUNE_DEFAULT=0 4 | export MXNET_ENGINE_TYPE=ThreadedEnginePerDevice 5 | 6 | # Disable the openmp tuning, as perseus is using one process 7 | # per GPU, the openmp tuning function assume each process own 8 | # all the CPU resources which cause very long time tuning and 9 | # is harmful for performance actually. 10 | export MXNET_USE_OPERATOR_TUNING=0 11 | export MXNET_USE_NUM_CORES_OPERATOR_TUNING=4 12 | # Force perseus to use hybrid allreduce 13 | export PERSEUS_ALLREDUCE_MODE=1 14 | # Set maximum perseus stream count to 6 15 | export PERSEUS_ALLREDUCE_STREAMS=6 16 | 17 | DATA_DIR=/ncluster/data/faces_ms1m_112x112/ 18 | 19 | NETWORK=r100 20 | JOB=softmax1e3 21 | MODELDIR="../save-results/model-$NETWORK-$JOB" 22 | mkdir -p "$MODELDIR" 23 | PREFIX="$MODELDIR/model" 24 | LOGFILE="$MODELDIR/log" 25 | 26 | KVSTORE='dist-sync-perseus' 27 | #TRAIN_MODE='data-parallel' 28 | TRAIN_MODE='data-and-model-parallel' 29 | TARGET='lfw' 30 | LR=0.02 31 | PER_BATCH_SIZE=32 32 | MARGIN_M=0.0 33 | MARGIN_S=64 34 | EASY_MARGIN=0 35 | EMB_SIZE=512 36 | END_EPOCH=50 37 | NUM_CLASSES=80000 38 | LOCAL_RUN=0 39 | 40 | cmd=CUDA_VISIBLE_DEVICES='0,1,2,3,4,5,6,7' /root/anaconda/envs/mxnet_1.5_cu10.0_py27/bin/python -u ./train.py --data-dir $DATA_DIR --network "$NETWORK" \ 41 | --loss-type 4 \ 42 | --prefix "$PREFIX" \ 43 | --per-batch-size $PER_BATCH_SIZE \ 44 | --kvstore $KVSTORE \ 45 | --train-mode $TRAIN_MODE \ 46 | --target $TARGET \ 47 | --lr $LR \ 48 | --per-batch-size $PER_BATCH_SIZE \ 49 | --margin-m $MARGIN_M \ 50 | --margin-s $MARGIN_S \ 51 | --easy-margin $EASY_MARGIN \ 52 | --emb-size $EMB_SIZE \ 53 | --end-epoch $END_EPOCH \ 54 | --num-classes $NUM_CLASSES \ 55 | --local-run $LOCAL_RUN 56 | # > "$LOGFILE" 2>&1 & 57 | cmd2=CUDA_VISIBLE_DEVICES='0,1,2,3,4,5,6,7' /root/anaconda/envs/mxnet_1.5_cu10.0_py27/bin/python -m pdb -u ./train.py --data-dir $DATA_DIR --network "$NETWORK" \ 58 | --loss-type 4 \ 59 | --prefix "$PREFIX" \ 60 | --per-batch-size $PER_BATCH_SIZE \ 61 | --kvstore $KVSTORE \ 62 | --train-mode $TRAIN_MODE \ 63 | --target $TARGET \ 64 | --lr $LR \ 65 | --per-batch-size $PER_BATCH_SIZE \ 66 | --margin-m $MARGIN_M \ 67 | --margin-s $MARGIN_S \ 68 | --easy-margin $EASY_MARGIN \ 69 | --emb-size $EMB_SIZE \ 70 | --end-epoch $END_EPOCH \ 71 | --num-classes $NUM_CLASSES \ 72 | --local-run $LOCAL_RUN 73 | if [ $OMPI_COMM_WORLD_RANK -eq 0 ] ; then 74 | cmd2 75 | else 76 | echo "common..." 77 | cmd 78 | fi 79 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/train-perseus.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export MXNET_CPU_WORKER_NTHREADS=8 3 | export MXNET_CUDNN_AUTOTUNE_DEFAULT=0 4 | export MXNET_ENGINE_TYPE=ThreadedEnginePerDevice 5 | 6 | # Disable the openmp tuning, as perseus is using one process 7 | # per GPU, the openmp tuning function assume each process own 8 | # all the CPU resources which cause very long time tuning and 9 | # is harmful for performance actually. 10 | export MXNET_USE_OPERATOR_TUNING=0 11 | export MXNET_USE_NUM_CORES_OPERATOR_TUNING=4 12 | # Force perseus to use hybrid allreduce 13 | export PERSEUS_ALLREDUCE_MODE=1 14 | # Set maximum perseus stream count to 6 15 | export PERSEUS_ALLREDUCE_STREAMS=6 16 | 17 | export PERSEUS_ALLREDUCE_DTYPE=2 18 | export PERSEUS_ALLREDUCE_NANCHECK=1 19 | 20 | DATA_DIR=/root/faces_ms1m_112x112/ 21 | 22 | NETWORK=r101 23 | JOB=softmax1e3 24 | MODELDIR="../save-results/model-$NETWORK-$JOB" 25 | mkdir -p "$MODELDIR" 26 | PREFIX="$MODELDIR/model" 27 | LOGFILE="$MODELDIR/log" 28 | 29 | KVSTORE='dist-sync-perseus' 30 | #TRAIN_MODE='data-parallel' 31 | TRAIN_MODE='data-and-model-parallel' 32 | TARGET='lfw' 33 | LR=0.02 34 | PER_BATCH_SIZE=32 35 | MARGIN_M=0.0 36 | MARGIN_S=64 37 | EASY_MARGIN=0 38 | EMB_SIZE=512 39 | END_EPOCH=50 40 | NUM_CLASSES=85000 41 | PREFETCH=0 42 | 43 | CUDA_VISIBLE_DEVICES='0,1,2,3,4,5,6,7' /root/anaconda/envs/mxnet_1.5.1.post0_cu10.0_py27/bin/python -u ./train.py --data-dir $DATA_DIR --network "$NETWORK" \ 44 | --loss-type 4 \ 45 | --prefix "$PREFIX" \ 46 | --per-batch-size $PER_BATCH_SIZE \ 47 | --kvstore $KVSTORE \ 48 | --train-mode $TRAIN_MODE \ 49 | --target $TARGET \ 50 | --lr $LR \ 51 | --margin-m $MARGIN_M \ 52 | --margin-s $MARGIN_S \ 53 | --easy-margin $EASY_MARGIN \ 54 | --emb-size $EMB_SIZE \ 55 | --end-epoch $END_EPOCH \ 56 | --num-classes $NUM_CLASSES \ 57 | --prefetch $PREFETCH 58 | # > "$LOGFILE" 2>&1 & 59 | 60 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | export MXNET_CPU_WORKER_NTHREADS=8 3 | export MXNET_CUDNN_AUTOTUNE_DEFAULT=0 4 | export MXNET_ENGINE_TYPE=ThreadedEnginePerDevice 5 | 6 | # Disable the openmp tuning, as perseus is using one process 7 | # per GPU, the openmp tuning function assume each process own 8 | # all the CPU resources which cause very long time tuning and 9 | # is harmful for performance actually. 10 | export MXNET_USE_OPERATOR_TUNING=0 11 | export MXNET_USE_NUM_CORES_OPERATOR_TUNING=4 12 | # Force perseus to use hybrid allreduce 13 | export PERSEUS_ALLREDUCE_MODE=1 14 | # Set maximum perseus stream count to 6 15 | export PERSEUS_ALLREDUCE_STREAMS=6 16 | 17 | DATA_DIR=/ncluster/dpx/dpx_github_code_2/insightface/datasets/ms1m-retinaface-t1/ 18 | 19 | NETWORK=r100 20 | JOB=softmax1e3 21 | MODELDIR="../save-results/model-$NETWORK-$JOB" 22 | mkdir -p "$MODELDIR" 23 | PREFIX="$MODELDIR/model" 24 | LOGFILE="$MODELDIR/log" 25 | 26 | KVSTORE='dist-sync-perseus' 27 | #TRAIN_MODE='data-parallel' 28 | TRAIN_MODE='data-and-model-parallel' 29 | TARGET='lfw' 30 | LR=0.02 31 | PER_BATCH_SIZE=16 32 | MARGIN_M=0.0 33 | MARGIN_S=64 34 | EASY_MARGIN=0 35 | EMB_SIZE=512 36 | END_EPOCH=50 37 | NUM_CLASSES=200000 38 | 39 | CUDA_VISIBLE_DEVICES='0,1,2,3,4,5,6,7' python -u /ncluster/dpx/dpx_github_code_2/insightface/src/train.py --data-dir $DATA_DIR --network "$NETWORK" \ 40 | --loss-type 4 \ 41 | --prefix "$PREFIX" \ 42 | --per-batch-size $PER_BATCH_SIZE \ 43 | --kvstore $KVSTORE \ 44 | --train-mode $TRAIN_MODE \ 45 | --target $TARGET \ 46 | --lr $LR \ 47 | --per-batch-size $PER_BATCH_SIZE \ 48 | --margin-m $MARGIN_M \ 49 | --margin-s $MARGIN_S \ 50 | --easy-margin $EASY_MARGIN \ 51 | --emb-size $EMB_SIZE \ 52 | --end-epoch $END_EPOCH \ 53 | --num-classes $NUM_CLASSES #\ 54 | # > "$LOGFILE" 2>&1 & 55 | 56 | -------------------------------------------------------------------------------- /mxnet/insightface/insightface/src/utils/benchmark.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import os 5 | import sys 6 | import datetime 7 | import mxnet as mx 8 | from mxnet import ndarray as nd 9 | import random 10 | import argparse 11 | import cv2 12 | import time 13 | import sklearn 14 | from sklearn.decomposition import PCA 15 | from easydict import EasyDict as edict 16 | from sklearn.cluster import DBSCAN 17 | import numpy as np 18 | sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common')) 19 | import face_image 20 | 21 | def ch_dev(arg_params, aux_params, ctx): 22 | new_args = dict() 23 | new_auxs = dict() 24 | for k, v in arg_params.items(): 25 | new_args[k] = v.as_in_context(ctx) 26 | for k, v in aux_params.items(): 27 | new_auxs[k] = v.as_in_context(ctx) 28 | return new_args, new_auxs 29 | 30 | 31 | def main(args): 32 | ctx = mx.gpu(args.gpu) 33 | args.ctx_num = 1 34 | prop = face_image.load_property(args.data) 35 | image_size = prop.image_size 36 | print('image_size', image_size) 37 | vec = args.model.split(',') 38 | prefix = vec[0] 39 | epoch = int(vec[1]) 40 | print('loading',prefix, epoch) 41 | sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) 42 | arg_params, aux_params = ch_dev(arg_params, aux_params, ctx) 43 | all_layers = sym.get_internals() 44 | sym = all_layers['fc1_output'] 45 | #model = mx.mod.Module.load(prefix, epoch, context = ctx) 46 | model = mx.mod.Module(symbol=sym, context=ctx, label_names = None) 47 | #model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))]) 48 | model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))]) 49 | model.set_params(arg_params, aux_params) 50 | path_imgrec = os.path.join(args.data, 'train.rec') 51 | path_imgidx = os.path.join(args.data, 'train.idx') 52 | imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r') # pylint: disable=redefined-variable-type 53 | s = imgrec.read_idx(0) 54 | header, _ = mx.recordio.unpack(s) 55 | assert header.flag>0 56 | print('header0 label', header.label) 57 | header0 = (int(header.label[0]), int(header.label[1])) 58 | #assert(header.flag==1) 59 | imgidx = range(1, int(header.label[0])) 60 | stat = [] 61 | count = 0 62 | data = nd.zeros( (1 ,3, image_size[0], image_size[1]) ) 63 | label = nd.zeros( (1,) ) 64 | for idx in imgidx: 65 | if len(stat)%100==0: 66 | print('processing', len(stat)) 67 | s = imgrec.read_idx(idx) 68 | header, img = mx.recordio.unpack(s) 69 | img = mx.image.imdecode(img) 70 | img = nd.transpose(img, axes=(2, 0, 1)) 71 | data[0][:] = img 72 | #input_blob = np.expand_dims(img.asnumpy(), axis=0) 73 | #arg_params["data"] = mx.nd.array(input_blob, ctx) 74 | #arg_params["softmax_label"] = mx.nd.empty((1,), ctx) 75 | time_now = datetime.datetime.now() 76 | #exe = sym.bind(ctx, arg_params ,args_grad=None, grad_req="null", aux_states=aux_params) 77 | #exe.forward(is_train=False) 78 | #_embedding = exe.outputs[0].asnumpy().flatten() 79 | #db = mx.io.DataBatch(data=(data,), label=(label,)) 80 | db = mx.io.DataBatch(data=(data,)) 81 | model.forward(db, is_train=False) 82 | net_out = model.get_outputs()[0].asnumpy() 83 | time_now2 = datetime.datetime.now() 84 | diff = time_now2 - time_now 85 | stat.append(diff.total_seconds()) 86 | if len(stat)==args.param1: 87 | break 88 | stat = stat[10:] 89 | print('avg infer time', np.mean(stat)) 90 | 91 | if __name__ == '__main__': 92 | parser = argparse.ArgumentParser(description='do network benchmark') 93 | # general 94 | parser.add_argument('--gpu', default=0, type=int, help='') 95 | parser.add_argument('--data', default='', type=str, help='') 96 | parser.add_argument('--model', default='../model/softmax,50', help='path to load model.') 97 | parser.add_argument('--batch-size', default=1, type=int, help='') 98 | parser.add_argument('--param1', default=1010, type=int, help='') 99 | args = parser.parse_args() 100 | main(args) 101 | 102 | -------------------------------------------------------------------------------- /mxnet/insightface/train_insightface.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import ncluster 5 | import os 6 | import time 7 | 8 | INSTANCE_TYPE = 'ecs.gn6v-c10g1.20xlarge' # V100 9 | NUM_GPUS = 8 10 | 11 | ncluster.set_backend('aliyun') 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('--name', type=str, default='perseus-insightface-test', 14 | help="name of the current run, used for machine naming and tensorboard visualization") 15 | parser.add_argument('--machines', type=int, default=1, 16 | help="how many machines to use") 17 | args = parser.parse_args() 18 | 19 | def main(): 20 | start_time = time.time() 21 | # 1. Create infrastructure 22 | supported_regions = ['cn-huhehaote', 'cn-zhangjiakou', 'cn-shanghai', 'cn-hangzhou', 'cn-beijing'] 23 | assert ncluster.get_region() in supported_regions, f"required AMI {IMAGE_NAME} has only been made available in regions {supported_regions}, but your current region is {ncluster.get_region()} (set $ALYUN_DEFAULT_REGION)" 24 | 25 | job = ncluster.make_job(name=args.name, 26 | run_name=f"{args.name}-{args.machines}", 27 | num_tasks=args.machines, 28 | instance_type=INSTANCE_TYPE) 29 | # 2. Upload perseus insightface code. 30 | job.run('yum -y install unzip') 31 | job.upload('insightface') 32 | job.run('conda activate mxnet_1.5.1.post0_cu10.0_py27') 33 | 34 | # 3. Download pretrain model and dataset. 35 | DATA_DIR = '/root/faces_ms1m_112x112' 36 | job.run('cd /root && wget -c -t 10 https://public-ai-datasets.oss-cn-huhehaote.aliyuncs.com/mxnet-deepinsight/faces_ms1m_112x112.zip && unzip faces_ms1m_112x112.zip') 37 | 38 | # 4. install requirements. 39 | job.run('cd /root/insightface/src') 40 | job.run('pip install -r requirements.txt') 41 | 42 | # 5. Run the training job. 43 | hosts = [task.ip + f':{NUM_GPUS}' for task in job.tasks] 44 | host_str = ','.join(hosts) 45 | 46 | mpi_cmd = ['mpirun --allow-run-as-root', 47 | f'-np {args.machines * NUM_GPUS}', 48 | f'--npernode {NUM_GPUS}', 49 | f'--host {host_str}', 50 | '--bind-to none', 51 | '-x NCCL_DEBUG=INFO', 52 | '-x PATH', 53 | '-x LD_LIBRARY_PATH',] 54 | 55 | insightface_cmd = './train-perseus.sh' 56 | 57 | cmd = mpi_cmd 58 | cmd = " ".join(cmd) + " " + insightface_cmd 59 | job.tasks[0].run(f'echo {cmd} > {job.logdir}/task-cmd') 60 | job.tasks[0].run(cmd, non_blocking=True) 61 | print(f"Logging to {job.logdir}") 62 | 63 | eclapse_time = time.time() - start_time 64 | print(f'training deploy time is: {eclapse_time} s.') 65 | 66 | 67 | if __name__ == '__main__': 68 | main() 69 | 70 | -------------------------------------------------------------------------------- /pytorch/GTC/LossFunction.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from torch import nn 4 | import torch 5 | from torch.nn import functional as F 6 | 7 | class FocalLoss(nn.Module): 8 | 9 | def __init__(self, gamma=0): 10 | super(FocalLoss, self).__init__() 11 | self.gamma = gamma 12 | self.ce = torch.nn.CrossEntropyLoss() 13 | 14 | def forward(self, input, target): 15 | logp = self.ce(input, target) 16 | p = torch.exp(-logp) 17 | loss = (1 - p) ** self.gamma * logp 18 | return loss.mean() 19 | 20 | def margin(cos, label, m, s): 21 | #m = 0.35 22 | #s = 30. 23 | phi = cos - m 24 | label = label.view(-1, 1) 25 | index = cos.data * 0.0 26 | index.scatter_(1, label.data.view(-1, 1), 1) 27 | index = index.byte() 28 | output = cos * 1.0 29 | output[index] = phi[index] 30 | output *= s 31 | return output 32 | -------------------------------------------------------------------------------- /pytorch/GTC/enhance-data.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import os 3 | import os.path 4 | import glob 5 | import time 6 | from torchvision import transforms as transforms 7 | 8 | start_time = time.time() 9 | 10 | def convertjpg(jpgfile,outdir,width=224,height=224): 11 | img=Image.open(jpgfile) 12 | try: 13 | new_img=img.resize((width,height),Image.BILINEAR) 14 | save_img = os.path.join(outdir, os.path.basename(jpgfile)) 15 | new_img.save(save_img) 16 | print('save', jpgfile, 'to', save_img, ' done.') 17 | except Exception as e: 18 | print(e) 19 | def enhance(jpgfile, outdir): 20 | im = Image.open(jpgfile) 21 | # 进行随机的灰度化 22 | new_im = transforms.RandomGrayscale(p=0.5)(im) # 以0.5的概率进行灰度化 23 | save_gray = os.path.join(outdir, 'gray_' + os.path.basename(jpgfile)) 24 | new_im.save(save_gray) 25 | # 色度、亮度、饱和度、对比度的变化 26 | new_im = transforms.ColorJitter(brightness=1)(im) 27 | new_im = transforms.ColorJitter(contrast=1)(im) 28 | new_im = transforms.ColorJitter(saturation=0.5)(im) 29 | new_im = transforms.ColorJitter(hue=0.5)(im) 30 | save_color = os.path.join(outdir, 'color_' + os.path.basename(jpgfile)) 31 | new_im.save(save_color) 32 | # 随机角度旋转 33 | new_im = transforms.RandomRotation(45)(im) #随机旋转45度 34 | save_rotate = os.path.join(outdir, 'rotate_' + os.path.basename(jpgfile)) 35 | new_im.save(save_rotate) 36 | # 随机水平/垂直翻转 37 | new_im = transforms.RandomHorizontalFlip(p=1)(im) # p表示概率 38 | save_hor = os.path.join(outdir, 'hor_'+os.path.basename(jpgfile)) 39 | new_im.save(save_hor) 40 | new_im = transforms.RandomVerticalFlip(p=1)(im) 41 | save_ver = os.path.join(outdir, 'ver_' + os.path.basename(jpgfile)) 42 | new_im.save(save_ver) 43 | 44 | def merge_file(from_path, to_path): 45 | os.system('mkdir -p ' + to_path + '/scissors') 46 | os.system('mkdir -p ' + to_path + '/rock') 47 | os.system('mkdir -p ' + to_path + '/paper') 48 | os.system('cp ' + from_path + '/scissors/* ' + to_path + '/scissors/') 49 | os.system('cp ' + from_path + '/rock/* ' + to_path + '/rock/') 50 | os.system('cp ' + from_path + '/paper/* ' + to_path + '/paper/') 51 | 52 | # main process 53 | import sys 54 | stage = 1 if len(sys.argv) <= 1 else int(sys.argv[1]) 55 | raw_dataset = "mini-rps-dataset" if len(sys.argv) <= 2 else sys.argv[2] 56 | enhance_dataset = "enhance-mini-rps-dataset" if len(sys.argv) <= 2 else sys.argv[3] 57 | merge_from_dataset = "enhance-test-mini-dataset" if len(sys.argv) <= 2 else sys.argv[2] 58 | merge_to_dataset = "merge-dataset" if len(sys.argv) <= 2 else sys.argv[3] 59 | 60 | if stage <= 1: 61 | for classfile in glob.glob(raw_dataset + "/*"): 62 | for file_type in ['jpg', 'png', 'JPG', 'PNG']: 63 | for jpgfile in glob.glob(classfile+"/*." + file_type): 64 | save_path = enhance_dataset + "/" + os.path.basename(classfile) 65 | os.system('mkdir -p ' + save_path) 66 | #convertjpg(jpgfile, save_path) 67 | enhance(jpgfile, save_path) 68 | else: 69 | merge_file(merge_from_dataset, merge_to_dataset) 70 | 71 | print('cost time:', time.time() - start_time) 72 | print('finish!') 73 | -------------------------------------------------------------------------------- /pytorch/GTC/inference.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import torch 3 | import torchvision 4 | from torchvision import transforms as transforms 5 | import PIL.Image as Image 6 | import cv2 7 | import numpy as np 8 | import sys 9 | import datetime 10 | import glob 11 | 12 | #model = torchvision.models.alexnet(pretrained=False) 13 | #model.classifier[6] = torch.nn.Linear(model.classifier[6].in_features, 3) 14 | model = torchvision.models.resnet50(pretrained=False) 15 | model.eval() 16 | model.fc = torch.nn.Linear(2048, 3) 17 | 18 | model.load_state_dict(torch.load('save_model.pth')) 19 | 20 | device = torch.device('cuda') 21 | model = model.to(device) 22 | 23 | mean = 255.0 * np.array([0.485, 0.456, 0.406]) 24 | stdev = 255.0 * np.array([0.229, 0.224, 0.225]) 25 | 26 | normalize = torchvision.transforms.Normalize(mean, stdev) 27 | 28 | def preprocess(img): 29 | x = transforms.Compose([ 30 | transforms.Resize((224,224)), 31 | transforms.ToTensor(), 32 | transforms.Normalize(mean/255.0, stdev/255.0), 33 | ])(img) 34 | x = x[None, ...] 35 | return x 36 | 37 | print('usage: python inference.py test-dataset 0 JPG') 38 | 39 | print(str(sys.argv)) 40 | test_path = 'test-dataset' if len(sys.argv) <= 2 else sys.argv[1] 41 | test_type = '0' if len(sys.argv) <= 2 else sys.argv[2] 42 | image_type = 'jpg' if len(sys.argv) <= 3 else sys.argv[3] 43 | 44 | img_list = [] 45 | if test_type == '0': 46 | img_list = glob.glob(test_path + '/scissors/*.'+image_type) #IMG_*.JPG') 47 | elif test_type == '1': 48 | img_list = glob.glob(test_path + '/rock/*.' + image_type) #IMG_*.JPG') 49 | else: 50 | img_list = glob.glob(test_path + '/paper/*.' + image_type) #IMG_*.JPG') 51 | ''' 52 | for hand in ['scissors', 'rock', 'paper']: 53 | for types in ['jpg', 'JPG', 'png', 'PNG']: 54 | img_list.extend(glob.glob(test_path + '/' + hand + '/*.' + image_type)) 55 | ''' 56 | 57 | for_gtc_demo = True 58 | if for_gtc_demo and len(sys.argv) <= 2: 59 | img_list = ['test.JPG'] 60 | 61 | count = 0 62 | count_paper = 0 63 | count_rock = 0 64 | count_scissors = 0 65 | res = [] 66 | for img_file in img_list: 67 | count += 1 68 | img = Image.open(img_file) 69 | # convert RGBA to RGB 70 | img = img.convert("RGB") 71 | x = preprocess(img).to(device) 72 | begin = datetime.datetime.now() 73 | y = model(x) 74 | import torch.nn.functional as F 75 | y = F.softmax(y, dim=1) 76 | 77 | predict = y.argmax(1) 78 | print(predict) 79 | 80 | a=[float(y.flatten()[0]),float(y.flatten()[1]),float(y.flatten()[2])] 81 | end = datetime.datetime.now() 82 | k = end - begin 83 | m = np.where(a==np.max(a)) 84 | p_paper = float(y.flatten()[0]) 85 | p_rock = float(y.flatten()[1]) 86 | p_scissors = float(y.flatten()[2]) 87 | 88 | print('image:', img_file) 89 | print("布的概率:"+str(float(y.flatten()[0]))) 90 | print("石头的概率:"+str(str(float(y.flatten()[1])))) 91 | print("剪刀的概率:"+str(float(y.flatten()[2]))) 92 | label_ = int(m[0][0]) 93 | label_ = predict 94 | assert predict == int(m[0][0]) 95 | if label_ == 0: 96 | res.append(img_file) 97 | if label_ == 0: 98 | filename = "test.JPG" 99 | count_paper += 1 100 | print("你出的是布") 101 | 102 | if label_ == 1: 103 | filename="test2.JPG" 104 | count_rock += 1 105 | print("你出的是石头") 106 | 107 | if label_ == 2: 108 | filename="test1.JPG" 109 | count_scissors += 1 110 | print("你出的是剪刀") 111 | print("推理时间:"+str(k.total_seconds()*1000)+"毫秒") 112 | 113 | if len(sys.argv) > 2: 114 | print('paper acc:', 1.0 * count_paper / count) #len(img_list)) 115 | print('rock acc:', 1.0*count_rock / count) #len(img_list)) 116 | print('scissors acc:', 1.0*count_scissors / count) #len(img_list)) 117 | 118 | #print('res:', res) 119 | -------------------------------------------------------------------------------- /pytorch/GTC/pre-processing.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import os 3 | import os.path 4 | import glob 5 | import time 6 | 7 | start_time = time.time() 8 | 9 | def convertjpg(jpgfile,outdir,width=224,height=224): 10 | img=Image.open(jpgfile) 11 | try: 12 | new_img=img.resize((width,height),Image.BILINEAR) 13 | save_img = os.path.join(outdir, os.path.basename(jpgfile)) 14 | new_img.save(save_img) 15 | print('save', jpgfile, 'to', save_img, ' done.') 16 | except Exception as e: 17 | print(e) 18 | 19 | for classfile in glob.glob("rps/*"): 20 | for file_type in ['jpb', 'png', 'JPG', 'PNG']: 21 | for jpgfile in glob.glob(classfile+"/*." + file_type): 22 | save_path = "mini-rps-dataset/" + os.path.basename(classfile) 23 | os.system('mkdir -p ' + save_path) 24 | convertjpg(jpgfile, save_path) 25 | 26 | print('cost time:', time.time() - start_time) 27 | print('finish!') 28 | -------------------------------------------------------------------------------- /pytorch/GTC/run-perseus.sh: -------------------------------------------------------------------------------- 1 | mpirun -allow-run-as-root -np 1 -npernode 1 sh ./train-perseus.sh 2 | -------------------------------------------------------------------------------- /pytorch/GTC/train-perseus.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python3 train.py 4 | -------------------------------------------------------------------------------- /pytorch/fastgpu_script.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import fastgpu 5 | import os 6 | import time 7 | from fastgpu import fastgpu_globals 8 | 9 | # setting parameters 10 | INSTANCE_TYPE = 'ecs.gn6v-c8g1.16xlarge' 11 | NUM_GPUS = 8 12 | IMAGE_TYPE = 'aiacc' 13 | 14 | fastgpu.set_backend('aliyun') 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--name', type=str, default='fastgpu-gtc-demo', 17 | help="name of the current run, used for machine naming and tensorboard visualization") 18 | 19 | args = parser.parse_args() 20 | 21 | def main(): 22 | print('start job ...') 23 | start_time = time.time() 24 | 25 | # 0. setup 26 | supported_regions = ['cn-huhehaote', 'cn-shanghai', 'cn-zhangjiakou', 'cn-hangzhou', 'cn-beijing', 'cn-shenzhen'] 27 | assert fastgpu.get_region() in supported_regions, f"required AMI {IMAGE_TYPE} has only been made available in regions {supported_regions}, but your current region is {fastgpu.get_region()} (set $ALYUN_DEFAULT_REGION)" 28 | fastgpu_globals.set_should_disable_nas(True) 29 | 30 | # 1. create a job 31 | job = fastgpu.make_job(name=args.name, 32 | run_name=f"{args.name}-1", 33 | num_tasks=1, 34 | instance_type=INSTANCE_TYPE, 35 | image_type=IMAGE_TYPE, 36 | disable_nas=True, 37 | spot=True, 38 | install_script='') 39 | 40 | init_fastgpu = time.time() 41 | print('init fastgpu: %.2fs'%(init_fastgpu - start_time)) 42 | 43 | # 2. upload GTC code 44 | job.run('apt install -y unzip') 45 | job.upload('GTC') 46 | job.run("chmod -R 744 GTC") 47 | job.run('cd GTC && wget http://public-ai-datasets.oss-cn-huhehaote.aliyuncs.com/gtc-demo/dataset.zip ' + 48 | '&& wget http://public-ai-datasets.oss-cn-huhehaote.aliyuncs.com/gtc-demo/test.JPG ' + 49 | '&& wget http://public-ai-datasets.oss-cn-huhehaote.aliyuncs.com/gtc-demo/resnet50-19c8e357.pth ') 50 | upload_data = time.time() 51 | print('upload_data time: %.2fs'%(upload_data - init_fastgpu)) 52 | 53 | # 3. prepare the dataset 54 | job.run('unzip -o dataset.zip') 55 | unzip_time = time.time() 56 | print('unzip data: %.2fs'%(unzip_time - upload_data)) 57 | 58 | # 4. run the training job 59 | job.run('conda init bash && conda activate torch_1.3.0_cu10.0_py36') 60 | 61 | job.run('pip install opencv-python') 62 | job.run("pip install 'pillow<7.0.0'") 63 | 64 | job.tasks[0].run('./run-perseus.sh', show_realtime=True) 65 | train_time = time.time() 66 | print('training time: %.2f'%(train_time - unzip_time)) 67 | 68 | # 5. run the inference job 69 | job.tasks[0].run('python inference.py', show_realtime=True) 70 | print('inference time: %.2fs'%(time.time() - train_time)) 71 | 72 | eclapse_time = time.time() - start_time 73 | print(f'training and inference deploy time is: %.2fs.'%eclapse_time) 74 | 75 | # 6. stop the instance 76 | job.stop() 77 | 78 | # 7. kill the instance (optional) 79 | job.kill() 80 | 81 | if __name__ == '__main__': 82 | main() 83 | 84 | -------------------------------------------------------------------------------- /pytorch/gtc-demo/GTC/LossFunction.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from torch import nn 4 | import torch 5 | from torch.nn import functional as F 6 | 7 | class FocalLoss(nn.Module): 8 | 9 | def __init__(self, gamma=0): 10 | super(FocalLoss, self).__init__() 11 | self.gamma = gamma 12 | self.ce = torch.nn.CrossEntropyLoss() 13 | 14 | def forward(self, input, target): 15 | logp = self.ce(input, target) 16 | p = torch.exp(-logp) 17 | loss = (1 - p) ** self.gamma * logp 18 | return loss.mean() 19 | 20 | def margin(cos, label, m, s): 21 | #m = 0.35 22 | #s = 30. 23 | phi = cos - m 24 | label = label.view(-1, 1) 25 | index = cos.data * 0.0 26 | index.scatter_(1, label.data.view(-1, 1), 1) 27 | index = index.byte() 28 | output = cos * 1.0 29 | output[index] = phi[index] 30 | output *= s 31 | return output 32 | -------------------------------------------------------------------------------- /pytorch/gtc-demo/GTC/enhance-data.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import os 3 | import os.path 4 | import glob 5 | import time 6 | from torchvision import transforms as transforms 7 | 8 | start_time = time.time() 9 | 10 | def convertjpg(jpgfile,outdir,width=224,height=224): 11 | img=Image.open(jpgfile) 12 | try: 13 | new_img=img.resize((width,height),Image.BILINEAR) 14 | save_img = os.path.join(outdir, os.path.basename(jpgfile)) 15 | new_img.save(save_img) 16 | print('save', jpgfile, 'to', save_img, ' done.') 17 | except Exception as e: 18 | print(e) 19 | def enhance(jpgfile, outdir): 20 | im = Image.open(jpgfile) 21 | # 进行随机的灰度化 22 | new_im = transforms.RandomGrayscale(p=0.5)(im) # 以0.5的概率进行灰度化 23 | save_gray = os.path.join(outdir, 'gray_' + os.path.basename(jpgfile)) 24 | new_im.save(save_gray) 25 | # 色度、亮度、饱和度、对比度的变化 26 | new_im = transforms.ColorJitter(brightness=1)(im) 27 | new_im = transforms.ColorJitter(contrast=1)(im) 28 | new_im = transforms.ColorJitter(saturation=0.5)(im) 29 | new_im = transforms.ColorJitter(hue=0.5)(im) 30 | save_color = os.path.join(outdir, 'color_' + os.path.basename(jpgfile)) 31 | new_im.save(save_color) 32 | # 随机角度旋转 33 | new_im = transforms.RandomRotation(45)(im) #随机旋转45度 34 | save_rotate = os.path.join(outdir, 'rotate_' + os.path.basename(jpgfile)) 35 | new_im.save(save_rotate) 36 | # 随机水平/垂直翻转 37 | new_im = transforms.RandomHorizontalFlip(p=1)(im) # p表示概率 38 | save_hor = os.path.join(outdir, 'hor_'+os.path.basename(jpgfile)) 39 | new_im.save(save_hor) 40 | new_im = transforms.RandomVerticalFlip(p=1)(im) 41 | save_ver = os.path.join(outdir, 'ver_' + os.path.basename(jpgfile)) 42 | new_im.save(save_ver) 43 | 44 | def merge_file(from_path, to_path): 45 | os.system('mkdir -p ' + to_path + '/scissors') 46 | os.system('mkdir -p ' + to_path + '/rock') 47 | os.system('mkdir -p ' + to_path + '/paper') 48 | os.system('cp ' + from_path + '/scissors/* ' + to_path + '/scissors/') 49 | os.system('cp ' + from_path + '/rock/* ' + to_path + '/rock/') 50 | os.system('cp ' + from_path + '/paper/* ' + to_path + '/paper/') 51 | 52 | # main process 53 | import sys 54 | stage = 1 if len(sys.argv) <= 1 else int(sys.argv[1]) 55 | raw_dataset = "mini-rps-dataset" if len(sys.argv) <= 2 else sys.argv[2] 56 | enhance_dataset = "enhance-mini-rps-dataset" if len(sys.argv) <= 2 else sys.argv[3] 57 | merge_from_dataset = "enhance-test-mini-dataset" if len(sys.argv) <= 2 else sys.argv[2] 58 | merge_to_dataset = "merge-dataset" if len(sys.argv) <= 2 else sys.argv[3] 59 | 60 | if stage <= 1: 61 | for classfile in glob.glob(raw_dataset + "/*"): 62 | for file_type in ['jpg', 'png', 'JPG', 'PNG']: 63 | for jpgfile in glob.glob(classfile+"/*." + file_type): 64 | save_path = enhance_dataset + "/" + os.path.basename(classfile) 65 | os.system('mkdir -p ' + save_path) 66 | #convertjpg(jpgfile, save_path) 67 | enhance(jpgfile, save_path) 68 | else: 69 | merge_file(merge_from_dataset, merge_to_dataset) 70 | 71 | print('cost time:', time.time() - start_time) 72 | print('finish!') 73 | -------------------------------------------------------------------------------- /pytorch/gtc-demo/GTC/inference.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import torch 3 | import torchvision 4 | from torchvision import transforms as transforms 5 | import PIL.Image as Image 6 | import cv2 7 | import numpy as np 8 | import sys 9 | import datetime 10 | import glob 11 | 12 | #model = torchvision.models.alexnet(pretrained=False) 13 | #model.classifier[6] = torch.nn.Linear(model.classifier[6].in_features, 3) 14 | model = torchvision.models.resnet50(pretrained=False) 15 | model.eval() 16 | model.fc = torch.nn.Linear(2048, 3) 17 | 18 | model.load_state_dict(torch.load('save_model.pth')) 19 | 20 | device = torch.device('cuda') 21 | model = model.to(device) 22 | 23 | mean = 255.0 * np.array([0.485, 0.456, 0.406]) 24 | stdev = 255.0 * np.array([0.229, 0.224, 0.225]) 25 | 26 | normalize = torchvision.transforms.Normalize(mean, stdev) 27 | 28 | def preprocess(img): 29 | x = transforms.Compose([ 30 | transforms.Resize((224,224)), 31 | transforms.ToTensor(), 32 | transforms.Normalize(mean/255.0, stdev/255.0), 33 | ])(img) 34 | x = x[None, ...] 35 | return x 36 | 37 | print('usage: python inference.py test-dataset 0 JPG') 38 | 39 | print(str(sys.argv)) 40 | test_path = 'test-dataset' if len(sys.argv) <= 2 else sys.argv[1] 41 | test_type = '0' if len(sys.argv) <= 2 else sys.argv[2] 42 | image_type = 'jpg' if len(sys.argv) <= 3 else sys.argv[3] 43 | 44 | img_list = [] 45 | if test_type == '0': 46 | img_list = glob.glob(test_path + '/scissors/*.'+image_type) #IMG_*.JPG') 47 | elif test_type == '1': 48 | img_list = glob.glob(test_path + '/rock/*.' + image_type) #IMG_*.JPG') 49 | else: 50 | img_list = glob.glob(test_path + '/paper/*.' + image_type) #IMG_*.JPG') 51 | ''' 52 | for hand in ['scissors', 'rock', 'paper']: 53 | for types in ['jpg', 'JPG', 'png', 'PNG']: 54 | img_list.extend(glob.glob(test_path + '/' + hand + '/*.' + image_type)) 55 | ''' 56 | 57 | for_gtc_demo = True 58 | if for_gtc_demo and len(sys.argv) <= 2: 59 | img_list = ['test.JPG'] 60 | 61 | count = 0 62 | count_paper = 0 63 | count_rock = 0 64 | count_scissors = 0 65 | res = [] 66 | for img_file in img_list: 67 | count += 1 68 | img = Image.open(img_file) 69 | # convert RGBA to RGB 70 | img = img.convert("RGB") 71 | x = preprocess(img).to(device) 72 | begin = datetime.datetime.now() 73 | y = model(x) 74 | import torch.nn.functional as F 75 | y = F.softmax(y, dim=1) 76 | 77 | predict = y.argmax(1) 78 | print(predict) 79 | 80 | a=[float(y.flatten()[0]),float(y.flatten()[1]),float(y.flatten()[2])] 81 | end = datetime.datetime.now() 82 | k = end - begin 83 | m = np.where(a==np.max(a)) 84 | p_paper = float(y.flatten()[0]) 85 | p_rock = float(y.flatten()[1]) 86 | p_scissors = float(y.flatten()[2]) 87 | 88 | print('image:', img_file) 89 | print("布的概率:"+str(float(y.flatten()[0]))) 90 | print("石头的概率:"+str(str(float(y.flatten()[1])))) 91 | print("剪刀的概率:"+str(float(y.flatten()[2]))) 92 | label_ = int(m[0][0]) 93 | label_ = predict 94 | assert predict == int(m[0][0]) 95 | if label_ == 0: 96 | res.append(img_file) 97 | if label_ == 0: 98 | filename = "test.JPG" 99 | count_paper += 1 100 | print("你出的是布") 101 | 102 | if label_ == 1: 103 | filename="test2.JPG" 104 | count_rock += 1 105 | print("你出的是石头") 106 | 107 | if label_ == 2: 108 | filename="test1.JPG" 109 | count_scissors += 1 110 | print("你出的是剪刀") 111 | print("推理时间:"+str(k.total_seconds()*1000)+"毫秒") 112 | 113 | if len(sys.argv) > 2: 114 | print('paper acc:', 1.0 * count_paper / count) #len(img_list)) 115 | print('rock acc:', 1.0*count_rock / count) #len(img_list)) 116 | print('scissors acc:', 1.0*count_scissors / count) #len(img_list)) 117 | 118 | #print('res:', res) 119 | -------------------------------------------------------------------------------- /pytorch/gtc-demo/GTC/pre-processing.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import os 3 | import os.path 4 | import glob 5 | import time 6 | 7 | start_time = time.time() 8 | 9 | def convertjpg(jpgfile,outdir,width=224,height=224): 10 | img=Image.open(jpgfile) 11 | try: 12 | new_img=img.resize((width,height),Image.BILINEAR) 13 | save_img = os.path.join(outdir, os.path.basename(jpgfile)) 14 | new_img.save(save_img) 15 | print('save', jpgfile, 'to', save_img, ' done.') 16 | except Exception as e: 17 | print(e) 18 | 19 | for classfile in glob.glob("rps/*"): 20 | for file_type in ['jpb', 'png', 'JPG', 'PNG']: 21 | for jpgfile in glob.glob(classfile+"/*." + file_type): 22 | save_path = "mini-rps-dataset/" + os.path.basename(classfile) 23 | os.system('mkdir -p ' + save_path) 24 | convertjpg(jpgfile, save_path) 25 | 26 | print('cost time:', time.time() - start_time) 27 | print('finish!') 28 | -------------------------------------------------------------------------------- /pytorch/gtc-demo/GTC/run-perseus.sh: -------------------------------------------------------------------------------- 1 | mpirun -allow-run-as-root -np 1 -npernode 1 ./train-perseus.sh 2 | -------------------------------------------------------------------------------- /pytorch/gtc-demo/GTC/train-perseus.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python3 train.py 4 | -------------------------------------------------------------------------------- /pytorch/gtc-demo/fastgpu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import ncluster 5 | import os 6 | import time 7 | from ncluster import ncluster_globals 8 | 9 | # setting parameters 10 | INSTANCE_TYPE = 'ecs.gn6v-c8g1.2xlarge' 11 | NUM_GPUS = 1 12 | 13 | ncluster.set_backend('aliyun') 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--name', type=str, default='fastgpu-gtc-demo', 16 | help="name of the current run, used for machine naming and tensorboard visualization") 17 | parser.add_argument('--machines', type=int, default=1, 18 | help="how many machines to use") 19 | args = parser.parse_args() 20 | 21 | def main(): 22 | print('start job ...') 23 | start_time = time.time() 24 | 25 | # 1. create infrastructure 26 | supported_regions = ['cn-huhehaote', 'cn-shanghai', 'cn-zhangjiakou', 'cn-hangzhou', 'cn-beijing'] 27 | assert ncluster.get_region() in supported_regions, f"required AMI {IMAGE_NAME} has only been made available in regions {supported_regions}, but your current region is {ncluster.get_region()} (set $ALYUN_DEFAULT_REGION)" 28 | 29 | ncluster_globals.set_should_disable_nas(True) 30 | 31 | job = ncluster.make_job(name=args.name, 32 | run_name=f"{args.name}-{args.machines}", 33 | num_tasks=args.machines, 34 | instance_type=INSTANCE_TYPE, 35 | #image_name='aiacc-dlimg-centos7:1.3.0.post3', 36 | disable_nas=True, 37 | spot=True, 38 | install_script='') 39 | 40 | init_ncluster = time.time() 41 | print('init ncluster:', init_ncluster - start_time) 42 | 43 | # 2. upload GTC code 44 | job.run('yum install -y unzip') 45 | job.upload('GTC') 46 | job.run('cd GTC && wget http://public-ai-datasets.oss-cn-huhehaote.aliyuncs.com/gtc-demo/dataset.zip ' + 47 | '&& wget http://public-ai-datasets.oss-cn-huhehaote.aliyuncs.com/gtc-demo/test.JPG ' + 48 | '&& wget http://public-ai-datasets.oss-cn-huhehaote.aliyuncs.com/gtc-demo/resnet50-19c8e357.pth ' + 49 | '&& conda activate torch_1.3_cu10.0_py36') 50 | upload_data = time.time() 51 | print('upload_data time:', upload_data - init_ncluster) 52 | 53 | # 3. prepare the dataset 54 | job.run('unzip -o dataset.zip') 55 | unzip_time = time.time() 56 | print('unzip data:', unzip_time - upload_data) 57 | 58 | # 4. run the training job 59 | job.tasks[0].run('conda activate torch_1.3_cu10.0_py36') 60 | job.run('pip install opencv-python') 61 | job.run("pip install 'pillow<7.0.0'") 62 | job.tasks[0].run('./run-perseus.sh 2>&1 | tee logs.log', non_blocking=False) 63 | train_time = time.time() 64 | print('training time:', train_time - unzip_time) 65 | 66 | # 5. run the inference job 67 | job.tasks[0].run('python inference.py 2>&1 | tee logs.inference.log', non_blocking=False) 68 | print('inference time:', time.time() - train_time) 69 | 70 | eclapse_time = time.time() - start_time 71 | print(f'training and inference deploy time is: {eclapse_time} s.') 72 | 73 | # 6. stop the instance (optional) 74 | #job.stop() 75 | 76 | if __name__ == '__main__': 77 | main() 78 | 79 | -------------------------------------------------------------------------------- /tensorflow/bert/README.md: -------------------------------------------------------------------------------- 1 | # News classifier example using BERT funtune. 2 | 3 | ## Prerequisite 4 | 5 | * Aliyun account 6 | * fastgpu package installed. 7 | 8 | ## Training 9 | 10 | 1. Register your aliyun acount using below command. 11 | 12 | ```Bash 13 | export ALIYUN_ACCESS_KEY_ID=xxxxx 14 | export ALIYUN_ACCESS_KEY_SECRET=xxxxx 15 | export ALIYUN_DEFAULT_REGION=cn-beijing 16 | ``` 17 | 18 | 2. Run the training job with 19 | 20 | ```Bash 21 | python train_news_classifier.py 22 | ``` 23 | 24 | After the training job deployed to cloud, the console display as followed. 25 | 26 | ```Bash 27 | training deploy time is: 196.9283847808838 s. 28 | ``` 29 | 30 | 3. Use `fastgpu ls` to display the cloud machine. 31 | ``` 32 | In Region cn-beijing 33 | Running Instances: 34 | ------------------------------------------------------------------------------------------------ 35 | Instance Name | Age(hr) | Public IP | Private IP | GPU | Instance Type 36 | ------------------------------------------------------------------------------------------------ 37 | task0.aiacc-bert | 125.4 | 112.125.xxx.xxx | 192.168.xxx.xxx | V100 x 8 | ecs.gn6v-c8g1.16xlarge 38 | ------------------------------------------------------------------------------------------------ 39 | task1.aiacc-bert | 125.4 | 112.125.xxx.xxx | 192.168.xxx.xxx | V100 x 8 | ecs.gn6v-c8g1.16xlarge 40 | ------------------------------------------------------------------------------------------------ 41 | ``` 42 | 1. Attach to running console using `fastgpu tmux task0.aiacc-bert`. The runing log will display in it. The result is. 43 | ![img](training_output_log.jpg) 44 | 45 | ## Acccuracy. 46 | 47 | The final accuracy is 0.888 and we provide the accuracy of 8 card without accuracy loss compared to 1 card training. 48 | 49 | ## Time 50 | 51 | The deploy time is about 3.5 min, the training time is about 11.5 min, The total time is about 15 min. 52 | 53 | ## Benchmark 54 | 55 | On Instance of 8x V100 (gn6v-c10g1.20xlarge) the benchmark result in examples/sec. 56 | 57 | | method | 1 GPU | 8 GPU | speed up | 58 | | -------- | ----- | ------ | -------- | 59 | | base | 118.5 | 865.6 | 7.3 | 60 | | amp+xla | 400 | 2720.8 | 6.8 | 61 | | speed up | 3.37 | 3.14 | | 62 | -------------------------------------------------------------------------------- /tensorflow/bert/docs/ecluster_kill.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/bert/docs/ecluster_kill.jpg -------------------------------------------------------------------------------- /tensorflow/bert/docs/ecluster_ls_display.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/bert/docs/ecluster_ls_display.jpg -------------------------------------------------------------------------------- /tensorflow/bert/docs/training_output_log.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/bert/docs/training_output_log.jpg -------------------------------------------------------------------------------- /tensorflow/bert/fastgpu_ls_display.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/bert/fastgpu_ls_display.jpg -------------------------------------------------------------------------------- /tensorflow/bert/perseus-bert/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | BERT needs to maintain permanent compatibility with the pre-trained model files, 4 | so we do not plan to make any major changes to this library (other than what was 5 | promised in the README). However, we can accept small patches related to 6 | re-factoring and documentation. To submit contributes, there are just a few 7 | small guidelines you need to follow. 8 | 9 | ## Contributor License Agreement 10 | 11 | Contributions to this project must be accompanied by a Contributor License 12 | Agreement. You (or your employer) retain the copyright to your contribution; 13 | this simply gives us permission to use and redistribute your contributions as 14 | part of the project. Head over to to see 15 | your current agreements on file or to sign a new one. 16 | 17 | You generally only need to submit a CLA once, so if you've already submitted one 18 | (even if it was for a different project), you probably don't need to do it 19 | again. 20 | 21 | ## Code reviews 22 | 23 | All submissions, including submissions by project members, require review. We 24 | use GitHub pull requests for this purpose. Consult 25 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 26 | information on using pull requests. 27 | 28 | ## Community Guidelines 29 | 30 | This project follows 31 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). 32 | -------------------------------------------------------------------------------- /tensorflow/bert/perseus-bert/RUN_CLASSIFIER_1CARD_vs_8CARD.md: -------------------------------------------------------------------------------- 1 | # Using Toutiao dataset to classify the news data. 2 | * Dataset: https://public-ai-datasets.oss-cn-huhehaote.aliyuncs.com/toutiao_data.tgz 3 | * pretrain_model: https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip 4 | 5 | ## Training script with 1 card. 6 | 7 | ```Bash 8 | export BERT_CHINESE_BASE_DIR=/root/chinese_L-12_H-768_A-12 9 | export DATA_DIR=/root/toutiao_data 10 | python run_classifier.py \ 11 | --task_name=news \ 12 | --do_train=true \ 13 | --do_eval=true \ 14 | --do_predict=true \ 15 | --data_dir=$DATA_DIR \ 16 | --vocab_file=$BERT_CHINESE_BASE_DIR/vocab.txt \ 17 | --bert_config_file=$BERT_CHINESE_BASE_DIR/bert_config.json \ 18 | --init_checkpoint=$BERT_CHINESE_BASE_DIR/bert_model.ckpt \ 19 | --max_seq_length=128 \ 20 | --train_batch_size=48 \ 21 | --learning_rate=2e-5 \ 22 | --num_train_epochs=3.0 \ 23 | --output_dir=/tmp/news_output/ \ 24 | --use_tpu=false \ 25 | --use_fp16=false \ 26 | --use_perseus=true \ 27 | --use_amp=true 28 | ``` 29 | 30 | ## Training script with 8 card. 31 | 32 | ```Bash 33 | export BERT_CHINESE_BASE_DIR=/root/chinese_L-12_H-768_A-12 34 | export DATA_DIR=/root/toutiao_data 35 | mpirun --allow-run-as-root -np 8 \ 36 | python run_classifier.py \ 37 | --task_name=news \ 38 | --do_train=true \ 39 | --do_eval=true \ 40 | --do_predict=true \ 41 | --data_dir=$DATA_DIR \ 42 | --vocab_file=$BERT_CHINESE_BASE_DIR/vocab.txt \ 43 | --bert_config_file=$BERT_CHINESE_BASE_DIR/bert_config.json \ 44 | --init_checkpoint=$BERT_CHINESE_BASE_DIR/bert_model.ckpt \ 45 | --max_seq_length=128 \ 46 | --train_batch_size=48 \ 47 | --learning_rate=8e-5 \ 48 | --num_train_epochs=3.0 \ 49 | --warmup_proportion=0.8 \ 50 | --output_dir=/ncluster/news_output/ \ 51 | --use_tpu=false \ 52 | --use_perseus=true \ 53 | --use_amp=true 54 | 55 | ``` 56 | 57 | ## Result. 58 | The final result after 3 epoch with 1 card training and 8 card training can both reach to 0.88, below is the final result of 8 card training. 59 | ``` 60 | eval_accuracy = 0.8886091 61 | eval_loss = 0.40453392 62 | global_step = 1619 63 | loss = 0.40447024 64 | ``` 65 | 66 | ## Benchmark. 67 | On Instance of 8x V100 (gn6v-c10g1.20xlarge) the benchmark result in examples/sec. 68 | 69 | | method | 1 GPU | 8 GPU | speed up | 70 | | ------ | ----- | ----- | -------- | 71 | | base | 118.5 | 865.6 | 7.3 | 72 | | amp+xla | 400 | 2720.8 | 6.8 | 73 | | speed up | 3.37 | 3.14 | | 74 | 75 | 76 | -------------------------------------------------------------------------------- /tensorflow/bert/perseus-bert/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /tensorflow/bert/perseus-bert/optimization_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import optimization 20 | import tensorflow as tf 21 | 22 | 23 | class OptimizationTest(tf.test.TestCase): 24 | 25 | def test_adam(self): 26 | with self.test_session() as sess: 27 | w = tf.get_variable( 28 | "w", 29 | shape=[3], 30 | initializer=tf.constant_initializer([0.1, -0.2, -0.1])) 31 | x = tf.constant([0.4, 0.2, -0.5]) 32 | loss = tf.reduce_mean(tf.square(x - w)) 33 | tvars = tf.trainable_variables() 34 | grads = tf.gradients(loss, tvars) 35 | global_step = tf.train.get_or_create_global_step() 36 | optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2) 37 | train_op = optimizer.apply_gradients(zip(grads, tvars), global_step) 38 | init_op = tf.group(tf.global_variables_initializer(), 39 | tf.local_variables_initializer()) 40 | sess.run(init_op) 41 | for _ in range(100): 42 | sess.run(train_op) 43 | w_np = sess.run(w) 44 | self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2) 45 | 46 | 47 | if __name__ == "__main__": 48 | tf.test.main() 49 | -------------------------------------------------------------------------------- /tensorflow/bert/perseus-bert/predict_client_grpc.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Send news text to tensorflow_model_server loaded with BERT model. 16 | 17 | """ 18 | 19 | from __future__ import print_function 20 | 21 | # This is a placeholder for a Google-internal import. 22 | 23 | import grpc 24 | import requests 25 | import tensorflow as tf 26 | import os 27 | import re 28 | import numpy as np 29 | import time 30 | 31 | import tokenization 32 | from run_classifier_util import NewsProcessor, convert_examples_to_features, serving_input_fn, InputExample 33 | 34 | from tensorflow_serving.apis import predict_pb2 35 | from tensorflow_serving.apis import prediction_service_pb2_grpc 36 | 37 | 38 | # The image URL is the location of the image we should send to the server 39 | IMAGE_URL = 'https://tensorflow.org/images/blogs/serving/cat.jpg' 40 | 41 | tf.app.flags.DEFINE_string('server', 'localhost:8500', 42 | 'PredictionService host:port') 43 | tf.app.flags.DEFINE_string('text_dir', '', 'path to text dir, the text file format is tsv.') 44 | tf.app.flags.DEFINE_string('model_name', '', 'Model name to do inference.') 45 | tf.app.flags.DEFINE_string('vocab_file', '', 'path of vocab file.') 46 | FLAGS = tf.app.flags.FLAGS 47 | 48 | 49 | def main(_): 50 | processor = NewsProcessor() 51 | label_list = processor.get_labels() 52 | predict_examples = processor.get_test_examples(FLAGS.text_dir) 53 | tokenizer = tokenization.FullTokenizer( 54 | vocab_file=FLAGS.vocab_file, do_lower_case=True) 55 | features = convert_examples_to_features(predict_examples, label_list, 128, tokenizer) 56 | 57 | 58 | channel = grpc.insecure_channel(FLAGS.server) 59 | stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) 60 | # Send request 61 | # See prediction_service.proto for gRPC request/response details. 62 | request = predict_pb2.PredictRequest() 63 | request.model_spec.name = FLAGS.model_name 64 | request.model_spec.signature_name = 'serving_default' 65 | # request.inputs = features 66 | for i, feature in enumerate(features): 67 | text = predict_examples[i].text_a 68 | request.inputs['input_ids'].CopyFrom( 69 | tf.contrib.util.make_tensor_proto(feature.input_ids, shape=[1, 128])) 70 | request.inputs['input_mask'].CopyFrom( 71 | tf.contrib.util.make_tensor_proto(feature.input_mask, shape=[1, 128])) 72 | request.inputs['segment_ids'].CopyFrom( 73 | tf.contrib.util.make_tensor_proto(feature.segment_ids, shape=[1, 128])) 74 | request.inputs['label_ids'].CopyFrom( 75 | tf.contrib.util.make_tensor_proto(feature.label_id, shape=[1])) 76 | # start = time.time() 77 | result = stub.Predict(request, 10.0) # 10 secs timeout 78 | predictions = tf.make_ndarray(result.outputs['probabilities']) 79 | predictions = np.squeeze(predictions) 80 | top_k = predictions.argsort()[-5:][::-1] 81 | print('input text: ' + text + ' -> result class is: ' + label_list[top_k[0]]) 82 | # print(result.outputs['probabilities']) 83 | # stop = time.time() 84 | 85 | 86 | if __name__ == '__main__': 87 | tf.app.run() 88 | -------------------------------------------------------------------------------- /tensorflow/bert/perseus-bert/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow >= 1.11.0 # CPU Version of TensorFlow. 2 | # tensorflow-gpu >= 1.11.0 # GPU version of TensorFlow. 3 | -------------------------------------------------------------------------------- /tensorflow/bert/perseus-bert/sample_text.txt: -------------------------------------------------------------------------------- 1 | This text is included to make sure Unicode is handled properly: 力加勝北区ᴵᴺᵀᵃছজটডণত 2 | Text should be one-sentence-per-line, with empty lines between documents. 3 | This sample text is public domain and was randomly selected from Project Guttenberg. 4 | 5 | The rain had only ceased with the gray streaks of morning at Blazing Star, and the settlement awoke to a moral sense of cleanliness, and the finding of forgotten knives, tin cups, and smaller camp utensils, where the heavy showers had washed away the debris and dust heaps before the cabin doors. 6 | Indeed, it was recorded in Blazing Star that a fortunate early riser had once picked up on the highway a solid chunk of gold quartz which the rain had freed from its incumbering soil, and washed into immediate and glittering popularity. 7 | Possibly this may have been the reason why early risers in that locality, during the rainy season, adopted a thoughtful habit of body, and seldom lifted their eyes to the rifted or india-ink washed skies above them. 8 | "Cass" Beard had risen early that morning, but not with a view to discovery. 9 | A leak in his cabin roof,--quite consistent with his careless, improvident habits,--had roused him at 4 A. M., with a flooded "bunk" and wet blankets. 10 | The chips from his wood pile refused to kindle a fire to dry his bed-clothes, and he had recourse to a more provident neighbor's to supply the deficiency. 11 | This was nearly opposite. 12 | Mr. Cassius crossed the highway, and stopped suddenly. 13 | Something glittered in the nearest red pool before him. 14 | Gold, surely! 15 | But, wonderful to relate, not an irregular, shapeless fragment of crude ore, fresh from Nature's crucible, but a bit of jeweler's handicraft in the form of a plain gold ring. 16 | Looking at it more attentively, he saw that it bore the inscription, "May to Cass." 17 | Like most of his fellow gold-seekers, Cass was superstitious. 18 | 19 | The fountain of classic wisdom, Hypatia herself. 20 | As the ancient sage--the name is unimportant to a monk--pumped water nightly that he might study by day, so I, the guardian of cloaks and parasols, at the sacred doors of her lecture-room, imbibe celestial knowledge. 21 | From my youth I felt in me a soul above the matter-entangled herd. 22 | She revealed to me the glorious fact, that I am a spark of Divinity itself. 23 | A fallen star, I am, sir!' continued he, pensively, stroking his lean stomach--'a fallen star!--fallen, if the dignity of philosophy will allow of the simile, among the hogs of the lower world--indeed, even into the hog-bucket itself. Well, after all, I will show you the way to the Archbishop's. 24 | There is a philosophic pleasure in opening one's treasures to the modest young. 25 | Perhaps you will assist me by carrying this basket of fruit?' And the little man jumped up, put his basket on Philammon's head, and trotted off up a neighbouring street. 26 | Philammon followed, half contemptuous, half wondering at what this philosophy might be, which could feed the self-conceit of anything so abject as his ragged little apish guide; 27 | but the novel roar and whirl of the street, the perpetual stream of busy faces, the line of curricles, palanquins, laden asses, camels, elephants, which met and passed him, and squeezed him up steps and into doorways, as they threaded their way through the great Moon-gate into the ample street beyond, drove everything from his mind but wondering curiosity, and a vague, helpless dread of that great living wilderness, more terrible than any dead wilderness of sand which he had left behind. 28 | Already he longed for the repose, the silence of the Laura--for faces which knew him and smiled upon him; but it was too late to turn back now. 29 | His guide held on for more than a mile up the great main street, crossed in the centre of the city, at right angles, by one equally magnificent, at each end of which, miles away, appeared, dim and distant over the heads of the living stream of passengers, the yellow sand-hills of the desert; 30 | while at the end of the vista in front of them gleamed the blue harbour, through a network of countless masts. 31 | At last they reached the quay at the opposite end of the street; 32 | and there burst on Philammon's astonished eyes a vast semicircle of blue sea, ringed with palaces and towers. 33 | He stopped involuntarily; and his little guide stopped also, and looked askance at the young monk, to watch the effect which that grand panorama should produce on him. 34 | -------------------------------------------------------------------------------- /tensorflow/bert/perseus-bert/sentence_segmentation.py: -------------------------------------------------------------------------------- 1 | # The code used to do prepare docs to align with bert pretrain usage. 2 | # It use spacy module to do sentences separation. 3 | # Used below command to prepare the running code env. 4 | # pip install spacy 5 | # python -m spacy download en_core_web_lg 6 | 7 | '''Example of adding a pipeline component to prohibit sentence boundaries 8 | before certain tokens. 9 | 10 | What we do is write to the token.is_sent_start attribute, which 11 | takes values in {True, False, None}. The default value None allows the parser 12 | to predict sentence segments. The value False prohibits the parser from inserting 13 | a sentence boundary before that token. Note that fixing the sentence segmentation 14 | should also improve the parse quality. 15 | 16 | The specific example here is drawn from https://github.com/explosion/spaCy/issues/2627 17 | Other versions of the model may not make the original mistake, so the specific 18 | example might not be apt for future versions. 19 | ''' 20 | import plac 21 | import spacy 22 | import codecs 23 | import os 24 | 25 | def prevent_sentence_boundaries(doc): 26 | for token in doc: 27 | if not can_be_sentence_start(token): 28 | token.is_sent_start = False 29 | return doc 30 | 31 | def can_be_sentence_start(token): 32 | if token.i == 0: 33 | return True 34 | elif token.is_title: 35 | return True 36 | elif token.nbor(-1).is_punct: 37 | return True 38 | elif token.nbor(-1).is_space: 39 | return True 40 | else: 41 | return False 42 | 43 | def main(): 44 | nlp = spacy.load('en_core_web_lg') 45 | nlp.add_pipe(prevent_sentence_boundaries, before='parser') 46 | nlp.max_length = 200000000 47 | input_dir = '/mnt/newcpfs/wikiextractor/wikiextractor/enwiki' 48 | for root, dirs, files in os.walk(input_dir): 49 | father, child = os.path.split(root) 50 | new_father = father+'_processed' 51 | new_root = os.path.join(new_father, child) 52 | if not os.path.exists(new_root): 53 | os.makedirs(new_root) 54 | for file in files: 55 | abs_file = os.path.join(root, file) 56 | new_abs_file = os.path.join(new_root, file) 57 | raw_text = open(abs_file).read().decode('utf8') 58 | fo = open("test.txt", "w") 59 | # doc = nlp(raw_text) 60 | # sentences = [sent.string.strip() for sent in doc.sents] 61 | # print(sentences) 62 | # nlp.add_pipe(prevent_sentence_boundaries, before='parser') 63 | doc = nlp(raw_text) 64 | sentences = [sent.string.strip() for sent in doc.sents] 65 | with codecs.open(new_abs_file ,'w', 'utf-8') as f: 66 | for sentence in sentences: 67 | if '' in sentence: 68 | f.write('\n') 69 | elif ' {job.logdir}/task-cmd') 86 | # print(f"Logging to {job.logdir}") 87 | job.tasks[0].run(cmd) 88 | 89 | eclapse_time = time.time() - start_time 90 | print(f'training deploy time is: {eclapse_time} s.') 91 | 92 | # 5. stop the instance 93 | job.stop() 94 | # 6. Terminate Instances (Optional) 95 | job.kill() 96 | 97 | if __name__ == '__main__': 98 | main() -------------------------------------------------------------------------------- /tensorflow/bert/training_output_log.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/bert/training_output_log.jpg -------------------------------------------------------------------------------- /tensorflow/image_classification/command.sh: -------------------------------------------------------------------------------- 1 | COMMAND='python scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --image_size=224 --model=resnet50_v1.5 --batch_size=128 --display_every=100 --data_name=imagenet --nodistortions --variable_update=perseus --num_gpus=1 --use_fp16=True --fp16_gradient=True --batch_group_size=4 --num_inter_threads=8 --datasets_use_prefetch=True --datasets_num_private_threads=5 --device=gpu --xla=True --allow_growth=True --optimizer=momentum --momentum=0.9 --weight_decay=1e-4 --use_datasets=False --num_eval_epochs=1 --eval_during_training_every_n_epochs=1 --num_warmup_batches=500 --num_batches=1500' 2 | if [ $OMPI_COMM_WORLD_RANK -eq 0 ] ; then 3 | $COMMAND 4 | else 5 | $COMMAND >> /dev/null 2>&1 6 | fi 7 | -------------------------------------------------------------------------------- /tensorflow/image_classification/docs/ResNet50_batchsize256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/docs/ResNet50_batchsize256.png -------------------------------------------------------------------------------- /tensorflow/image_classification/docs/ResNet50_batchsize64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/docs/ResNet50_batchsize64.png -------------------------------------------------------------------------------- /tensorflow/image_classification/docs/VGG16_batchsize64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/docs/VGG16_batchsize64.png -------------------------------------------------------------------------------- /tensorflow/image_classification/resnet50_bs256_command.sh: -------------------------------------------------------------------------------- 1 | python fastgpu_script.py --name=tfbenchmark --model=resnet50 --batch_size=256 --machines=2 --gpus=8 -------------------------------------------------------------------------------- /tensorflow/image_classification/resnet50_bs64_command.sh: -------------------------------------------------------------------------------- 1 | python fastgpu_script.py --name=tfbenchmark --model=resnet50 --batch_size=64 --machines=2 --gpus=8 -------------------------------------------------------------------------------- /tensorflow/image_classification/rn50_bs256_command.sh: -------------------------------------------------------------------------------- 1 | python fastgpu_script.py --name=tfbenchmark --model=resnet50 --batch_size=256 --machines=4 --gpus=8 -------------------------------------------------------------------------------- /tensorflow/image_classification/rn50_bs64_command.sh: -------------------------------------------------------------------------------- 1 | python fastgpu_script.py --name=tfbenchmark --model=resnet50 --batch_size=64 --machines=4 --gpus=8 -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/README.md: -------------------------------------------------------------------------------- 1 | # tf_cnn_benchmarks: High performance benchmarks 2 | 3 | **Note: tf_cnn_benchmarks is no longer maintained.** 4 | 5 | tf_cnn_benchmarks contains TensorFlow 1 implementations of several popular 6 | convolutional models, and is designed to be as fast as possible. 7 | tf_cnn_benchmarks supports both running on a single machine or running in 8 | distributed mode across multiple hosts. 9 | 10 | tf_cnn_benchmarks is no longer maintained. Although it will run with TensorFlow 11 | 2, it was written and optimized for TensorFlow 1, and has not been maintained 12 | since TensorFlow 2 was released. For clean and easy-to-read TensorFlow 2 models, 13 | please see the [TensorFlow Official 14 | Models](https://github.com/tensorflow/models/tree/master/official). 15 | 16 | ## Getting Started 17 | 18 | To run ResNet50 with synthetic data without distortions with a single GPU, run 19 | 20 | ``` 21 | python tf_cnn_benchmarks.py --num_gpus=1 --batch_size=32 --model=resnet50 --variable_update=parameter_server 22 | ``` 23 | 24 | Note that the master branch of tf_cnn_benchmarks occasionally requires the 25 | latest nightly version of TensorFlow. You can install the nightly version by 26 | running `pip install tf-nightly-gpu` in a clean environment, or by installing 27 | TensorFlow from source. We sometimes will create a branch of tf_cnn_benchmarks, 28 | in the form of cnn_tf_vX.Y_compatible, that is compatible with TensorFlow 29 | version X.Y. For example, branch 30 | [cnn_tf_v1.9_compatible](https://github.com/tensorflow/benchmarks/tree/cnn_tf_v1.9_compatible/scripts/tf_cnn_benchmarks) 31 | works with TensorFlow 1.9. However, as tf_cnn_benchmarks is no longer 32 | maintained, we will likely no longer create new branches. 33 | 34 | Some important flags are 35 | 36 | * model: Model to use, e.g. resnet50, inception3, vgg16, and alexnet. 37 | * num_gpus: Number of GPUs to use. 38 | * data_dir: Path to data to process. If not set, synthetic data is used. To 39 | use Imagenet data use these 40 | [instructions](https://github.com/tensorflow/models/tree/master/research/inception#getting-started) 41 | as a starting point. 42 | * batch_size: Batch size for each GPU. 43 | * variable_update: The method for managing variables: parameter_server 44 | ,replicated, distributed_replicated, independent 45 | * local_parameter_device: Device to use as parameter server: cpu or gpu. 46 | 47 | To see the full list of flags, run `python tf_cnn_benchmarks.py --help`. 48 | 49 | To run ResNet50 with real data with 8 GPUs, run: 50 | 51 | ``` 52 | python tf_cnn_benchmarks.py --data_format=NCHW --batch_size=256 \ 53 | --model=resnet50 --optimizer=momentum --variable_update=replicated \ 54 | --nodistortions --gradient_repacking=8 --num_gpus=8 \ 55 | --num_epochs=90 --weight_decay=1e-4 --data_dir=${DATA_DIR} --use_fp16 \ 56 | --train_dir=${CKPT_DIR} 57 | ``` 58 | This will train a ResNet-50 model on ImageNet with 2048 batch size on 8 59 | GPUs. The model should train to around 76% accuracy. 60 | 61 | ## Running the tests 62 | 63 | To run the tests, run 64 | 65 | ```bash 66 | pip install portpicker 67 | python run_tests.py && python run_tests.py --run_distributed_tests 68 | ``` 69 | 70 | Note the tests require portpicker. 71 | 72 | The command above runs a subset of tests that is both fast and fairly 73 | comprehensive. Alternatively, all the tests can be run, but this will take a 74 | long time: 75 | 76 | ```bash 77 | python run_tests.py --full_tests && python run_tests.py --full_tests --run_distributed_tests 78 | ``` 79 | 80 | We will run all tests on every PR before merging them, so it is not necessary 81 | to pass `--full_tests` when running tests yourself. 82 | 83 | To run an individual test, such as method `testParameterServer` of test class 84 | `TfCnnBenchmarksTest` of module `benchmark_cnn_test`, run 85 | 86 | ```bash 87 | python -m unittest -v benchmark_cnn_test.TfCnnBenchmarksTest.testParameterServer 88 | ``` 89 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/all_reduce_benchmark_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for all_reduce_benchmark.py.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow.compat.v1 as tf 22 | 23 | import all_reduce_benchmark 24 | import benchmark_cnn 25 | import test_util 26 | 27 | 28 | class AllReduceBenchmarkTest(tf.test.TestCase): 29 | """Tests the all-reduce benchmark.""" 30 | 31 | def _test_run_benchmark(self, params): 32 | """Tests that run_benchmark() runs successfully with the params.""" 33 | logs = [] 34 | with test_util.monkey_patch(all_reduce_benchmark, 35 | log_fn=test_util.print_and_add_to_list(logs)): 36 | bench_cnn = benchmark_cnn.BenchmarkCNN(params) 37 | all_reduce_benchmark.run_benchmark(bench_cnn, num_iters=5) 38 | self.assertRegex(logs[-1], '^Average time per step: [0-9.]+$') 39 | 40 | def test_run_benchmark(self): 41 | """Tests that run_benchmark() runs successfully.""" 42 | params = benchmark_cnn.make_params(num_batches=10, 43 | variable_update='replicated', 44 | num_gpus=2) 45 | self._test_run_benchmark(params) 46 | params = params._replace(hierarchical_copy=True, gradient_repacking=8, 47 | num_gpus=8) 48 | self._test_run_benchmark(params) 49 | 50 | if __name__ == '__main__': 51 | tf.disable_v2_behavior() 52 | tf.test.main() 53 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/benchmark_cnn_distributed_test_runner.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Used to run benchmark_cnn for distributed tests. 17 | 18 | In distributed tests, we spawn processes to run tf_cnn_benchmark tasks. We could 19 | directly spawn tf_cnn_benchmark processes, but we want some added functionality, 20 | such as being able to inject custom images during training. So instead, this 21 | file is spawned as a Python process, which supports the added functionality. 22 | """ 23 | 24 | from __future__ import absolute_import 25 | from __future__ import division 26 | from __future__ import print_function 27 | 28 | from absl import flags as absl_flags 29 | import numpy as np 30 | import tensorflow.compat.v1 as tf 31 | import benchmark_cnn 32 | import flags 33 | import preprocessing 34 | import test_util 35 | 36 | 37 | absl_flags.DEFINE_string('fake_input', 'none', 38 | """What fake input to inject into benchmark_cnn. This 39 | is ignored if --model=test_model. 40 | Options are: 41 | none: Do not inject any fake input. 42 | zeros_and_ones: Half the images will be all 0s with 43 | a label of 0. Half the images will be all 1s with a 44 | label of 1.""") 45 | 46 | flags.define_flags() 47 | FLAGS = flags.FLAGS 48 | 49 | 50 | def get_test_image_preprocessor(batch_size, params): 51 | """Returns the preprocessing.TestImagePreprocessor that should be injected. 52 | 53 | Returns None if no preprocessor should be injected. 54 | 55 | Args: 56 | batch_size: The batch size across all GPUs. 57 | params: BenchmarkCNN's parameters. 58 | Returns: 59 | Returns the preprocessing.TestImagePreprocessor that should be injected. 60 | Raises: 61 | ValueError: Flag --fake_input is an invalid value. 62 | """ 63 | if FLAGS.fake_input == 'none': 64 | return None 65 | elif FLAGS.fake_input == 'zeros_and_ones': 66 | half_batch_size = batch_size // 2 67 | images = np.zeros((batch_size, 227, 227, 3), dtype=np.float32) 68 | images[half_batch_size:, :, :, :] = 1 69 | labels = np.array([0] * half_batch_size + [1] * half_batch_size, 70 | dtype=np.int32) 71 | preprocessor = preprocessing.TestImagePreprocessor( 72 | batch_size, [227, 227, 3], params.num_gpus, 73 | benchmark_cnn.get_data_type(params)) 74 | preprocessor.set_fake_data(images, labels) 75 | preprocessor.expected_subset = 'validation' if params.eval else 'train' 76 | return preprocessor 77 | else: 78 | raise ValueError('Invalid --fake_input: %s' % FLAGS.fake_input) 79 | 80 | 81 | def run_with_real_model(params): 82 | """Runs tf_cnn_benchmarks with a real model.""" 83 | bench = benchmark_cnn.BenchmarkCNN(params) 84 | bench.print_info() 85 | preprocessor = get_test_image_preprocessor(bench.batch_size, params) 86 | if preprocessor is not None: 87 | # The test image preprocessor requires queue runners. Since this file is 88 | # used for testing, it is OK to access protected members. 89 | # pylint: disable=protected-access 90 | bench.dataset._queue_runner_required = True 91 | # pylint: enable=protected-access 92 | bench.input_preprocessor = preprocessor 93 | bench.run() 94 | 95 | 96 | def run_with_test_model(params): 97 | """Runs tf_cnn_benchmarks with a test model.""" 98 | model = test_util.TestCNNModel() 99 | inputs = test_util.get_fake_var_update_inputs() 100 | with test_util.monkey_patch(benchmark_cnn, 101 | LOSS_AND_ACCURACY_DIGITS_TO_SHOW=15): 102 | bench = benchmark_cnn.BenchmarkCNN(params, dataset=test_util.TestDataSet(), 103 | model=model) 104 | # The test model does not use labels when computing loss, so the label 105 | # values do not matter as long as it's the right shape. 106 | labels = np.array([1] * inputs.shape[0]) 107 | bench.input_preprocessor.set_fake_data(inputs, labels) 108 | bench.run() 109 | 110 | 111 | def main(_): 112 | params = benchmark_cnn.make_params_from_flags() 113 | params = benchmark_cnn.setup(params) 114 | if params.model == 'test_model': 115 | run_with_test_model(params) 116 | else: 117 | run_with_real_model(params) 118 | 119 | 120 | if __name__ == '__main__': 121 | tf.disable_v2_behavior() 122 | tf.app.run() 123 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/cnn_util_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for tf_cnn_benchmarks.cnn_util.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import threading 23 | import time 24 | 25 | import tensorflow.compat.v1 as tf 26 | 27 | import cnn_util 28 | 29 | 30 | class CnnUtilBarrierTest(tf.test.TestCase): 31 | 32 | def testBarrier(self): 33 | num_tasks = 20 34 | num_waits = 4 35 | barrier = cnn_util.Barrier(num_tasks) 36 | threads = [] 37 | sync_matrix = [] 38 | for i in range(num_tasks): 39 | sync_times = [0] * num_waits 40 | thread = threading.Thread( 41 | target=self._run_task, args=(barrier, sync_times)) 42 | thread.start() 43 | threads.append(thread) 44 | sync_matrix.append(sync_times) 45 | for thread in threads: 46 | thread.join() 47 | for wait_index in range(num_waits - 1): 48 | # Max of times at iteration i < min of times at iteration i + 1 49 | self.assertLessEqual( 50 | max([sync_matrix[i][wait_index] for i in range(num_tasks)]), 51 | min([sync_matrix[i][wait_index + 1] for i in range(num_tasks)])) 52 | 53 | def _run_task(self, barrier, sync_times): 54 | for wait_index in range(len(sync_times)): 55 | sync_times[wait_index] = time.time() 56 | barrier.wait() 57 | 58 | def testBarrierAbort(self): 59 | num_tasks = 2 60 | num_waits = 1 61 | sync_times = [0] * num_waits 62 | barrier = cnn_util.Barrier(num_tasks) 63 | thread = threading.Thread( 64 | target=self._run_task, args=(barrier, sync_times)) 65 | thread.start() 66 | barrier.abort() 67 | # thread won't be blocked by done barrier. 68 | thread.join() 69 | 70 | 71 | class ImageProducerTest(tf.test.TestCase): 72 | 73 | def _slow_tensorflow_op(self): 74 | """Returns a TensorFlow op that takes approximately 0.1s to complete.""" 75 | def slow_func(v): 76 | time.sleep(0.1) 77 | return v 78 | return tf.py_func(slow_func, [tf.constant(0.)], tf.float32).op 79 | 80 | def _test_image_producer(self, batch_group_size, put_slower_than_get): 81 | # We use the variable x to simulate a staging area of images. x represents 82 | # the number of batches in the staging area. 83 | x = tf.Variable(0, dtype=tf.int32) 84 | if put_slower_than_get: 85 | put_dep = self._slow_tensorflow_op() 86 | get_dep = tf.no_op() 87 | else: 88 | put_dep = tf.no_op() 89 | get_dep = self._slow_tensorflow_op() 90 | with tf.control_dependencies([put_dep]): 91 | put_op = x.assign_add(batch_group_size, use_locking=True) 92 | with tf.control_dependencies([get_dep]): 93 | get_op = x.assign_sub(1, use_locking=True) 94 | with self.test_session() as sess: 95 | sess.run(tf.variables_initializer([x])) 96 | image_producer = cnn_util.ImageProducer(sess, put_op, batch_group_size, 97 | use_python32_barrier=False) 98 | image_producer.start() 99 | for _ in range(5 * batch_group_size): 100 | sess.run(get_op) 101 | # We assert x is nonnegative, to ensure image_producer never causes 102 | # an unstage op to block. We assert x is at most 2 * batch_group_size, 103 | # to ensure it doesn't use too much memory by storing too many batches 104 | # in the staging area. 105 | self.assertGreaterEqual(sess.run(x), 0) 106 | self.assertLessEqual(sess.run(x), 2 * batch_group_size) 107 | image_producer.notify_image_consumption() 108 | self.assertGreaterEqual(sess.run(x), 0) 109 | self.assertLessEqual(sess.run(x), 2 * batch_group_size) 110 | 111 | image_producer.done() 112 | time.sleep(0.1) 113 | self.assertGreaterEqual(sess.run(x), 0) 114 | self.assertLessEqual(sess.run(x), 2 * batch_group_size) 115 | 116 | def test_image_producer(self): 117 | self._test_image_producer(1, False) 118 | self._test_image_producer(1, True) 119 | self._test_image_producer(2, False) 120 | self._test_image_producer(2, True) 121 | self._test_image_producer(3, False) 122 | self._test_image_producer(3, True) 123 | self._test_image_producer(8, False) 124 | self._test_image_producer(8, True) 125 | 126 | 127 | if __name__ == '__main__': 128 | tf.disable_v2_behavior() 129 | tf.test.main() 130 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Constants used in tf_cnn_benchmarks.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from enum import Enum 22 | 23 | # Results fetched with this prefix will not be reduced. Instead, they will be 24 | # passed as matrices to model's postprocess function. 25 | UNREDUCED_ACCURACY_OP_PREFIX = "tensor:" 26 | 27 | # Eval result values with this name prefix will be included in summary. 28 | SIMPLE_VALUE_RESULT_PREFIX = "simple_value:" 29 | 30 | 31 | class BenchmarkMode(object): 32 | """Benchmark running mode.""" 33 | TRAIN = "training" 34 | EVAL = "evaluation" 35 | TRAIN_AND_EVAL = "training + evaluation" 36 | FORWARD_ONLY = "forward only" 37 | 38 | 39 | class NetworkTopology(str, Enum): 40 | """Network topology describes how multiple GPUs are inter-connected. 41 | """ 42 | # DGX-1 uses hybrid cube mesh topology with the following device peer to peer 43 | # matrix: 44 | # DMA: 0 1 2 3 4 5 6 7 45 | # 0: Y Y Y Y Y N N N 46 | # 1: Y Y Y Y N Y N N 47 | # 2: Y Y Y Y N N Y N 48 | # 3: Y Y Y Y N N N Y 49 | # 4: Y N N N Y Y Y Y 50 | # 5: N Y N N Y Y Y Y 51 | # 6: N N Y N Y Y Y Y 52 | # 7: N N N Y Y Y Y Y 53 | DGX1 = "dgx1" 54 | 55 | # V100 in GCP are connected with the following device peer to peer matrix. 56 | # In this topology, bandwidth of the connection depends on if it uses NVLink 57 | # or PCIe link. 58 | # DMA: 0 1 2 3 4 5 6 7 59 | # 0: Y Y Y Y N Y N N 60 | # 1: Y Y Y Y N N N N 61 | # 2: Y Y Y Y N N N Y 62 | # 3: Y Y Y Y N N N N 63 | # 4: N N N N Y Y Y Y 64 | # 5: Y N N N Y Y Y Y 65 | # 6: N N N N Y Y Y Y 66 | # 7: N N Y N Y Y Y Y 67 | GCP_V100 = "gcp_v100" 68 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/flags.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains functions to define flags and params. 16 | 17 | Calling a DEFINE_* function will add a ParamSpec namedtuple to the param_spec 18 | dict. The DEFINE_* arguments match those in absl. Calling define_flags() creates 19 | a command-line flag for every ParamSpec defined by a DEFINE_* functions. 20 | 21 | The reason we don't use absl flags directly is that we want to be able to use 22 | tf_cnn_benchmarks as a library. When using it as a library, we don't want to 23 | define any flags, but instead pass parameters to the BenchmarkCNN constructor. 24 | """ 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | from collections import namedtuple 31 | 32 | from absl import flags as absl_flags 33 | import six 34 | 35 | 36 | FLAGS = absl_flags.FLAGS 37 | 38 | 39 | # ParamSpec describes one of benchmark_cnn.BenchmarkCNN's parameters. 40 | ParamSpec = namedtuple('_ParamSpec', 41 | ['flag_type', 'default_value', 'description', 42 | 'kwargs']) 43 | 44 | 45 | # Maps from parameter name to its ParamSpec. 46 | param_specs = {} 47 | 48 | 49 | def DEFINE_string(name, default, help): # pylint: disable=invalid-name,redefined-builtin 50 | param_specs[name] = ParamSpec('string', default, help, {}) 51 | 52 | 53 | def DEFINE_boolean(name, default, help): # pylint: disable=invalid-name,redefined-builtin 54 | param_specs[name] = ParamSpec('boolean', default, help, {}) 55 | 56 | 57 | def DEFINE_integer(name, default, help, lower_bound=None, upper_bound=None): # pylint: disable=invalid-name,redefined-builtin 58 | kwargs = {'lower_bound': lower_bound, 'upper_bound': upper_bound} 59 | param_specs[name] = ParamSpec('integer', default, help, kwargs) 60 | 61 | 62 | def DEFINE_float(name, default, help, lower_bound=None, upper_bound=None): # pylint: disable=invalid-name,redefined-builtin 63 | kwargs = {'lower_bound': lower_bound, 'upper_bound': upper_bound} 64 | param_specs[name] = ParamSpec('float', default, help, kwargs) 65 | 66 | 67 | def DEFINE_enum(name, default, enum_values, help): # pylint: disable=invalid-name,redefined-builtin 68 | kwargs = {'enum_values': enum_values} 69 | param_specs[name] = ParamSpec('enum', default, help, kwargs) 70 | 71 | 72 | def DEFINE_list(name, default, help): # pylint: disable=invalid-name,redefined-builtin 73 | param_specs[name] = ParamSpec('list', default, help, {}) 74 | 75 | 76 | def define_flags(specs=None): 77 | """Define a command line flag for each ParamSpec in flags.param_specs.""" 78 | specs = specs or param_specs 79 | define_flag = { 80 | 'boolean': absl_flags.DEFINE_boolean, 81 | 'float': absl_flags.DEFINE_float, 82 | 'integer': absl_flags.DEFINE_integer, 83 | 'string': absl_flags.DEFINE_string, 84 | 'enum': absl_flags.DEFINE_enum, 85 | 'list': absl_flags.DEFINE_list 86 | } 87 | for name, param_spec in six.iteritems(specs): 88 | if param_spec.flag_type not in define_flag: 89 | raise ValueError('Unknown flag_type %s' % param_spec.flag_type) 90 | else: 91 | define_flag[param_spec.flag_type](name, param_spec.default_value, 92 | help=param_spec.description, 93 | **param_spec.kwargs) 94 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/models/__init__.py -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/models/alexnet_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Alexnet model configuration. 16 | 17 | References: 18 | Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton 19 | ImageNet Classification with Deep Convolutional Neural Networks 20 | Advances in Neural Information Processing Systems. 2012 21 | """ 22 | 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import tensorflow.compat.v1 as tf 28 | from models import model 29 | 30 | 31 | class AlexnetModel(model.CNNModel): 32 | """Alexnet cnn model.""" 33 | 34 | def __init__(self, params=None): 35 | super(AlexnetModel, self).__init__( 36 | 'alexnet', 224 + 3, 512, 0.005, params=params) 37 | 38 | def add_inference(self, cnn): 39 | # Note: VALID requires padding the images by 3 in width and height 40 | cnn.conv(64, 11, 11, 4, 4, 'VALID') 41 | cnn.mpool(3, 3, 2, 2) 42 | cnn.conv(192, 5, 5) 43 | cnn.mpool(3, 3, 2, 2) 44 | cnn.conv(384, 3, 3) 45 | cnn.conv(384, 3, 3) 46 | cnn.conv(256, 3, 3) 47 | cnn.mpool(3, 3, 2, 2) 48 | cnn.reshape([-1, 256 * 6 * 6]) 49 | cnn.affine(4096) 50 | cnn.dropout() 51 | cnn.affine(4096) 52 | cnn.dropout() 53 | 54 | 55 | class AlexnetCifar10Model(model.CNNModel): 56 | """Alexnet cnn model for cifar datasets. 57 | 58 | The model architecture follows the one defined in the tensorflow tutorial 59 | model. 60 | 61 | Reference model: tensorflow/models/tutorials/image/cifar10/cifar10.py 62 | Paper: http://www.cs.toronto.edu/~kriz/learning-features-2009-TR.pdf 63 | """ 64 | 65 | def __init__(self, params=None): 66 | super(AlexnetCifar10Model, self).__init__( 67 | 'alexnet', 32, 128, 0.1, params=params) 68 | 69 | def add_inference(self, cnn): 70 | cnn.conv(64, 5, 5, 1, 1, 'SAME', stddev=5e-2) 71 | cnn.mpool(3, 3, 2, 2, mode='SAME') 72 | cnn.lrn(depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75) 73 | cnn.conv(64, 5, 5, 1, 1, 'SAME', bias=0.1, stddev=5e-2) 74 | cnn.lrn(depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75) 75 | cnn.mpool(3, 3, 2, 2, mode='SAME') 76 | shape = cnn.top_layer.get_shape().as_list() 77 | flat_dim = shape[1] * shape[2] * shape[3] 78 | cnn.reshape([-1, flat_dim]) 79 | cnn.affine(384, stddev=0.04, bias=0.1) 80 | cnn.affine(192, stddev=0.04, bias=0.1) 81 | 82 | def get_learning_rate(self, global_step, batch_size): 83 | num_examples_per_epoch = 50000 84 | num_epochs_per_decay = 100 85 | decay_steps = ( 86 | num_epochs_per_decay * num_examples_per_epoch // batch_size) 87 | decay_factor = 0.1 88 | return tf.train.exponential_decay( 89 | self.learning_rate, 90 | global_step, 91 | decay_steps, 92 | decay_factor, 93 | staircase=True) 94 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/models/densenet_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Densenet model configuration. 17 | 18 | References: 19 | "Densely Connected Convolutional Networks": https://arxiv.org/pdf/1608.06993 20 | """ 21 | 22 | from __future__ import absolute_import 23 | from __future__ import division 24 | from __future__ import print_function 25 | 26 | import numpy as np 27 | from six.moves import xrange # pylint: disable=redefined-builtin 28 | import tensorflow.compat.v1 as tf 29 | from models import model as model_lib 30 | 31 | 32 | class DensenetCifar10Model(model_lib.CNNModel): 33 | """Densenet cnn network configuration.""" 34 | 35 | def __init__(self, model, layer_counts, growth_rate, params=None): 36 | self.growth_rate = growth_rate 37 | super(DensenetCifar10Model, self).__init__( 38 | model, 32, 64, 0.1, layer_counts=layer_counts, params=params) 39 | self.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True} 40 | 41 | def dense_block(self, cnn, growth_rate): 42 | input_layer = cnn.top_layer 43 | c = cnn.batch_norm(input_layer, **self.batch_norm_config) 44 | c = tf.nn.relu(c) 45 | c = cnn.conv(growth_rate, 3, 3, 1, 1, stddev=np.sqrt(2.0/9/growth_rate), 46 | activation=None, input_layer=c) 47 | channel_index = 3 if cnn.channel_pos == 'channels_last' else 1 48 | cnn.top_layer = tf.concat([input_layer, c], channel_index) 49 | cnn.top_size += growth_rate 50 | 51 | def transition_layer(self, cnn): 52 | in_size = cnn.top_size 53 | cnn.batch_norm(**self.batch_norm_config) 54 | cnn.top_layer = tf.nn.relu(cnn.top_layer) 55 | cnn.conv(in_size, 1, 1, 1, 1, stddev=np.sqrt(2.0/9/in_size)) 56 | cnn.apool(2, 2, 2, 2) 57 | 58 | def add_inference(self, cnn): 59 | if self.layer_counts is None: 60 | raise ValueError('Layer counts not specified for %s' % self.get_model()) 61 | if self.growth_rate is None: 62 | raise ValueError('Growth rate not specified for %s' % self.get_model()) 63 | 64 | cnn.conv(16, 3, 3, 1, 1, activation=None) 65 | # Block 1 66 | for _ in xrange(self.layer_counts[0]): 67 | self.dense_block(cnn, self.growth_rate) 68 | self.transition_layer(cnn) 69 | # Block 2 70 | for _ in xrange(self.layer_counts[1]): 71 | self.dense_block(cnn, self.growth_rate) 72 | self.transition_layer(cnn) 73 | # Block 3 74 | for _ in xrange(self.layer_counts[2]): 75 | self.dense_block(cnn, self.growth_rate) 76 | cnn.batch_norm(**self.batch_norm_config) 77 | cnn.top_layer = tf.nn.relu(cnn.top_layer) 78 | channel_index = 3 if cnn.channel_pos == 'channels_last' else 1 79 | cnn.top_size = cnn.top_layer.get_shape().as_list()[channel_index] 80 | cnn.spatial_mean() 81 | 82 | def get_learning_rate(self, global_step, batch_size): 83 | num_batches_per_epoch = 50000 // batch_size 84 | boundaries = num_batches_per_epoch * np.array([150, 225, 300], 85 | dtype=np.int64) 86 | boundaries = [x for x in boundaries] 87 | values = [0.1, 0.01, 0.001, 0.0001] 88 | return tf.train.piecewise_constant(global_step, boundaries, values) 89 | 90 | 91 | def create_densenet40_k12_model(): 92 | return DensenetCifar10Model('densenet40_k12', (12, 12, 12), 12) 93 | 94 | 95 | def create_densenet100_k12_model(): 96 | return DensenetCifar10Model('densenet100_k12', (32, 32, 32), 12) 97 | 98 | 99 | def create_densenet100_k24_model(): 100 | return DensenetCifar10Model('densenet100_k24', (32, 32, 32), 24) 101 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/models/experimental/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/models/experimental/__init__.py -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/models/googlenet_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Googlenet model configuration. 16 | 17 | References: 18 | Szegedy, Christian, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, 19 | Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, and Andrew Rabinovich 20 | Going deeper with convolutions 21 | arXiv preprint arXiv:1409.4842 (2014) 22 | """ 23 | 24 | from __future__ import absolute_import 25 | from __future__ import division 26 | from __future__ import print_function 27 | 28 | from models import model 29 | 30 | 31 | class GooglenetModel(model.CNNModel): 32 | """GoogLeNet.""" 33 | 34 | def __init__(self, params=None): 35 | super(GooglenetModel, self).__init__( 36 | 'googlenet', 224, 32, 0.005, params=params) 37 | 38 | def add_inference(self, cnn): 39 | 40 | def inception_v1(cnn, k, l, m, n, p, q): 41 | cols = [[('conv', k, 1, 1)], [('conv', l, 1, 1), ('conv', m, 3, 3)], 42 | [('conv', n, 1, 1), ('conv', p, 5, 5)], 43 | [('mpool', 3, 3, 1, 1, 'SAME'), ('conv', q, 1, 1)]] 44 | cnn.inception_module('incept_v1', cols) 45 | 46 | cnn.conv(64, 7, 7, 2, 2) 47 | cnn.mpool(3, 3, 2, 2, mode='SAME') 48 | cnn.conv(64, 1, 1) 49 | cnn.conv(192, 3, 3) 50 | cnn.mpool(3, 3, 2, 2, mode='SAME') 51 | inception_v1(cnn, 64, 96, 128, 16, 32, 32) 52 | inception_v1(cnn, 128, 128, 192, 32, 96, 64) 53 | cnn.mpool(3, 3, 2, 2, mode='SAME') 54 | inception_v1(cnn, 192, 96, 208, 16, 48, 64) 55 | inception_v1(cnn, 160, 112, 224, 24, 64, 64) 56 | inception_v1(cnn, 128, 128, 256, 24, 64, 64) 57 | inception_v1(cnn, 112, 144, 288, 32, 64, 64) 58 | inception_v1(cnn, 256, 160, 320, 32, 128, 128) 59 | cnn.mpool(3, 3, 2, 2, mode='SAME') 60 | inception_v1(cnn, 256, 160, 320, 32, 128, 128) 61 | inception_v1(cnn, 384, 192, 384, 48, 128, 128) 62 | cnn.apool(7, 7, 1, 1, mode='VALID') 63 | cnn.reshape([-1, 1024]) 64 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/models/lenet_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Lenet model configuration. 17 | 18 | References: 19 | LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner 20 | Gradient-based learning applied to document recognition 21 | Proceedings of the IEEE (1998) 22 | """ 23 | 24 | from __future__ import absolute_import 25 | from __future__ import division 26 | from __future__ import print_function 27 | 28 | from models import model 29 | 30 | 31 | class Lenet5Model(model.CNNModel): 32 | """Lenet5.""" 33 | 34 | def __init__(self, params=None): 35 | super(Lenet5Model, self).__init__('lenet5', 28, 32, 0.005, params=params) 36 | 37 | def add_inference(self, cnn): 38 | # Note: This matches TF's MNIST tutorial model 39 | cnn.conv(32, 5, 5) 40 | cnn.mpool(2, 2) 41 | cnn.conv(64, 5, 5) 42 | cnn.mpool(2, 2) 43 | cnn.reshape([-1, 64 * 7 * 7]) 44 | cnn.affine(512) 45 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/models/official_resnet_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Import official resnet models.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow.compat.v1 as tf 22 | import datasets 23 | from models import model as model_lib 24 | 25 | 26 | class ImagenetResnetModel(model_lib.CNNModel): 27 | """Official resnet models.""" 28 | 29 | def __init__(self, resnet_size, version=2, params=None): 30 | """These are the parameters that work for Imagenet data. 31 | 32 | Args: 33 | resnet_size: The number of convolutional layers needed in the model. 34 | version: 1 or 2 for v1 or v2, respectively. 35 | params: params passed by BenchmarkCNN. 36 | """ 37 | default_batch_sizes = { 38 | 50: 128, 39 | 101: 32, 40 | 152: 32 41 | } 42 | batch_size = default_batch_sizes.get(resnet_size, 32) 43 | default_learning_rate = 0.0125 * batch_size / 32 44 | model_name = 'official_resnet_{}_v{}'.format(resnet_size, version) 45 | super(ImagenetResnetModel, self).__init__( 46 | model_name, 224, batch_size, default_learning_rate, params=params) 47 | self.resnet_size = resnet_size 48 | self.version = version 49 | 50 | def get_learning_rate(self, global_step, batch_size): 51 | num_batches_per_epoch = ( 52 | float(datasets.IMAGENET_NUM_TRAIN_IMAGES) / batch_size) 53 | boundaries = [int(num_batches_per_epoch * x) for x in [30, 60, 80, 90]] 54 | values = [1, 0.1, 0.01, 0.001, 0.0001] 55 | adjusted_learning_rate = ( 56 | self.learning_rate / self.default_batch_size * batch_size) 57 | values = [v * adjusted_learning_rate for v in values] 58 | return tf.train.piecewise_constant(global_step, boundaries, values) 59 | 60 | def build_network(self, images, phase_train=True, nclass=1001, 61 | data_type=tf.float32): 62 | # pylint: disable=g-import-not-at-top 63 | try: 64 | from official.resnet.r1.imagenet_main import ImagenetModel 65 | except ImportError: 66 | tf.logging.fatal('Please include tensorflow/models to the PYTHONPATH.') 67 | raise 68 | images = tf.cast(images, data_type) 69 | model_class = ImagenetModel(resnet_size=self.resnet_size, 70 | resnet_version=self.version, 71 | # The official model dtype seems to be ignored, 72 | # as the dtype it uses is the dtype of the input 73 | # images. Doesn't hurt to set it though. 74 | dtype=data_type) 75 | logits = model_class(images, phase_train) 76 | logits = tf.cast(logits, tf.float32) 77 | return model_lib.BuildNetworkResult(logits=logits, extra_info=None) 78 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/models/overfeat_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Overfeat model configuration. 16 | 17 | References: 18 | OverFeat: Integrated Recognition, Localization and Detection using 19 | Convolutional Networks 20 | Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus, 21 | Yann LeCun, 2014 22 | http://arxiv.org/abs/1312.6229 23 | """ 24 | 25 | from __future__ import absolute_import 26 | from __future__ import division 27 | from __future__ import print_function 28 | 29 | from models import model 30 | 31 | 32 | class OverfeatModel(model.CNNModel): 33 | """OverfeatModel.""" 34 | 35 | def __init__(self, params=None): 36 | super(OverfeatModel, self).__init__( 37 | 'overfeat', 231, 32, 0.005, params=params) 38 | 39 | def add_inference(self, cnn): 40 | # Note: VALID requires padding the images by 3 in width and height 41 | cnn.conv(96, 11, 11, 4, 4, mode='VALID') 42 | cnn.mpool(2, 2) 43 | cnn.conv(256, 5, 5, 1, 1, mode='VALID') 44 | cnn.mpool(2, 2) 45 | cnn.conv(512, 3, 3) 46 | cnn.conv(1024, 3, 3) 47 | cnn.conv(1024, 3, 3) 48 | cnn.mpool(2, 2) 49 | cnn.reshape([-1, 1024 * 6 * 6]) 50 | cnn.affine(3072) 51 | cnn.dropout() 52 | cnn.affine(4096) 53 | cnn.dropout() 54 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/models/resnet_model_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for resnet_model.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import mock 22 | import tensorflow.compat.v1 as tf 23 | 24 | from models import resnet_model 25 | 26 | 27 | class ResNetModelTest(tf.test.TestCase): 28 | 29 | def testGetScaledBaseLearningRateOneGpuLrFromParams(self): 30 | """Verifies setting params.resnet_base_lr pipes through.""" 31 | lr = self._get_scaled_base_learning_rate(1, 32 | 'parameter_server', 33 | 256, 34 | base_lr=.050) 35 | self.assertEqual(lr, .050) 36 | 37 | def testGetScaledBaseLearningRateOneGpu(self): 38 | lr = self._get_scaled_base_learning_rate(1, 'parameter_server', 128) 39 | self.assertEqual(lr, .064) 40 | 41 | def testGetScaledBaseLearningRateEightGpuReplicated(self): 42 | lr = self._get_scaled_base_learning_rate(8, 'replicated', 256 * 8) 43 | self.assertEqual(lr, .128) 44 | 45 | def testGetScaledBaseLearningRateTwoGpuParameter(self): 46 | lr = self._get_scaled_base_learning_rate(2, 'parameter_server', 256 * 2) 47 | self.assertEqual(lr, .256) 48 | 49 | def testGetScaledBaseLearningRateTwoGpuUneven(self): 50 | lr = self._get_scaled_base_learning_rate(2, 'replicated', 13) 51 | self.assertEqual(lr, 0.0032500000000000003) 52 | 53 | def _get_scaled_base_learning_rate(self, 54 | num_gpus, 55 | variable_update, 56 | batch_size, 57 | base_lr=None): 58 | """Simplifies testing different learning rate calculations. 59 | 60 | Args: 61 | num_gpus: Number of GPUs to be used. 62 | variable_update: Type of variable update used. 63 | batch_size: Total batch size. 64 | base_lr: Base learning rate before scaling. 65 | 66 | Returns: 67 | Base learning rate that would be used to create lr schedule. 68 | """ 69 | params = mock.Mock() 70 | params.num_gpus = num_gpus 71 | params.variable_update = variable_update 72 | if base_lr: 73 | params.resnet_base_lr = base_lr 74 | resnet50_model = resnet_model.ResnetModel('resnet50', 50, params=params) 75 | return resnet50_model.get_scaled_base_learning_rate(batch_size) 76 | 77 | 78 | if __name__ == '__main__': 79 | tf.disable_v2_behavior() 80 | tf.test.main() 81 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/models/tf1_only/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/models/tf1_only/__init__.py -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/models/trivial_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Trivial model configuration.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow.compat.v1 as tf 22 | from models import model 23 | 24 | 25 | class TrivialModel(model.CNNModel): 26 | """Trivial model configuration.""" 27 | 28 | def __init__(self, params=None): 29 | super(TrivialModel, self).__init__( 30 | 'trivial', 224 + 3, 32, 0.005, params=params) 31 | 32 | def add_inference(self, cnn): 33 | cnn.reshape([-1, 227 * 227 * 3]) 34 | cnn.affine(1) 35 | cnn.affine(4096) 36 | 37 | 38 | class TrivialCifar10Model(model.CNNModel): 39 | """Trivial cifar10 model configuration.""" 40 | 41 | def __init__(self, params=None): 42 | super(TrivialCifar10Model, self).__init__( 43 | 'trivial', 32, 32, 0.005, params=params) 44 | 45 | def add_inference(self, cnn): 46 | cnn.reshape([-1, 32 * 32 * 3]) 47 | cnn.affine(1) 48 | cnn.affine(4096) 49 | 50 | 51 | class TrivialSSD300Model(model.CNNModel): 52 | """Trivial SSD300 model configuration.""" 53 | 54 | def __init__(self, params=None): 55 | super(TrivialSSD300Model, self).__init__( 56 | 'trivial', 300, params.batch_size, 0.005, params=params) 57 | 58 | def add_inference(self, cnn): 59 | cnn.reshape([-1, 300 * 300 * 3]) 60 | cnn.affine(1) 61 | cnn.affine(4096) 62 | 63 | def get_input_shapes(self, subset): 64 | return [[self.batch_size, 300, 300, 3], 65 | [self.batch_size, 8732, 4], 66 | [self.batch_size, 8732, 1], 67 | [self.batch_size]] 68 | 69 | def loss_function(self, inputs, build_network_result): 70 | images, _, _, labels = inputs 71 | labels = tf.cast(labels, tf.int32) 72 | return super(TrivialSSD300Model, self).loss_function( 73 | (images, labels), build_network_result) 74 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/models/vgg_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Vgg model configuration. 16 | 17 | Includes multiple models: vgg11, vgg16, vgg19, corresponding to 18 | model A, D, and E in Table 1 of [1]. 19 | 20 | References: 21 | [1] Simonyan, Karen, Andrew Zisserman 22 | Very Deep Convolutional Networks for Large-Scale Image Recognition 23 | arXiv:1409.1556 (2014) 24 | """ 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | from six.moves import xrange # pylint: disable=redefined-builtin 31 | from models import model 32 | 33 | 34 | def _construct_vgg(cnn, num_conv_layers): 35 | """Build vgg architecture from blocks.""" 36 | assert len(num_conv_layers) == 5 37 | for _ in xrange(num_conv_layers[0]): 38 | cnn.conv(64, 3, 3) 39 | cnn.mpool(2, 2) 40 | for _ in xrange(num_conv_layers[1]): 41 | cnn.conv(128, 3, 3) 42 | cnn.mpool(2, 2) 43 | for _ in xrange(num_conv_layers[2]): 44 | cnn.conv(256, 3, 3) 45 | cnn.mpool(2, 2) 46 | for _ in xrange(num_conv_layers[3]): 47 | cnn.conv(512, 3, 3) 48 | cnn.mpool(2, 2) 49 | for _ in xrange(num_conv_layers[4]): 50 | cnn.conv(512, 3, 3) 51 | cnn.mpool(2, 2) 52 | cnn.reshape([-1, 512 * 7 * 7]) 53 | cnn.affine(4096) 54 | cnn.dropout() 55 | cnn.affine(4096) 56 | cnn.dropout() 57 | 58 | 59 | class Vgg11Model(model.CNNModel): 60 | 61 | def __init__(self, params=None): 62 | super(Vgg11Model, self).__init__('vgg11', 224, 64, 0.005, params=params) 63 | 64 | def add_inference(self, cnn): 65 | _construct_vgg(cnn, [1, 1, 2, 2, 2]) 66 | 67 | 68 | class Vgg16Model(model.CNNModel): 69 | 70 | def __init__(self, params=None): 71 | super(Vgg16Model, self).__init__('vgg16', 224, 64, 0.005, params=params) 72 | 73 | def add_inference(self, cnn): 74 | _construct_vgg(cnn, [2, 2, 3, 3, 3]) 75 | 76 | 77 | class Vgg19Model(model.CNNModel): 78 | 79 | def __init__(self, params=None): 80 | super(Vgg19Model, self).__init__('vgg19', 224, 64, 0.005, params=params) 81 | 82 | def add_inference(self, cnn): 83 | _construct_vgg(cnn, [2, 2, 4, 4, 4]) 84 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/platforms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/platforms/__init__.py -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/platforms/default/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/platforms/default/__init__.py -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/platforms/default/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Utility code for the default platform.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | import sys 24 | import tempfile 25 | 26 | import cnn_util 27 | from models import model_config 28 | 29 | 30 | _ROOT_PROJECT_DIR = os.path.dirname(cnn_util.__file__) 31 | 32 | 33 | def define_platform_params(): 34 | """Defines platform-specific parameters. 35 | 36 | Currently there are no platform-specific parameters to be defined. 37 | """ 38 | pass 39 | 40 | 41 | def get_cluster_manager(params, config_proto): 42 | """Returns the cluster manager to be used.""" 43 | return cnn_util.GrpcClusterManager(params, config_proto) 44 | 45 | 46 | def get_command_to_run_python_module(module): 47 | """Returns a command to run a Python module.""" 48 | python_interpretter = sys.executable 49 | if not python_interpretter: 50 | raise ValueError('Could not find Python interpreter') 51 | return [python_interpretter, 52 | os.path.join(_ROOT_PROJECT_DIR, module + '.py')] 53 | 54 | 55 | def get_test_output_dir(): 56 | """Returns a directory where test outputs should be placed.""" 57 | base_dir = os.environ.get('TEST_OUTPUTS_DIR', 58 | '/tmp/tf_cnn_benchmarks_test_outputs') 59 | if not os.path.exists(base_dir): 60 | os.mkdir(base_dir) 61 | return tempfile.mkdtemp(dir=base_dir) 62 | 63 | 64 | def get_test_data_dir(): 65 | """Returns the path to the test_data directory.""" 66 | return os.path.join(_ROOT_PROJECT_DIR, 'test_data') 67 | 68 | 69 | def get_ssd_backborn_model_file(): 70 | raise NotImplementedError 71 | 72 | 73 | def get_ssd_backboard_data_dir(): 74 | raise NotImplementedError 75 | 76 | 77 | def _initialize(params, config_proto): 78 | del params, config_proto 79 | model_config.register_tf1_models() 80 | 81 | 82 | _is_initalized = False 83 | 84 | 85 | def initialize(params, config_proto): 86 | global _is_initalized 87 | if _is_initalized: 88 | return 89 | _is_initalized = True 90 | _initialize(params, config_proto) 91 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/platforms/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Utility code for a certain platform. 17 | 18 | This file simply imports everything from the default platform. To switch to a 19 | different platform, the import statement can be changed to point to a new 20 | platform. 21 | 22 | Creating a custom platform can be useful to, e.g., run some initialization code 23 | required by the platform or register a platform-specific model. 24 | """ 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | from platforms.default.util import * # pylint: disable=unused-import,wildcard-import 31 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/run_tests.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Runs the tf_cnn_benchmarks tests.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import sys 22 | import unittest 23 | 24 | from absl import app 25 | from absl import flags as absl_flags 26 | import tensorflow.compat.v1 as tf 27 | 28 | import all_reduce_benchmark_test 29 | import allreduce_test 30 | import benchmark_cnn_distributed_test 31 | import benchmark_cnn_test 32 | import cnn_util_test 33 | import variable_mgr_util_test 34 | from models import model_config 35 | 36 | # Ideally, we wouldn't need this option, and run both distributed tests and non- 37 | # distributed tests. But, TensorFlow allocates all the GPU memory by default, so 38 | # the non-distributed tests allocate all the GPU memory. The distributed tests 39 | # spawn processes that run TensorFlow, and cannot run if all the GPU memory is 40 | # already allocated. If a non-distributed test is run, then a distributed test 41 | # is run in the same process, the distributed test will fail because there is no 42 | # more GPU memory for the spawned processes to allocate. 43 | absl_flags.DEFINE_boolean('run_distributed_tests', False, 44 | 'If True, run the distributed tests. If False, the' 45 | 'non-distributed tests.') 46 | 47 | absl_flags.DEFINE_boolean('full_tests', False, 48 | 'If True, all distributed or non-distributed tests ' 49 | 'are run, which can take hours. If False, only a ' 50 | 'subset of tests will be run. This subset runs much ' 51 | 'faster and tests almost all the functionality as ' 52 | 'the full set of tests, so it is recommended to keep ' 53 | 'this option set to False.') 54 | 55 | FLAGS = absl_flags.FLAGS 56 | 57 | 58 | def main(_): 59 | loader = unittest.defaultTestLoader 60 | if FLAGS.full_tests: 61 | suite = unittest.TestSuite([ 62 | loader.loadTestsFromModule(allreduce_test), 63 | loader.loadTestsFromModule(cnn_util_test), 64 | loader.loadTestsFromModule(variable_mgr_util_test), 65 | loader.loadTestsFromModule(benchmark_cnn_test), 66 | loader.loadTestsFromModule(all_reduce_benchmark_test), 67 | ]) 68 | if model_config.can_import_contrib: 69 | from models.tf1_only import nasnet_test # pylint: disable=g-import-not-at-top 70 | suite.addTest(loader.loadTestsFromModule(nasnet_test)) 71 | dist_suite = unittest.TestSuite([ 72 | loader.loadTestsFromModule(benchmark_cnn_distributed_test), 73 | ]) 74 | else: 75 | suite = unittest.TestSuite([ 76 | loader.loadTestsFromModule(allreduce_test), 77 | loader.loadTestsFromModule(cnn_util_test), 78 | loader.loadTestsFromModule(all_reduce_benchmark_test), 79 | loader.loadTestsFromModule(variable_mgr_util_test), 80 | loader.loadTestsFromTestCase(benchmark_cnn_test.TestAlexnetModel), 81 | loader.loadTestsFromTestCase(benchmark_cnn_test.TfCnnBenchmarksTest), 82 | loader.loadTestsFromTestCase(benchmark_cnn_test.VariableUpdateTest), 83 | loader.loadTestsFromTestCase( 84 | benchmark_cnn_test.VariableMgrLocalReplicatedTest), 85 | ]) 86 | dist_suite = unittest.TestSuite([ 87 | loader.loadTestsFromNames([ 88 | 'benchmark_cnn_distributed_test.DistributedVariableUpdateTest' 89 | '.testVarUpdateDefault', 90 | 91 | 'benchmark_cnn_distributed_test.TfCnnBenchmarksDistributedTest' 92 | '.testParameterServer', 93 | ]), 94 | ]) 95 | 96 | if FLAGS.run_distributed_tests: 97 | print('Running distributed tests') 98 | result = unittest.TextTestRunner(verbosity=2).run(dist_suite) 99 | else: 100 | print('Running non-distributed tests') 101 | result = unittest.TextTestRunner(verbosity=2).run(suite) 102 | sys.exit(not result.wasSuccessful()) 103 | 104 | 105 | if __name__ == '__main__': 106 | tf.disable_v2_behavior() 107 | app.run(main) 108 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/ssd_constants.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Central location for all constants related to MLPerf SSD.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | # ============================================================================== 22 | # == Model ===================================================================== 23 | # ============================================================================== 24 | IMAGE_SIZE = 300 25 | 26 | # TODO(taylorrobie): MLPerf uses 80, but COCO documents 90. (RetinaNet uses 90) 27 | # Update(taylorrobie): Labels > 81 show up in the pipeline. This will need to 28 | # be resolved. 29 | NUM_CLASSES = 81 # Including "no class". Not all COCO classes are used. 30 | 31 | # Note: Zero is special. (Background class) CLASS_INV_MAP[0] must be zero. 32 | CLASS_INV_MAP = ( 33 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 34 | 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 35 | 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 36 | 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 37 | 88, 89, 90) 38 | _MAP = {j: i for i, j in enumerate(CLASS_INV_MAP)} 39 | CLASS_MAP = tuple(_MAP.get(i, -1) for i in range(max(CLASS_INV_MAP) + 1)) 40 | 41 | NUM_SSD_BOXES = 8732 42 | 43 | RESNET_DEPTH = 34 44 | 45 | """SSD specific""" 46 | MIN_LEVEL = 3 47 | MAX_LEVEL = 8 48 | 49 | FEATURE_SIZES = (38, 19, 10, 5, 3, 1) 50 | STEPS = (8, 16, 32, 64, 100, 300) 51 | 52 | # https://github.com/amdegroot/ssd.pytorch/blob/master/data/config.py 53 | SCALES = (21, 45, 99, 153, 207, 261, 315) 54 | ASPECT_RATIOS = ((2,), (2, 3), (2, 3), (2, 3), (2,), (2,)) 55 | NUM_DEFAULTS = (4, 6, 6, 6, 4, 4) 56 | NUM_DEFAULTS_BY_LEVEL = {3: 4, 4: 6, 5: 6, 6: 6, 7: 4, 8: 4} 57 | SCALE_XY = 0.1 58 | SCALE_HW = 0.2 59 | BOX_CODER_SCALES = (1 / SCALE_XY, 1 / SCALE_XY, 1 / SCALE_HW, 1 / SCALE_HW) 60 | MATCH_THRESHOLD = 0.5 61 | 62 | # https://discuss.pytorch.org/t/how-to-preprocess-input-for-pre-trained-networks/683 63 | NORMALIZATION_MEAN = (0.485, 0.456, 0.406) 64 | NORMALIZATION_STD = (0.229, 0.224, 0.225) 65 | 66 | # SSD Cropping 67 | NUM_CROP_PASSES = 50 68 | CROP_MIN_IOU_CHOICES = (0, 0.1, 0.3, 0.5, 0.7, 0.9) 69 | P_NO_CROP_PER_PASS = 1 / (len(CROP_MIN_IOU_CHOICES) + 1) 70 | 71 | # Hard example mining 72 | NEGS_PER_POSITIVE = 3 73 | 74 | # Batch normalization 75 | BATCH_NORM_DECAY = 0.997 76 | BATCH_NORM_EPSILON = 1e-4 77 | 78 | 79 | # ============================================================================== 80 | # == Optimizer ================================================================= 81 | # ============================================================================== 82 | LEARNING_RATE_SCHEDULE = ( 83 | (0, 1e-3), 84 | (160000, 1e-4), 85 | (200000, 1e-5), 86 | ) 87 | MOMENTUM = 0.9 88 | WEIGHT_DECAY = 5e-4 89 | 90 | 91 | # ============================================================================== 92 | # == Keys ====================================================================== 93 | # ============================================================================== 94 | BOXES = "boxes" 95 | CLASSES = "classes" 96 | NUM_MATCHED_BOXES = "num_matched_boxes" 97 | IMAGE = "image" 98 | SOURCE_ID = "source_id" 99 | RAW_SHAPE = "raw_shape" 100 | PRED_BOXES = "pred_boxes" 101 | PRED_SCORES = "pred_scores" 102 | 103 | 104 | # ============================================================================== 105 | # == Evaluation ================================================================ 106 | # ============================================================================== 107 | 108 | # Note: This is based on a batch size of 32 109 | # https://github.com/mlperf/reference/blob/master/single_stage_detector/ssd/train.py#L21-L37 110 | CHECKPOINT_FREQUENCY = 20000 111 | MAX_NUM_EVAL_BOXES = 200 112 | OVERLAP_CRITERIA = 0.5 # Used for nonmax supression 113 | MIN_SCORE = 0.05 # Minimum score to be considered during evaluation. 114 | DUMMY_SCORE = -1e5 # If no boxes are matched. 115 | 116 | ANNOTATION_FILE = "annotations/instances_val2017.json" 117 | COCO_NUM_TRAIN_IMAGES = 118287 118 | COCO_NUM_VAL_IMAGES = 4952 119 | -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/__init__.py -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00000-of-00008: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00000-of-00008 -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00001-of-00008: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00001-of-00008 -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00002-of-00008: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00002-of-00008 -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00003-of-00008: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00003-of-00008 -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00004-of-00008: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00004-of-00008 -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00005-of-00008: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00005-of-00008 -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00006-of-00008: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00006-of-00008 -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00007-of-00008: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00007-of-00008 -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/validation-00000-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/validation-00000-of-00002 -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/validation-00001-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/validation-00001-of-00002 -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/images/black_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/images/black_image.jpg -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/images/white_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/alibabacloud-aiacc-demo/2e49deeb38d12d4af4c5e50bb15d731c4bbf4cf1/tensorflow/image_classification/scripts/tf_cnn_benchmarks/test_data/images/white_image.jpg -------------------------------------------------------------------------------- /tensorflow/image_classification/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Benchmark script for TensorFlow. 17 | 18 | See the README for more information. 19 | """ 20 | 21 | from __future__ import absolute_import 22 | from __future__ import division 23 | from __future__ import print_function 24 | 25 | from absl import app 26 | from absl import flags as absl_flags 27 | import tensorflow.compat.v1 as tf 28 | 29 | import benchmark_cnn 30 | import cnn_util 31 | import flags 32 | import mlperf 33 | from cnn_util import log_fn 34 | 35 | 36 | flags.define_flags() 37 | for name in flags.param_specs.keys(): 38 | absl_flags.declare_key_flag(name) 39 | 40 | absl_flags.DEFINE_boolean( 41 | 'ml_perf_compliance_logging', False, 42 | 'Print logs required to be compliant with MLPerf. If set, must clone the ' 43 | 'MLPerf training repo https://github.com/mlperf/training and add ' 44 | 'https://github.com/mlperf/training/tree/master/compliance to the ' 45 | 'PYTHONPATH') 46 | 47 | 48 | def main(positional_arguments): 49 | # Command-line arguments like '--distortions False' are equivalent to 50 | # '--distortions=True False', where False is a positional argument. To prevent 51 | # this from silently running with distortions, we do not allow positional 52 | # arguments. 53 | assert len(positional_arguments) >= 1 54 | if len(positional_arguments) > 1: 55 | raise ValueError('Received unknown positional arguments: %s' 56 | % positional_arguments[1:]) 57 | 58 | params = benchmark_cnn.make_params_from_flags() 59 | with mlperf.mlperf_logger(absl_flags.FLAGS.ml_perf_compliance_logging, 60 | params.model): 61 | params = benchmark_cnn.setup(params) 62 | bench = benchmark_cnn.BenchmarkCNN(params) 63 | 64 | tfversion = cnn_util.tensorflow_version_tuple() 65 | log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) 66 | 67 | bench.print_info() 68 | bench.run() 69 | 70 | 71 | if __name__ == '__main__': 72 | tf.disable_v2_behavior() 73 | app.run(main) # Raises error on invalid flags, unlike tf.app.run() 74 | -------------------------------------------------------------------------------- /tensorflow/image_classification/vgg16_bs64_command.sh: -------------------------------------------------------------------------------- 1 | python fastgpu_script.py --name=tfbenchmark --model=resnet50 --batch_size=64 --machines=2 --gpus=8 --------------------------------------------------------------------------------