├── official ├── vision │ ├── gan │ │ ├── megengine_mimicry │ │ │ ├── nets │ │ │ │ ├── __init__.py │ │ │ │ ├── dcgan │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── dcgan_base.py │ │ │ │ └── wgan │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── wgan_base.py │ │ │ ├── metrics │ │ │ │ ├── fid │ │ │ │ │ └── __init__.py │ │ │ │ ├── kid │ │ │ │ │ └── __init__.py │ │ │ │ ├── inception_model │ │ │ │ │ └── __init__.py │ │ │ │ ├── inception_score │ │ │ │ │ └── __init__.py │ │ │ │ ├── __init__.py │ │ │ │ ├── utils.py │ │ │ │ └── compute_is.py │ │ │ ├── datasets │ │ │ │ ├── __init__.py │ │ │ │ ├── data_utils.py │ │ │ │ └── image_loader.py │ │ │ ├── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── common.py │ │ │ │ └── vis.py │ │ │ ├── training │ │ │ │ ├── __init__.py │ │ │ │ └── metric_log.py │ │ │ └── __init__.py │ │ ├── requirements.txt │ │ ├── README.md │ │ ├── train_dcgan.py │ │ └── train_wgan.py │ ├── detection │ │ ├── __init__.py │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── basic │ │ │ │ ├── __init__.py │ │ │ │ ├── norm.py │ │ │ │ └── nn.py │ │ │ └── det │ │ │ │ ├── __init__.py │ │ │ │ ├── sampling.py │ │ │ │ ├── matcher.py │ │ │ │ ├── pooler.py │ │ │ │ ├── box_head.py │ │ │ │ └── point_head.py │ │ ├── tools │ │ │ ├── data_mapper.py │ │ │ ├── nms.py │ │ │ ├── inference.py │ │ │ └── test_in_table.py │ │ ├── models │ │ │ └── __init__.py │ │ └── configs │ │ │ ├── atss_res50_coco_3x_800size.py │ │ │ ├── fcos_res50_coco_3x_800size.py │ │ │ ├── retinanet_res50_coco_3x_800size.py │ │ │ ├── freeanchor_res50_coco_3x_800size.py │ │ │ ├── faster_rcnn_res50_coco_3x_800size.py │ │ │ ├── atss_res101_coco_3x_800size.py │ │ │ ├── fcos_res101_coco_3x_800size.py │ │ │ ├── atss_res18_coco_3x_800size.py │ │ │ ├── atss_res34_coco_3x_800size.py │ │ │ ├── fcos_res18_coco_3x_800size.py │ │ │ ├── fcos_res34_coco_3x_800size.py │ │ │ ├── retinanet_res101_coco_3x_800size.py │ │ │ ├── freeanchor_res101_coco_3x_800size.py │ │ │ ├── faster_rcnn_res101_coco_3x_800size.py │ │ │ ├── atss_resx101_coco_2x_800size.py │ │ │ ├── fcos_resx101_coco_2x_800size.py │ │ │ ├── freeanchor_res18_coco_3x_800size.py │ │ │ ├── freeanchor_res34_coco_3x_800size.py │ │ │ ├── faster_rcnn_res18_coco_3x_800size.py │ │ │ ├── faster_rcnn_res34_coco_3x_800size.py │ │ │ ├── retinanet_res18_coco_3x_800size.py │ │ │ ├── retinanet_res34_coco_3x_800size.py │ │ │ ├── retinanet_resx101_coco_2x_800size.py │ │ │ ├── freeanchor_resx101_coco_2x_800size.py │ │ │ ├── faster_rcnn_resx101_coco_2x_800size.py │ │ │ └── __init__.py │ ├── segmentation │ │ ├── configs │ │ │ ├── __init__.py │ │ │ ├── deeplabv3plus_res101_voc_512size.py │ │ │ └── deeplabv3plus_res101_cityscapes_768size.py │ │ ├── models │ │ │ └── __init__.py │ │ ├── tools │ │ │ ├── inference.py │ │ │ └── utils.py │ │ └── README.md │ ├── __init__.py │ ├── classification │ │ ├── __init__.py │ │ ├── resnet │ │ │ ├── __init__.py │ │ │ └── inference.py │ │ ├── shufflenet │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ └── README.md │ │ ├── README.md │ │ └── dump.py │ └── keypoints │ │ ├── models │ │ └── __init__.py │ │ ├── config.py │ │ └── README.md ├── multimodal │ ├── __init__.py │ ├── big_sleep │ │ ├── __init__.py │ │ ├── README.md │ │ ├── ema.py │ │ └── resample.py │ ├── clip │ │ ├── bpe_simple_vocab_16e6.txt.gz │ │ ├── __init__.py │ │ └── README.md │ ├── taming_transformer │ │ ├── data │ │ │ └── drin_images │ │ │ │ ├── n01795545 │ │ │ │ └── ILSVRC2012_val_00023344.JPEG │ │ │ │ ├── n01819313 │ │ │ │ └── ILSVRC2012_val_00003068.JPEG │ │ │ │ ├── n01820546 │ │ │ │ ├── ILSVRC2012_val_00034784.JPEG │ │ │ │ └── ILSVRC2012_val_00047491.JPEG │ │ │ │ ├── n01828970 │ │ │ │ ├── ILSVRC2012_val_00001336.JPEG │ │ │ │ ├── ILSVRC2012_val_00008236.JPEG │ │ │ │ └── ILSVRC2012_val_00046802.JPEG │ │ │ │ ├── n01843065 │ │ │ │ └── ILSVRC2012_val_00022439.JPEG │ │ │ │ ├── n01847000 │ │ │ │ └── ILSVRC2012_val_00022364.JPEG │ │ │ │ ├── n02085782 │ │ │ │ └── ILSVRC2012_val_00012298.JPEG │ │ │ │ ├── n02086646 │ │ │ │ └── ILSVRC2012_val_00011473.JPEG │ │ │ │ ├── n02088466 │ │ │ │ └── ILSVRC2012_val_00013651.JPEG │ │ │ │ ├── n02089973 │ │ │ │ └── ILSVRC2012_val_00000028.JPEG │ │ │ │ ├── n02093256 │ │ │ │ └── ILSVRC2012_val_00046547.JPEG │ │ │ │ ├── n02096294 │ │ │ │ └── ILSVRC2012_val_00042133.JPEG │ │ │ │ ├── n02099601 │ │ │ │ └── ILSVRC2012_val_00005697.JPEG │ │ │ │ ├── n02099712 │ │ │ │ └── ILSVRC2012_val_00023471.JPEG │ │ │ │ ├── n02100877 │ │ │ │ └── ILSVRC2012_val_00039863.JPEG │ │ │ │ ├── n02101006 │ │ │ │ ├── ILSVRC2012_val_00032333.JPEG │ │ │ │ └── ILSVRC2012_val_00047325.JPEG │ │ │ │ ├── n02101556 │ │ │ │ └── ILSVRC2012_val_00030540.JPEG │ │ │ │ ├── n02102318 │ │ │ │ └── ILSVRC2012_val_00024691.JPEG │ │ │ │ ├── n02105505 │ │ │ │ └── ILSVRC2012_val_00031252.JPEG │ │ │ │ ├── n02110627 │ │ │ │ └── ILSVRC2012_val_00008310.JPEG │ │ │ │ └── n02111889 │ │ │ │ └── ILSVRC2012_val_00042625.JPEG │ │ ├── __init__.py │ │ ├── functional.py │ │ └── README.md │ └── dalle │ │ ├── vae │ │ ├── openaidvae │ │ │ ├── __init__.py │ │ │ ├── utils.py │ │ │ ├── encoder.py │ │ │ └── decoder.py │ │ ├── __init__.py │ │ ├── base_vae.py │ │ ├── openai_dvae.py │ │ └── vqgan_vae.py │ │ ├── __init__.py │ │ ├── pretrained.py │ │ ├── README.md │ │ └── tokenizer.py ├── assets │ ├── cat.jpg │ ├── dcgan.png │ ├── total.png │ ├── test_depth.png │ ├── cat_det_out.jpg │ ├── cat_seg_out.jpg │ ├── test_000009.png │ ├── test_000010.png │ ├── test_sampling.mp4 │ ├── norway_sampling.mp4 │ ├── test_sample_255.png │ ├── norway_sample_2687.png │ └── norway_segmentation.png ├── nlp │ ├── __init__.py │ └── bert │ │ ├── __init__.py │ │ ├── config_args.py │ │ └── test.py └── quantization │ ├── __init__.py │ ├── models │ └── __init__.py │ ├── param_config.py │ ├── README.md │ └── inference.py ├── requires-style.txt ├── requirements.txt ├── .github ├── ISSUE_TEMPLATE │ ├── feature-bug-issue.md │ └── Help-wanted Issue.md └── workflows │ └── ci.yml ├── .gitignore ├── run_format_check.sh ├── setup.cfg ├── README.md ├── CODE_OF_CONDUCT.md └── hubconf.py /official/vision/gan/megengine_mimicry/nets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /official/vision/gan/megengine_mimicry/nets/dcgan/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /official/vision/gan/megengine_mimicry/nets/wgan/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /official/multimodal/__init__.py: -------------------------------------------------------------------------------- 1 | from .dalle.dalle import DALLE 2 | -------------------------------------------------------------------------------- /official/vision/gan/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow>=2.0 2 | tensorboardX 3 | -------------------------------------------------------------------------------- /requires-style.txt: -------------------------------------------------------------------------------- 1 | flake8==3.7.9 2 | isort==4.3.21 3 | pylint==2.5.2 4 | -------------------------------------------------------------------------------- /official/vision/detection/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | -------------------------------------------------------------------------------- /official/vision/gan/megengine_mimicry/metrics/fid/__init__.py: -------------------------------------------------------------------------------- 1 | from .fid_utils import * 2 | -------------------------------------------------------------------------------- /official/vision/gan/megengine_mimicry/metrics/kid/__init__.py: -------------------------------------------------------------------------------- 1 | from .kid_utils import * 2 | -------------------------------------------------------------------------------- /official/assets/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/assets/cat.jpg -------------------------------------------------------------------------------- /official/assets/dcgan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/assets/dcgan.png -------------------------------------------------------------------------------- /official/assets/total.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/assets/total.png -------------------------------------------------------------------------------- /official/vision/gan/megengine_mimicry/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_utils import load_dataset 2 | -------------------------------------------------------------------------------- /official/vision/gan/megengine_mimicry/metrics/inception_model/__init__.py: -------------------------------------------------------------------------------- 1 | from .inception_utils import * 2 | -------------------------------------------------------------------------------- /official/assets/test_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/assets/test_depth.png -------------------------------------------------------------------------------- /official/assets/cat_det_out.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/assets/cat_det_out.jpg -------------------------------------------------------------------------------- /official/assets/cat_seg_out.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/assets/cat_seg_out.jpg -------------------------------------------------------------------------------- /official/assets/test_000009.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/assets/test_000009.png -------------------------------------------------------------------------------- /official/assets/test_000010.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/assets/test_000010.png -------------------------------------------------------------------------------- /official/assets/test_sampling.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/assets/test_sampling.mp4 -------------------------------------------------------------------------------- /official/vision/gan/megengine_mimicry/metrics/inception_score/__init__.py: -------------------------------------------------------------------------------- 1 | from .inception_score_utils import * 2 | -------------------------------------------------------------------------------- /official/assets/norway_sampling.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/assets/norway_sampling.mp4 -------------------------------------------------------------------------------- /official/assets/test_sample_255.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/assets/test_sample_255.png -------------------------------------------------------------------------------- /official/assets/norway_sample_2687.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/assets/norway_sample_2687.png -------------------------------------------------------------------------------- /official/assets/norway_segmentation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/assets/norway_segmentation.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | megengine 2 | numpy 3 | opencv-python 4 | tqdm 5 | tabulate 6 | ftfy 7 | imageio 8 | youtokentome 9 | regex==2020.10.15 -------------------------------------------------------------------------------- /official/multimodal/big_sleep/__init__.py: -------------------------------------------------------------------------------- 1 | from .big_sleep import Imagine 2 | from .biggan import BigGAN, biggan_128, biggan_256, biggan_512 3 | -------------------------------------------------------------------------------- /official/multimodal/clip/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/clip/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /official/multimodal/clip/__init__.py: -------------------------------------------------------------------------------- 1 | from .inference_utils import ClipInferenceUtils 2 | from .models import CLIP 3 | from .simple_tokenizer import SimpleTokenizer, tokenize 4 | -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n01795545/ILSVRC2012_val_00023344.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n01795545/ILSVRC2012_val_00023344.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n01819313/ILSVRC2012_val_00003068.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n01819313/ILSVRC2012_val_00003068.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n01820546/ILSVRC2012_val_00034784.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n01820546/ILSVRC2012_val_00034784.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n01820546/ILSVRC2012_val_00047491.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n01820546/ILSVRC2012_val_00047491.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n01828970/ILSVRC2012_val_00001336.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n01828970/ILSVRC2012_val_00001336.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n01828970/ILSVRC2012_val_00008236.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n01828970/ILSVRC2012_val_00008236.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n01828970/ILSVRC2012_val_00046802.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n01828970/ILSVRC2012_val_00046802.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n01843065/ILSVRC2012_val_00022439.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n01843065/ILSVRC2012_val_00022439.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n01847000/ILSVRC2012_val_00022364.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n01847000/ILSVRC2012_val_00022364.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n02085782/ILSVRC2012_val_00012298.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n02085782/ILSVRC2012_val_00012298.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n02086646/ILSVRC2012_val_00011473.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n02086646/ILSVRC2012_val_00011473.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n02088466/ILSVRC2012_val_00013651.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n02088466/ILSVRC2012_val_00013651.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n02089973/ILSVRC2012_val_00000028.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n02089973/ILSVRC2012_val_00000028.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n02093256/ILSVRC2012_val_00046547.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n02093256/ILSVRC2012_val_00046547.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n02096294/ILSVRC2012_val_00042133.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n02096294/ILSVRC2012_val_00042133.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n02099601/ILSVRC2012_val_00005697.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n02099601/ILSVRC2012_val_00005697.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n02099712/ILSVRC2012_val_00023471.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n02099712/ILSVRC2012_val_00023471.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n02100877/ILSVRC2012_val_00039863.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n02100877/ILSVRC2012_val_00039863.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n02101006/ILSVRC2012_val_00032333.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n02101006/ILSVRC2012_val_00032333.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n02101006/ILSVRC2012_val_00047325.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n02101006/ILSVRC2012_val_00047325.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n02101556/ILSVRC2012_val_00030540.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n02101556/ILSVRC2012_val_00030540.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n02102318/ILSVRC2012_val_00024691.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n02102318/ILSVRC2012_val_00024691.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n02105505/ILSVRC2012_val_00031252.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n02105505/ILSVRC2012_val_00031252.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n02110627/ILSVRC2012_val_00008310.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n02110627/ILSVRC2012_val_00008310.JPEG -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/data/drin_images/n02111889/ILSVRC2012_val_00042625.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/Models/HEAD/official/multimodal/taming_transformer/data/drin_images/n02111889/ILSVRC2012_val_00042625.JPEG -------------------------------------------------------------------------------- /official/multimodal/dalle/vae/openaidvae/__init__.py: -------------------------------------------------------------------------------- 1 | from .decoder import Decoder as OpenAIDiscreteVAEDecoder 2 | from .decoder import openai_discrete_VAE_decoder 3 | from .encoder import Encoder as OpenAIDiscreteVAEEncoder 4 | from .encoder import openai_discrete_VAE_encoder 5 | from .utils import map_pixels, unmap_pixels 6 | -------------------------------------------------------------------------------- /official/vision/segmentation/configs/__init__.py: -------------------------------------------------------------------------------- 1 | from .deeplabv3plus_res101_cityscapes_768size import deeplabv3plus_res101_cityscapes_768size 2 | from .deeplabv3plus_res101_voc_512size import deeplabv3plus_res101_voc_512size 3 | 4 | _EXCLUDE = {} 5 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 6 | -------------------------------------------------------------------------------- /official/multimodal/dalle/vae/__init__.py: -------------------------------------------------------------------------------- 1 | from .openai_dvae import DiscreteVAE as OpenAIDiscreteVAE 2 | from .openaidvae import ( 3 | OpenAIDiscreteVAEDecoder, 4 | OpenAIDiscreteVAEEncoder, 5 | map_pixels, 6 | openai_discrete_VAE_decoder, 7 | openai_discrete_VAE_encoder, 8 | unmap_pixels 9 | ) 10 | from .vqgan_vae import VQGanVAE 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-bug-issue.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature/Bug Issue 3 | about: 请使用此模型提出您的建议/问题 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | ## 环境 12 | 1.系统环境: 13 | 2.MegEngine版本: 14 | 3.python版本: 15 | 4.模型名称: 16 | 17 | ## 复现步骤 18 | 1. 19 | 2. 20 | 3. 21 | 22 | ## 请提供关键的代码片段便于追查问题 23 | 24 | 25 | 26 | ## 请提供完整的日志及报错信息 27 | -------------------------------------------------------------------------------- /official/multimodal/dalle/__init__.py: -------------------------------------------------------------------------------- 1 | from .dalle import DALLE 2 | from .generate import Generator 3 | from .pretrained import coco_512_16_16d_16h_80tsl 4 | from .vae import ( 5 | OpenAIDiscreteVAE, 6 | OpenAIDiscreteVAEDecoder, 7 | OpenAIDiscreteVAEEncoder, 8 | VQGanVAE, 9 | openai_discrete_VAE_decoder, 10 | openai_discrete_VAE_encoder 11 | ) 12 | from .vae.vqgan_vae import vqgan_vae_1024 13 | -------------------------------------------------------------------------------- /official/nlp/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | -------------------------------------------------------------------------------- /official/nlp/bert/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | -------------------------------------------------------------------------------- /official/vision/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | -------------------------------------------------------------------------------- /official/quantization/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | -------------------------------------------------------------------------------- /official/vision/classification/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | -------------------------------------------------------------------------------- /official/vision/classification/resnet/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | -------------------------------------------------------------------------------- /official/vision/classification/shufflenet/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *log*/ 2 | *.jpg 3 | *.png 4 | 5 | # compilation and distribution 6 | __pycache__ 7 | _ext 8 | *.pyc 9 | *.so 10 | build/ 11 | dist/ 12 | wheels/ 13 | 14 | # pytorch/python/numpy formats 15 | *.pth 16 | *.pkl 17 | *.npy 18 | 19 | # ipython/jupyter notebooks 20 | *.ipynb 21 | **/.ipynb_checkpoints/ 22 | 23 | # Editor temporaries 24 | *.swn 25 | *.swo 26 | *.swp 27 | *~ 28 | 29 | # pycharm editor settings 30 | .idea 31 | 32 | # vscode editor settings 33 | .vscode 34 | 35 | -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | from .cond_transformer import ( 2 | Net2NetTransformer, 3 | celebahq_transformer, 4 | drin_transformer, 5 | s_flckr_transformer 6 | ) 7 | from .inference_utils import ( 8 | ConditionalSampler, 9 | FastSampler, 10 | Reconstruction, 11 | convert_tensor_to_image, 12 | preprocess_depth, 13 | preprocess_segmetation 14 | ) 15 | from .vqgan import vqgan_gumbel_f8, vqgan_imagenet_f16_1024, vqgan_imagenet_f16_16384 16 | -------------------------------------------------------------------------------- /official/quantization/models/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from .mobilenet_v2 import * 10 | from .resnet import * 11 | from .shufflenet import * 12 | -------------------------------------------------------------------------------- /official/vision/keypoints/models/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from .simplebaseline import simplebaseline_res50, simplebaseline_res101, simplebaseline_res152 10 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/Help-wanted Issue.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Help-wanted Issue 3 | about: 请使用此模板提出help-wanted任务 4 | title: Help-wanted Issue 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## 背景 11 | 12 | 13 | 14 | ## 任务描述 15 | 16 | 17 | 18 | ## 目标 19 | 20 | 21 | -------------------------------------------------------------------------------- /official/vision/segmentation/models/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from .deeplabv3plus import * 10 | 11 | _EXCLUDE = {} 12 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 13 | -------------------------------------------------------------------------------- /official/vision/detection/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from .basic import * 10 | from .det import * 11 | 12 | _EXCLUDE = {} 13 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 14 | -------------------------------------------------------------------------------- /official/vision/detection/tools/data_mapper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine.data.dataset import COCO, Objects365, PascalVOC 10 | 11 | data_mapper = dict( 12 | coco=COCO, 13 | objects365=Objects365, 14 | voc=PascalVOC, 15 | ) 16 | -------------------------------------------------------------------------------- /official/vision/detection/layers/basic/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from .functional import * 10 | from .nn import * 11 | from .norm import * 12 | 13 | _EXCLUDE = {} 14 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 15 | -------------------------------------------------------------------------------- /run_format_check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | set -e 4 | 5 | export PYTHONPATH=$PWD:$PYTHONPATH 6 | pip install -q -r requires-style.txt 7 | CHECK_DIR="official/vision official/quantization official/nlp official/multimodal" 8 | pylint $CHECK_DIR --rcfile=.pylintrc || pylint_ret=$? 9 | if [ "$pylint_ret" ]; then 10 | exit $pylint_ret 11 | fi 12 | echo "All lint check passed!" 13 | flake8 official || flake8_ret=$? 14 | if [ "$flake8_ret" ]; then 15 | exit $flake8_ret 16 | fi 17 | echo "All flake check passed!" 18 | isort --check-only -rc official || isort_ret=$? 19 | if [ "$isort_ret" ]; then 20 | exit $isort_ret 21 | fi 22 | echo "All isort check passed!" 23 | -------------------------------------------------------------------------------- /official/vision/detection/models/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from .atss import * 10 | from .faster_rcnn import * 11 | from .fcos import * 12 | from .freeanchor import * 13 | from .retinanet import * 14 | 15 | _EXCLUDE = {} 16 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 17 | -------------------------------------------------------------------------------- /official/multimodal/dalle/pretrained.py: -------------------------------------------------------------------------------- 1 | from megengine import hub 2 | 3 | from .dalle import DALLE 4 | from .vae.vqgan_vae import vqgan_vae_1024 5 | 6 | 7 | @hub.pretrained( 8 | "https://data.megengine.org.cn/research/multimodality/dalle_coco_512_16_16d_16h_80tsl.pkl" 9 | ) 10 | def coco_512_16_16d_16h_80tsl(): 11 | vae = vqgan_vae_1024(False) 12 | model = DALLE( 13 | num_text_tokens=8192, 14 | text_seq_len=80, 15 | embed_dim=512, 16 | vae=vae, 17 | num_heads=16, 18 | head_dim=64, 19 | stable=False, 20 | depths=16, 21 | attention_types=['row', 'row', 'column', 'row', 'row', 'row', 'column', 'full'] 22 | ) 23 | return model 24 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 100 3 | skip = official/vision/gan 4 | multi_line_output = 3 5 | balanced_wrapping = True 6 | known_standard_library = setuptools 7 | known_myself = official 8 | known_data_processing = cv2,numpy,scipy,PIL,matplotlib 9 | known_datasets = pycocotools 10 | known_deeplearning = megengine 11 | sections = FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,data_processing,datasets,deeplearning,myself,LOCALFOLDER 12 | no_lines_before = STDLIB,THIRDPARTY,datasets 13 | default_section = FIRSTPARTY 14 | 15 | [flake8] 16 | ignore = W503 17 | max-line-length = 100 18 | max-complexity = 18 19 | select = B,C,E,F,W,T4,B9 20 | exclude = official/vision/gan 21 | per-file-ignores = 22 | **/__init__.py:F401,F403 23 | -------------------------------------------------------------------------------- /official/vision/classification/README.md: -------------------------------------------------------------------------------- 1 | # MegEngine classification models 2 | 3 | 图像分类是计算机视觉的基础任务。许多计算机视觉的其它任务(例如物体检测)都使用了基于图像分类的预训练模型。因此,我们提供了各种在ImageNet上预训练好的分类模型, 4 | 包括[ResNet](./resnet)系列, [Shufflenet](./shufflenet)系列等,这些模型在**ImageNet验证集**上的测试结果如下表: 5 | 6 | | 模型 | top1 acc | top5 acc | 7 | | --- | :---: | :---: | 8 | | ResNet18 | 70.312 | 89.430 | 9 | | ResNet34 | 73.960 | 91.630 | 10 | | ResNet50 | 76.254 | 93.056 | 11 | | ResNet101 | 77.944 | 93.844 | 12 | | ResNet152 | 78.582 | 94.130 | 13 | | ResNeXt50 32x4d | 77.592 | 93.644 | 14 | | ResNeXt101 32x8d| 79.520 | 94.586 | 15 | | ShuffleNetV2 x0.5 | 60.696 | 82.190 | 16 | | ShuffleNetV2 x1.0 | 69.372 | 88.764 | 17 | | ShuffleNetV2 x1.5 | 72.806 | 90.792 | 18 | | ShuffleNetV2 x2.0 | 75.074 | 92.278 | 19 | -------------------------------------------------------------------------------- /official/multimodal/dalle/vae/base_vae.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | 3 | import megengine.module as M 4 | 5 | 6 | class BaseVAE(M.Module): 7 | def __init__( 8 | self, 9 | num_layers: int, 10 | num_tokens: int, 11 | image_size: int, 12 | channels: int = 3, 13 | ): 14 | super(BaseVAE, self).__init__() 15 | 16 | self.channels = channels 17 | self.num_layers = num_layers 18 | self.num_tokens = num_tokens 19 | self.image_size = image_size 20 | 21 | @abstractmethod 22 | def get_codebook_indices(self, inputs): 23 | pass 24 | 25 | @abstractmethod 26 | def decode(self, inputs): 27 | pass 28 | 29 | def forward(self, inputs): 30 | raise NotImplementedError() 31 | -------------------------------------------------------------------------------- /official/vision/detection/layers/det/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from .anchor import * 10 | from .box_head import * 11 | from .box_utils import * 12 | from .fpn import * 13 | from .loss import * 14 | from .matcher import * 15 | from .point_head import * 16 | from .pooler import * 17 | from .rcnn import * 18 | from .rpn import * 19 | from .sampling import * 20 | 21 | _EXCLUDE = {} 22 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 23 | -------------------------------------------------------------------------------- /official/multimodal/dalle/vae/openaidvae/utils.py: -------------------------------------------------------------------------------- 1 | import megengine.functional as F 2 | import megengine.module as M 3 | 4 | logit_laplace_eps: float = 0.1 5 | 6 | 7 | def map_pixels(x): 8 | if x.ndim != 4: 9 | raise ValueError('input must be 4D') 10 | return (1 - 2 * logit_laplace_eps) * x + logit_laplace_eps 11 | 12 | 13 | def unmap_pixels(x): 14 | if x.ndim != 4: 15 | raise ValueError('input must be 4D') 16 | return F.clip((x - logit_laplace_eps) / (1 - 2 * logit_laplace_eps), 0, 1) 17 | 18 | 19 | class Upsample(M.Module): 20 | def __init__(self, scale_factor, mode): 21 | super().__init__() 22 | self.scale_factor = scale_factor 23 | self.mode = mode 24 | 25 | def forward(self, inputs): 26 | return F.nn.interpolate(inputs, scale_factor=self.scale_factor, mode=self.mode) 27 | -------------------------------------------------------------------------------- /official/vision/gan/megengine_mimicry/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Kwot Sin Lee 2 | # This code is licensed under MIT license 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE) 4 | # ------------------------------------------------------------------------------ 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 6 | # 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # 13 | # This file has been modified by Megvii ("Megvii Modifications"). 14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved. 15 | # ------------------------------------------------------------------------------ 16 | from .common import * 17 | -------------------------------------------------------------------------------- /official/vision/gan/megengine_mimicry/training/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Kwot Sin Lee 2 | # This code is licensed under MIT license 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE) 4 | # ------------------------------------------------------------------------------ 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 6 | # 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # 13 | # This file has been modified by Megvii ("Megvii Modifications"). 14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved. 15 | # ------------------------------------------------------------------------------ 16 | from .trainer import Trainer 17 | -------------------------------------------------------------------------------- /official/vision/gan/megengine_mimicry/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Kwot Sin Lee 2 | # This code is licensed under MIT license 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE) 4 | # ------------------------------------------------------------------------------ 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 6 | # 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # 13 | # This file has been modified by Megvii ("Megvii Modifications"). 14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved. 15 | # ------------------------------------------------------------------------------ 16 | from . import nets, training, datasets, metrics 17 | -------------------------------------------------------------------------------- /official/vision/gan/megengine_mimicry/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Kwot Sin Lee 2 | # This code is licensed under MIT license 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE) 4 | # ------------------------------------------------------------------------------ 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 6 | # 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # 13 | # This file has been modified by Megvii ("Megvii Modifications"). 14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved. 15 | # ------------------------------------------------------------------------------ 16 | from . import fid, kid, inception_score, inception_model 17 | from .compute_fid import * 18 | from .compute_is import * 19 | from .compute_kid import * 20 | from .compute_metrics import * 21 | -------------------------------------------------------------------------------- /official/vision/detection/configs/atss_res50_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | @hub.pretrained( 15 | "https://data.megengine.org.cn/models/weights/" 16 | "atss_res50_coco_3x_800size_42dot6_9a92ed8c.pkl" 17 | ) 18 | def atss_res50_coco_3x_800size(**kwargs): 19 | r""" 20 | ATSS trained from COCO dataset. 21 | `"ATSS" `_ 22 | `"FPN" `_ 23 | `"COCO" `_ 24 | """ 25 | cfg = models.ATSSConfig() 26 | cfg.backbone_pretrained = False 27 | return models.ATSS(cfg, **kwargs) 28 | 29 | 30 | Net = models.ATSS 31 | Cfg = models.ATSSConfig 32 | -------------------------------------------------------------------------------- /official/vision/detection/configs/fcos_res50_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | @hub.pretrained( 15 | "https://data.megengine.org.cn/models/weights/" 16 | "fcos_res50_coco_3x_800size_42dot2_b16f9c8b.pkl" 17 | ) 18 | def fcos_res50_coco_3x_800size(**kwargs): 19 | r""" 20 | FCOS trained from COCO dataset. 21 | `"FCOS" `_ 22 | `"FPN" `_ 23 | `"COCO" `_ 24 | """ 25 | cfg = models.FCOSConfig() 26 | cfg.backbone_pretrained = False 27 | return models.FCOS(cfg, **kwargs) 28 | 29 | 30 | Net = models.FCOS 31 | Cfg = models.FCOSConfig 32 | -------------------------------------------------------------------------------- /official/vision/detection/configs/retinanet_res50_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | @hub.pretrained( 15 | "https://data.megengine.org.cn/models/weights/" 16 | "retinanet_res50_coco_3x_800size_39dot3_8eaec532.pkl" 17 | ) 18 | def retinanet_res50_coco_3x_800size(**kwargs): 19 | r""" 20 | RetinaNet trained from COCO dataset. 21 | `"RetinaNet" `_ 22 | `"FPN" `_ 23 | `"COCO" `_ 24 | """ 25 | cfg = models.RetinaNetConfig() 26 | cfg.backbone_pretrained = False 27 | return models.RetinaNet(cfg, **kwargs) 28 | 29 | 30 | Net = models.RetinaNet 31 | Cfg = models.RetinaNetConfig 32 | -------------------------------------------------------------------------------- /official/vision/detection/configs/freeanchor_res50_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | @hub.pretrained( 15 | "https://data.megengine.org.cn/models/weights/" 16 | "freeanchor_res50_coco_3x_800size_42dot1_5c567f14.pkl" 17 | ) 18 | def freeanchor_res50_coco_3x_800size(**kwargs): 19 | r""" 20 | FreeAnchor trained from COCO dataset. 21 | `"FreeAnchor" `_ 22 | `"FPN" `_ 23 | `"COCO" `_ 24 | """ 25 | cfg = models.FreeAnchorConfig() 26 | cfg.backbone_pretrained = False 27 | return models.FreeAnchor(cfg, **kwargs) 28 | 29 | 30 | Net = models.FreeAnchor 31 | Cfg = models.FreeAnchorConfig 32 | -------------------------------------------------------------------------------- /official/vision/detection/configs/faster_rcnn_res50_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | @hub.pretrained( 15 | "https://data.megengine.org.cn/models/weights/" 16 | "faster_rcnn_res50_coco_3x_800size_40dot1_8682ff1a.pkl" 17 | ) 18 | def faster_rcnn_res50_coco_3x_800size(**kwargs): 19 | r""" 20 | Faster-RCNN FPN trained from COCO dataset. 21 | `"Faster-RCNN" `_ 22 | `"FPN" `_ 23 | `"COCO" `_ 24 | """ 25 | cfg = models.FasterRCNNConfig() 26 | cfg.backbone_pretrained = False 27 | return models.FasterRCNN(cfg, **kwargs) 28 | 29 | 30 | Net = models.FasterRCNN 31 | Cfg = models.FasterRCNNConfig 32 | -------------------------------------------------------------------------------- /official/vision/detection/configs/atss_res101_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomATSSConfig(models.ATSSConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet101" 19 | 20 | 21 | @hub.pretrained( 22 | "https://data.megengine.org.cn/models/weights/" 23 | "atss_res101_coco_3x_800size_44dot7_9181687e.pkl" 24 | ) 25 | def atss_res101_coco_3x_800size(**kwargs): 26 | r""" 27 | ATSS trained from COCO dataset. 28 | `"ATSS" `_ 29 | `"FPN" `_ 30 | `"COCO" `_ 31 | """ 32 | cfg = CustomATSSConfig() 33 | cfg.backbone_pretrained = False 34 | return models.ATSS(cfg, **kwargs) 35 | 36 | 37 | Net = models.ATSS 38 | Cfg = CustomATSSConfig 39 | -------------------------------------------------------------------------------- /official/vision/detection/configs/fcos_res101_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomFCOSConfig(models.FCOSConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet101" 19 | 20 | 21 | @hub.pretrained( 22 | "https://data.megengine.org.cn/models/weights/" 23 | "fcos_res101_coco_3x_800size_44dot3_f38e8df1.pkl" 24 | ) 25 | def fcos_res101_coco_3x_800size(**kwargs): 26 | r""" 27 | FCOS trained from COCO dataset. 28 | `"FCOS" `_ 29 | `"FPN" `_ 30 | `"COCO" `_ 31 | """ 32 | cfg = CustomFCOSConfig() 33 | cfg.backbone_pretrained = False 34 | return models.FCOS(cfg, **kwargs) 35 | 36 | 37 | Net = models.FCOS 38 | Cfg = CustomFCOSConfig 39 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: CI 4 | 5 | # Controls when the action will run. Triggers the workflow on push or pull request 6 | # events but only for the master branch 7 | on: 8 | push: 9 | pull_request: 10 | 11 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 12 | jobs: 13 | # This workflow contains a single job called "build" 14 | build: 15 | # The type of runner that the job will run on 16 | runs-on: ubuntu-latest 17 | strategy: 18 | matrix: 19 | python-version: [3.6, 3.7, 3.8] 20 | 21 | # Steps represent a sequence of tasks that will be executed as part of the job 22 | steps: 23 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 24 | - uses: actions/checkout@v2 25 | 26 | - name: Set up Python ${{ matrix.python-version }} 27 | uses: actions/setup-python@v1 28 | with: 29 | python-version: ${{ matrix.python-version }} 30 | 31 | - name: Install dependencies 32 | run: | 33 | python -m pip install --upgrade pip 34 | pip install -r requirements.txt 35 | 36 | # Runs a set of commands using the runners shell 37 | - name: Format check 38 | run: ./run_format_check.sh 39 | -------------------------------------------------------------------------------- /official/vision/detection/layers/det/sampling.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import megengine.functional as F 10 | from megengine.random import uniform 11 | 12 | 13 | def sample_labels(labels, num_samples, label_value, ignore_label=-1): 14 | """sample N labels with label value = sample_labels 15 | 16 | Args: 17 | labels(Tensor): shape of label is (N,) 18 | num_samples(int): 19 | label_value(int): 20 | 21 | Returns: 22 | label(Tensor): label after sampling 23 | """ 24 | assert labels.ndim == 1, "Only tensor of dim 1 is supported." 25 | mask = (labels == label_value) 26 | num_valid = mask.sum() 27 | if num_valid <= num_samples: 28 | return labels 29 | 30 | random_tensor = F.zeros_like(labels).astype("float32") 31 | random_tensor[mask] = uniform(size=num_valid) 32 | _, invalid_inds = F.topk(random_tensor, k=num_samples - num_valid) 33 | 34 | labels[invalid_inds] = ignore_label 35 | return labels 36 | -------------------------------------------------------------------------------- /official/vision/detection/configs/atss_res18_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomATSSConfig(models.ATSSConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet18" 19 | self.fpn_in_channels = [128, 256, 512] 20 | 21 | 22 | @hub.pretrained( 23 | "https://data.megengine.org.cn/models/weights/" 24 | "atss_res18_coco_3x_800size_38dot3_58e249d5.pkl" 25 | ) 26 | def atss_res18_coco_3x_800size(**kwargs): 27 | r""" 28 | ATSS trained from COCO dataset. 29 | `"ATSS" `_ 30 | `"FPN" `_ 31 | `"COCO" `_ 32 | """ 33 | cfg = CustomATSSConfig() 34 | cfg.backbone_pretrained = False 35 | return models.ATSS(cfg, **kwargs) 36 | 37 | 38 | Net = models.ATSS 39 | Cfg = CustomATSSConfig 40 | -------------------------------------------------------------------------------- /official/vision/detection/configs/atss_res34_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomATSSConfig(models.ATSSConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet34" 19 | self.fpn_in_channels = [128, 256, 512] 20 | 21 | 22 | @hub.pretrained( 23 | "https://data.megengine.org.cn/models/weights/" 24 | "atss_res34_coco_3x_800size_41dot5_ec16a67b.pkl" 25 | ) 26 | def atss_res34_coco_3x_800size(**kwargs): 27 | r""" 28 | ATSS trained from COCO dataset. 29 | `"ATSS" `_ 30 | `"FPN" `_ 31 | `"COCO" `_ 32 | """ 33 | cfg = CustomATSSConfig() 34 | cfg.backbone_pretrained = False 35 | return models.ATSS(cfg, **kwargs) 36 | 37 | 38 | Net = models.ATSS 39 | Cfg = CustomATSSConfig 40 | -------------------------------------------------------------------------------- /official/vision/detection/configs/fcos_res18_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomFCOSConfig(models.FCOSConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet18" 19 | self.fpn_in_channels = [128, 256, 512] 20 | 21 | 22 | @hub.pretrained( 23 | "https://data.megengine.org.cn/models/weights/" 24 | "fcos_res18_coco_3x_800size_37dot6_adab0136.pkl" 25 | ) 26 | def fcos_res18_coco_3x_800size(**kwargs): 27 | r""" 28 | FCOS trained from COCO dataset. 29 | `"FCOS" `_ 30 | `"FPN" `_ 31 | `"COCO" `_ 32 | """ 33 | cfg = CustomFCOSConfig() 34 | cfg.backbone_pretrained = False 35 | return models.FCOS(cfg, **kwargs) 36 | 37 | 38 | Net = models.FCOS 39 | Cfg = CustomFCOSConfig 40 | -------------------------------------------------------------------------------- /official/vision/detection/configs/fcos_res34_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomFCOSConfig(models.FCOSConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet34" 19 | self.fpn_in_channels = [128, 256, 512] 20 | 21 | 22 | @hub.pretrained( 23 | "https://data.megengine.org.cn/models/weights/" 24 | "fcos_res34_coco_3x_800size_41dot0_8ba4633f.pkl" 25 | ) 26 | def fcos_res34_coco_3x_800size(**kwargs): 27 | r""" 28 | FCOS trained from COCO dataset. 29 | `"FCOS" `_ 30 | `"FPN" `_ 31 | `"COCO" `_ 32 | """ 33 | cfg = CustomFCOSConfig() 34 | cfg.backbone_pretrained = False 35 | return models.FCOS(cfg, **kwargs) 36 | 37 | 38 | Net = models.FCOS 39 | Cfg = CustomFCOSConfig 40 | -------------------------------------------------------------------------------- /official/vision/detection/configs/retinanet_res101_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomRetinaNetConfig(models.RetinaNetConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet101" 19 | 20 | 21 | @hub.pretrained( 22 | "https://data.megengine.org.cn/models/weights/" 23 | "retinanet_res101_coco_3x_800size_41dot4_73b01887.pkl" 24 | ) 25 | def retinanet_res101_coco_3x_800size(**kwargs): 26 | r""" 27 | RetinaNet trained from COCO dataset. 28 | `"RetinaNet" `_ 29 | `"FPN" `_ 30 | `"COCO" `_ 31 | """ 32 | cfg = CustomRetinaNetConfig() 33 | cfg.backbone_pretrained = False 34 | return models.RetinaNet(cfg, **kwargs) 35 | 36 | 37 | Net = models.RetinaNet 38 | Cfg = CustomRetinaNetConfig 39 | -------------------------------------------------------------------------------- /official/vision/detection/configs/freeanchor_res101_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet101" 19 | 20 | 21 | @hub.pretrained( 22 | "https://data.megengine.org.cn/models/weights/" 23 | "freeanchor_res101_coco_3x_800size_43dot9_8c707d7d.pkl" 24 | ) 25 | def freeanchor_res101_coco_3x_800size(**kwargs): 26 | r""" 27 | FreeAnchor trained from COCO dataset. 28 | `"FreeAnchor" `_ 29 | `"FPN" `_ 30 | `"COCO" `_ 31 | """ 32 | cfg = models.FreeAnchorConfig() 33 | cfg.backbone_pretrained = False 34 | return models.FreeAnchor(cfg, **kwargs) 35 | 36 | 37 | Net = models.FreeAnchor 38 | Cfg = CustomFreeAnchorConfig 39 | -------------------------------------------------------------------------------- /official/vision/detection/configs/faster_rcnn_res101_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet101" 19 | 20 | 21 | @hub.pretrained( 22 | "https://data.megengine.org.cn/models/weights/" 23 | "faster_rcnn_res101_coco_3x_800size_42dot6_2538b0ff.pkl" 24 | ) 25 | def faster_rcnn_res101_coco_3x_800size(**kwargs): 26 | r""" 27 | Faster-RCNN FPN trained from COCO dataset. 28 | `"Faster-RCNN" `_ 29 | `"FPN" `_ 30 | `"COCO" `_ 31 | """ 32 | cfg = CustomFasterRCNNConfig() 33 | cfg.backbone_pretrained = False 34 | return models.FasterRCNN(cfg, **kwargs) 35 | 36 | 37 | Net = models.FasterRCNN 38 | Cfg = CustomFasterRCNNConfig 39 | -------------------------------------------------------------------------------- /official/vision/detection/tools/nms.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import numpy as np 10 | 11 | 12 | def py_cpu_nms(dets, thresh): 13 | x1 = np.ascontiguousarray(dets[:, 0]) 14 | y1 = np.ascontiguousarray(dets[:, 1]) 15 | x2 = np.ascontiguousarray(dets[:, 2]) 16 | y2 = np.ascontiguousarray(dets[:, 3]) 17 | 18 | areas = (x2 - x1) * (y2 - y1) 19 | order = dets[:, 4].argsort()[::-1] 20 | keep = list() 21 | 22 | while order.size > 0: 23 | pick_idx = order[0] 24 | keep.append(pick_idx) 25 | order = order[1:] 26 | 27 | xx1 = np.maximum(x1[pick_idx], x1[order]) 28 | yy1 = np.maximum(y1[pick_idx], y1[order]) 29 | xx2 = np.minimum(x2[pick_idx], x2[order]) 30 | yy2 = np.minimum(y2[pick_idx], y2[order]) 31 | 32 | inter = np.maximum(xx2 - xx1, 0) * np.maximum(yy2 - yy1, 0) 33 | iou = inter / np.maximum(areas[pick_idx] + areas[order] - inter, 1e-5) 34 | 35 | order = order[iou <= thresh] 36 | 37 | return keep 38 | -------------------------------------------------------------------------------- /official/vision/detection/configs/atss_resx101_coco_2x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomATSSConfig(models.ATSSConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnext101_32x8d" 19 | self.max_epoch = 36 20 | self.lr_decay_stages = [24, 32] 21 | 22 | 23 | @hub.pretrained( 24 | "https://data.megengine.org.cn/models/weights/" 25 | "atss_resx101_coco_2x_800size_45dot6_b3a91b36.pkl" 26 | ) 27 | def atss_resx101_coco_2x_800size(**kwargs): 28 | r""" 29 | ATSS trained from COCO dataset. 30 | `"ATSS" `_ 31 | `"FPN" `_ 32 | `"COCO" `_ 33 | """ 34 | cfg = CustomATSSConfig() 35 | cfg.backbone_pretrained = False 36 | return models.ATSS(cfg, **kwargs) 37 | 38 | 39 | Net = models.ATSS 40 | Cfg = CustomATSSConfig 41 | -------------------------------------------------------------------------------- /official/vision/detection/configs/fcos_resx101_coco_2x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomFCOSConfig(models.FCOSConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnext101_32x8d" 19 | self.max_epoch = 36 20 | self.lr_decay_stages = [24, 32] 21 | 22 | 23 | @hub.pretrained( 24 | "https://data.megengine.org.cn/models/weights/" 25 | "fcos_resx101_coco_2x_800size_44dot8_42ac8e82.pkl" 26 | ) 27 | def fcos_resx101_coco_2x_800size(**kwargs): 28 | r""" 29 | FCOS trained from COCO dataset. 30 | `"FCOS" `_ 31 | `"FPN" `_ 32 | `"COCO" `_ 33 | """ 34 | cfg = CustomFCOSConfig() 35 | cfg.backbone_pretrained = False 36 | return models.FCOS(cfg, **kwargs) 37 | 38 | 39 | Net = models.FCOS 40 | Cfg = CustomFCOSConfig 41 | -------------------------------------------------------------------------------- /official/multimodal/dalle/README.md: -------------------------------------------------------------------------------- 1 | # DALLE 2 | 3 | 此仓库包含MegEngine实现的多模态模型DALLE以及文生图代码,但不包含训练代码。 4 | 5 | ## 图像重建 6 | 7 | 对于给定的大小为256x256的归一化四维输入,可以使用如下方式进行重建: 8 | 9 | ```python 10 | from official.multimodal.dalle.vae import OpenAIDiscreteVAE 11 | from official.multimodal.big_sleep.big_sleep import save_images 12 | 13 | 14 | vae = OpenAIDiscreteVAE(True) 15 | 16 | img_seq = vae.get_codebook_indices(input) 17 | 18 | reconstructed_image = vae.decode(img_seq) 19 | 20 | save_images(reconstructed_image, './image.png') 21 | 22 | ``` 23 | 24 | 25 | 26 | ## 文生图 27 | 28 | 可以使用以下代码体验文生图的功能,需要先下载[dalle_new_variety.bpe](https://data.megengine.org.cn/research/multimodality/dalle_new_variety.bpe)文件 29 | 30 | ```python 31 | from official.multimodal.dalle import coco_512_16_16d_16h_80tsl 32 | from official.multimodal.dalle import Generator 33 | 34 | dalle = coco_512_16_16d_16h_80tsl() 35 | 36 | generator = Generator( 37 | dalle, 38 | texts = ['A tower has a clock on it on a day with a blue sky'], 39 | num_images=64, 40 | batch_size=4, 41 | bpe_path = './dalle_new_variety.bpe', 42 | root='./dalle' 43 | ) 44 | 45 | generator() 46 | ``` 47 | 48 | 生成结果如下所示: 49 | 50 | ![res](../../assets/total.png) 51 | 52 | 53 | ## 参考 54 | 55 | [DALLE-pytorch](https://github.com/lucidrains/DALLE-pytorch) 56 | 57 | [DALLE-pytorch-discussions](https://github.com/lucidrains/DALLE-pytorch/discussions/335) 58 | -------------------------------------------------------------------------------- /official/vision/detection/configs/freeanchor_res18_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet18" 19 | self.fpn_in_channels = [128, 256, 512] 20 | 21 | 22 | @hub.pretrained( 23 | "https://data.megengine.org.cn/models/weights/" 24 | "freeanchor_res18_coco_3x_800size_38dot1_3d0559a8.pkl" 25 | ) 26 | def freeanchor_res18_coco_3x_800size(**kwargs): 27 | r""" 28 | FreeAnchor trained from COCO dataset. 29 | `"FreeAnchor" `_ 30 | `"FPN" `_ 31 | `"COCO" `_ 32 | """ 33 | cfg = models.FreeAnchorConfig() 34 | cfg.backbone_pretrained = False 35 | return models.FreeAnchor(cfg, **kwargs) 36 | 37 | 38 | Net = models.FreeAnchor 39 | Cfg = CustomFreeAnchorConfig 40 | -------------------------------------------------------------------------------- /official/vision/detection/configs/freeanchor_res34_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet34" 19 | self.fpn_in_channels = [128, 256, 512] 20 | 21 | 22 | @hub.pretrained( 23 | "https://data.megengine.org.cn/models/weights/" 24 | "freeanchor_res34_coco_3x_800size_41dot1_3b03734e.pkl" 25 | ) 26 | def freeanchor_res34_coco_3x_800size(**kwargs): 27 | r""" 28 | FreeAnchor trained from COCO dataset. 29 | `"FreeAnchor" `_ 30 | `"FPN" `_ 31 | `"COCO" `_ 32 | """ 33 | cfg = models.FreeAnchorConfig() 34 | cfg.backbone_pretrained = False 35 | return models.FreeAnchor(cfg, **kwargs) 36 | 37 | 38 | Net = models.FreeAnchor 39 | Cfg = CustomFreeAnchorConfig 40 | -------------------------------------------------------------------------------- /official/vision/detection/configs/faster_rcnn_res18_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet18" 19 | self.fpn_in_channels = [64, 128, 256, 512] 20 | 21 | 22 | @hub.pretrained( 23 | "https://data.megengine.org.cn/models/weights/" 24 | "faster_rcnn_res18_coco_3x_800size_35dot7_a33835ca.pkl" 25 | ) 26 | def faster_rcnn_res18_coco_3x_800size(**kwargs): 27 | r""" 28 | Faster-RCNN FPN trained from COCO dataset. 29 | `"Faster-RCNN" `_ 30 | `"FPN" `_ 31 | `"COCO" `_ 32 | """ 33 | cfg = CustomFasterRCNNConfig() 34 | cfg.backbone_pretrained = False 35 | return models.FasterRCNN(cfg, **kwargs) 36 | 37 | 38 | Net = models.FasterRCNN 39 | Cfg = CustomFasterRCNNConfig 40 | -------------------------------------------------------------------------------- /official/vision/detection/configs/faster_rcnn_res34_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet34" 19 | self.fpn_in_channels = [64, 128, 256, 512] 20 | 21 | 22 | @hub.pretrained( 23 | "https://data.megengine.org.cn/models/weights/" 24 | "faster_rcnn_res34_coco_3x_800size_39dot6_11fca4d4.pkl" 25 | ) 26 | def faster_rcnn_res34_coco_3x_800size(**kwargs): 27 | r""" 28 | Faster-RCNN FPN trained from COCO dataset. 29 | `"Faster-RCNN" `_ 30 | `"FPN" `_ 31 | `"COCO" `_ 32 | """ 33 | cfg = CustomFasterRCNNConfig() 34 | cfg.backbone_pretrained = False 35 | return models.FasterRCNN(cfg, **kwargs) 36 | 37 | 38 | Net = models.FasterRCNN 39 | Cfg = CustomFasterRCNNConfig 40 | -------------------------------------------------------------------------------- /official/vision/detection/configs/retinanet_res18_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomRetinaNetConfig(models.RetinaNetConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet18" 19 | self.fpn_in_channels = [128, 256, 512] 20 | self.fpn_top_in_channel = 512 21 | 22 | 23 | @hub.pretrained( 24 | "https://data.megengine.org.cn/models/weights/" 25 | "retinanet_res18_coco_3x_800size_35dot3_0c4956c8.pkl" 26 | ) 27 | def retinanet_res18_coco_3x_800size(**kwargs): 28 | r""" 29 | RetinaNet trained from COCO dataset. 30 | `"RetinaNet" `_ 31 | `"FPN" `_ 32 | `"COCO" `_ 33 | """ 34 | cfg = CustomRetinaNetConfig() 35 | cfg.backbone_pretrained = False 36 | return models.RetinaNet(cfg, **kwargs) 37 | 38 | 39 | Net = models.RetinaNet 40 | Cfg = CustomRetinaNetConfig 41 | -------------------------------------------------------------------------------- /official/vision/detection/configs/retinanet_res34_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomRetinaNetConfig(models.RetinaNetConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet34" 19 | self.fpn_in_channels = [128, 256, 512] 20 | self.fpn_top_in_channel = 512 21 | 22 | 23 | @hub.pretrained( 24 | "https://data.megengine.org.cn/models/weights/" 25 | "retinanet_res34_coco_3x_800size_38dot4_3485f9ec.pkl" 26 | ) 27 | def retinanet_res34_coco_3x_800size(**kwargs): 28 | r""" 29 | RetinaNet trained from COCO dataset. 30 | `"RetinaNet" `_ 31 | `"FPN" `_ 32 | `"COCO" `_ 33 | """ 34 | cfg = CustomRetinaNetConfig() 35 | cfg.backbone_pretrained = False 36 | return models.RetinaNet(cfg, **kwargs) 37 | 38 | 39 | Net = models.RetinaNet 40 | Cfg = CustomRetinaNetConfig 41 | -------------------------------------------------------------------------------- /official/vision/detection/configs/retinanet_resx101_coco_2x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomRetinaNetConfig(models.RetinaNetConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnext101_32x8d" 19 | self.max_epoch = 36 20 | self.lr_decay_stages = [24, 32] 21 | 22 | 23 | @hub.pretrained( 24 | "https://data.megengine.org.cn/models/weights/" 25 | "retinanet_resx101_coco_2x_800size_42dot3_1502eace.pkl" 26 | ) 27 | def retinanet_resx101_coco_2x_800size(**kwargs): 28 | r""" 29 | RetinaNet trained from COCO dataset. 30 | `"RetinaNet" `_ 31 | `"FPN" `_ 32 | `"COCO" `_ 33 | """ 34 | cfg = CustomRetinaNetConfig() 35 | cfg.backbone_pretrained = False 36 | return models.RetinaNet(cfg, **kwargs) 37 | 38 | 39 | Net = models.RetinaNet 40 | Cfg = CustomRetinaNetConfig 41 | -------------------------------------------------------------------------------- /official/vision/detection/configs/freeanchor_resx101_coco_2x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnext101_32x8d" 19 | self.max_epoch = 36 20 | self.lr_decay_stages = [24, 32] 21 | 22 | 23 | @hub.pretrained( 24 | "https://data.megengine.org.cn/models/weights/" 25 | "freeanchor_resx101_coco_2x_800size_44dot9_5a23fca7.pkl" 26 | ) 27 | def freeanchor_resx101_coco_2x_800size(**kwargs): 28 | r""" 29 | FreeAnchor trained from COCO dataset. 30 | `"FreeAnchor" `_ 31 | `"FPN" `_ 32 | `"COCO" `_ 33 | """ 34 | cfg = models.FreeAnchorConfig() 35 | cfg.backbone_pretrained = False 36 | return models.FreeAnchor(cfg, **kwargs) 37 | 38 | 39 | Net = models.FreeAnchor 40 | Cfg = CustomFreeAnchorConfig 41 | -------------------------------------------------------------------------------- /official/vision/detection/configs/faster_rcnn_resx101_coco_2x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | from official.vision.detection import models 12 | 13 | 14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnext101_32x8d" 19 | self.max_epoch = 36 20 | self.lr_decay_stages = [24, 32] 21 | 22 | 23 | @hub.pretrained( 24 | "https://data.megengine.org.cn/models/weights/" 25 | "faster_rcnn_resx101_coco_2x_800size_44dot1_e5e0060b.pkl" 26 | ) 27 | def faster_rcnn_resx101_coco_2x_800size(**kwargs): 28 | r""" 29 | Faster-RCNN FPN trained from COCO dataset. 30 | `"Faster-RCNN" `_ 31 | `"FPN" `_ 32 | `"COCO" `_ 33 | """ 34 | cfg = CustomFasterRCNNConfig() 35 | cfg.backbone_pretrained = False 36 | return models.FasterRCNN(cfg, **kwargs) 37 | 38 | 39 | Net = models.FasterRCNN 40 | Cfg = CustomFasterRCNNConfig 41 | -------------------------------------------------------------------------------- /official/multimodal/dalle/vae/openai_dvae.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import megengine.functional as F 4 | 5 | from .base_vae import BaseVAE 6 | from .openaidvae import openai_discrete_VAE_decoder, openai_discrete_VAE_encoder 7 | from .openaidvae.utils import map_pixels, unmap_pixels 8 | 9 | 10 | class DiscreteVAE(BaseVAE): 11 | def __init__( 12 | self, 13 | pretrained: bool = True 14 | ): 15 | super(DiscreteVAE, self).__init__( 16 | num_layers=3, 17 | num_tokens=8192, 18 | image_size=256, 19 | ) 20 | 21 | self.encoder = openai_discrete_VAE_encoder(pretrained=pretrained) 22 | self.decoder = openai_discrete_VAE_decoder(pretrained=pretrained) 23 | 24 | def get_codebook_indices(self, img): 25 | img = map_pixels(img) 26 | z_logits = self.encoder.blocks(img) 27 | z = F.argmax(z_logits, axis=1) 28 | z = F.flatten(z, 1) 29 | return z 30 | 31 | def decode(self, img_seq): 32 | b, n, = img_seq.shape 33 | L = int(math.sqrt(n)) 34 | img_seq = img_seq.reshape(b, L, L) 35 | 36 | z = F.one_hot(img_seq, num_classes=self.num_tokens) 37 | 38 | z = z.transpose(0, 3, 1, 2).astype('float32') 39 | x_stats = self.decoder(z).astype('float32') 40 | x_rec = unmap_pixels(F.sigmoid(x_stats[:, :3])) 41 | return x_rec 42 | 43 | def forward(self, inputs): 44 | raise NotImplementedError("Do not call forward method!") 45 | -------------------------------------------------------------------------------- /official/multimodal/dalle/tokenizer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import youtokentome as yttm 3 | 4 | import megengine.functional as F 5 | from megengine import Tensor 6 | 7 | from ..clip.simple_tokenizer import SimpleTokenizer # pylint: disable=unused-import # noqa: F401 8 | 9 | 10 | class YttmTokenizer: 11 | def __init__(self, bpe_path: str): 12 | if not os.path.exists(bpe_path): 13 | raise ValueError(f'BPE json path {bpe_path} does not exist') 14 | 15 | tokenizer = yttm.BPE(model=bpe_path) 16 | self.tokenizer = tokenizer 17 | self.vocab_size = tokenizer.vocab_size() 18 | 19 | def decode(self, tokens, pad_tokens=(0, )): 20 | if isinstance(tokens, Tensor): 21 | tokens = tokens.tolist() 22 | 23 | return self.tokenizer.decode(tokens, ignore_ids=pad_tokens) 24 | 25 | def encode(self, texts): 26 | encoded = self.tokenizer.encode(texts, output_type=yttm.OutputType.ID) 27 | return list(map(Tensor, encoded)) 28 | 29 | def tokenize(self, texts, context_length=256, truncate_text=False): 30 | if isinstance(texts, str): 31 | texts = [texts] 32 | 33 | all_tokens = self.encode(texts) 34 | 35 | result = F.zeros((len(all_tokens), context_length), dtype='int32') 36 | for i, tokens in enumerate(all_tokens): 37 | if len(tokens) > context_length: 38 | if truncate_text: 39 | tokens = tokens[:context_length] 40 | else: 41 | raise RuntimeError( 42 | f"Input {texts[i]} is too long for context length {context_length}") 43 | result[i, :len(tokens)] = Tensor(tokens) 44 | 45 | return result 46 | -------------------------------------------------------------------------------- /official/vision/gan/megengine_mimicry/utils/common.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Kwot Sin Lee 2 | # This code is licensed under MIT license 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE) 4 | # ------------------------------------------------------------------------------ 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 6 | # 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # 13 | # This file has been modified by Megvii ("Megvii Modifications"). 14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved. 15 | # ------------------------------------------------------------------------------ 16 | """ 17 | Script for common utility functions. 18 | """ 19 | import json 20 | import os 21 | 22 | import numpy as np 23 | 24 | 25 | def write_to_json(dict_to_write, output_file): 26 | """ 27 | Outputs a given dictionary as a JSON file with indents. 28 | 29 | Args: 30 | dict_to_write (dict): Input dictionary to output. 31 | output_file (str): File path to write the dictionary. 32 | 33 | Returns: 34 | None 35 | """ 36 | with open(output_file, 'w') as file: 37 | json.dump(dict_to_write, file, indent=4) 38 | 39 | 40 | def load_from_json(json_file): 41 | """ 42 | Loads a JSON file as a dictionary and return it. 43 | 44 | Args: 45 | json_file (str): Input JSON file to read. 46 | 47 | Returns: 48 | dict: Dictionary loaded from the JSON file. 49 | """ 50 | with open(json_file, 'r') as file: 51 | return json.load(file) 52 | -------------------------------------------------------------------------------- /official/multimodal/dalle/vae/vqgan_vae.py: -------------------------------------------------------------------------------- 1 | from math import log, sqrt 2 | from typing import Union 3 | 4 | import megengine.functional as F 5 | 6 | from ...taming_transformer.vqgan import GumbelVQ, VQModel, vqgan_imagenet_f16_1024 7 | from .base_vae import BaseVAE 8 | 9 | 10 | class VQGanVAE(BaseVAE): 11 | def __init__(self, model: Union[VQModel, GumbelVQ]): 12 | image_size = model.in_resolution 13 | num_layers = int(log(image_size / model.attn_resolution[0]) / log(2)) 14 | channels = model.in_channel 15 | num_tokens = model.quantize.num_embeddings 16 | 17 | super(VQGanVAE, self).__init__( 18 | num_layers, 19 | num_tokens, 20 | image_size, 21 | channels 22 | ) 23 | self.model = model 24 | 25 | self.is_gumbel = isinstance(model, GumbelVQ) 26 | 27 | def get_codebook_indices(self, img): 28 | b = img.shape[0] 29 | img = (2 * img) - 1 30 | _, _, [_, _, indices] = self.model.encode(img) 31 | if self.is_gumbel: 32 | return F.flatten(indices, 1) 33 | return indices.reshape(b, -1) 34 | 35 | def decode(self, img_seq): 36 | b, n = img_seq.shape 37 | one_hot_indices = F.one_hot(img_seq, num_classes=self.num_tokens).astype('float32') 38 | z = one_hot_indices @ self.model.quantize.embedding.weight 39 | 40 | c = z.shape[-1] 41 | z = z.reshape(b, int(sqrt(n)), -1, c).transpose(0, 3, 1, 2) 42 | img = self.model.decode(z) 43 | 44 | img = (F.clip(img, -1., 1.) + 1) * 0.5 45 | return img 46 | 47 | def forward(self): 48 | raise NotImplementedError() 49 | 50 | 51 | def vqgan_vae_1024(pretrained=True): 52 | vae = vqgan_imagenet_f16_1024(pretrained=pretrained) 53 | model = VQGanVAE(vae) 54 | return model 55 | -------------------------------------------------------------------------------- /official/vision/detection/layers/det/matcher.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import megengine.functional as F 10 | 11 | 12 | class Matcher: 13 | 14 | def __init__(self, thresholds, labels, allow_low_quality_matches=False): 15 | assert len(thresholds) + 1 == len(labels), "thresholds and labels are not matched" 16 | assert all(low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])) 17 | thresholds.append(float("inf")) 18 | thresholds.insert(0, -float("inf")) 19 | 20 | self.thresholds = thresholds 21 | self.labels = labels 22 | self.allow_low_quality_matches = allow_low_quality_matches 23 | 24 | def __call__(self, matrix): 25 | """ 26 | matrix(tensor): A two dim tensor with shape of (N, M). N is number of GT-boxes, 27 | while M is the number of anchors in detection. 28 | """ 29 | assert len(matrix.shape) == 2 30 | max_scores = matrix.max(axis=0) 31 | match_indices = F.argmax(matrix, axis=0) 32 | 33 | # default ignore label: -1 34 | labels = F.full_like(match_indices, -1) 35 | 36 | for label, low, high in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]): 37 | mask = (max_scores >= low) & (max_scores < high) 38 | labels[mask] = label 39 | 40 | if self.allow_low_quality_matches: 41 | mask = (matrix == F.max(matrix, axis=1, keepdims=True)).sum(axis=0) > 0 42 | labels[mask] = 1 43 | 44 | return match_indices, labels 45 | -------------------------------------------------------------------------------- /official/multimodal/big_sleep/README.md: -------------------------------------------------------------------------------- 1 | # Big Sleep 2 | 3 | 此仓库包含MegEngine实现的多模态模型`Big Sleep`,其将`CLIP`与`BigGAN`的生成器相结合,用户可以轻松使用一行文本构想图像! 4 | 5 | ## 使用方法 6 | 7 | 请使用GPU设备,否则生成过程可能会过长。 8 | 9 | 使用`hub`加载 10 | 11 | ```python 12 | from megengine import hub 13 | modelhub = hub.import_module(repo_info='megengine/models', git_host='github.com') 14 | 15 | dream = modelhub.Imagine( 16 | # 需要进行构想的文本 17 | text = "fire in the sky", 18 | # 传入参考图像用于稍微引导生成 19 | img = None, 20 | # 生成图像尺寸大小 21 | image_size=512, 22 | # 迭代过程的学习率 23 | lr = 5e-2, 24 | # 保存图像的间隔 25 | save_every = 25, 26 | # 是否保存迭代过程中的所有图像,否则图像将会重写到一张图片上 27 | save_progress = True, 28 | # 惩罚关键词 29 | text_min = None, 30 | # 梯度累积的步数 31 | gradient_accumulate_every: int = 1, 32 | epochs: int = 20, 33 | iterations: int = 1050, 34 | # 是否将迭代过程中的所有图像保存为mp4视频文件 35 | animate: bool = False, 36 | # 保存mp4的帧率 37 | fps: int = 15, 38 | # BIgSleep中采样方式 39 | bilinear: bool = False, 40 | # 固定随机种子 41 | seed: Optional[int] = None, 42 | # 限制最大类别数量 43 | max_classes: Optional[int] = None, 44 | # 用于可微topk 45 | class_temperature: float = 2., 46 | # 保存文件时是否加上日期前缀 47 | save_date_time: bool = False, 48 | # 是否保存得分最高的图像 49 | save_best: bool = True, 50 | # 实验性采样 51 | experimental_resample: bool = False, 52 | ema_decay: float = 0.99, 53 | num_cutouts: int = 128, 54 | center_bias: bool = False, 55 | clip_type: str = 'RN50', 56 | root: str = 'BigSleep', 57 | ) 58 | 59 | # 开始迭代生成图像 60 | dream() 61 | ``` 62 | 63 | 本地加载 64 | 65 | ```python 66 | from official.multimodal.big_sleep import Imagine 67 | 68 | dream = Imagine( 69 | text = "fire in the sky", 70 | lr = 5e-2, 71 | save_every = 25, 72 | save_progress = True, 73 | image_size=512 74 | ) 75 | 76 | # 开始迭代生成图像 77 | dream() 78 | ``` 79 | 80 | ### 参考 81 | 82 | [lucidrains/big-sleep](https://github.com/lucidrains/big-sleep) 83 | -------------------------------------------------------------------------------- /official/vision/gan/megengine_mimicry/metrics/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Kwot Sin Lee 2 | # This code is licensed under MIT license 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE) 4 | # ------------------------------------------------------------------------------ 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 6 | # 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # 13 | # This file has been modified by Megvii ("Megvii Modifications"). 14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved. 15 | # ------------------------------------------------------------------------------ 16 | import numpy as np 17 | 18 | 19 | def _normalize_images(images): 20 | """ 21 | Given a tensor of (megengine BGR) images, uses the torchvision 22 | normalization method to convert floating point data to integers. See reference 23 | at: https://pytorch.org/docs/stable/_modules/torchvision/utils.html#save_image 24 | 25 | The function uses the normalization from make_grid and save_image functions. 26 | 27 | Args: 28 | images (Tensor): Batch of images of shape (N, 3, H, W). 29 | 30 | Returns: 31 | ndarray: Batch of normalized (0-255) RGB images of shape (N, H, W, 3). 32 | """ 33 | # Shift the image from [-1, 1] range to [0, 1] range. 34 | min_val = float(images.min()) 35 | max_val = float(images.max()) 36 | 37 | images = (images - min_val) / (max_val - min_val + 1e-5) 38 | 39 | images = np.clip(images * 255 + 0.5, 0, 255).astype("uint8") 40 | 41 | images = np.transpose(images, [0, 2, 3, 1]) 42 | 43 | # NOTE: megengine(opencv) uses BGR, while TF uses RGB. Needs conversion. 44 | images = images[:, :, :, ::-1] 45 | 46 | return images 47 | -------------------------------------------------------------------------------- /official/vision/segmentation/configs/deeplabv3plus_res101_voc_512size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | from megengine import hub 9 | 10 | from official.vision.segmentation import models 11 | 12 | 13 | class VOCConfig: 14 | def __init__(self): 15 | self.dataset = "VOC2012" 16 | self.data_type = "trainaug" 17 | 18 | self.backbone = "resnet101" 19 | self.backbone_pretrained = True 20 | 21 | self.batch_size = 8 22 | self.learning_rate = 0.02 23 | self.momentum = 0.9 24 | self.weight_decay = 0.0001 25 | self.max_epoch = 40 26 | self.nr_images_epoch = 64000 27 | 28 | self.ignore_label = 255 29 | self.num_classes = 21 30 | self.img_height = 512 31 | self.img_width = 512 32 | self.img_mean = [103.530, 116.280, 123.675] # BGR 33 | self.img_std = [57.375, 57.120, 58.395] 34 | 35 | self.val_height = 512 36 | self.val_width = 512 37 | self.val_multiscale = [1.0] # [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] 38 | self.val_flip = False 39 | self.val_slip = False 40 | self.val_save_path = None 41 | 42 | self.log_interval = 20 43 | 44 | 45 | @hub.pretrained( 46 | "https://data.megengine.org.cn/models/weights/" 47 | "deeplabv3plus_res101_voc_512size_79dot5_7856dc84.pkl" 48 | ) 49 | def deeplabv3plus_res101_voc_512size(**kwargs): 50 | r"""DeepLab v3+ model from 51 | `"Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation" 52 | `_ 53 | """ 54 | cfg = VOCConfig() 55 | cfg.backbone_pretrained = False 56 | return models.DeepLabV3Plus(cfg, **kwargs) 57 | 58 | 59 | Net = models.DeepLabV3Plus 60 | Cfg = VOCConfig 61 | -------------------------------------------------------------------------------- /official/vision/segmentation/configs/deeplabv3plus_res101_cityscapes_768size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | from megengine import hub 9 | 10 | from official.vision.segmentation import models 11 | 12 | 13 | class CityscapesConfig: 14 | def __init__(self): 15 | self.dataset = "Cityscapes" 16 | 17 | self.backbone = "resnet101" 18 | self.backbone_pretrained = True 19 | 20 | self.batch_size = 4 21 | self.learning_rate = 0.01 22 | self.momentum = 0.9 23 | self.weight_decay = 0.0001 24 | self.max_epoch = 40 25 | self.nr_images_epoch = 32000 26 | 27 | self.ignore_label = 255 28 | self.num_classes = 19 29 | self.img_height = 768 30 | self.img_width = 768 31 | self.img_mean = [103.530, 116.280, 123.675] # BGR 32 | self.img_std = [57.375, 57.120, 58.395] 33 | 34 | self.val_height = 1024 35 | self.val_width = 2048 36 | self.val_multiscale = [1.0] # [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] 37 | self.val_flip = False 38 | self.val_slip = False 39 | self.val_save_path = None 40 | 41 | self.log_interval = 20 42 | 43 | 44 | @hub.pretrained( 45 | "https://data.megengine.org.cn/models/weights/" 46 | "deeplabv3plus_res101_cityscapes_768size_78dot5_c45e0cb9.pkl" 47 | ) 48 | def deeplabv3plus_res101_cityscapes_768size(**kwargs): 49 | r"""DeepLab v3+ model from 50 | `"Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation" 51 | `_ 52 | """ 53 | cfg = CityscapesConfig() 54 | cfg.backbone_pretrained = False 55 | return models.DeepLabV3Plus(cfg, **kwargs) 56 | 57 | 58 | Net = models.DeepLabV3Plus 59 | Cfg = CityscapesConfig 60 | -------------------------------------------------------------------------------- /official/vision/detection/configs/__init__.py: -------------------------------------------------------------------------------- 1 | from .atss_res18_coco_3x_800size import atss_res18_coco_3x_800size 2 | from .atss_res34_coco_3x_800size import atss_res34_coco_3x_800size 3 | from .atss_res50_coco_3x_800size import atss_res50_coco_3x_800size 4 | from .atss_res101_coco_3x_800size import atss_res101_coco_3x_800size 5 | from .atss_resx101_coco_2x_800size import atss_resx101_coco_2x_800size 6 | from .faster_rcnn_res18_coco_3x_800size import faster_rcnn_res18_coco_3x_800size 7 | from .faster_rcnn_res34_coco_3x_800size import faster_rcnn_res34_coco_3x_800size 8 | from .faster_rcnn_res50_coco_3x_800size import faster_rcnn_res50_coco_3x_800size 9 | from .faster_rcnn_res101_coco_3x_800size import faster_rcnn_res101_coco_3x_800size 10 | from .faster_rcnn_resx101_coco_2x_800size import faster_rcnn_resx101_coco_2x_800size 11 | from .fcos_res18_coco_3x_800size import fcos_res18_coco_3x_800size 12 | from .fcos_res34_coco_3x_800size import fcos_res34_coco_3x_800size 13 | from .fcos_res50_coco_3x_800size import fcos_res50_coco_3x_800size 14 | from .fcos_res101_coco_3x_800size import fcos_res101_coco_3x_800size 15 | from .fcos_resx101_coco_2x_800size import fcos_resx101_coco_2x_800size 16 | from .freeanchor_res18_coco_3x_800size import freeanchor_res18_coco_3x_800size 17 | from .freeanchor_res34_coco_3x_800size import freeanchor_res34_coco_3x_800size 18 | from .freeanchor_res50_coco_3x_800size import freeanchor_res50_coco_3x_800size 19 | from .freeanchor_res101_coco_3x_800size import freeanchor_res101_coco_3x_800size 20 | from .freeanchor_resx101_coco_2x_800size import freeanchor_resx101_coco_2x_800size 21 | from .retinanet_res18_coco_3x_800size import retinanet_res18_coco_3x_800size 22 | from .retinanet_res34_coco_3x_800size import retinanet_res34_coco_3x_800size 23 | from .retinanet_res50_coco_3x_800size import retinanet_res50_coco_3x_800size 24 | from .retinanet_res101_coco_3x_800size import retinanet_res101_coco_3x_800size 25 | from .retinanet_resx101_coco_2x_800size import retinanet_resx101_coco_2x_800size 26 | 27 | _EXCLUDE = {} 28 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 29 | -------------------------------------------------------------------------------- /official/vision/gan/megengine_mimicry/nets/dcgan/dcgan_base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Kwot Sin Lee 2 | # This code is licensed under MIT license 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE) 4 | # ------------------------------------------------------------------------------ 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 6 | # 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # 13 | # This file has been modified by Megvii ("Megvii Modifications"). 14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved. 15 | # ------------------------------------------------------------------------------ 16 | from .. import gan 17 | 18 | 19 | class DCGANBaseGenerator(gan.BaseGenerator): 20 | r""" 21 | ResNet backbone generator for ResNet DCGAN. 22 | 23 | Attributes: 24 | nz (int): Noise dimension for upsampling. 25 | ngf (int): Variable controlling generator feature map sizes. 26 | bottom_width (int): Starting width for upsampling generator output to an image. 27 | loss_type (str): Name of loss to use for GAN loss. 28 | """ 29 | def __init__(self, nz, ngf, bottom_width, loss_type='ns', **kwargs): 30 | super().__init__(nz=nz, 31 | ngf=ngf, 32 | bottom_width=bottom_width, 33 | loss_type=loss_type, 34 | **kwargs) 35 | 36 | 37 | class DCGANBaseDiscriminator(gan.BaseDiscriminator): 38 | r""" 39 | ResNet backbone discriminator for ResNet DCGAN. 40 | 41 | Attributes: 42 | ndf (int): Variable controlling discriminator feature map sizes. 43 | loss_type (str): Name of loss to use for GAN loss. 44 | """ 45 | def __init__(self, ndf, loss_type='ns', **kwargs): 46 | super().__init__(ndf=ndf, loss_type=loss_type, **kwargs) 47 | -------------------------------------------------------------------------------- /official/vision/detection/layers/basic/norm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2019 - present, Facebook, Inc 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # --------------------------------------------------------------------- 16 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 17 | # 18 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # 24 | # This file has been modified by Megvii ("Megvii Modifications"). 25 | # All Megvii Modifications are Copyright (C) 2014-2021 Megvii Inc. All rights reserved. 26 | # --------------------------------------------------------------------- 27 | from functools import partial 28 | 29 | import megengine.module as M 30 | from megengine.module.normalization import GroupNorm, InstanceNorm, LayerNorm 31 | 32 | 33 | def get_norm(norm): 34 | """ 35 | Args: 36 | norm (str): currently support "BN", "SyncBN", "FrozenBN", "GN", "LN" and "IN" 37 | 38 | Returns: 39 | M.Module or None: the normalization layer 40 | """ 41 | if norm is None: 42 | return None 43 | norm = { 44 | "BN": M.BatchNorm2d, 45 | "SyncBN": M.SyncBatchNorm, 46 | "FrozenBN": partial(M.BatchNorm2d, freeze=True), 47 | "GN": GroupNorm, 48 | "LN": LayerNorm, 49 | "IN": InstanceNorm, 50 | }[norm] 51 | return norm 52 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MegEngine Models 2 | 3 | ![](https://github.com/MegEngine/Models/workflows/CI/badge.svg) 4 | 5 | 本仓库包含了采用[MegEngine](https://github.com/megengine/megengine)实现的各种主流深度学习模型。 6 | 7 | [official](./official)目录下提供了各种经典的图像分类、目标检测、图像分割以及自然语言模型的官方实现。每个模型同时提供了模型定义、推理以及训练的代码。 8 | 9 | 官方会一直维护[official](./official)下的代码,保持适配MegEngine的最新API,提供最优的模型实现。同时,提供高质量的学习文档,帮助新手学习如何在MegEngine下训练自己的模型。 10 | 11 | ## 综述 12 | 13 | 对于每个模型,我们提供了至少四个脚本文件:模型定义(`model.py`)、模型推理(`inference.py`)、模型训练(`train.py`)、模型测试(`test.py`)。 14 | 15 | 每个模型目录下都对应有一个`README`,介绍了模型的详细信息,并详细描述了训练和测试的流程。例如 [ResNet README](./official/vision/classification/resnet/README.md)。 16 | 17 | 另外,`official`下定义的模型可以通过`megengine.hub`来直接加载,例如: 18 | 19 | ```bash 20 | import megengine.hub 21 | 22 | # 只加载网络结构 23 | resnet18 = megengine.hub.load("megengine/models", "resnet18") 24 | # 加载网络结构和预训练权重 25 | resnet18 = megengine.hub.load("megengine/models", "resnet18", pretrained=True) 26 | ``` 27 | 28 | 更多可以通过`megengine.hub`接口加载的模型见[hubconf.py](./hubconf.py)。 29 | 30 | ## 安装和环境配置 31 | 32 | 在开始运行本仓库下的代码之前,用户需要通过以下步骤来配置本地环境: 33 | 34 | 1. 克隆仓库 35 | 36 | ```bash 37 | git clone https://github.com/MegEngine/Models.git 38 | ``` 39 | 40 | 2. 安装依赖包 41 | 42 | ```bash 43 | pip3 install --user -r requirements.txt 44 | ``` 45 | 46 | 3. 添加目录到python环境变量中 47 | 48 | ```bash 49 | export PYTHONPATH=/path/to/models:$PYTHONPATH 50 | ``` 51 | 52 | 53 | ## 官方模型介绍 54 | 55 | ### 图像分类 56 | 57 | 图像分类是计算机视觉的基础任务。许多计算机视觉的其它任务(例如物体检测)都使用了基于图像分类的预训练模型。因此,我们提供了各种在ImageNet上预训练好的分类模型, 58 | 具体实现模型参考[这里](./official/vision/classification). 59 | 60 | ### 目标检测 61 | 62 | 目标检测同样是计算机视觉中的常见任务,我们提供了多个经典的目标检测模型,具体模型的实现可以参考[这里](./official/vision/detection). 63 | 64 | ### 图像分割 65 | 66 | 语意分割也是计算机视觉中的一项基础任务,为此我们也提供了经典的语义分割模型,具体可以参考[这里](./official/vision/segmentation/). 67 | 68 | ### 人体关节点检测 69 | 70 | 我们提供了人体关节点检测的经典模型和高精度模型,具体的实现可以参考[这里](./official/vision/keypoints). 71 | 72 | ### 自然语言处理 73 | 74 | 我们同样支持一些常见的自然语言处理模型,模型的权重来自Google的pre-trained models, 用户可以直接使用`megengine.hub`轻松的调用预训练的bert模型。 75 | 76 | 另外,我们在[bert](./official/nlp/bert)中还提供了更加方便的脚本, 可以通过任务名直接获取到对应字典, 配置, 与预训练模型。 77 | 78 | ### 多模态 79 | 80 | 多模态学习拥有令人着迷的魅力,其有着丰富有趣的现实应用。我们支持了一些经典的多模态模型,模型的权重来源于官方预训练模型,用户可以参考仓库下的教程轻松体验多模态的奇妙。 81 | -------------------------------------------------------------------------------- /official/vision/detection/tools/inference.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import argparse 10 | 11 | import cv2 12 | 13 | import megengine as mge 14 | 15 | from official.vision.detection.tools.data_mapper import data_mapper 16 | from official.vision.detection.tools.utils import DetEvaluator, import_from_file 17 | 18 | logger = mge.get_logger(__name__) 19 | logger.setLevel("INFO") 20 | 21 | 22 | def make_parser(): 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument( 25 | "-f", "--file", default="net.py", type=str, help="net description file" 26 | ) 27 | parser.add_argument( 28 | "-w", "--weight_file", default=None, type=str, help="weights file", 29 | ) 30 | parser.add_argument("-i", "--image", type=str) 31 | return parser 32 | 33 | 34 | def main(): 35 | parser = make_parser() 36 | args = parser.parse_args() 37 | 38 | current_network = import_from_file(args.file) 39 | cfg = current_network.Cfg() 40 | cfg.backbone_pretrained = False 41 | model = current_network.Net(cfg) 42 | model.eval() 43 | 44 | state_dict = mge.load(args.weight_file) 45 | if "state_dict" in state_dict: 46 | state_dict = state_dict["state_dict"] 47 | model.load_state_dict(state_dict) 48 | 49 | evaluator = DetEvaluator(model) 50 | 51 | ori_img = cv2.imread(args.image) 52 | image, im_info = DetEvaluator.process_inputs( 53 | ori_img.copy(), model.cfg.test_image_short_size, model.cfg.test_image_max_size, 54 | ) 55 | pred_res = evaluator.predict( 56 | image=mge.tensor(image), 57 | im_info=mge.tensor(im_info) 58 | ) 59 | res_img = DetEvaluator.vis_det( 60 | ori_img, 61 | pred_res, 62 | is_show_label=True, 63 | classes=data_mapper[cfg.test_dataset["name"]].class_names, 64 | ) 65 | cv2.imwrite("results.jpg", res_img) 66 | 67 | 68 | if __name__ == "__main__": 69 | main() 70 | -------------------------------------------------------------------------------- /official/vision/detection/tools/test_in_table.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import argparse 5 | import os 6 | from tabulate import tabulate 7 | 8 | import megengine as mge 9 | 10 | from official.vision.detection.tools.utils import import_from_file 11 | 12 | logger = mge.get_logger(__name__) 13 | logger.setLevel("INFO") 14 | 15 | 16 | def make_parser(): 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument( 19 | "-f", "--files", nargs="+", default=None, help="all config file" 20 | ) 21 | parser.add_argument( 22 | "-j", "--jsons", nargs="+", default=None, help="all json file" 23 | ) 24 | parser.add_argument( 25 | "-d", "--dataset_dir", default="/data/Datasets", type=str, 26 | ) 27 | return parser 28 | 29 | 30 | def main(): 31 | # pylint: disable=import-outside-toplevel,too-many-branches,too-many-statements 32 | from pycocotools.coco import COCO 33 | from pycocotools.cocoeval import COCOeval 34 | 35 | parser = make_parser() 36 | args = parser.parse_args() 37 | assert len(args.files) == len(args.jsons), "length of config and json mismatch" 38 | table_content = [] 39 | 40 | for cfg_file, json_path in zip(args.files, args.jsons): 41 | current_network = import_from_file(cfg_file) 42 | cfg = current_network.Cfg() 43 | 44 | logger.info(f"load json from {json_path}, start evaluation!") 45 | 46 | eval_gt = COCO( 47 | os.path.join( 48 | args.dataset_dir, cfg.test_dataset["name"], cfg.test_dataset["ann_file"] 49 | ) 50 | ) 51 | eval_dt = eval_gt.loadRes(json_path) 52 | cocoEval = COCOeval(eval_gt, eval_dt, iouType="bbox") 53 | cocoEval.evaluate() 54 | cocoEval.accumulate() 55 | cocoEval.summarize() 56 | cfg_name = cfg_file.split(".")[0] 57 | table_content.append([cfg_name, *["{:.3f}".format(v) for v in cocoEval.stats]]) 58 | 59 | headers = [ 60 | "name", "AP", "AP@0.5", "AP@0.75", "APs", "APm", "APl", 61 | "AR@1", "AR@10", "AR@100", "ARs", "ARm", "ARl", 62 | ] 63 | table = tabulate(table_content, headers=headers, tablefmt="pipe") 64 | logger.info("\n" + table) 65 | 66 | 67 | if __name__ == "__main__": 68 | main() 69 | -------------------------------------------------------------------------------- /official/quantization/param_config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | """ 10 | Configurations to train/finetune quantized classification models 11 | """ 12 | import megengine.data.transform as T 13 | 14 | 15 | class ShufflenetConfig: 16 | BATCH_SIZE = 128 17 | LEARNING_RATE = 0.0625 18 | MOMENTUM = 0.9 19 | WEIGHT_DECAY = ( 20 | lambda self, n, p: 4e-5 if n.find("weight") >= 0 and len(p.shape) > 1 else 0 21 | ) 22 | EPOCHS = 240 23 | 24 | SCHEDULER = "Linear" 25 | COLOR_JITTOR = T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4) 26 | 27 | 28 | class ResnetConfig: 29 | BATCH_SIZE = 32 30 | LEARNING_RATE = 0.0125 31 | MOMENTUM = 0.9 32 | WEIGHT_DECAY = 1e-4 33 | EPOCHS = 90 34 | 35 | SCHEDULER = "Multistep" 36 | SCHEDULER_STEPS = [30, 60, 80] 37 | SCHEDULER_GAMMA = 0.1 38 | COLOR_JITTOR = T.PseudoTransform() # disable colorjittor 39 | 40 | 41 | def get_config(arch: str): 42 | if "resne" in arch: # both resnet and resnext 43 | return ResnetConfig() 44 | elif "shufflenet" in arch or "mobilenet" in arch: 45 | return ShufflenetConfig() 46 | else: 47 | raise ValueError("config for {} not exists".format(arch)) 48 | 49 | 50 | class ShufflenetFinetuneConfig(ShufflenetConfig): 51 | BATCH_SIZE = 64 // 2 52 | LEARNING_RATE = 0.003125 / 2 53 | EPOCHS = 30 54 | 55 | 56 | class ResnetFinetuneConfig(ResnetConfig): 57 | BATCH_SIZE = 32 58 | LEARNING_RATE = 0.000125 59 | EPOCHS = 12 60 | 61 | SCHEDULER = "Multistep" 62 | SCHEDULER_STEPS = [ 63 | 6, 64 | ] 65 | SCHEDULER_GAMMA = 0.1 66 | 67 | 68 | def get_finetune_config(arch: str): 69 | if "resne" in arch: # both resnet and resnext 70 | return ResnetFinetuneConfig() 71 | elif "shufflenet" in arch or "mobilenet" in arch: 72 | return ShufflenetFinetuneConfig() 73 | else: 74 | raise ValueError("config for {} not exists".format(arch)) 75 | -------------------------------------------------------------------------------- /official/vision/classification/dump.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | import argparse 11 | import sys 12 | 13 | # pylint: disable=import-error 14 | import resnet.model as resnet_model 15 | # pylint: disable=import-error 16 | import shufflenet.model as snet_model 17 | 18 | import numpy as np 19 | 20 | import megengine as mge 21 | from megengine import jit 22 | 23 | 24 | def dump_static_graph(model, graph_name, shape): 25 | model.eval() 26 | 27 | data = mge.Tensor(np.random.random(shape)) 28 | 29 | @jit.trace(capture_as_const=True) 30 | def pred_func(data): 31 | outputs = model(data) 32 | return outputs 33 | 34 | pred_func(data) 35 | pred_func.dump( 36 | graph_name, 37 | arg_names=["data"], 38 | optimize_for_inference=True, 39 | enable_fuse_conv_bias_nonlinearity=True, 40 | ) 41 | 42 | 43 | def main(): 44 | parser = argparse.ArgumentParser(description="MegEngine Classification Dump .mge") 45 | parser.add_argument( 46 | "-a", 47 | "--arch", 48 | default="resnet18", 49 | help="model architecture (default: resnet18)", 50 | ) 51 | parser.add_argument( 52 | "-s", 53 | "--shape", 54 | type=int, 55 | nargs=4, 56 | default=(1, 3, 224, 224), 57 | help="input shape (default: 1 3 224 224)" 58 | ) 59 | parser.add_argument( 60 | "-o", 61 | "--output", 62 | type=str, 63 | default="model.mge", 64 | help="output filename" 65 | ) 66 | 67 | args = parser.parse_args() 68 | 69 | if 'resnet' in args.arch: 70 | model = getattr(resnet_model, args.arch)(pretrained=True) 71 | elif 'shufflenet' in args.arch: 72 | model = getattr(snet_model, args.arch)(pretrained=True) 73 | else: 74 | print('unavailable arch {}'.format(args.arch)) 75 | sys.exit() 76 | dump_static_graph(model, args.output, tuple(args.shape)) 77 | 78 | 79 | if __name__ == "__main__": 80 | main() 81 | -------------------------------------------------------------------------------- /official/vision/gan/megengine_mimicry/utils/vis.py: -------------------------------------------------------------------------------- 1 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 2 | # 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | # 5 | # Unless required by applicable law or agreed to in writing, 6 | # software distributed under the License is distributed on an 7 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 8 | import math 9 | 10 | import cv2 11 | import megengine 12 | 13 | 14 | def normalize_image(tensor: megengine.Tensor, scale=255): 15 | """normalize image tensors of any range to [0, scale=255]""" 16 | mi = tensor.min() 17 | ma = tensor.max() 18 | tensor = scale * (tensor - mi) / (ma - mi + 1e-9) 19 | return tensor 20 | 21 | 22 | def make_grid( 23 | tensor: megengine.Tensor, # [N,C,H,W] 24 | nrow: int = 8, 25 | padding: int = 2, 26 | background: float = 0, 27 | normalize: bool = False, 28 | ) -> megengine.Tensor: 29 | """align [N, C, H, W] image tensor to [H, W, 3] image grids, for visualization""" 30 | if normalize: 31 | tensor = normalize_image(tensor, scale=255) # normalize to 0-255 scale 32 | 33 | c = tensor.shape[1] 34 | assert c in (1, 3), "only support color/grayscale images, got channel = {}".format(c) 35 | nmaps = tensor.shape[0] 36 | xmaps = min(nrow, nmaps) 37 | ymaps = int(math.ceil(float(nmaps) / xmaps)) 38 | height, width = int(tensor.shape[2] + padding), int(tensor.shape[3] + padding) 39 | num_channels = tensor.shape[1] 40 | grid = megengine.ones((num_channels, height * ymaps + padding, width * xmaps + padding), "float32") * background 41 | k = 0 42 | for y in range(ymaps): 43 | for x in range(xmaps): 44 | if k >= nmaps: 45 | break 46 | grid = grid.set_subtensor(tensor[k])[:, 47 | y * height + padding: (y + 1) * height, 48 | x * width + padding: (x + 1) * width] 49 | k = k + 1 50 | c, h, w = grid.shape 51 | grid = grid.dimshuffle(1, 2, 0) # [C,H,W] -> [H,W,C] 52 | grid = grid.broadcast(h, w, 3) # [H,W,C] -> [H,W,3] 53 | return grid 54 | 55 | 56 | def save_image(image, path): 57 | if isinstance(image, megengine.Tensor): 58 | image = image.numpy() 59 | cv2.imwrite(path, image) 60 | -------------------------------------------------------------------------------- /official/vision/segmentation/tools/inference.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import argparse 10 | 11 | import cv2 12 | import numpy as np 13 | 14 | import megengine as mge 15 | 16 | from official.vision.segmentation.tools.utils import class_colors, import_from_file 17 | 18 | logger = mge.get_logger(__name__) 19 | logger.setLevel("INFO") 20 | 21 | 22 | def main(): 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument( 25 | "-f", "--file", default="net.py", type=str, help="net description file" 26 | ) 27 | parser.add_argument( 28 | "-w", "--weight_file", default=None, type=str, help="weights file", 29 | ) 30 | parser.add_argument("-i", "--image", type=str) 31 | args = parser.parse_args() 32 | 33 | current_network = import_from_file(args.file) 34 | cfg = current_network.Cfg() 35 | cfg.backbone_pretrained = False 36 | model = current_network.Net(cfg) 37 | model.eval() 38 | 39 | state_dict = mge.load(args.weight_file) 40 | if "state_dict" in state_dict: 41 | state_dict = state_dict["state_dict"] 42 | model.load_state_dict(state_dict) 43 | 44 | img = cv2.imread(args.image) 45 | pred = inference(img, model) 46 | cv2.imwrite("results.jpg", pred) 47 | 48 | 49 | def inference(img, model): 50 | def pred_func(data): 51 | pred = model(data) 52 | return pred 53 | 54 | img = ( 55 | img.astype("float32") - np.array(model.cfg.img_mean) 56 | ) / np.array(model.cfg.img_std) 57 | ori_h, ori_w = img.shape[:2] 58 | img = cv2.resize(img, (model.cfg.val_height, model.cfg.val_width)) 59 | img = img.transpose(2, 0, 1)[np.newaxis] 60 | 61 | pred = pred_func(mge.tensor(img)) 62 | pred = pred.numpy().squeeze().argmax(0) 63 | pred = cv2.resize( 64 | pred.astype("uint8"), (ori_w, ori_h), interpolation=cv2.INTER_NEAREST 65 | ) 66 | 67 | out = np.zeros((ori_h, ori_w, 3)) 68 | nids = np.unique(pred) 69 | for t in nids: 70 | out[pred == t] = class_colors[t] 71 | return out 72 | 73 | 74 | if __name__ == "__main__": 75 | main() 76 | -------------------------------------------------------------------------------- /official/vision/gan/README.md: -------------------------------------------------------------------------------- 1 | Generative Adversarial Networks 2 | --- 3 | 4 | This directory provides code to build, train and evaluate popular GAN models including DCGAN and WGAN. Most of the code are modified from a well-written and reproducible GAN benchmark [pytorch_mimicry](https://github.com/kwotsin/mimicry). 5 | 6 | We provide pretrained DCGAN and WGAN on cifar10. They use similar ResNet backbone and share the same training setting. 7 | 8 | ![images generated by DCGAN](../../assets/dcgan.png) 9 | 10 | #### Training Parameters 11 | | Resolution | Batch Size | Learning Rate | β1 | β2 | Decay Policy | ndis | niter | 12 | |:----------:|:----------:|:-------------:|:-------------:|:-------------:|:------------:|:---------------:|------------------| 13 | | 32 x 32 | 64 | 2e-4 | 0.0 | 0.9 | Linear | 5 | 100K | 14 | 15 | Their FID and Inception Score(IS) are listed below. 16 | 17 | #### Metrics 18 | | Metric | Method | 19 | |:--------------------------------:|:---------------------------------------:| 20 | | [Inception Score (IS)](https://arxiv.org/abs/1606.03498) | 50K samples at 10 splits| 21 | | [Fréchet Inception Distance (FID)](https://arxiv.org/abs/1706.08500) | 50K real/generated samples | 22 | | [Kernel Inception Distance (KID)](https://arxiv.org/abs/1801.01401) | 50K real/generated samples, averaged over 10 splits.| 23 | 24 | 25 | #### Cifar10 Results 26 | | Method | FID Score | IS Score | KID Score | 27 | | :-: | :-: | :-: | :-: | 28 | | DCGAN | 27.2 | 7.0 | 0.0242 | 29 | | WGAN-WC | 30.5 | 6.7 | 0.0249 | 30 | 31 | ### Generate images with pretrained weights 32 | 33 | ```python 34 | import megengine.hub as hub 35 | import megengine_mimicry.nets.dcgan.dcgan_cifar as dcgan 36 | import megengine_mimicry.utils.vis as vis 37 | 38 | netG = dcgan.DCGANGeneratorCIFAR() 39 | netG.load_state_dict(hub.load_serialized_obj_from_url("https://data.megengine.org.cn/models/weights/dcgan_cifar.pkl")) 40 | images = dcgan_generator.generate_images(num_images=64) # in NCHW format with normalized pixel values in [0, 1] 41 | grid = vis.make_grid(images) # in HW3 format with [0, 255] BGR images for visualization 42 | vis.save_image(grid, "visual.png") 43 | ``` 44 | 45 | ### Train and evaluate a DCGAN or WGAN 46 | 47 | ```bash 48 | # train and evaluate a DCGAN 49 | python3 train_dcgan.py 50 | # train and evaluate a WGAN 51 | python3 train_wgan.py 52 | ``` 53 | 54 | #### Tensorboard visualization 55 | ```bash 56 | tensorboard --logdir ./log --bind_all 57 | ``` 58 | -------------------------------------------------------------------------------- /official/multimodal/big_sleep/ema.py: -------------------------------------------------------------------------------- 1 | # Exponential Moving Average (from https://gist.github.com/crowsonkb/76b94d5238272722290734bf4725d204) # noqa: E501 2 | from copy import deepcopy 3 | 4 | import megengine as mge 5 | import megengine.functional as F 6 | import megengine.module as M 7 | 8 | 9 | class EMA(M.Module): 10 | def __init__(self, model: M.Module, decay: float): 11 | super(EMA, self).__init__() 12 | self.model = model 13 | self.decay = decay 14 | self.accum = mge.tensor(1.) 15 | 16 | self._biased = deepcopy(model) 17 | self.average = deepcopy(model) 18 | for param in self._biased.parameters(): 19 | param.set_value(param.detach() * 0) 20 | for param in self.average.parameters(): 21 | param.set_value(param.detach() * 0) 22 | self.update() 23 | 24 | def update(self): 25 | if not self.training: 26 | raise RuntimeError('Update should only be called during training') 27 | 28 | self.accum *= self.decay 29 | 30 | model_params = dict(self.model.named_parameters()) 31 | biased_params = dict(self._biased.named_parameters()) 32 | average_params = dict(self.average.named_parameters()) 33 | assert model_params.keys() == biased_params.keys() == average_params.keys( 34 | ), 'Model parameter keys incompatible with EMA stored parameter keys' 35 | 36 | for name, param in model_params.items(): 37 | biased_params[name].set_value( 38 | F.mul(biased_params[name], self.decay)) 39 | biased_params[name].set_value( 40 | F.add(biased_params[name], (1 - self.decay) * param)) 41 | average_params[name].set_value(biased_params[name]) 42 | average_params[name].set_value( 43 | F.div(average_params[name], 1 - self.accum)) 44 | 45 | model_buffers = dict(self.model.named_buffers()) 46 | biased_buffers = dict(self._biased.named_buffers()) 47 | average_buffers = dict(self.average.named_buffers()) 48 | assert model_buffers.keys() == biased_buffers.keys() == average_buffers.keys() 49 | 50 | for name, buffer in model_buffers.items(): 51 | biased_buffers[name].set_value(buffer) 52 | average_buffers[name].set_value(buffer) 53 | 54 | def forward(self, *args, **kwargs): 55 | if self.training: 56 | return self.model(*args, **kwargs) 57 | return self.average(*args, **kwargs) 58 | -------------------------------------------------------------------------------- /official/vision/detection/layers/det/pooler.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import math 10 | 11 | import megengine.functional as F 12 | 13 | 14 | def roi_pool( 15 | rpn_fms, rois, stride, pool_shape, pooler_type="roi_align", 16 | ): 17 | rois = rois.detach() 18 | assert len(stride) == len(rpn_fms) 19 | canonical_level = 4 20 | canonical_box_size = 224 21 | min_level = int(math.log2(stride[0])) 22 | max_level = int(math.log2(stride[-1])) 23 | 24 | num_fms = len(rpn_fms) 25 | box_area = (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]) 26 | assigned_level = F.floor( 27 | canonical_level + F.log(F.sqrt(box_area) / canonical_box_size) / math.log(2) 28 | ).astype("int32") 29 | assigned_level = F.minimum(assigned_level, max_level) 30 | assigned_level = F.maximum(assigned_level, min_level) 31 | assigned_level = assigned_level - min_level 32 | 33 | # avoid empty assignment 34 | assigned_level = F.concat( 35 | [assigned_level, F.arange(num_fms, dtype="int32", device=assigned_level.device)], 36 | ) 37 | rois = F.concat([rois, F.zeros((num_fms, rois.shape[-1]))]) 38 | 39 | pool_list, inds_list = [], [] 40 | for i in range(num_fms): 41 | _, inds = F.cond_take(assigned_level == i, assigned_level) 42 | level_rois = rois[inds] 43 | 44 | if pooler_type == "roi_pool": 45 | pool_fm = F.nn.roi_pooling( 46 | rpn_fms[i], level_rois, pool_shape, mode="max", scale=1.0 / stride[i] 47 | ) 48 | elif pooler_type == "roi_align": 49 | pool_fm = F.nn.roi_align( 50 | rpn_fms[i], 51 | level_rois, 52 | pool_shape, 53 | mode="average", 54 | spatial_scale=1.0 / stride[i], 55 | sample_points=2, 56 | aligned=True, 57 | ) 58 | pool_list.append(pool_fm) 59 | inds_list.append(inds) 60 | 61 | fm_order = F.argsort(F.concat(inds_list, axis=0)) 62 | pool_feature = F.concat(pool_list, axis=0) 63 | pool_feature = pool_feature[fm_order][:-num_fms] 64 | 65 | return pool_feature 66 | -------------------------------------------------------------------------------- /official/nlp/bert/config_args.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import argparse 10 | 11 | 12 | def get_args(): 13 | parser = argparse.ArgumentParser() 14 | 15 | # parameters 16 | parser.add_argument( 17 | "--data_dir", 18 | default=None, 19 | type=str, 20 | required=True, 21 | help="The input data dir. Should contain the .tsv files (or other data files)" 22 | " for the task.", 23 | ) 24 | 25 | parser.add_argument( 26 | "--pretrained_bert", required=True, type=str, help="pretrained bert name" 27 | ) 28 | 29 | parser.add_argument( 30 | "--max_seq_length", 31 | default=128, 32 | type=int, 33 | help="The maximum total input sequence length after WordPiece tokenization. \n" 34 | "Sequences longer than this will be truncated, and sequences shorter \n" 35 | "than this will be padded.", 36 | ) 37 | parser.add_argument( 38 | "--do_lower_case", 39 | default=False, 40 | action="store_true", 41 | help="Set this flag if you are using an uncased model.", 42 | ) 43 | 44 | parser.add_argument( 45 | "--train_batch_size", 46 | default=16, 47 | type=int, 48 | help="Total batch size for training.", 49 | ) 50 | parser.add_argument( 51 | "--learning_rate", 52 | default=5e-5, 53 | type=float, 54 | help="The initial learning rate for Adam.", 55 | ) 56 | parser.add_argument( 57 | "--num_train_epochs", 58 | default=3, 59 | type=int, 60 | help="Total number of training epochs to perform.", 61 | ) 62 | 63 | parser.add_argument( 64 | "--eval_batch_size", default=16, type=int, help="Total batch size for eval." 65 | ) 66 | parser.add_argument( 67 | "--load_model_path", 68 | default="./check_point_last.pkl", 69 | type=str, 70 | help="the initial model", 71 | ) 72 | 73 | parser.add_argument( 74 | "--save_model_path", 75 | default="./check_point_last.pkl", 76 | type=str, 77 | help="the path to save model", 78 | ) 79 | 80 | return parser.parse_args() 81 | -------------------------------------------------------------------------------- /official/nlp/bert/test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from tqdm import tqdm 10 | 11 | import megengine as mge 12 | import megengine.functional as F 13 | 14 | from official.nlp.bert.config_args import get_args 15 | from official.nlp.bert.model import BertForSequenceClassification, create_hub_bert 16 | from official.nlp.bert.mrpc_dataset import MRPCDataset 17 | 18 | args = get_args() 19 | logger = mge.get_logger(__name__) 20 | 21 | 22 | def net_eval(input_ids, segment_ids, input_mask, label_ids, net=None): 23 | net.eval() 24 | results = net(input_ids, segment_ids, input_mask, label_ids) 25 | logits, loss = results 26 | return loss, logits, label_ids 27 | 28 | 29 | def eval(dataloader, net): 30 | logger.info("***** Running evaluation *****") 31 | logger.info("batch size = %d", args.eval_batch_size) 32 | 33 | sum_loss, sum_accuracy, total_steps, total_examples = 0, 0, 0, 0 34 | 35 | for _, batch in enumerate(tqdm(dataloader, desc="Iteration")): 36 | input_ids, input_mask, segment_ids, label_ids = tuple( 37 | mge.tensor(t) for t in batch 38 | ) 39 | batch_size = input_ids.shape[0] 40 | loss, logits, label_ids = net_eval( 41 | input_ids, segment_ids, input_mask, label_ids, net=net 42 | ) 43 | sum_loss += loss.mean().item() 44 | sum_accuracy += F.topk_accuracy(logits, label_ids) * batch_size 45 | total_examples += batch_size 46 | total_steps += 1 47 | 48 | result = { 49 | "eval_loss": sum_loss / total_steps, 50 | "eval_accuracy": sum_accuracy / total_examples, 51 | } 52 | 53 | logger.info("***** Eval results *****") 54 | for key in sorted(result.keys()): 55 | logger.info("%s = %s", key, str(result[key])) 56 | 57 | 58 | if __name__ == "__main__": 59 | bert, config, vocab_file = create_hub_bert(args.pretrained_bert, pretrained=False) 60 | args.vocab_file = vocab_file 61 | model = BertForSequenceClassification(config, num_labels=2, bert=bert) 62 | mrpc_dataset = MRPCDataset(args) 63 | model.load_state_dict(mge.load(args.load_model_path)) 64 | mrpc_dataset = MRPCDataset(args) 65 | eval_dataloader, eval_size = mrpc_dataset.get_eval_dataloader() 66 | eval(eval_dataloader, model) 67 | -------------------------------------------------------------------------------- /official/multimodal/big_sleep/resample.py: -------------------------------------------------------------------------------- 1 | import math 2 | from functools import update_wrapper 3 | 4 | import numpy as np 5 | 6 | import megengine as mge 7 | import megengine.functional as F 8 | 9 | 10 | def sinc(x): 11 | return F.where(x != 0, F.sin(math.pi * x) / (math.pi * x), F.ones_like(x)) 12 | 13 | 14 | def lanczos(x, a): 15 | cond = F.logical_and(-a < x, x < a) 16 | out = F.where(cond, sinc(x) * sinc(x / a), F.zeros_like(x)) 17 | return out / F.sum(out) 18 | 19 | 20 | def ramp(ratio, width): 21 | n = math.ceil(width / ratio + 1) 22 | out = np.zeros(n) 23 | cur = 0 24 | for i in range(out.shape[0]): 25 | out[i] = cur 26 | cur += ratio 27 | out = np.concatenate([np.flip(-out[1:], axis=0), out])[1:-1] 28 | return mge.tensor(out, dtype='float32') 29 | 30 | 31 | def odd(fn): 32 | return update_wrapper(lambda x: F.sin(x) * fn(F.abs(x)), fn) 33 | 34 | 35 | def _to_linear_srgb(input): 36 | cond = input <= 0.04045 37 | a = input / 12.92 38 | b = ((input + 0.055) / 1.055)**2.4 39 | return F.where(cond, a, b) 40 | 41 | 42 | def _to_nonlinear_srgb(input): 43 | cond = input <= 0.0031308 44 | a = 12.92 * input 45 | b = 1.055 * input**(1 / 2.4) - 0.055 46 | return F.where(cond, a, b) 47 | 48 | 49 | to_linear_srgb = odd(_to_linear_srgb) 50 | to_nonlinear_srgb = odd(_to_nonlinear_srgb) 51 | 52 | 53 | def resample(input, size, align_corners=True, is_srgb=False): # pylint: disable=unused-argument 54 | n, c, h, w = input.shape 55 | dh, dw = size 56 | 57 | if is_srgb: 58 | input = to_linear_srgb(input) 59 | 60 | input = input.reshape(n * c, 1, h, w) 61 | 62 | if dh < h: 63 | kernel_h = lanczos( 64 | ramp(dh / h, 3), 3).to(input.device).astype(input.dtype) 65 | pad_h = (kernel_h.shape[0] - 1) // 2 66 | input = F.pad( 67 | input, [(0, 0), (0, 0), (pad_h, pad_h), (0, 0)], 'reflect') 68 | input = F.conv2d(input, kernel_h[None, None, :, None]) 69 | 70 | if dw < w: 71 | kernel_w = lanczos( 72 | ramp(dw / w, 3), 3).to(input.device).astype(input.dtype) 73 | pad_w = (kernel_w.shape[0] - 1) // 2 74 | input = F.pad(input, [(0, 0), (0, 0), (0, 0), 75 | (pad_w, pad_w)], 'reflect') 76 | input = F.conv2d(input, kernel_w[None, None, None, :]) 77 | 78 | input = input.reshape(n, c, h, w) 79 | # NOTE: can not set align_corners when specify mode with `bicubic` in megengine 80 | input = F.nn.interpolate(input, size, mode='bicubic', 81 | align_corners=None) 82 | 83 | if is_srgb: 84 | input = to_nonlinear_srgb(input) 85 | 86 | return input 87 | -------------------------------------------------------------------------------- /official/vision/gan/train_dcgan.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Kwot Sin Lee 2 | # This code is licensed under MIT license 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE) 4 | # ------------------------------------------------------------------------------ 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 6 | # 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # 13 | # This file has been modified by Megvii ("Megvii Modifications"). 14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved. 15 | # ------------------------------------------------------------------------------ 16 | import megengine.data as data 17 | import megengine.data.transform as T 18 | import megengine.optimizer as optim 19 | 20 | import megengine_mimicry as mmc 21 | import megengine_mimicry.nets.dcgan.dcgan_cifar as dcgan 22 | 23 | dataset = mmc.datasets.load_dataset(root=None, name='cifar10') 24 | dataloader = data.DataLoader( 25 | dataset, 26 | sampler=data.Infinite(data.RandomSampler(dataset, batch_size=64, drop_last=True)), 27 | transform=T.Compose([T.Normalize(std=255), T.ToMode("CHW")]), 28 | num_workers=4 29 | ) 30 | 31 | netG = dcgan.DCGANGeneratorCIFAR() 32 | netD = dcgan.DCGANDiscriminatorCIFAR() 33 | optD = optim.Adam(netD.parameters(), 2e-4, betas=(0.0, 0.9)) 34 | optG = optim.Adam(netG.parameters(), 2e-4, betas=(0.0, 0.9)) 35 | 36 | LOG_DIR = "./log/dcgan_example" 37 | 38 | trainer = mmc.training.Trainer( 39 | netD=netD, 40 | netG=netG, 41 | optD=optD, 42 | optG=optG, 43 | n_dis=5, 44 | num_steps=100000, 45 | lr_decay="linear", 46 | dataloader=dataloader, 47 | log_dir=LOG_DIR, 48 | device=0) 49 | 50 | trainer.train() 51 | 52 | mmc.metrics.compute_metrics.evaluate( 53 | metric="fid", 54 | netG=netG, 55 | log_dir=LOG_DIR, 56 | evaluate_step=100000, 57 | num_runs=1, 58 | device=0, 59 | num_real_samples=50000, 60 | num_fake_samples=50000, 61 | dataset_name="cifar10", 62 | ) 63 | 64 | mmc.metrics.compute_metrics.evaluate( 65 | metric="inception_score", 66 | netG=netG, 67 | log_dir=LOG_DIR, 68 | evaluate_step=100000, 69 | num_runs=1, 70 | device=0, 71 | num_samples=50000, 72 | ) 73 | 74 | mmc.metrics.compute_metrics.evaluate( 75 | metric="kid", 76 | netG=netG, 77 | log_dir=LOG_DIR, 78 | evaluate_step=100000, 79 | num_runs=1, 80 | device=0, 81 | num_subsets=50, 82 | subset_size=1000, 83 | dataset_name="cifar10", 84 | ) 85 | 86 | -------------------------------------------------------------------------------- /official/vision/gan/train_wgan.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Kwot Sin Lee 2 | # This code is licensed under MIT license 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE) 4 | # ------------------------------------------------------------------------------ 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 6 | # 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # 13 | # This file has been modified by Megvii ("Megvii Modifications"). 14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved. 15 | # ------------------------------------------------------------------------------ 16 | import megengine.data as data 17 | import megengine.data.transform as T 18 | import megengine.optimizer as optim 19 | 20 | import megengine_mimicry as mmc 21 | import megengine_mimicry.nets.wgan.wgan_cifar as wgan 22 | 23 | dataset = mmc.datasets.load_dataset(root=None, name='cifar10') 24 | dataloader = data.DataLoader( 25 | dataset, 26 | sampler=data.Infinite(data.RandomSampler(dataset, batch_size=64, drop_last=True)), 27 | transform=T.Compose([T.Normalize(mean=127, std=127), T.ToMode("CHW")]), 28 | num_workers=4 29 | ) 30 | 31 | netG = wgan.WGANGeneratorCIFAR() 32 | netD = wgan.WGANDiscriminatorCIFAR() 33 | optD = optim.Adam(netD.parameters(), 2e-4, betas=(0.0, 0.9)) 34 | optG = optim.Adam(netG.parameters(), 2e-4, betas=(0.0, 0.9)) 35 | 36 | LOG_DIR = "./log/wgan_example" 37 | 38 | trainer = mmc.training.Trainer( 39 | netD=netD, 40 | netG=netG, 41 | optD=optD, 42 | optG=optG, 43 | n_dis=5, 44 | num_steps=100000, 45 | lr_decay="linear", 46 | dataloader=dataloader, 47 | log_dir=LOG_DIR, 48 | device=0) 49 | 50 | trainer.train() 51 | 52 | mmc.metrics.compute_metrics.evaluate( 53 | metric="fid", 54 | netG=netG, 55 | log_dir=LOG_DIR, 56 | evaluate_step=100000, 57 | num_runs=1, 58 | device=0, 59 | num_real_samples=50000, 60 | num_fake_samples=50000, 61 | dataset_name="cifar10", 62 | ) 63 | 64 | mmc.metrics.compute_metrics.evaluate( 65 | metric="inception_score", 66 | netG=netG, 67 | log_dir=LOG_DIR, 68 | evaluate_step=100000, 69 | num_runs=1, 70 | device=0, 71 | num_samples=50000, 72 | ) 73 | 74 | mmc.metrics.compute_metrics.evaluate( 75 | metric="kid", 76 | netG=netG, 77 | log_dir=LOG_DIR, 78 | evaluate_step=100000, 79 | num_runs=1, 80 | device=0, 81 | num_subsets=50, 82 | subset_size=1000, 83 | dataset_name="cifar10", 84 | ) 85 | 86 | -------------------------------------------------------------------------------- /official/vision/detection/layers/basic/nn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2019 - present, Facebook, Inc 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # --------------------------------------------------------------------- 16 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 17 | # 18 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # 24 | # This file has been modified by Megvii ("Megvii Modifications"). 25 | # All Megvii Modifications are Copyright (C) 2014-2021 Megvii Inc. All rights reserved. 26 | # --------------------------------------------------------------------- 27 | from collections import namedtuple 28 | 29 | import megengine.module as M 30 | 31 | 32 | class Conv2d(M.Conv2d): 33 | """ 34 | A wrapper around :class:`megengine.module.Conv2d`. 35 | """ 36 | 37 | def __init__(self, *args, **kwargs): 38 | """ 39 | Extra keyword arguments supported in addition to 40 | `megengine.module.Conv2d`. 41 | 42 | Args: 43 | norm (M.Module, optional): a normalization layer 44 | activation (callable(Tensor) -> Tensor): a callable activation 45 | function 46 | """ 47 | norm = kwargs.pop("norm", None) 48 | activation = kwargs.pop("activation", None) 49 | super().__init__(*args, **kwargs) 50 | 51 | self.norm = norm 52 | self.activation = activation 53 | 54 | def forward(self, x): 55 | x = super().forward(x) 56 | if self.norm is not None: 57 | x = self.norm(x) 58 | if self.activation is not None: 59 | x = self.activation(x) 60 | return x 61 | 62 | 63 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])): 64 | """ 65 | A simple structure that contains basic shape specification about a tensor. 66 | Useful for getting the modules output channels when building the graph. 67 | """ 68 | 69 | def __new__(cls, channels=None, height=None, width=None, stride=None): 70 | return super().__new__(cls, channels, height, width, stride) 71 | -------------------------------------------------------------------------------- /official/vision/classification/resnet/inference.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import argparse 10 | import json 11 | 12 | # pylint: disable=import-error 13 | import model as resnet_model 14 | 15 | import cv2 16 | import numpy as np 17 | 18 | import megengine 19 | import megengine.data.transform as T 20 | import megengine.functional as F 21 | 22 | logging = megengine.logger.get_logger() 23 | 24 | 25 | def main(): 26 | parser = argparse.ArgumentParser() 27 | parser.add_argument("-a", "--arch", default="resnet18", type=str) 28 | parser.add_argument("-m", "--model", default=None, type=str) 29 | parser.add_argument("-i", "--image", default=None, type=str) 30 | args = parser.parse_args() 31 | 32 | model = resnet_model.__dict__[args.arch](pretrained=(args.model is None)) 33 | if args.model is not None: 34 | logging.info("load from checkpoint %s", args.model) 35 | checkpoint = megengine.load(args.model) 36 | if "state_dict" in checkpoint: 37 | state_dict = checkpoint["state_dict"] 38 | model.load_state_dict(state_dict) 39 | 40 | if args.image is None: 41 | path = "../../../assets/cat.jpg" 42 | else: 43 | path = args.image 44 | image = cv2.imread(path, cv2.IMREAD_COLOR) 45 | 46 | transform = T.Compose( 47 | [ 48 | T.Resize(256), 49 | T.CenterCrop(224), 50 | T.Normalize( 51 | mean=[103.530, 116.280, 123.675], std=[57.375, 57.120, 58.395] 52 | ), # BGR 53 | T.ToMode("CHW"), 54 | ] 55 | ) 56 | 57 | def infer_func(processed_img): 58 | model.eval() 59 | logits = model(processed_img) 60 | probs = F.softmax(logits) 61 | return probs 62 | 63 | processed_img = transform.apply(image)[np.newaxis, :] 64 | processed_img = megengine.tensor(processed_img, dtype="float32") 65 | probs = infer_func(processed_img) 66 | 67 | top_probs, classes = F.topk(probs, k=5, descending=True) 68 | 69 | with open("../../../assets/imagenet_class_info.json") as fp: 70 | imagenet_class_index = json.load(fp) 71 | 72 | for rank, (prob, classid) in enumerate( 73 | zip(top_probs.numpy().reshape(-1), classes.numpy().reshape(-1)) 74 | ): 75 | print( 76 | "{}: class = {:20s} with probability = {:4.1f} %".format( 77 | rank, imagenet_class_index[str(classid)][1], 100 * prob 78 | ) 79 | ) 80 | 81 | 82 | if __name__ == "__main__": 83 | main() 84 | -------------------------------------------------------------------------------- /official/vision/classification/shufflenet/inference.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import argparse 10 | import json 11 | 12 | # pylint: disable=import-error 13 | import model as snet_model 14 | 15 | import cv2 16 | import numpy as np 17 | 18 | import megengine 19 | import megengine.data.transform as T 20 | import megengine.functional as F 21 | 22 | logging = megengine.logger.get_logger() 23 | 24 | 25 | def main(): 26 | parser = argparse.ArgumentParser() 27 | parser.add_argument("-a", "--arch", default="shufflenet_v2_x1_0", type=str) 28 | parser.add_argument("-m", "--model", default=None, type=str) 29 | parser.add_argument("-i", "--image", default=None, type=str) 30 | args = parser.parse_args() 31 | 32 | model = snet_model.__dict__[args.arch](pretrained=(args.model is None)) 33 | if args.model is not None: 34 | logging.info("load from checkpoint %s", args.model) 35 | checkpoint = megengine.load(args.model) 36 | if "state_dict" in checkpoint: 37 | state_dict = checkpoint["state_dict"] 38 | model.load_state_dict(state_dict) 39 | 40 | if args.image is None: 41 | path = "../../../assets/cat.jpg" 42 | else: 43 | path = args.image 44 | image = cv2.imread(path, cv2.IMREAD_COLOR) 45 | 46 | transform = T.Compose( 47 | [ 48 | T.Resize(256), 49 | T.CenterCrop(224), 50 | T.Normalize( 51 | mean=[103.530, 116.280, 123.675], std=[57.375, 57.120, 58.395] 52 | ), # BGR 53 | T.ToMode("CHW"), 54 | ] 55 | ) 56 | 57 | def infer_func(processed_img): 58 | model.eval() 59 | logits = model(processed_img) 60 | probs = F.softmax(logits) 61 | return probs 62 | 63 | processed_img = transform.apply(image)[np.newaxis, :] 64 | processed_img = megengine.tensor(processed_img, dtype="float32") 65 | probs = infer_func(processed_img) 66 | 67 | top_probs, classes = F.topk(probs, k=5, descending=True) 68 | 69 | with open("../../../assets/imagenet_class_info.json") as fp: 70 | imagenet_class_index = json.load(fp) 71 | 72 | for rank, (prob, classid) in enumerate( 73 | zip(top_probs.numpy().reshape(-1), classes.numpy().reshape(-1)) 74 | ): 75 | print( 76 | "{}: class = {:20s} with probability = {:4.1f} %".format( 77 | rank, imagenet_class_index[str(classid)][1], 100 * prob 78 | ) 79 | ) 80 | 81 | 82 | if __name__ == "__main__": 83 | main() 84 | -------------------------------------------------------------------------------- /official/vision/gan/megengine_mimicry/datasets/data_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Kwot Sin Lee 2 | # This code is licensed under MIT license 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE) 4 | # ------------------------------------------------------------------------------ 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 6 | # 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # 13 | # This file has been modified by Megvii ("Megvii Modifications"). 14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved. 15 | # ------------------------------------------------------------------------------ 16 | """ 17 | Script for loading datasets. 18 | """ 19 | import os 20 | 21 | import megengine.data as data 22 | import megengine.data.transform as T 23 | 24 | 25 | def load_dataset(root, name, **kwargs): 26 | """ 27 | Loads different datasets specifically for GAN training. 28 | By default, all images are normalized to values in the range [-1, 1]. 29 | 30 | Args: 31 | root (str): Path to where datasets are stored. 32 | name (str): Name of dataset to load. 33 | 34 | Returns: 35 | Dataset: Torch Dataset object for a specific dataset. 36 | """ 37 | if name == "cifar10": 38 | return load_cifar10_dataset(root, **kwargs) 39 | 40 | else: 41 | raise ValueError("Invalid dataset name {} selected.".format(name)) 42 | 43 | 44 | def load_cifar10_dataset(root=None, 45 | split='train', 46 | download=True, 47 | **kwargs): 48 | """ 49 | Loads the CIFAR-10 dataset. 50 | 51 | Args: 52 | root (str): Path to where datasets are stored. 53 | split (str): The split of data to use. 54 | download (bool): If True, downloads the dataset. 55 | 56 | Returns: 57 | Dataset: Torch Dataset object. 58 | """ 59 | dataset_dir = root 60 | if dataset_dir and not os.path.exists(dataset_dir): 61 | os.makedirs(dataset_dir) 62 | 63 | # Build datasets 64 | if split == "train": 65 | dataset = data.dataset.CIFAR10(root=dataset_dir, 66 | train=True, 67 | download=download, 68 | **kwargs) 69 | elif split == "test": 70 | dataset = data.dataset.CIFAR10(root=dataset_dir, 71 | train=False, 72 | download=download, 73 | **kwargs) 74 | else: 75 | raise ValueError("split argument must one of ['train', 'val']") 76 | 77 | return dataset 78 | -------------------------------------------------------------------------------- /official/vision/detection/layers/det/box_head.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import math 10 | from typing import List 11 | 12 | import megengine.module as M 13 | from megengine import Tensor 14 | 15 | from official.vision.detection import layers 16 | 17 | 18 | class BoxHead(M.Module): 19 | """ 20 | The head used when anchor boxes are adopted for object classification and box regression. 21 | """ 22 | 23 | def __init__(self, cfg, input_shape: List[layers.ShapeSpec]): 24 | super().__init__() 25 | 26 | in_channels = input_shape[0].channels 27 | num_classes = cfg.num_classes 28 | num_convs = 4 29 | prior_prob = cfg.cls_prior_prob 30 | num_anchors = [ 31 | len(cfg.anchor_scales[i]) * len(cfg.anchor_ratios[i]) 32 | for i in range(len(input_shape)) 33 | ] 34 | 35 | assert ( 36 | len(set(num_anchors)) == 1 37 | ), "not support different number of anchors between levels" 38 | num_anchors = num_anchors[0] 39 | 40 | cls_subnet = [] 41 | bbox_subnet = [] 42 | for _ in range(num_convs): 43 | cls_subnet.append( 44 | M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) 45 | ) 46 | cls_subnet.append(M.ReLU()) 47 | bbox_subnet.append( 48 | M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) 49 | ) 50 | bbox_subnet.append(M.ReLU()) 51 | 52 | self.cls_subnet = M.Sequential(*cls_subnet) 53 | self.bbox_subnet = M.Sequential(*bbox_subnet) 54 | self.cls_score = M.Conv2d( 55 | in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1 56 | ) 57 | self.bbox_pred = M.Conv2d( 58 | in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1 59 | ) 60 | 61 | # Initialization 62 | for modules in [ 63 | self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred 64 | ]: 65 | for layer in modules.modules(): 66 | if isinstance(layer, M.Conv2d): 67 | M.init.normal_(layer.weight, mean=0, std=0.01) 68 | M.init.fill_(layer.bias, 0) 69 | 70 | # Use prior in model initialization to improve stability 71 | bias_value = -math.log((1 - prior_prob) / prior_prob) 72 | M.init.fill_(self.cls_score.bias, bias_value) 73 | 74 | def forward(self, features: List[Tensor]): 75 | logits, offsets = [], [] 76 | for feature in features: 77 | logits.append(self.cls_score(self.cls_subnet(feature))) 78 | offsets.append(self.bbox_pred(self.bbox_subnet(feature))) 79 | return logits, offsets 80 | -------------------------------------------------------------------------------- /official/vision/keypoints/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | 10 | 11 | class Config: 12 | # model 13 | model_choices = [ 14 | "simplebaseline_res50", 15 | "simplebaseline_res101", 16 | "simplebaseline_res152", 17 | ] 18 | 19 | # train 20 | initial_lr = 3e-4 21 | lr_ratio = 0.1 22 | 23 | batch_size = 32 24 | epochs = 200 25 | warm_epochs = 0 26 | weight_decay = 0 27 | 28 | report_freq = 10 29 | save_freq = 1 30 | 31 | # data 32 | # path 33 | data_root = "/data/coco_data/" 34 | 35 | # normalize 36 | img_mean = [103.530, 116.280, 123.675] 37 | img_std = [57.375, 57.120, 58.395] 38 | 39 | # shape 40 | input_shape = (256, 192) 41 | output_shape = (64, 48) 42 | 43 | # heat maps 44 | keypoint_num = 17 45 | heat_kernels = [k * 4 for k in [2.6, 2.0, 1.7, 1.4]] 46 | heat_thr = 1e-2 47 | heat_range = 255 48 | 49 | # augmentation 50 | half_body_transform = True 51 | extend_boxes = True 52 | 53 | # extend 54 | x_ext = 0.6 55 | y_ext = 0.6 56 | 57 | # half body 58 | num_keypoints_half_body = 3 59 | prob_half_body = 0.3 60 | upper_body_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] 61 | lower_body_ids = [11, 12, 13, 14, 15, 16] 62 | 63 | keypoint_flip_order = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] 64 | 65 | # scale 66 | scale_prob = 1 67 | scale_range = 0.3 68 | 69 | # rorate 70 | rotation_prob = 0.6 71 | rotate_range = 40 72 | 73 | # test settings 74 | test_aug_border = 10 75 | test_x_ext = 0.10 76 | test_y_ext = 0.10 77 | test_gaussian_kernel = 17 78 | second_value_aug = True 79 | 80 | # inference settings 81 | nms_thr = 0.7 82 | vis_colors = [ 83 | [255, 0, 0], 84 | [255, 85, 0], 85 | [255, 170, 0], 86 | [255, 255, 0], 87 | [170, 255, 0], 88 | [85, 255, 0], 89 | [0, 255, 0], 90 | [0, 255, 85], 91 | [0, 255, 170], 92 | [0, 255, 255], 93 | [0, 170, 255], 94 | [0, 85, 255], 95 | [0, 0, 255], 96 | [85, 0, 255], 97 | [170, 0, 255], 98 | [255, 0, 255], 99 | [255, 0, 170], 100 | [255, 0, 85], 101 | [255, 85, 85], 102 | [255, 170, 85], 103 | [255, 170, 170], 104 | ] 105 | 106 | vis_skeletons = [ 107 | [0, 1], 108 | [0, 2], 109 | [1, 3], 110 | [2, 4], 111 | [3, 5], 112 | [4, 6], 113 | [5, 6], 114 | [5, 7], 115 | [7, 9], 116 | [6, 8], 117 | [8, 10], 118 | [5, 11], 119 | [6, 12], 120 | [11, 12], 121 | [11, 13], 122 | [13, 15], 123 | [12, 14], 124 | [14, 16], 125 | ] 126 | -------------------------------------------------------------------------------- /official/vision/segmentation/tools/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import functools 10 | import importlib 11 | import math 12 | from tabulate import tabulate 13 | 14 | import numpy as np 15 | 16 | from megengine.data import MapSampler 17 | 18 | 19 | class AverageMeter: 20 | """Computes and stores the average and current value""" 21 | 22 | def __init__(self, record_len=1): 23 | self.record_len = record_len 24 | self.reset() 25 | 26 | def reset(self): 27 | self.sum = [0 for i in range(self.record_len)] 28 | self.cnt = 0 29 | 30 | def update(self, val): 31 | self.sum = [s + v for s, v in zip(self.sum, val)] 32 | self.cnt += 1 33 | 34 | def average(self): 35 | return [s / self.cnt for s in self.sum] 36 | 37 | 38 | def import_from_file(cfg_file): 39 | spec = importlib.util.spec_from_file_location("config", cfg_file) 40 | cfg_module = importlib.util.module_from_spec(spec) 41 | spec.loader.exec_module(cfg_module) 42 | return cfg_module 43 | 44 | 45 | def get_config_info(config): 46 | config_table = [] 47 | for c, v in config.__dict__.items(): 48 | if not isinstance(v, (int, float, str, list, tuple, dict, np.ndarray)): 49 | if hasattr(v, "__name__"): 50 | v = v.__name__ 51 | elif hasattr(v, "__class__"): 52 | v = v.__class__ 53 | elif isinstance(v, functools.partial): 54 | v = v.func.__name__ 55 | config_table.append((str(c), str(v))) 56 | config_table = tabulate(config_table) 57 | return config_table 58 | 59 | 60 | class InferenceSampler(MapSampler): 61 | def __init__(self, dataset, batch_size=1, world_size=None, rank=None): 62 | super().__init__(dataset, batch_size, False, None, world_size, rank) 63 | begin = self.num_samples * self.rank 64 | end = min(self.num_samples * (self.rank + 1), len(self.dataset)) 65 | self.indices = list(range(begin, end)) 66 | 67 | def sample(self): 68 | pass 69 | 70 | def batch(self): 71 | step, length = self.batch_size, len(self.indices) 72 | batch_index = [self.indices[i: i + step] for i in range(0, length, step)] 73 | return iter(batch_index) 74 | 75 | def __len__(self): 76 | return int(math.ceil(len(self.indices) / self.batch_size)) 77 | 78 | 79 | # pre-defined colors for at most 20 categories 80 | class_colors = [ 81 | [0, 0, 0], # background 82 | [0, 0, 128], 83 | [0, 128, 0], 84 | [0, 128, 128], 85 | [128, 0, 0], 86 | [128, 0, 128], 87 | [128, 128, 0], 88 | [128, 128, 128], 89 | [0, 0, 64], 90 | [0, 0, 192], 91 | [0, 128, 64], 92 | [0, 128, 192], 93 | [128, 0, 64], 94 | [128, 0, 192], 95 | [128, 128, 64], 96 | [128, 128, 192], 97 | [0, 64, 0], 98 | [0, 64, 128], 99 | [0, 192, 0], 100 | [0, 192, 128], 101 | [128, 64, 0], 102 | ] 103 | -------------------------------------------------------------------------------- /official/quantization/README.md: -------------------------------------------------------------------------------- 1 | 模型量化 Model Quantization 2 | --- 3 | 4 | 本目录包含了采用MegEngine实现的量化训练和部署的代码,包括常用的ResNet、ShuffleNet和MobileNet,其量化模型的ImageNet Top 1 准确率如下: 5 | 6 | | Model | top1 acc (float32) | FPS* (float32) | top1 acc (int8) | FPS* (int8) | 7 | | --- | --- | --- | --- | --- | 8 | | ResNet18 | 69.796 | 10.5 | 69.814 | 16.3 | 9 | | ShufflenetV1 (1.5x) | 71.948 | 17.3 | 70.806 | 25.3 | 10 | | MobilenetV2 | 72.808 | 13.1 | 71.228 | 17.4 | 11 | 12 | **: FPS is measured on Intel(R) Xeon(R) Gold 6130 CPU @ 2.10GHz, single 224x224 image* 13 | 14 | *We finetune mobile models with QAT for 30 epochs, training longer may yield better accuracy* 15 | 16 | 量化模型使用时,统一读取0-255的uint8图片,减去128的均值,转化为int8,输入网络。 17 | 18 | 19 | #### (Optional) Download Pretrained Models 20 | ``` 21 | wget https://data.megengine.org.cn/models/weights/mobilenet_v2_normal_72808.pkl 22 | wget https://data.megengine.org.cn/models/weights/mobilenet_v2_qat_71228.pkl 23 | wget https://data.megengine.org.cn/models/weights/resnet18_normal_69796.pkl 24 | wget https://data.megengine.org.cn/models/weights/resnet18_qat_69814.pkl 25 | wget https://data.megengine.org.cn/models/weights/shufflenet_v1_x1_5_g3_normal_71948.pkl 26 | wget https://data.megengine.org.cn/models/weights/shufflenet_v1_x1_5_g3_qat_70806.pkl 27 | ``` 28 | 29 | ## Quantization Aware Training (QAT) 30 | 31 | ```python 32 | import megengine.quantization as Q 33 | 34 | model = ... 35 | 36 | # Quantization Aware Training 37 | Q.quantize_qat(model, qconfig=Q.ema_fakequant_qconfig) 38 | 39 | for _ in range(...): 40 | train(model) 41 | ``` 42 | 43 | ## Deploying Quantized Model 44 | 45 | ```python 46 | import megengine.quantization as Q 47 | from megengine.jit import trace 48 | 49 | model = ... 50 | 51 | Q.quantize_qat(model, qconfig=Q.ema_fakequant_qconfig) 52 | 53 | # real quant 54 | Q.quantize(model) 55 | 56 | @trace(symbolic=True, capture_as_const=True) 57 | def inference_func(x): 58 | return model(x) 59 | 60 | inference_func(x) 61 | inference_func.dump(...) 62 | ``` 63 | 64 | # HOWTO use this codebase 65 | 66 | ## Step 1. Train a fp32 model 67 | 68 | ``` 69 | python3 train.py -a resnet18 -d /path/to/imagenet --mode normal 70 | ``` 71 | 72 | ## Step 2. Finetune fp32 model with quantization aware training(QAT) 73 | 74 | ``` 75 | python3 finetune.py -a resnet18 -d /path/to/imagenet --checkpoint /path/to/resnet18.normal/checkpoint.pkl --mode qat 76 | ``` 77 | 78 | ## Step 2. Calibration 79 | ``` 80 | python3 calibration.py -a resnet18 -d /path/to/imagenet --checkpoint /path/to/resnet18.normal/checkpoint.pkl 81 | ``` 82 | 83 | ## Step 3. Test QAT model on ImageNet Testset 84 | 85 | ``` 86 | python3 test.py -a resnet18 -d /path/to/imagenet --checkpoint /path/to/resnet18.qat/checkpoint.pkl --mode qat 87 | ``` 88 | 89 | or testing in quantized mode 90 | 91 | ``` 92 | python3 test.py -a resnet18 -d /path/to/imagenet --checkpoint /path/to/resnet18.qat/checkpoint.pkl --mode quantized -n 1 93 | ``` 94 | 95 | ## Step 4. Inference and dump 96 | 97 | ``` 98 | python3 inference.py -a resnet18 --checkpoint /path/to/resnet18.qat/checkpoint.pkl --mode quantized --dump 99 | ``` 100 | 101 | will feed a cat image to the network and output the classification probabilities with quantized network. 102 | 103 | Also, set `--dump` will dump the quantized network to `resnet18.quantized.megengine` binary file. 104 | 105 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to a positive environment for our community include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior include: 18 | 19 | * The use of sexualized language or imagery, and sexual attention or advances of any kind 20 | * Trolling, insulting or derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others’ private information, such as a physical or email address, without their explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | All MegEngine forums and spaces are meant for professional interactions, and any behavior which could reasonably be considered inappropriate in a professional setting is unacceptable. 26 | 27 | ## Our Responsibilities 28 | 29 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 30 | 31 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 32 | 33 | ## Scope 34 | 35 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 36 | 37 | 38 | ## Enforcement 39 | 40 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at megengine@megvii.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 41 | 42 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 43 | 44 | ## Attribution 45 | 46 | This Code of Conduct is updated from the Contributor Covenant, version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 47 | 48 | -------------------------------------------------------------------------------- /official/vision/gan/megengine_mimicry/training/metric_log.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Kwot Sin Lee 2 | # This code is licensed under MIT license 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE) 4 | # ------------------------------------------------------------------------------ 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") 6 | # 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # 13 | # This file has been modified by Megvii ("Megvii Modifications"). 14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved. 15 | # ------------------------------------------------------------------------------ 16 | """ 17 | MetricLog object for intelligently logging data to display them more intuitively. 18 | """ 19 | 20 | 21 | class MetricLog: 22 | """ 23 | A dictionary-like object that logs data, and includes an extra dict to map the metrics 24 | to its group name, if any, and the corresponding precision to print out. 25 | 26 | Attributes: 27 | metrics_dict (dict): A dictionary mapping to another dict containing 28 | the corresponding value, precision, and the group this metric belongs to. 29 | """ 30 | def __init__(self, **kwargs): 31 | self.metrics_dict = {} 32 | 33 | def add_metric(self, name, value, group=None, precision=4): 34 | """ 35 | Logs metric to internal dict, but with an additional option 36 | of grouping certain metrics together. 37 | 38 | Args: 39 | name (str): Name of metric to log. 40 | value (Tensor/Float): Value of the metric to log. 41 | group (str): Name of the group to classify different metrics together. 42 | precision (int): The number of floating point precision to represent the value. 43 | 44 | Returns: 45 | None 46 | """ 47 | # Grab tensor values only 48 | try: 49 | value = value.item() 50 | except AttributeError: 51 | value = value 52 | 53 | self.metrics_dict[name] = dict(value=value, 54 | group=group, 55 | precision=precision) 56 | 57 | def __getitem__(self, key): 58 | return round(self.metrics_dict[key]['value'], 59 | self.metrics_dict[key]['precision']) 60 | 61 | def get_group_name(self, name): 62 | """ 63 | Obtains the group name of a particular metric. For example, errD and errG 64 | which represents the discriminator/generator losses could fall under a 65 | group name called "loss". 66 | 67 | Args: 68 | name (str): The name of the metric to retrieve group name. 69 | 70 | Returns: 71 | str: A string representing the group name of the metric. 72 | """ 73 | return self.metrics_dict[name]['group'] 74 | 75 | def keys(self): 76 | """ 77 | Dict like functionality for retrieving keys. 78 | """ 79 | return self.metrics_dict.keys() 80 | 81 | def items(self): 82 | """ 83 | Dict like functionality for retrieving items. 84 | """ 85 | return self.metrics_dict.items() 86 | -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/functional.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import Sequence 3 | 4 | import megengine as mge 5 | import megengine.functional as F 6 | from megengine import Tensor 7 | 8 | 9 | def sample_exponential(size: Sequence[int], lambd: float = 1., eps: float = 1e-10): 10 | """ 11 | Generate random numbers from exponential distribution. 12 | """ 13 | random_tensor = mge.random.uniform(0, 1, size=size) 14 | return -(1 / lambd) * F.log(random_tensor + eps) 15 | 16 | 17 | def gumbel_softmax( 18 | logits: Tensor, 19 | tau: float = 1., 20 | hard: bool = False, 21 | eps: float = 1e-10, 22 | axis: int = -1, 23 | ) -> Tensor: 24 | r""" 25 | Generate gumble noise, G_i = -log(-log(U_i)), U_i \in U(0, 1) 26 | More details see https://arxiv.org/pdf/1611.00712.pdf 27 | """ 28 | gumble_noise = -F.log(sample_exponential(logits.shape, eps=eps) + eps) 29 | 30 | gumbels = (logits + gumble_noise) / tau 31 | y_soft = F.softmax(gumbels, axis=axis) 32 | 33 | if hard: 34 | index = F.argmax(y_soft, axis=axis, keepdims=True) 35 | y_hard = F.scatter(F.zeros_like(logits), axis=axis, 36 | index=index, source=F.ones(index.shape, dtype='float32')) 37 | ret = y_hard - y_soft.detach() + y_soft 38 | else: 39 | ret = y_soft 40 | return ret 41 | 42 | 43 | def top_k_top_p_filtering( 44 | logits: Tensor, 45 | top_k: int = 0, 46 | top_p: float = 1.0, 47 | filter_value: float = -float("Inf"), 48 | min_tokens_to_keep: int = 1, 49 | ) -> Tensor: 50 | """ 51 | Take and adapt from huggingface/transformers. 52 | """ 53 | 54 | if top_k > 0: 55 | top_k = min(max(top_k, min_tokens_to_keep), logits.shape[-1]) 56 | filter_indices = logits < F.topk(logits, top_k, descending=True)[ 57 | 0][..., -1, None] 58 | logits[filter_indices] = filter_value 59 | 60 | if 0.0 <= top_p <= 1.0: 61 | sorted_logits, sorted_indices = F.sort(logits, descending=False) 62 | 63 | cumulative_probs = F.cumsum(F.softmax(sorted_logits, axis=-1), axis=-1) 64 | sorted_indices_to_filter = cumulative_probs <= 1 - top_p 65 | 66 | if min_tokens_to_keep > 1: 67 | sorted_indices_to_filter[..., -min_tokens_to_keep] = 0 68 | 69 | filter_indices = F.scatter( 70 | sorted_indices_to_filter, axis=1, index=sorted_indices, source=sorted_indices_to_filter) 71 | 72 | logits[filter_indices] = filter_value 73 | 74 | return logits 75 | 76 | 77 | def multinomial(x, num_samples, repalcement=None): 78 | """ 79 | Implemented by python. 80 | """ 81 | if x.ndim != 2: 82 | raise ValueError(f"expected input has 2 dimention, but got {x.ndim}") 83 | if repalcement is not None: 84 | raise ValueError("Currently not support `replacement`") 85 | _, num_col = x.shape 86 | x = F.cumsum(x, axis=1) 87 | choices = [] 88 | for t in x: 89 | t = t.numpy() 90 | ch = [] 91 | for _ in range(num_samples): 92 | prob = random.random() 93 | for id in range(num_col): 94 | if t[id] > prob: 95 | idx = id 96 | break 97 | ch.append(idx) 98 | choices.append(ch) 99 | return mge.tensor(choices, dtype='int32') 100 | -------------------------------------------------------------------------------- /official/vision/segmentation/README.md: -------------------------------------------------------------------------------- 1 | # Megengine Semantic Segmentation Models 2 | 3 | ## 介绍 4 | 5 | 本目录包含了采用MegEngine实现的经典[DeepLabV3+](https://arxiv.org/abs/1802.02611.pdf)网络结构,同时提供了在Pascal VOC2012和Cityscapes数据集上的完整训练和测试代码。 6 | 7 | 网络在Pascal VOC2012验证集上的性能和结果如下: 8 | 9 | | 模型 | mIoU | 10 | | --- | :--: | 11 | | deeplabv3plus-res101-voc-512size | 79.5 | 12 | 13 | 网络在Cityscapes验证集上的性能和结果如下: 14 | 15 | | 模型 | mIoU | 16 | | --- | :--: | 17 | | deeplabv3plus-res101-cityscapes-768size | 78.5 | 18 | 19 | ## 安装和环境配置 20 | 21 | 本目录下代码基于MegEngine v1.2,在开始运行本目录下的代码之前,请确保按照[README](../../../README.md)进行了正确的环境配置。 22 | 23 | ## 如何使用 24 | 25 | 以DeepLabV3+为例,模型训练好之后,可以通过如下命令测试单张图片: 26 | 27 | ```bash 28 | python3 tools/inference.py -f configs/deeplabv3plus_res101_voc_512size.py \ 29 | -w /path/to/model_weights.pkl \ 30 | -i ../../assets/cat.jpg 31 | ``` 32 | 33 | `tools/inference.py`的命令行选项如下: 34 | 35 | - `-f`, 测试的网络结构描述文件。 36 | - `-w`, 需要测试的模型权重。 37 | - `-i`, 需要测试的样例图片。 38 | 39 | 使用默认图片和默认模型测试的结果见下图: 40 | 41 | ![demo image](../../assets/cat_seg_out.jpg) 42 | 43 | ## 如何训练 44 | 45 | 以DeepLabV3+在Pascal VOC2012数据集上训练为例。 46 | 47 | 1. 在开始训练前,请下载[Pascal VOC2012数据集](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/#data),并解压到合适的目录下。为保证一样的训练环境,还需要下载[SegmentationClassAug](https://www.dropbox.com/s/oeu149j8qtbs1x0/SegmentationClassAug.zip?dl=0&file_subpath=%2FSegmentationClassAug)。具体可以参照这个[流程](https://www.sun11.me/blog/2018/how-to-use-10582-trainaug-images-on-DeeplabV3-code/)。 48 | 49 | 准备好的 VOC 数据目录结构如下: 50 | 51 | ``` 52 | /path/to/ 53 | |->VOC2012 54 | | |Annotations 55 | | |ImageSets 56 | | |JPEGImages 57 | | |SegmentationClass 58 | | |SegmentationClass_aug 59 | ``` 60 | 61 | 其中,ImageSets/Segmentation中包含了[trainaug.txt](https://gist.githubusercontent.com/sun11/2dbda6b31acc7c6292d14a872d0c90b7/raw/5f5a5270089239ef2f6b65b1cc55208355b5acca/trainaug.txt)。 62 | 63 | 注意:SegmentationClass_aug和SegmentationClass中的数据格式不同。 64 | 65 | 2. 准备预训练的`backbone`网络权重:可使用 megengine.hub 下载`megengine`官方提供的在ImageNet上训练的模型, 并存放在 `/path/to/pretrain.pkl`。 66 | 67 | 3. 开始训练: 68 | 69 | ```bash 70 | python3 tools/train.py -f configs/deeplabv3plus_res101_voc_512size.py -n 8 \ 71 | -d /path/to/VOC2012 72 | ``` 73 | 74 | `tools/train.py`的命令行选项如下: 75 | 76 | - `-f`, 所需要训练的网络结构描述文件。 77 | - `-n`, 用于训练的devices(gpu)数量。 78 | - `-w`, 预训练的backbone网络权重。 79 | - `-d`, 数据集的上级目录,默认`/data/datasets`。 80 | - `-r`, 是否从已训好的模型继续训练,默认`None`。 81 | 82 | 默认情况下模型会存在 `log-of-模型名`目录下。 83 | 84 | ## 如何测试 85 | 86 | 以DeepLabV3+在Pascal VOC2012数据集上测试为例。 87 | 88 | 在得到训练完保存的模型之后,可以通过tools下的test.py文件测试模型在验证集上的性能。 89 | 90 | ```bash 91 | python3 tools/test.py -f configs/deeplabv3plus_res101_voc_512size.py -n 8 \ 92 | -w /path/to/model_weights.pkl \ 93 | -d /path/to/VOC2012 94 | ``` 95 | 96 | `tools/test.py`的命令行选项如下: 97 | 98 | - `-f`, 所需要测试的网络结构描述文件。 99 | - `-n`, 用于测试的devices(gpu)数量。 100 | - `-w`, 需要测试的模型权重。 101 | - `-d`,数据集的上级目录,默认`/data/datasets`。 102 | 103 | ## 参考文献 104 | 105 | - [Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1802.02611) Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, and Hartwig Adam. European Conference on Computer Vision (ECCV), 2018. 106 | -------------------------------------------------------------------------------- /official/multimodal/dalle/vae/openaidvae/encoder.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from functools import partial 3 | 4 | import megengine.module as M 5 | from megengine import hub 6 | 7 | 8 | class EncoderBlock(M.Module): 9 | def __init__(self, n_in, n_out, layers): 10 | super(EncoderBlock, self).__init__() 11 | n_hid = n_out // 4 12 | self.pre_gain = 1 / (layers ** 2) 13 | self.id_path = M.Conv2d( 14 | n_in, n_out, 1) if n_in != n_out else M.Identity() 15 | self.res_path = M.Sequential(OrderedDict([ 16 | ("relu1", M.ReLU()), 17 | ('conv_1', M.Conv2d(n_in, n_hid, 3, padding=1)), 18 | ("relu2", M.ReLU()), 19 | ('conv_2', M.Conv2d(n_hid, n_hid, 3, padding=1)), 20 | ("relu3", M.ReLU()), 21 | ('conv_3', M.Conv2d(n_hid, n_hid, 3, padding=1)), 22 | ("relu4", M.ReLU()), 23 | ('conv_4', M.Conv2d(n_hid, n_out, 1)), 24 | ])) 25 | 26 | def forward(self, x): 27 | return self.id_path(x) + self.pre_gain * self.res_path(x) 28 | 29 | 30 | class Encoder(M.Module): 31 | def __init__(self, input_channel=3, n_hid=256, n_blk_per_group=2, vocab_size=8192): 32 | super(Encoder, self).__init__() 33 | group_count = 4 34 | n_layers = group_count * n_blk_per_group 35 | blk_range = range(n_blk_per_group) 36 | make_blk = partial(EncoderBlock, layers=n_layers) 37 | self.input_channel = input_channel 38 | self.vocab_size = vocab_size 39 | self.blocks = M.Sequential(OrderedDict([ 40 | ('input', M.Conv2d(input_channel, n_hid, 7, padding=3)), 41 | ('group_1', M.Sequential(OrderedDict([ 42 | *[(f'block_{i + 1}', make_blk(n_hid, n_hid)) 43 | for i in blk_range], 44 | ('pool', M.MaxPool2d(kernel_size=2, stride=2)), 45 | ]))), 46 | ('group_2', M.Sequential(OrderedDict([ 47 | *[(f'block_{i + 1}', make_blk(n_hid if i 48 | == 0 else 2 * n_hid, 2 * n_hid)) for i in blk_range], 49 | ('pool', M.MaxPool2d(kernel_size=2, stride=2)), 50 | ]))), 51 | ('group_3', M.Sequential(OrderedDict([ 52 | *[(f'block_{i + 1}', make_blk(2 * n_hid if i 53 | == 0 else 4 * n_hid, 4 * n_hid)) for i in blk_range], 54 | ('pool', M.MaxPool2d(kernel_size=2, stride=2)), 55 | ]))), 56 | ('group_4', M.Sequential(OrderedDict([ 57 | *[(f'block_{i + 1}', make_blk(4 * n_hid if i 58 | == 0 else 8 * n_hid, 8 * n_hid)) for i in blk_range], 59 | ]))), 60 | ('output', M.Sequential(OrderedDict([ 61 | ('relu', M.ReLU()), 62 | ('conv', M.Conv2d(8 * n_hid, self.vocab_size, 1)), 63 | ]))), 64 | ])) 65 | 66 | def forward(self, x): 67 | if x.ndim != 4: 68 | raise ValueError("Input must be 4D tensor") 69 | if x.shape[1] != self.input_channel: 70 | raise ValueError( 71 | f"Input channel must be {self.input_channel}") 72 | return self.blocks(x) 73 | 74 | 75 | @hub.pretrained( 76 | "https://data.megengine.org.cn/research/multimodality/dalle_openai_dvae_encoder.pkl" 77 | ) 78 | def openai_discrete_VAE_encoder(**kwargs): 79 | return Encoder(**kwargs) 80 | -------------------------------------------------------------------------------- /hubconf.py: -------------------------------------------------------------------------------- 1 | from official.multimodal.big_sleep import BigGAN, Imagine, biggan_128, biggan_256, biggan_512 2 | from official.multimodal.clip.inference_utils import ClipInferenceUtils 3 | from official.multimodal.clip.models import ( 4 | rn50, 5 | rn50x4, 6 | rn50x16, 7 | rn50x64, 8 | rn101, 9 | vit_b_16, 10 | vit_b_32, 11 | vit_l_14, 12 | vit_l_14_336px, 13 | ) 14 | from official.multimodal.dalle import ( 15 | Generator, 16 | OpenAIDiscreteVAE, 17 | OpenAIDiscreteVAEDecoder, 18 | OpenAIDiscreteVAEEncoder, 19 | VQGanVAE, 20 | coco_512_16_16d_16h_80tsl, 21 | openai_discrete_VAE_decoder, 22 | openai_discrete_VAE_encoder, 23 | vqgan_vae_1024, 24 | ) 25 | from official.multimodal.taming_transformer import ( 26 | ConditionalSampler, 27 | FastSampler, 28 | Reconstruction, 29 | celebahq_transformer, 30 | drin_transformer, 31 | s_flckr_transformer, 32 | vqgan_gumbel_f8, 33 | vqgan_imagenet_f16_1024, 34 | vqgan_imagenet_f16_16384, 35 | ) 36 | from official.nlp.bert.model import ( 37 | cased_L_12_H_768_A_12, 38 | cased_L_24_H_1024_A_16, 39 | chinese_L_12_H_768_A_12, 40 | multi_cased_L_12_H_768_A_12, 41 | uncased_L_12_H_768_A_12, 42 | uncased_L_24_H_1024_A_16, 43 | wwm_cased_L_24_H_1024_A_16, 44 | wwm_uncased_L_24_H_1024_A_16, 45 | ) 46 | from official.quantization.models import quantized_resnet18 47 | from official.vision.classification.resnet.model import ( 48 | BasicBlock, 49 | Bottleneck, 50 | ResNet, 51 | resnet18, 52 | resnet34, 53 | resnet50, 54 | resnet101, 55 | resnet152, 56 | resnext50_32x4d, 57 | resnext101_32x8d, 58 | ) 59 | from official.vision.classification.shufflenet.model import ( 60 | shufflenet_v2_x0_5, 61 | shufflenet_v2_x1_0, 62 | shufflenet_v2_x1_5, 63 | shufflenet_v2_x2_0, 64 | ) 65 | from official.vision.detection.configs import ( 66 | atss_res18_coco_3x_800size, 67 | atss_res34_coco_3x_800size, 68 | atss_res50_coco_3x_800size, 69 | atss_res101_coco_3x_800size, 70 | atss_resx101_coco_2x_800size, 71 | faster_rcnn_res18_coco_3x_800size, 72 | faster_rcnn_res34_coco_3x_800size, 73 | faster_rcnn_res50_coco_3x_800size, 74 | faster_rcnn_res101_coco_3x_800size, 75 | faster_rcnn_resx101_coco_2x_800size, 76 | fcos_res18_coco_3x_800size, 77 | fcos_res34_coco_3x_800size, 78 | fcos_res50_coco_3x_800size, 79 | fcos_res101_coco_3x_800size, 80 | fcos_resx101_coco_2x_800size, 81 | freeanchor_res18_coco_3x_800size, 82 | freeanchor_res34_coco_3x_800size, 83 | freeanchor_res50_coco_3x_800size, 84 | freeanchor_res101_coco_3x_800size, 85 | freeanchor_resx101_coco_2x_800size, 86 | retinanet_res18_coco_3x_800size, 87 | retinanet_res34_coco_3x_800size, 88 | retinanet_res50_coco_3x_800size, 89 | retinanet_res101_coco_3x_800size, 90 | retinanet_resx101_coco_2x_800size, 91 | ) 92 | from official.vision.detection.models import ATSS, FCOS, FasterRCNN, FreeAnchor, RetinaNet 93 | from official.vision.detection.tools.utils import DetEvaluator 94 | from official.vision.keypoints.inference import KeypointEvaluator 95 | from official.vision.keypoints.models import ( 96 | simplebaseline_res50, 97 | simplebaseline_res101, 98 | simplebaseline_res152, 99 | ) 100 | from official.vision.segmentation.configs import ( 101 | deeplabv3plus_res101_cityscapes_768size, 102 | deeplabv3plus_res101_voc_512size, 103 | ) 104 | from official.vision.segmentation.models import DeepLabV3Plus 105 | -------------------------------------------------------------------------------- /official/multimodal/taming_transformer/README.md: -------------------------------------------------------------------------------- 1 | # Taming Transformer 2 | 3 | 此仓库包含MegEngine实现的`taming_transformer`模型代码及推理代码,但不包含训练代码。`taming_transformer`通过`VQGAN`将卷积的高效性和`Transformer`极强的表达能力相结合,拥有强大的图像重建和高分辨率图像合成能力。 4 | 5 | ## 图像重建 6 | 7 | 我们可以使用`VQGAN`来测试图像重建,`VQGAN`的结构参考与`Diffusion Model`,并且使用GAN的方式进行训练。其主要拥有两种不同的模型——`VQModel`和`GumbelVQ`,主要区别在于模型中的`quantize离散化`部分,`VQModel`使用`VQVAE`中的离散化方法,`GumbelVQ`则使用`Gumbel Softmax`进行离散化。 8 | 9 | 我们可以很方便的使用如下代码进行图像重建。 10 | 11 | ```python 12 | from official.multimodal.taming_transformer import Reconstruction 13 | 14 | # 加载模型及权重 15 | model = vqgan_imagenet_f16_16384(pretrained=True) 16 | 17 | # 传入模型 18 | rec = Reconstruction(model) 19 | 20 | image_path: str = ... 21 | # 传入图片路径和保存路径 22 | reconstructed_image = rec(image_path, file_name='reconstructed_image.png') 23 | ``` 24 | 25 | ## 从分割图采样 26 | 27 | `taming_transformer`可以利用分割图作为引导,逐步的从噪声中进行采样。可以使用如下代码进行采样。 28 | 29 | ```python 30 | from official.multimodal.taming_transformer import s_flckr_transformer 31 | # 加载模型及权重 32 | model = s_flckr_transformer(pretrained=True) 33 | 34 | sampler = ConditionalSampler( 35 | model, 36 | temperature=1.0, 37 | top_k=100, 38 | update_every=50, # 多少次采样保存一次图片 39 | scale_factor=1.0, # 对输入图片进行缩放 40 | animate=True, # 保存采样过程为mp4 41 | root='test', # 根目录,用于保存采样过程中的文件和视频 42 | seed=2022, # 固定随机种子 43 | kernal_size=16, # 每次采样的窗口大小,越大效果越好 44 | fps=15, # 保存视频的帧率 45 | segmentation_save=True # 为分割图使用专门的保存方式,保证每次推理保存的分割图色彩一致 46 | ) 47 | 48 | # 可以在official/multimodal/taming_transformer/data目录下找到更多图片 49 | segmentation_path: str = r"official/multimodal/taming_transformer/data/sflckr_segmentations/norway/25735082181_999927fe5a_b.png" 50 | # 传入分割图地址 51 | sampler.sample_segmentation(segmentation_path, name='norway') 52 | ``` 53 | 54 | 分割图如下所示: 55 | ![segmentation](../../assets/norway_segmentation.png) 56 | 57 | 采样结果如下所示: 58 | ![result](../../assets/norway_sample_2687.png) 59 | 多次运行即可获得更多样的结果 60 | 61 | 采样过程: 62 | 63 |