├── .github
    ├── ISSUE_TEMPLATE
    │   ├── Help-wanted Issue.md
    │   └── feature-bug-issue.md
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .pylintrc
├── ACKNOWLEDGMENTS
├── CODE_OF_CONDUCT.md
├── CONTRIBUTOR_LICENSE_AGREEMENT.md
├── LICENSE
├── README.md
├── hubconf.py
├── official
    ├── assets
    │   ├── cat.jpg
    │   ├── cat_det_out.jpg
    │   ├── cat_seg_out.jpg
    │   ├── dcgan.png
    │   ├── imagenet_class_info.json
    │   ├── norway_sample_2687.png
    │   ├── norway_sampling.mp4
    │   ├── norway_segmentation.png
    │   ├── test_000009.png
    │   ├── test_000010.png
    │   ├── test_depth.png
    │   ├── test_sample_255.png
    │   ├── test_sampling.mp4
    │   └── total.png
    ├── multimodal
    │   ├── __init__.py
    │   ├── big_sleep
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── big_sleep.py
    │   │   ├── biggan.py
    │   │   ├── ema.py
    │   │   ├── resample.py
    │   │   └── spectral_norm.py
    │   ├── clip
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── bpe_simple_vocab_16e6.txt.gz
    │   │   ├── functional.py
    │   │   ├── inference_utils.py
    │   │   ├── models.py
    │   │   └── simple_tokenizer.py
    │   ├── dalle
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── attention.py
    │   │   ├── dalle.py
    │   │   ├── functional.py
    │   │   ├── generate.py
    │   │   ├── pretrained.py
    │   │   ├── tokenizer.py
    │   │   ├── transformer.py
    │   │   └── vae
    │   │   │   ├── __init__.py
    │   │   │   ├── base_vae.py
    │   │   │   ├── openai_dvae.py
    │   │   │   ├── openaidvae
    │   │   │       ├── __init__.py
    │   │   │       ├── decoder.py
    │   │   │       ├── encoder.py
    │   │   │       └── utils.py
    │   │   │   └── vqgan_vae.py
    │   └── taming_transformer
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── cond_transformer.py
    │   │   ├── data
    │   │       └── drin_images
    │   │       │   ├── n01795545
    │   │       │       └── ILSVRC2012_val_00023344.JPEG
    │   │       │   ├── n01819313
    │   │       │       └── ILSVRC2012_val_00003068.JPEG
    │   │       │   ├── n01820546
    │   │       │       ├── ILSVRC2012_val_00034784.JPEG
    │   │       │       └── ILSVRC2012_val_00047491.JPEG
    │   │       │   ├── n01828970
    │   │       │       ├── ILSVRC2012_val_00001336.JPEG
    │   │       │       ├── ILSVRC2012_val_00008236.JPEG
    │   │       │       └── ILSVRC2012_val_00046802.JPEG
    │   │       │   ├── n01843065
    │   │       │       └── ILSVRC2012_val_00022439.JPEG
    │   │       │   ├── n01847000
    │   │       │       └── ILSVRC2012_val_00022364.JPEG
    │   │       │   ├── n02085782
    │   │       │       └── ILSVRC2012_val_00012298.JPEG
    │   │       │   ├── n02086646
    │   │       │       └── ILSVRC2012_val_00011473.JPEG
    │   │       │   ├── n02088466
    │   │       │       └── ILSVRC2012_val_00013651.JPEG
    │   │       │   ├── n02089973
    │   │       │       └── ILSVRC2012_val_00000028.JPEG
    │   │       │   ├── n02093256
    │   │       │       └── ILSVRC2012_val_00046547.JPEG
    │   │       │   ├── n02096294
    │   │       │       └── ILSVRC2012_val_00042133.JPEG
    │   │       │   ├── n02099601
    │   │       │       └── ILSVRC2012_val_00005697.JPEG
    │   │       │   ├── n02099712
    │   │       │       └── ILSVRC2012_val_00023471.JPEG
    │   │       │   ├── n02100877
    │   │       │       └── ILSVRC2012_val_00039863.JPEG
    │   │       │   ├── n02101006
    │   │       │       ├── ILSVRC2012_val_00032333.JPEG
    │   │       │       └── ILSVRC2012_val_00047325.JPEG
    │   │       │   ├── n02101556
    │   │       │       └── ILSVRC2012_val_00030540.JPEG
    │   │       │   ├── n02102318
    │   │       │       └── ILSVRC2012_val_00024691.JPEG
    │   │       │   ├── n02105505
    │   │       │       └── ILSVRC2012_val_00031252.JPEG
    │   │       │   ├── n02110627
    │   │       │       └── ILSVRC2012_val_00008310.JPEG
    │   │       │   └── n02111889
    │   │       │       └── ILSVRC2012_val_00042625.JPEG
    │   │   ├── diffusion_modules.py
    │   │   ├── functional.py
    │   │   ├── inference_utils.py
    │   │   ├── mingpt.py
    │   │   ├── quantize.py
    │   │   └── vqgan.py
    ├── nlp
    │   ├── __init__.py
    │   └── bert
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── config_args.py
    │   │   ├── glue_data
    │   │       └── MRPC
    │   │       │   ├── dev.tsv
    │   │       │   ├── dev_ids.tsv
    │   │       │   ├── msr_paraphrase_test.txt
    │   │       │   ├── msr_paraphrase_train.txt
    │   │       │   ├── test.tsv
    │   │       │   └── train.tsv
    │   │   ├── model.py
    │   │   ├── mrpc_dataset.py
    │   │   ├── test.py
    │   │   ├── tokenization.py
    │   │   └── train.py
    ├── quantization
    │   ├── README.md
    │   ├── __init__.py
    │   ├── calibration.py
    │   ├── finetune.py
    │   ├── inference.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── mobilenet_v2.py
    │   │   ├── resnet.py
    │   │   └── shufflenet.py
    │   ├── param_config.py
    │   ├── test.py
    │   └── train.py
    └── vision
    │   ├── __init__.py
    │   ├── classification
    │       ├── README.md
    │       ├── __init__.py
    │       ├── dump.py
    │       ├── resnet
    │       │   ├── README.md
    │       │   ├── __init__.py
    │       │   ├── inference.py
    │       │   ├── model.py
    │       │   ├── test.py
    │       │   └── train.py
    │       └── shufflenet
    │       │   ├── README.md
    │       │   ├── __init__.py
    │       │   ├── inference.py
    │       │   ├── model.py
    │       │   ├── test.py
    │       │   └── train.py
    │   ├── detection
    │       ├── README.md
    │       ├── __init__.py
    │       ├── configs
    │       │   ├── __init__.py
    │       │   ├── atss_res101_coco_3x_800size.py
    │       │   ├── atss_res18_coco_3x_800size.py
    │       │   ├── atss_res34_coco_3x_800size.py
    │       │   ├── atss_res50_coco_3x_800size.py
    │       │   ├── atss_resx101_coco_2x_800size.py
    │       │   ├── faster_rcnn_res101_coco_3x_800size.py
    │       │   ├── faster_rcnn_res18_coco_3x_800size.py
    │       │   ├── faster_rcnn_res34_coco_3x_800size.py
    │       │   ├── faster_rcnn_res50_coco_3x_800size.py
    │       │   ├── faster_rcnn_resx101_coco_2x_800size.py
    │       │   ├── fcos_res101_coco_3x_800size.py
    │       │   ├── fcos_res18_coco_3x_800size.py
    │       │   ├── fcos_res34_coco_3x_800size.py
    │       │   ├── fcos_res50_coco_3x_800size.py
    │       │   ├── fcos_resx101_coco_2x_800size.py
    │       │   ├── freeanchor_res101_coco_3x_800size.py
    │       │   ├── freeanchor_res18_coco_3x_800size.py
    │       │   ├── freeanchor_res34_coco_3x_800size.py
    │       │   ├── freeanchor_res50_coco_3x_800size.py
    │       │   ├── freeanchor_resx101_coco_2x_800size.py
    │       │   ├── retinanet_res101_coco_3x_800size.py
    │       │   ├── retinanet_res18_coco_3x_800size.py
    │       │   ├── retinanet_res34_coco_3x_800size.py
    │       │   ├── retinanet_res50_coco_3x_800size.py
    │       │   └── retinanet_resx101_coco_2x_800size.py
    │       ├── layers
    │       │   ├── __init__.py
    │       │   ├── basic
    │       │   │   ├── __init__.py
    │       │   │   ├── functional.py
    │       │   │   ├── nn.py
    │       │   │   └── norm.py
    │       │   └── det
    │       │   │   ├── __init__.py
    │       │   │   ├── anchor.py
    │       │   │   ├── box_head.py
    │       │   │   ├── box_utils.py
    │       │   │   ├── fpn.py
    │       │   │   ├── loss.py
    │       │   │   ├── matcher.py
    │       │   │   ├── point_head.py
    │       │   │   ├── pooler.py
    │       │   │   ├── rcnn.py
    │       │   │   ├── rpn.py
    │       │   │   └── sampling.py
    │       ├── models
    │       │   ├── __init__.py
    │       │   ├── atss.py
    │       │   ├── faster_rcnn.py
    │       │   ├── fcos.py
    │       │   ├── freeanchor.py
    │       │   └── retinanet.py
    │       └── tools
    │       │   ├── data_mapper.py
    │       │   ├── inference.py
    │       │   ├── nms.py
    │       │   ├── test.py
    │       │   ├── test_in_table.py
    │       │   ├── test_random.py
    │       │   ├── train.py
    │       │   ├── train_random.py
    │       │   └── utils.py
    │   ├── gan
    │       ├── README.md
    │       ├── megengine_mimicry
    │       │   ├── __init__.py
    │       │   ├── datasets
    │       │   │   ├── __init__.py
    │       │   │   ├── data_utils.py
    │       │   │   └── image_loader.py
    │       │   ├── metrics
    │       │   │   ├── __init__.py
    │       │   │   ├── compute_fid.py
    │       │   │   ├── compute_is.py
    │       │   │   ├── compute_kid.py
    │       │   │   ├── compute_metrics.py
    │       │   │   ├── fid
    │       │   │   │   ├── __init__.py
    │       │   │   │   └── fid_utils.py
    │       │   │   ├── inception_model
    │       │   │   │   ├── __init__.py
    │       │   │   │   └── inception_utils.py
    │       │   │   ├── inception_score
    │       │   │   │   ├── __init__.py
    │       │   │   │   └── inception_score_utils.py
    │       │   │   ├── kid
    │       │   │   │   ├── __init__.py
    │       │   │   │   └── kid_utils.py
    │       │   │   └── utils.py
    │       │   ├── nets
    │       │   │   ├── __init__.py
    │       │   │   ├── basemodel.py
    │       │   │   ├── blocks.py
    │       │   │   ├── dcgan
    │       │   │   │   ├── __init__.py
    │       │   │   │   ├── dcgan_base.py
    │       │   │   │   └── dcgan_cifar.py
    │       │   │   ├── gan.py
    │       │   │   ├── losses.py
    │       │   │   └── wgan
    │       │   │   │   ├── __init__.py
    │       │   │   │   ├── wgan_base.py
    │       │   │   │   └── wgan_cifar.py
    │       │   ├── training
    │       │   │   ├── __init__.py
    │       │   │   ├── logger.py
    │       │   │   ├── metric_log.py
    │       │   │   ├── scheduler.py
    │       │   │   └── trainer.py
    │       │   └── utils
    │       │   │   ├── __init__.py
    │       │   │   ├── common.py
    │       │   │   └── vis.py
    │       ├── requirements.txt
    │       ├── train_dcgan.py
    │       └── train_wgan.py
    │   ├── keypoints
    │       ├── README.md
    │       ├── config.py
    │       ├── dataset.py
    │       ├── inference.py
    │       ├── models
    │       │   ├── __init__.py
    │       │   └── simplebaseline.py
    │       ├── test.py
    │       ├── train.py
    │       └── transforms.py
    │   └── segmentation
    │       ├── README.md
    │       ├── configs
    │           ├── __init__.py
    │           ├── deeplabv3plus_res101_cityscapes_768size.py
    │           └── deeplabv3plus_res101_voc_512size.py
    │       ├── models
    │           ├── __init__.py
    │           └── deeplabv3plus.py
    │       └── tools
    │           ├── inference.py
    │           ├── test.py
    │           ├── train.py
    │           └── utils.py
├── requirements.txt
├── requires-style.txt
├── run_format_check.sh
└── setup.cfg


/.github/ISSUE_TEMPLATE/Help-wanted Issue.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Help-wanted Issue
 3 | about: 请使用此模板提出help-wanted任务
 4 | title: Help-wanted Issue
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## 背景
11 | <!-- 请简要描述此需求的背景 -->
12 | <!-- 示例：Googlenet是ImageNet挑战赛(ILSVRC14)第一名的高性能网络结构，常用分类模型之一。MegEngine已经提供相关的OPR支持 -->
13 | 
14 | ## 任务描述
15 | <!-- 请详细描述该任务，任务需要明确、具体 -->
16 | <!-- 示例：Googlenet模型复现，训练正常收敛，验收指标符合预期，并将代码提交到models/vision/classification/models[contribution] 下-->
17 | 
18 | ## 目标
19 | <!-- 请明确此任务的目标 -->
20 | <!-- 示例：数据集ImageNet 对点和论文一致 -->
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-bug-issue.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature/Bug Issue
 3 | about: 请使用此模型提出您的建议/问题
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | <!-- 请您简介清晰的描述您遇到的问题 -->
11 | ## 环境
12 | 1.系统环境：
13 | 2.MegEngine版本：
14 | 3.python版本：
15 | 4.模型名称：
16 | 
17 | ## 复现步骤
18 | 1.
19 | 2.
20 | 3.
21 | 
22 | ## 请提供关键的代码片段便于追查问题
23 | 
24 | 
25 | 
26 | ## 请提供完整的日志及报错信息
27 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | # This is a basic workflow to help you get started with Actions
 2 | 
 3 | name: CI
 4 | 
 5 | # Controls when the action will run. Triggers the workflow on push or pull request
 6 | # events but only for the master branch
 7 | on:
 8 |   push:
 9 |   pull_request:
10 | 
11 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
12 | jobs:
13 |   # This workflow contains a single job called "build"
14 |   build:
15 |     # The type of runner that the job will run on
16 |     runs-on: ubuntu-latest
17 |     strategy:
18 |       matrix:
19 |         python-version: [3.6, 3.7, 3.8]
20 | 
21 |     # Steps represent a sequence of tasks that will be executed as part of the job
22 |     steps:
23 |     # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
24 |     - uses: actions/checkout@v2
25 | 
26 |     - name: Set up Python ${{ matrix.python-version }}
27 |       uses: actions/setup-python@v1
28 |       with:
29 |         python-version: ${{ matrix.python-version }}
30 | 
31 |     - name: Install dependencies
32 |       run: |
33 |         python -m pip install --upgrade pip
34 |         pip install -r requirements.txt
35 | 
36 |     # Runs a set of commands using the runners shell
37 |     - name: Format check
38 |       run: ./run_format_check.sh
39 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *log*/
 2 | *.jpg
 3 | *.png
 4 | 
 5 | # compilation and distribution
 6 | __pycache__
 7 | _ext
 8 | *.pyc
 9 | *.so
10 | build/
11 | dist/
12 | wheels/
13 | 
14 | # pytorch/python/numpy formats
15 | *.pth
16 | *.pkl
17 | *.npy
18 | 
19 | # ipython/jupyter notebooks
20 | *.ipynb
21 | **/.ipynb_checkpoints/
22 | 
23 | # Editor temporaries
24 | *.swn
25 | *.swo
26 | *.swp
27 | *~
28 | 
29 | # pycharm editor settings
30 | .idea
31 | 
32 | # vscode editor settings
33 | .vscode
34 | 
35 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 6 | 
 7 | ## Our Standards
 8 | 
 9 | Examples of behavior that contributes to a positive environment for our community include:
10 | 
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 | 
17 | Examples of unacceptable behavior include:
18 | 
19 | * The use of sexualized language or imagery, and sexual attention or advances of any kind
20 | * Trolling, insulting or derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others’ private information, such as a physical or email address, without their explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 | 
25 | All MegEngine forums and spaces are meant for professional interactions, and any behavior which could reasonably be considered inappropriate in a professional setting is unacceptable.
26 | 
27 | ## Our Responsibilities
28 | 
29 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
30 | 
31 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
32 | 
33 | ## Scope
34 | 
35 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
36 | 
37 | 
38 | ## Enforcement
39 | 
40 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at megengine@megvii.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
41 | 
42 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
43 | 
44 | ## Attribution
45 | 
46 | This Code of Conduct is updated from the Contributor Covenant, version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
47 | 
48 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MegEngine Models
 2 | 
 3 | ![](https://github.com/MegEngine/Models/workflows/CI/badge.svg)
 4 | 
 5 | 本仓库包含了采用[MegEngine](https://github.com/megengine/megengine)实现的各种主流深度学习模型。
 6 | 
 7 | [official](./official)目录下提供了各种经典的图像分类、目标检测、图像分割以及自然语言模型的官方实现。每个模型同时提供了模型定义、推理以及训练的代码。
 8 | 
 9 | 官方会一直维护[official](./official)下的代码，保持适配MegEngine的最新API，提供最优的模型实现。同时，提供高质量的学习文档，帮助新手学习如何在MegEngine下训练自己的模型。
10 | 
11 | ## 综述
12 | 
13 | 对于每个模型，我们提供了至少四个脚本文件：模型定义(`model.py`)、模型推理(`inference.py`)、模型训练(`train.py`)、模型测试(`test.py`)。
14 | 
15 | 每个模型目录下都对应有一个`README`，介绍了模型的详细信息，并详细描述了训练和测试的流程。例如 [ResNet README](./official/vision/classification/resnet/README.md)。
16 | 
17 | 另外，`official`下定义的模型可以通过`megengine.hub`来直接加载，例如：
18 | 
19 | ```bash
20 | import megengine.hub
21 | 
22 | # 只加载网络结构
23 | resnet18 = megengine.hub.load("megengine/models", "resnet18")
24 | # 加载网络结构和预训练权重
25 | resnet18 = megengine.hub.load("megengine/models", "resnet18", pretrained=True)
26 | ```
27 | 
28 | 更多可以通过`megengine.hub`接口加载的模型见[hubconf.py](./hubconf.py)。
29 | 
30 | ## 安装和环境配置
31 | 
32 | 在开始运行本仓库下的代码之前，用户需要通过以下步骤来配置本地环境：
33 | 
34 | 1. 克隆仓库
35 | 
36 | ```bash
37 | git clone https://github.com/MegEngine/Models.git
38 | ```
39 | 
40 | 2. 安装依赖包
41 | 
42 | ```bash
43 | pip3 install --user -r requirements.txt
44 | ```
45 | 
46 | 3. 添加目录到python环境变量中
47 | 
48 | ```bash
49 | export PYTHONPATH=/path/to/models:$PYTHONPATH
50 | ```
51 | 
52 | 
53 | ## 官方模型介绍
54 | 
55 | ### 图像分类
56 | 
57 | 图像分类是计算机视觉的基础任务。许多计算机视觉的其它任务（例如物体检测）都使用了基于图像分类的预训练模型。因此，我们提供了各种在ImageNet上预训练好的分类模型，
58 | 具体实现模型参考[这里](./official/vision/classification).
59 | 
60 | ### 目标检测
61 | 
62 | 目标检测同样是计算机视觉中的常见任务，我们提供了多个经典的目标检测模型，具体模型的实现可以参考[这里](./official/vision/detection).
63 | 
64 | ### 图像分割
65 | 
66 | 语意分割也是计算机视觉中的一项基础任务，为此我们也提供了经典的语义分割模型，具体可以参考[这里](./official/vision/segmentation/).
67 | 
68 | ### 人体关节点检测
69 | 
70 | 我们提供了人体关节点检测的经典模型和高精度模型，具体的实现可以参考[这里](./official/vision/keypoints).
71 | 
72 | ### 自然语言处理
73 | 
74 | 我们同样支持一些常见的自然语言处理模型，模型的权重来自Google的pre-trained models, 用户可以直接使用`megengine.hub`轻松的调用预训练的bert模型。
75 | 
76 | 另外，我们在[bert](./official/nlp/bert)中还提供了更加方便的脚本, 可以通过任务名直接获取到对应字典, 配置, 与预训练模型。
77 | 
78 | ### 多模态
79 | 
80 | 多模态学习拥有令人着迷的魅力，其有着丰富有趣的现实应用。我们支持了一些经典的多模态模型，模型的权重来源于官方预训练模型，用户可以参考仓库下的教程轻松体验多模态的奇妙。
81 | 


--------------------------------------------------------------------------------
/hubconf.py:
--------------------------------------------------------------------------------
  1 | from official.multimodal.big_sleep import BigGAN, Imagine, biggan_128, biggan_256, biggan_512
  2 | from official.multimodal.clip.inference_utils import ClipInferenceUtils
  3 | from official.multimodal.clip.models import (
  4 |     rn50,
  5 |     rn50x4,
  6 |     rn50x16,
  7 |     rn50x64,
  8 |     rn101,
  9 |     vit_b_16,
 10 |     vit_b_32,
 11 |     vit_l_14,
 12 |     vit_l_14_336px,
 13 | )
 14 | from official.multimodal.dalle import (
 15 |     Generator,
 16 |     OpenAIDiscreteVAE,
 17 |     OpenAIDiscreteVAEDecoder,
 18 |     OpenAIDiscreteVAEEncoder,
 19 |     VQGanVAE,
 20 |     coco_512_16_16d_16h_80tsl,
 21 |     openai_discrete_VAE_decoder,
 22 |     openai_discrete_VAE_encoder,
 23 |     vqgan_vae_1024,
 24 | )
 25 | from official.multimodal.taming_transformer import (
 26 |     ConditionalSampler,
 27 |     FastSampler,
 28 |     Reconstruction,
 29 |     celebahq_transformer,
 30 |     drin_transformer,
 31 |     s_flckr_transformer,
 32 |     vqgan_gumbel_f8,
 33 |     vqgan_imagenet_f16_1024,
 34 |     vqgan_imagenet_f16_16384,
 35 | )
 36 | from official.nlp.bert.model import (
 37 |     cased_L_12_H_768_A_12,
 38 |     cased_L_24_H_1024_A_16,
 39 |     chinese_L_12_H_768_A_12,
 40 |     multi_cased_L_12_H_768_A_12,
 41 |     uncased_L_12_H_768_A_12,
 42 |     uncased_L_24_H_1024_A_16,
 43 |     wwm_cased_L_24_H_1024_A_16,
 44 |     wwm_uncased_L_24_H_1024_A_16,
 45 | )
 46 | from official.quantization.models import quantized_resnet18
 47 | from official.vision.classification.resnet.model import (
 48 |     BasicBlock,
 49 |     Bottleneck,
 50 |     ResNet,
 51 |     resnet18,
 52 |     resnet34,
 53 |     resnet50,
 54 |     resnet101,
 55 |     resnet152,
 56 |     resnext50_32x4d,
 57 |     resnext101_32x8d,
 58 | )
 59 | from official.vision.classification.shufflenet.model import (
 60 |     shufflenet_v2_x0_5,
 61 |     shufflenet_v2_x1_0,
 62 |     shufflenet_v2_x1_5,
 63 |     shufflenet_v2_x2_0,
 64 | )
 65 | from official.vision.detection.configs import (
 66 |     atss_res18_coco_3x_800size,
 67 |     atss_res34_coco_3x_800size,
 68 |     atss_res50_coco_3x_800size,
 69 |     atss_res101_coco_3x_800size,
 70 |     atss_resx101_coco_2x_800size,
 71 |     faster_rcnn_res18_coco_3x_800size,
 72 |     faster_rcnn_res34_coco_3x_800size,
 73 |     faster_rcnn_res50_coco_3x_800size,
 74 |     faster_rcnn_res101_coco_3x_800size,
 75 |     faster_rcnn_resx101_coco_2x_800size,
 76 |     fcos_res18_coco_3x_800size,
 77 |     fcos_res34_coco_3x_800size,
 78 |     fcos_res50_coco_3x_800size,
 79 |     fcos_res101_coco_3x_800size,
 80 |     fcos_resx101_coco_2x_800size,
 81 |     freeanchor_res18_coco_3x_800size,
 82 |     freeanchor_res34_coco_3x_800size,
 83 |     freeanchor_res50_coco_3x_800size,
 84 |     freeanchor_res101_coco_3x_800size,
 85 |     freeanchor_resx101_coco_2x_800size,
 86 |     retinanet_res18_coco_3x_800size,
 87 |     retinanet_res34_coco_3x_800size,
 88 |     retinanet_res50_coco_3x_800size,
 89 |     retinanet_res101_coco_3x_800size,
 90 |     retinanet_resx101_coco_2x_800size,
 91 | )
 92 | from official.vision.detection.models import ATSS, FCOS, FasterRCNN, FreeAnchor, RetinaNet
 93 | from official.vision.detection.tools.utils import DetEvaluator
 94 | from official.vision.keypoints.inference import KeypointEvaluator
 95 | from official.vision.keypoints.models import (
 96 |     simplebaseline_res50,
 97 |     simplebaseline_res101,
 98 |     simplebaseline_res152,
 99 | )
100 | from official.vision.segmentation.configs import (
101 |     deeplabv3plus_res101_cityscapes_768size,
102 |     deeplabv3plus_res101_voc_512size,
103 | )
104 | from official.vision.segmentation.models import DeepLabV3Plus
105 | 


--------------------------------------------------------------------------------
/official/assets/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/assets/cat.jpg


--------------------------------------------------------------------------------
/official/assets/cat_det_out.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/assets/cat_det_out.jpg


--------------------------------------------------------------------------------
/official/assets/cat_seg_out.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/assets/cat_seg_out.jpg


--------------------------------------------------------------------------------
/official/assets/dcgan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/assets/dcgan.png


--------------------------------------------------------------------------------
/official/assets/norway_sample_2687.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/assets/norway_sample_2687.png


--------------------------------------------------------------------------------
/official/assets/norway_sampling.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/assets/norway_sampling.mp4


--------------------------------------------------------------------------------
/official/assets/norway_segmentation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/assets/norway_segmentation.png


--------------------------------------------------------------------------------
/official/assets/test_000009.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/assets/test_000009.png


--------------------------------------------------------------------------------
/official/assets/test_000010.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/assets/test_000010.png


--------------------------------------------------------------------------------
/official/assets/test_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/assets/test_depth.png


--------------------------------------------------------------------------------
/official/assets/test_sample_255.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/assets/test_sample_255.png


--------------------------------------------------------------------------------
/official/assets/test_sampling.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/assets/test_sampling.mp4


--------------------------------------------------------------------------------
/official/assets/total.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/assets/total.png


--------------------------------------------------------------------------------
/official/multimodal/__init__.py:
--------------------------------------------------------------------------------
1 | from .dalle.dalle import DALLE
2 | 


--------------------------------------------------------------------------------
/official/multimodal/big_sleep/README.md:
--------------------------------------------------------------------------------
 1 | # Big Sleep
 2 | 
 3 | 此仓库包含MegEngine实现的多模态模型`Big Sleep`，其将`CLIP`与`BigGAN`的生成器相结合，用户可以轻松使用一行文本构想图像！
 4 | 
 5 | ## 使用方法
 6 | 
 7 | 请使用GPU设备，否则生成过程可能会过长。
 8 | 
 9 | 使用`hub`加载
10 | 
11 | ```python
12 | from megengine import hub
13 | modelhub = hub.import_module(repo_info='megengine/models', git_host='github.com')
14 | 
15 | dream = modelhub.Imagine(
16 |     # 需要进行构想的文本
17 |     text = "fire in the sky", 
18 |     # 传入参考图像用于稍微引导生成
19 |     img = None,
20 |     # 生成图像尺寸大小
21 |     image_size=512,
22 |     # 迭代过程的学习率
23 |     lr = 5e-2,
24 |     # 保存图像的间隔
25 |     save_every = 25,
26 |     # 是否保存迭代过程中的所有图像，否则图像将会重写到一张图片上
27 |     save_progress = True,
28 |     # 惩罚关键词
29 |     text_min = None,
30 |     # 梯度累积的步数
31 |     gradient_accumulate_every: int = 1,
32 |     epochs: int = 20,
33 |     iterations: int = 1050,
34 |     # 是否将迭代过程中的所有图像保存为mp4视频文件
35 |     animate: bool = False,
36 |     # 保存mp4的帧率
37 |     fps: int = 15,
38 |     # BIgSleep中采样方式
39 |     bilinear: bool = False,
40 |     # 固定随机种子
41 |     seed: Optional[int] = None,
42 |     # 限制最大类别数量
43 |     max_classes: Optional[int] = None,
44 |     # 用于可微topk
45 |     class_temperature: float = 2.,
46 |     # 保存文件时是否加上日期前缀
47 |     save_date_time: bool = False,
48 |     # 是否保存得分最高的图像
49 |     save_best: bool = True,
50 |     # 实验性采样
51 |     experimental_resample: bool = False,
52 |     ema_decay: float = 0.99,
53 |     num_cutouts: int = 128,
54 |     center_bias: bool = False,
55 |     clip_type: str = 'RN50',
56 |     root: str = 'BigSleep',
57 | )
58 | 
59 | # 开始迭代生成图像
60 | dream()
61 | ```
62 | 
63 | 本地加载
64 | 
65 | ```python
66 | from official.multimodal.big_sleep import Imagine
67 | 
68 | dream = Imagine(
69 |     text = "fire in the sky",
70 |     lr = 5e-2,
71 |     save_every = 25,
72 |     save_progress = True,
73 |     image_size=512
74 | )
75 | 
76 | # 开始迭代生成图像
77 | dream()
78 | ```
79 | 
80 | ### 参考
81 | 
82 | [lucidrains/big-sleep](https://github.com/lucidrains/big-sleep)
83 | 


--------------------------------------------------------------------------------
/official/multimodal/big_sleep/__init__.py:
--------------------------------------------------------------------------------
1 | from .big_sleep import Imagine
2 | from .biggan import BigGAN, biggan_128, biggan_256, biggan_512
3 | 


--------------------------------------------------------------------------------
/official/multimodal/big_sleep/ema.py:
--------------------------------------------------------------------------------
 1 | # Exponential Moving Average (from https://gist.github.com/crowsonkb/76b94d5238272722290734bf4725d204)  # noqa: E501
 2 | from copy import deepcopy
 3 | 
 4 | import megengine as mge
 5 | import megengine.functional as F
 6 | import megengine.module as M
 7 | 
 8 | 
 9 | class EMA(M.Module):
10 |     def __init__(self, model: M.Module, decay: float):
11 |         super(EMA, self).__init__()
12 |         self.model = model
13 |         self.decay = decay
14 |         self.accum = mge.tensor(1.)
15 | 
16 |         self._biased = deepcopy(model)
17 |         self.average = deepcopy(model)
18 |         for param in self._biased.parameters():
19 |             param.set_value(param.detach() * 0)
20 |         for param in self.average.parameters():
21 |             param.set_value(param.detach() * 0)
22 |         self.update()
23 | 
24 |     def update(self):
25 |         if not self.training:
26 |             raise RuntimeError('Update should only be called during training')
27 | 
28 |         self.accum *= self.decay
29 | 
30 |         model_params = dict(self.model.named_parameters())
31 |         biased_params = dict(self._biased.named_parameters())
32 |         average_params = dict(self.average.named_parameters())
33 |         assert model_params.keys() == biased_params.keys() == average_params.keys(
34 |         ), 'Model parameter keys incompatible with EMA stored parameter keys'
35 | 
36 |         for name, param in model_params.items():
37 |             biased_params[name].set_value(
38 |                 F.mul(biased_params[name], self.decay))
39 |             biased_params[name].set_value(
40 |                 F.add(biased_params[name], (1 - self.decay) * param))
41 |             average_params[name].set_value(biased_params[name])
42 |             average_params[name].set_value(
43 |                 F.div(average_params[name], 1 - self.accum))
44 | 
45 |         model_buffers = dict(self.model.named_buffers())
46 |         biased_buffers = dict(self._biased.named_buffers())
47 |         average_buffers = dict(self.average.named_buffers())
48 |         assert model_buffers.keys() == biased_buffers.keys() == average_buffers.keys()
49 | 
50 |         for name, buffer in model_buffers.items():
51 |             biased_buffers[name].set_value(buffer)
52 |             average_buffers[name].set_value(buffer)
53 | 
54 |     def forward(self, *args, **kwargs):
55 |         if self.training:
56 |             return self.model(*args, **kwargs)
57 |         return self.average(*args, **kwargs)
58 | 


--------------------------------------------------------------------------------
/official/multimodal/big_sleep/resample.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from functools import update_wrapper
 3 | 
 4 | import numpy as np
 5 | 
 6 | import megengine as mge
 7 | import megengine.functional as F
 8 | 
 9 | 
10 | def sinc(x):
11 |     return F.where(x != 0, F.sin(math.pi * x) / (math.pi * x), F.ones_like(x))
12 | 
13 | 
14 | def lanczos(x, a):
15 |     cond = F.logical_and(-a < x, x < a)
16 |     out = F.where(cond, sinc(x) * sinc(x / a), F.zeros_like(x))
17 |     return out / F.sum(out)
18 | 
19 | 
20 | def ramp(ratio, width):
21 |     n = math.ceil(width / ratio + 1)
22 |     out = np.zeros(n)
23 |     cur = 0
24 |     for i in range(out.shape[0]):
25 |         out[i] = cur
26 |         cur += ratio
27 |     out = np.concatenate([np.flip(-out[1:], axis=0), out])[1:-1]
28 |     return mge.tensor(out, dtype='float32')
29 | 
30 | 
31 | def odd(fn):
32 |     return update_wrapper(lambda x: F.sin(x) * fn(F.abs(x)), fn)
33 | 
34 | 
35 | def _to_linear_srgb(input):
36 |     cond = input <= 0.04045
37 |     a = input / 12.92
38 |     b = ((input + 0.055) / 1.055)**2.4
39 |     return F.where(cond, a, b)
40 | 
41 | 
42 | def _to_nonlinear_srgb(input):
43 |     cond = input <= 0.0031308
44 |     a = 12.92 * input
45 |     b = 1.055 * input**(1 / 2.4) - 0.055
46 |     return F.where(cond, a, b)
47 | 
48 | 
49 | to_linear_srgb = odd(_to_linear_srgb)
50 | to_nonlinear_srgb = odd(_to_nonlinear_srgb)
51 | 
52 | 
53 | def resample(input, size, align_corners=True, is_srgb=False):  # pylint: disable=unused-argument
54 |     n, c, h, w = input.shape
55 |     dh, dw = size
56 | 
57 |     if is_srgb:
58 |         input = to_linear_srgb(input)
59 | 
60 |     input = input.reshape(n * c, 1, h, w)
61 | 
62 |     if dh < h:
63 |         kernel_h = lanczos(
64 |             ramp(dh / h, 3), 3).to(input.device).astype(input.dtype)
65 |         pad_h = (kernel_h.shape[0] - 1) // 2
66 |         input = F.pad(
67 |             input, [(0, 0), (0, 0), (pad_h, pad_h), (0, 0)], 'reflect')
68 |         input = F.conv2d(input, kernel_h[None, None, :, None])
69 | 
70 |     if dw < w:
71 |         kernel_w = lanczos(
72 |             ramp(dw / w, 3), 3).to(input.device).astype(input.dtype)
73 |         pad_w = (kernel_w.shape[0] - 1) // 2
74 |         input = F.pad(input, [(0, 0), (0, 0), (0, 0),
75 |                       (pad_w, pad_w)], 'reflect')
76 |         input = F.conv2d(input, kernel_w[None, None, None, :])
77 | 
78 |     input = input.reshape(n, c, h, w)
79 |     # NOTE: can not set align_corners when specify mode with `bicubic` in megengine
80 |     input = F.nn.interpolate(input, size, mode='bicubic',
81 |                              align_corners=None)
82 | 
83 |     if is_srgb:
84 |         input = to_nonlinear_srgb(input)
85 | 
86 |     return input
87 | 


--------------------------------------------------------------------------------
/official/multimodal/clip/README.md:
--------------------------------------------------------------------------------
  1 | # CLIP
  2 | 
  3 | 此仓库包含MegEngine实现的多模态模型`CLIP`，但不包含训练及测试代码。
  4 | 
  5 | `models.py`中实现了CLIP的不同配置：`RN50`, `RN101`, `RN50x4`, `RN50x16`, `RN50x64`, `ViT-B-32`, `ViT-B-16`, `ViT-L-14`和`ViT-L-14-336px`。
  6 | 
  7 | 在ImageNet V2 matched-frequency数据集上，以float16的精度达成了一下的零样本分类准确度
  8 | 
  9 | | 模型           | TOP-1  |TOP-5  |
 10 | | -------------- | -------|------|
 11 | | RN50           | 53.55% |81.53%|
 12 | | RN101          | 56.21% |83.77%|
 13 | | RN50x4         | 59.77% |85.90%|
 14 | | RN50x16        | 64.14% |88.39%|
 15 | | RN50x64        | 66.90% |90.46%|
 16 | | ViT-B-32       | 56.48% |83.57%|
 17 | | ViT-B-16       | 62.24% |87.72%|
 18 | | ViT-L-14       | 69.72% |90.89%|
 19 | | ViT-L-14-336px | 70.72% |91.68%|
 20 | 
 21 | ## 零样本（zero-shot）分类
 22 | 
 23 | 用户可以使用以下模板使用`CLIP`进行零样本图像分类。
 24 | 
 25 | ### 加载网络
 26 | 
 27 | ```python
 28 | import megengine as mge
 29 | from megengine import hub
 30 | modelhub = hub.import_module(repo_info='megengine/models', git_host='github.com')
 31 | 
 32 | # 加载网络结构及预训练模型
 33 | # 方式一
 34 | clip = hub.load("megengine/models", "rn50", pretrained=True)
 35 | clip.eval()
 36 | 
 37 | # 将网络部分权重转换为float16, 仅限GPU
 38 | clip.convert_weights('float16')
 39 | 
 40 | # 方式二
 41 | # 查看所有可用模型
 42 | print(CLIP.available_models())
 43 | 
 44 | # 直接使用 from_pretrained 方法加载模型即可
 45 | clip = CLIP.from_pretrained(model_name='RN50', dtype='float16')
 46 | 
 47 | # 查看网络配置信息
 48 | clip.model_config()
 49 | 
 50 | # 使用float32的精度推理
 51 | clip.convert_weigths('float32')
 52 | ```
 53 | 
 54 | ### 数据处理
 55 | 
 56 | ```python
 57 | import cv2
 58 | from megengine.data.transform import CenterCrop, Compose, Normalize, Resize
 59 | 
 60 | #数据处理
 61 | image_resolution = clip.image_resolution  # clip需要固定输入图片的大小
 62 | transfroms =  Compose([
 63 |     Resize(image_resolution, interpolation=cv2.INTER_CUBIC),
 64 |     CenterCrop(image_resolution),
 65 |     Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
 66 | ])
 67 | 
 68 | ```
 69 | 
 70 | 数据处理构建完毕后需要用户手动构建`Dataloader`。
 71 | 
 72 | ### 构建文本模板和类别
 73 | 
 74 | `CLIP`需要一些文本模板/提示来描述某一张图片，比如：`a photo of {}.`，`a photo of many {}.`等，大括号中可以填入各种类别名称。这样为每一个类别都生成n句话，再使用文本编码器和图片编码器的输出向量做相似度计算，得分高者则认为其为该类的概率更高。
 75 | 
 76 | `CLIP`中内置了imagenet的80个文本模板，这里使用内置的CLIP推理工具，使用方法如下。
 77 | 
 78 | ```python
 79 | utils = modelhub.ClipInferenceUtils
 80 | ```
 81 | 
 82 | 随后调用如下方法即可得到对应的文本模板。
 83 | 
 84 | ```python
 85 | imagenet_templates = utils.generate_imagenet_templates()
 86 | ```
 87 | 
 88 | 对于不同的数据集可以采用不同的文本模板，其格式如下：
 89 | 
 90 | ```python
 91 | templates: List[str] = [
 92 |  'a bad photo of a {}.',
 93 |  'a photo of many {}.',
 94 |  ...
 95 | ]
 96 | ```
 97 | 
 98 | 同时我们需要各个类别的名称，可通过调用以下代码得到imagenet的1000个类别。
 99 | 
100 | ```python
101 | imagenet_classes = utils.generate_imagenet_classes()
102 | ```
103 | 
104 | 对于不同的数据集需要使用对应的类别名称，其格式如下：
105 | 
106 | ```python
107 | classes：List[str] = [
108 |     'tench',
109 |     'goldfish',
110 |     ...
111 | ]
112 | ```
113 | 
114 | ### 生成零样本分类权重
115 | 
116 | 使用下列代码生成权重。
117 | 
118 | ```python
119 | zeroshot_wieghts = utils.generate_zeroshot_classifier_weight(clip, imagenet_classes, imagenet_templates)
120 | ```
121 | 
122 | ### 预测
123 | 
124 | 传入模型、dataloader和零样本权重即可进行预测
125 | 
126 | ```python
127 | top1, top5 = utils.predict(clip, loader, zeroshot_wieghts, logit_scale=100.)
128 | print(f"Top-1 accuracy: {top1:.2f}")
129 | print(f"Top-5 accuracy: {top5:.2f}")
130 | ```
131 | 
132 | 如果你只想预测一张图片，使用`predict_once`方法即可
133 | 
134 | ```python
135 | logits = utils.predict_once(clip, image, zeroshot_wieghts, logit_scale=100.)
136 | ```
137 | 
138 | ## 参考
139 | 
140 | [Learning Transferable Visual Models From Natural Language Supervision](https://arxiv.org/abs/2103.00020)
141 | 
142 | [openai/CLIP](https://github.com/openai/CLIP)
143 | 


--------------------------------------------------------------------------------
/official/multimodal/clip/__init__.py:
--------------------------------------------------------------------------------
1 | from .inference_utils import ClipInferenceUtils
2 | from .models import CLIP
3 | from .simple_tokenizer import SimpleTokenizer, tokenize
4 | 


--------------------------------------------------------------------------------
/official/multimodal/clip/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/clip/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/official/multimodal/dalle/README.md:
--------------------------------------------------------------------------------
 1 | # DALLE
 2 | 
 3 | 此仓库包含MegEngine实现的多模态模型DALLE以及文生图代码，但不包含训练代码。
 4 | 
 5 | ## 图像重建
 6 | 
 7 | 对于给定的大小为256x256的归一化四维输入，可以使用如下方式进行重建：
 8 | 
 9 | ```python
10 | from official.multimodal.dalle.vae import OpenAIDiscreteVAE
11 | from official.multimodal.big_sleep.big_sleep import save_images
12 | 
13 | 
14 | vae = OpenAIDiscreteVAE(True)
15 | 
16 | img_seq = vae.get_codebook_indices(input)
17 | 
18 | reconstructed_image = vae.decode(img_seq)
19 | 
20 | save_images(reconstructed_image, './image.png')
21 | 
22 | ```
23 | 
24 | 
25 | 
26 | ## 文生图
27 | 
28 | 可以使用以下代码体验文生图的功能，需要先下载[dalle_new_variety.bpe](https://data.megengine.org.cn/research/multimodality/dalle_new_variety.bpe)文件
29 | 
30 | ```python
31 | from official.multimodal.dalle import coco_512_16_16d_16h_80tsl
32 | from official.multimodal.dalle import Generator
33 | 
34 | dalle = coco_512_16_16d_16h_80tsl()
35 | 
36 | generator = Generator(
37 |     dalle,
38 |     texts = ['A tower has a clock on it on a day with a blue sky'],
39 |     num_images=64,
40 |     batch_size=4,
41 |     bpe_path = './dalle_new_variety.bpe',
42 |     root='./dalle'
43 | )
44 | 
45 | generator()
46 | ```
47 | 
48 | 生成结果如下所示：
49 | 
50 | ![res](../../assets/total.png)
51 | 
52 | 
53 | ## 参考
54 | 
55 | [DALLE-pytorch](https://github.com/lucidrains/DALLE-pytorch)
56 | 
57 | [DALLE-pytorch-discussions](https://github.com/lucidrains/DALLE-pytorch/discussions/335)
58 | 


--------------------------------------------------------------------------------
/official/multimodal/dalle/__init__.py:
--------------------------------------------------------------------------------
 1 | from .dalle import DALLE
 2 | from .generate import Generator
 3 | from .pretrained import coco_512_16_16d_16h_80tsl
 4 | from .vae import (
 5 |     OpenAIDiscreteVAE,
 6 |     OpenAIDiscreteVAEDecoder,
 7 |     OpenAIDiscreteVAEEncoder,
 8 |     VQGanVAE,
 9 |     openai_discrete_VAE_decoder,
10 |     openai_discrete_VAE_encoder
11 | )
12 | from .vae.vqgan_vae import vqgan_vae_1024
13 | 


--------------------------------------------------------------------------------
/official/multimodal/dalle/pretrained.py:
--------------------------------------------------------------------------------
 1 | from megengine import hub
 2 | 
 3 | from .dalle import DALLE
 4 | from .vae.vqgan_vae import vqgan_vae_1024
 5 | 
 6 | 
 7 | @hub.pretrained(
 8 |     "https://data.megengine.org.cn/research/multimodality/dalle_coco_512_16_16d_16h_80tsl.pkl"
 9 | )
10 | def coco_512_16_16d_16h_80tsl():
11 |     vae = vqgan_vae_1024(False)
12 |     model = DALLE(
13 |         num_text_tokens=8192,
14 |         text_seq_len=80,
15 |         embed_dim=512,
16 |         vae=vae,
17 |         num_heads=16,
18 |         head_dim=64,
19 |         stable=False,
20 |         depths=16,
21 |         attention_types=['row', 'row', 'column', 'row', 'row', 'row', 'column', 'full']
22 |     )
23 |     return model
24 | 


--------------------------------------------------------------------------------
/official/multimodal/dalle/tokenizer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import youtokentome as yttm
 3 | 
 4 | import megengine.functional as F
 5 | from megengine import Tensor
 6 | 
 7 | from ..clip.simple_tokenizer import SimpleTokenizer  # pylint: disable=unused-import  # noqa: F401
 8 | 
 9 | 
10 | class YttmTokenizer:
11 |     def __init__(self, bpe_path: str):
12 |         if not os.path.exists(bpe_path):
13 |             raise ValueError(f'BPE json path {bpe_path} does not exist')
14 | 
15 |         tokenizer = yttm.BPE(model=bpe_path)
16 |         self.tokenizer = tokenizer
17 |         self.vocab_size = tokenizer.vocab_size()
18 | 
19 |     def decode(self, tokens, pad_tokens=(0, )):
20 |         if isinstance(tokens, Tensor):
21 |             tokens = tokens.tolist()
22 | 
23 |         return self.tokenizer.decode(tokens, ignore_ids=pad_tokens)
24 | 
25 |     def encode(self, texts):
26 |         encoded = self.tokenizer.encode(texts, output_type=yttm.OutputType.ID)
27 |         return list(map(Tensor, encoded))
28 | 
29 |     def tokenize(self, texts, context_length=256, truncate_text=False):
30 |         if isinstance(texts, str):
31 |             texts = [texts]
32 | 
33 |         all_tokens = self.encode(texts)
34 | 
35 |         result = F.zeros((len(all_tokens), context_length), dtype='int32')
36 |         for i, tokens in enumerate(all_tokens):
37 |             if len(tokens) > context_length:
38 |                 if truncate_text:
39 |                     tokens = tokens[:context_length]
40 |                 else:
41 |                     raise RuntimeError(
42 |                         f"Input {texts[i]} is too long for context length {context_length}")
43 |             result[i, :len(tokens)] = Tensor(tokens)
44 | 
45 |         return result
46 | 


--------------------------------------------------------------------------------
/official/multimodal/dalle/vae/__init__.py:
--------------------------------------------------------------------------------
 1 | from .openai_dvae import DiscreteVAE as OpenAIDiscreteVAE
 2 | from .openaidvae import (
 3 |     OpenAIDiscreteVAEDecoder,
 4 |     OpenAIDiscreteVAEEncoder,
 5 |     map_pixels,
 6 |     openai_discrete_VAE_decoder,
 7 |     openai_discrete_VAE_encoder,
 8 |     unmap_pixels
 9 | )
10 | from .vqgan_vae import VQGanVAE
11 | 


--------------------------------------------------------------------------------
/official/multimodal/dalle/vae/base_vae.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | 
 3 | import megengine.module as M
 4 | 
 5 | 
 6 | class BaseVAE(M.Module):
 7 |     def __init__(
 8 |         self,
 9 |         num_layers: int,
10 |         num_tokens: int,
11 |         image_size: int,
12 |         channels: int = 3,
13 |     ):
14 |         super(BaseVAE, self).__init__()
15 | 
16 |         self.channels = channels
17 |         self.num_layers = num_layers
18 |         self.num_tokens = num_tokens
19 |         self.image_size = image_size
20 | 
21 |     @abstractmethod
22 |     def get_codebook_indices(self, inputs):
23 |         pass
24 | 
25 |     @abstractmethod
26 |     def decode(self, inputs):
27 |         pass
28 | 
29 |     def forward(self, inputs):
30 |         raise NotImplementedError()
31 | 


--------------------------------------------------------------------------------
/official/multimodal/dalle/vae/openai_dvae.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import megengine.functional as F
 4 | 
 5 | from .base_vae import BaseVAE
 6 | from .openaidvae import openai_discrete_VAE_decoder, openai_discrete_VAE_encoder
 7 | from .openaidvae.utils import map_pixels, unmap_pixels
 8 | 
 9 | 
10 | class DiscreteVAE(BaseVAE):
11 |     def __init__(
12 |         self,
13 |         pretrained: bool = True
14 |     ):
15 |         super(DiscreteVAE, self).__init__(
16 |             num_layers=3,
17 |             num_tokens=8192,
18 |             image_size=256,
19 |         )
20 | 
21 |         self.encoder = openai_discrete_VAE_encoder(pretrained=pretrained)
22 |         self.decoder = openai_discrete_VAE_decoder(pretrained=pretrained)
23 | 
24 |     def get_codebook_indices(self, img):
25 |         img = map_pixels(img)
26 |         z_logits = self.encoder.blocks(img)
27 |         z = F.argmax(z_logits, axis=1)
28 |         z = F.flatten(z, 1)
29 |         return z
30 | 
31 |     def decode(self, img_seq):
32 |         b, n, = img_seq.shape
33 |         L = int(math.sqrt(n))
34 |         img_seq = img_seq.reshape(b, L, L)
35 | 
36 |         z = F.one_hot(img_seq, num_classes=self.num_tokens)
37 | 
38 |         z = z.transpose(0, 3, 1, 2).astype('float32')
39 |         x_stats = self.decoder(z).astype('float32')
40 |         x_rec = unmap_pixels(F.sigmoid(x_stats[:, :3]))
41 |         return x_rec
42 | 
43 |     def forward(self, inputs):
44 |         raise NotImplementedError("Do not call forward method!")
45 | 


--------------------------------------------------------------------------------
/official/multimodal/dalle/vae/openaidvae/__init__.py:
--------------------------------------------------------------------------------
1 | from .decoder import Decoder as OpenAIDiscreteVAEDecoder
2 | from .decoder import openai_discrete_VAE_decoder
3 | from .encoder import Encoder as OpenAIDiscreteVAEEncoder
4 | from .encoder import openai_discrete_VAE_encoder
5 | from .utils import map_pixels, unmap_pixels
6 | 


--------------------------------------------------------------------------------
/official/multimodal/dalle/vae/openaidvae/decoder.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | from functools import partial
 3 | 
 4 | import megengine.module as M
 5 | from megengine import hub
 6 | 
 7 | from .utils import Upsample
 8 | 
 9 | 
10 | class DecoderBlock(M.Module):
11 |     def __init__(
12 |         self,
13 |         in_channels,
14 |         out_channels,
15 |         layers,
16 |     ) -> None:
17 |         super(DecoderBlock, self).__init__()
18 |         assert out_channels % 4 == 0, "The output channel must be devided into 4"
19 |         self.post_gain = 1 / (layers ** 2)
20 |         hid_ch = out_channels // 4
21 |         self.id_path = M.Conv2d(
22 |             in_channels, out_channels, 1) if in_channels != out_channels else M.Identity()
23 |         self.res_path = M.Sequential(OrderedDict([
24 |             ("relu1", M.ReLU()),
25 |             ('conv_1', M.Conv2d(in_channels, hid_ch, 1)),
26 |             ("relu2", M.ReLU()),
27 |             ('conv_2', M.Conv2d(hid_ch, hid_ch, 3, padding=1)),
28 |             ("relu3", M.ReLU()),
29 |             ('conv_3', M.Conv2d(hid_ch, hid_ch, 3, padding=1)),
30 |             ("relu4", M.ReLU()),
31 |             ('conv_4', M.Conv2d(hid_ch, out_channels, 3, padding=1)),
32 |         ]))
33 | 
34 |     def forward(self, x):
35 |         return self.id_path(x) + self.post_gain * self.res_path(x)
36 | 
37 | 
38 | class Decoder(M.Module):
39 |     def __init__(self, n_init=128, n_hid=256, n_blk_per_group=2, out_ch=3, vocab_size=8192):
40 |         super(Decoder, self).__init__()
41 |         group_count = 4
42 |         n_layers = group_count * n_blk_per_group
43 |         blk_range = range(n_blk_per_group)
44 |         make_blk = partial(DecoderBlock, layers=n_layers)
45 |         self.vocab_size = vocab_size
46 |         self.blocks = M.Sequential(OrderedDict([
47 |             ('input', M.Conv2d(vocab_size, n_init, 1)),
48 |             ('group_1', M.Sequential(OrderedDict([
49 |                 *[(f'block_{i + 1}', make_blk(n_init if i == 0 else 8
50 |                                               * n_hid, 8 * n_hid)) for i in blk_range],
51 |                 ('upsample', Upsample(scale_factor=2, mode='nearest')),
52 |             ]))),
53 |             ('group_2', M.Sequential(OrderedDict([
54 |                 *[(f'block_{i + 1}', make_blk(8 * n_hid if i
55 |                                               == 0 else 4 * n_hid, 4 * n_hid)) for i in blk_range],
56 |                 ('upsample', Upsample(scale_factor=2, mode='nearest')),
57 |             ]))),
58 |             ('group_3', M.Sequential(OrderedDict([
59 |                 *[(f'block_{i + 1}', make_blk(4 * n_hid if i
60 |                                               == 0 else 2 * n_hid, 2 * n_hid)) for i in blk_range],
61 |                 ('upsample', Upsample(scale_factor=2, mode='nearest')),
62 |             ]))),
63 |             ('group_4', M.Sequential(OrderedDict([
64 |                 *[(f'block_{i + 1}', make_blk(2 * n_hid if i
65 |                                               == 0 else 1 * n_hid, 1 * n_hid)) for i in blk_range],
66 |             ]))),
67 |             ('output', M.Sequential(OrderedDict([
68 |                 ('relu', M.ReLU()),
69 |                 ('conv', M.Conv2d(1 * n_hid, 2 * out_ch, 1)),
70 |             ]))),
71 |         ]))
72 | 
73 |     def forward(self, x):
74 |         if x.ndim != 4:
75 |             raise ValueError("The input must be 4-dim")
76 |         if x.shape[1] != self.vocab_size:
77 |             raise ValueError(
78 |                 "The input must be the same shape as the vocab")
79 |         # if x.dtype != "float32":
80 |         #     raise ValueError("The input must be float32")
81 |         return self.blocks(x)
82 | 
83 | 
84 | @hub.pretrained(
85 |     "https://data.megengine.org.cn/research/multimodality/dalle_openai_dvae_decoder.pkl"
86 | )
87 | def openai_discrete_VAE_decoder(**kwargs):
88 |     return Decoder(**kwargs)
89 | 


--------------------------------------------------------------------------------
/official/multimodal/dalle/vae/openaidvae/encoder.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | from functools import partial
 3 | 
 4 | import megengine.module as M
 5 | from megengine import hub
 6 | 
 7 | 
 8 | class EncoderBlock(M.Module):
 9 |     def __init__(self, n_in, n_out, layers):
10 |         super(EncoderBlock, self).__init__()
11 |         n_hid = n_out // 4
12 |         self.pre_gain = 1 / (layers ** 2)
13 |         self.id_path = M.Conv2d(
14 |             n_in, n_out, 1) if n_in != n_out else M.Identity()
15 |         self.res_path = M.Sequential(OrderedDict([
16 |             ("relu1", M.ReLU()),
17 |             ('conv_1', M.Conv2d(n_in, n_hid, 3, padding=1)),
18 |             ("relu2", M.ReLU()),
19 |             ('conv_2', M.Conv2d(n_hid, n_hid, 3, padding=1)),
20 |             ("relu3", M.ReLU()),
21 |             ('conv_3', M.Conv2d(n_hid, n_hid, 3, padding=1)),
22 |             ("relu4", M.ReLU()),
23 |             ('conv_4', M.Conv2d(n_hid, n_out, 1)),
24 |         ]))
25 | 
26 |     def forward(self, x):
27 |         return self.id_path(x) + self.pre_gain * self.res_path(x)
28 | 
29 | 
30 | class Encoder(M.Module):
31 |     def __init__(self, input_channel=3, n_hid=256, n_blk_per_group=2, vocab_size=8192):
32 |         super(Encoder, self).__init__()
33 |         group_count = 4
34 |         n_layers = group_count * n_blk_per_group
35 |         blk_range = range(n_blk_per_group)
36 |         make_blk = partial(EncoderBlock, layers=n_layers)
37 |         self.input_channel = input_channel
38 |         self.vocab_size = vocab_size
39 |         self.blocks = M.Sequential(OrderedDict([
40 |             ('input', M.Conv2d(input_channel, n_hid, 7, padding=3)),
41 |             ('group_1', M.Sequential(OrderedDict([
42 |                 *[(f'block_{i + 1}', make_blk(n_hid, n_hid))
43 |                   for i in blk_range],
44 |                 ('pool', M.MaxPool2d(kernel_size=2, stride=2)),
45 |             ]))),
46 |             ('group_2', M.Sequential(OrderedDict([
47 |                 *[(f'block_{i + 1}', make_blk(n_hid if i
48 |                                               == 0 else 2 * n_hid, 2 * n_hid)) for i in blk_range],
49 |                 ('pool', M.MaxPool2d(kernel_size=2, stride=2)),
50 |             ]))),
51 |             ('group_3', M.Sequential(OrderedDict([
52 |                 *[(f'block_{i + 1}', make_blk(2 * n_hid if i
53 |                                               == 0 else 4 * n_hid, 4 * n_hid)) for i in blk_range],
54 |                 ('pool', M.MaxPool2d(kernel_size=2, stride=2)),
55 |             ]))),
56 |             ('group_4', M.Sequential(OrderedDict([
57 |                 *[(f'block_{i + 1}', make_blk(4 * n_hid if i
58 |                    == 0 else 8 * n_hid, 8 * n_hid)) for i in blk_range],
59 |             ]))),
60 |             ('output', M.Sequential(OrderedDict([
61 |                 ('relu', M.ReLU()),
62 |                 ('conv', M.Conv2d(8 * n_hid, self.vocab_size, 1)),
63 |             ]))),
64 |         ]))
65 | 
66 |     def forward(self, x):
67 |         if x.ndim != 4:
68 |             raise ValueError("Input must be 4D tensor")
69 |         if x.shape[1] != self.input_channel:
70 |             raise ValueError(
71 |                 f"Input channel must be {self.input_channel}")
72 |         return self.blocks(x)
73 | 
74 | 
75 | @hub.pretrained(
76 |     "https://data.megengine.org.cn/research/multimodality/dalle_openai_dvae_encoder.pkl"
77 | )
78 | def openai_discrete_VAE_encoder(**kwargs):
79 |     return Encoder(**kwargs)
80 | 


--------------------------------------------------------------------------------
/official/multimodal/dalle/vae/openaidvae/utils.py:
--------------------------------------------------------------------------------
 1 | import megengine.functional as F
 2 | import megengine.module as M
 3 | 
 4 | logit_laplace_eps: float = 0.1
 5 | 
 6 | 
 7 | def map_pixels(x):
 8 |     if x.ndim != 4:
 9 |         raise ValueError('input must be 4D')
10 |     return (1 - 2 * logit_laplace_eps) * x + logit_laplace_eps
11 | 
12 | 
13 | def unmap_pixels(x):
14 |     if x.ndim != 4:
15 |         raise ValueError('input must be 4D')
16 |     return F.clip((x - logit_laplace_eps) / (1 - 2 * logit_laplace_eps), 0, 1)
17 | 
18 | 
19 | class Upsample(M.Module):
20 |     def __init__(self, scale_factor, mode):
21 |         super().__init__()
22 |         self.scale_factor = scale_factor
23 |         self.mode = mode
24 | 
25 |     def forward(self, inputs):
26 |         return F.nn.interpolate(inputs, scale_factor=self.scale_factor, mode=self.mode)
27 | 


--------------------------------------------------------------------------------
/official/multimodal/dalle/vae/vqgan_vae.py:
--------------------------------------------------------------------------------
 1 | from math import log, sqrt
 2 | from typing import Union
 3 | 
 4 | import megengine.functional as F
 5 | 
 6 | from ...taming_transformer.vqgan import GumbelVQ, VQModel, vqgan_imagenet_f16_1024
 7 | from .base_vae import BaseVAE
 8 | 
 9 | 
10 | class VQGanVAE(BaseVAE):
11 |     def __init__(self, model: Union[VQModel, GumbelVQ]):
12 |         image_size = model.in_resolution
13 |         num_layers = int(log(image_size / model.attn_resolution[0]) / log(2))
14 |         channels = model.in_channel
15 |         num_tokens = model.quantize.num_embeddings
16 | 
17 |         super(VQGanVAE, self).__init__(
18 |             num_layers,
19 |             num_tokens,
20 |             image_size,
21 |             channels
22 |         )
23 |         self.model = model
24 | 
25 |         self.is_gumbel = isinstance(model, GumbelVQ)
26 | 
27 |     def get_codebook_indices(self, img):
28 |         b = img.shape[0]
29 |         img = (2 * img) - 1
30 |         _, _, [_, _, indices] = self.model.encode(img)
31 |         if self.is_gumbel:
32 |             return F.flatten(indices, 1)
33 |         return indices.reshape(b, -1)
34 | 
35 |     def decode(self, img_seq):
36 |         b, n = img_seq.shape
37 |         one_hot_indices = F.one_hot(img_seq, num_classes=self.num_tokens).astype('float32')
38 |         z = one_hot_indices @ self.model.quantize.embedding.weight
39 | 
40 |         c = z.shape[-1]
41 |         z = z.reshape(b, int(sqrt(n)), -1, c).transpose(0, 3, 1, 2)
42 |         img = self.model.decode(z)
43 | 
44 |         img = (F.clip(img, -1., 1.) + 1) * 0.5
45 |         return img
46 | 
47 |     def forward(self):
48 |         raise NotImplementedError()
49 | 
50 | 
51 | def vqgan_vae_1024(pretrained=True):
52 |     vae = vqgan_imagenet_f16_1024(pretrained=pretrained)
53 |     model = VQGanVAE(vae)
54 |     return model
55 | 


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/README.md:
--------------------------------------------------------------------------------
  1 | # Taming Transformer
  2 | 
  3 | 此仓库包含MegEngine实现的`taming_transformer`模型代码及推理代码，但不包含训练代码。`taming_transformer`通过`VQGAN`将卷积的高效性和`Transformer`极强的表达能力相结合，拥有强大的图像重建和高分辨率图像合成能力。
  4 | 
  5 | ## 图像重建
  6 | 
  7 | 我们可以使用`VQGAN`来测试图像重建，`VQGAN`的结构参考与`Diffusion Model`，并且使用GAN的方式进行训练。其主要拥有两种不同的模型——`VQModel`和`GumbelVQ`，主要区别在于模型中的`quantize离散化`部分，`VQModel`使用`VQVAE`中的离散化方法，`GumbelVQ`则使用`Gumbel Softmax`进行离散化。
  8 | 
  9 | 我们可以很方便的使用如下代码进行图像重建。
 10 | 
 11 | ```python
 12 | from official.multimodal.taming_transformer import Reconstruction
 13 | 
 14 | # 加载模型及权重
 15 | model = vqgan_imagenet_f16_16384(pretrained=True)
 16 | 
 17 | # 传入模型
 18 | rec = Reconstruction(model)
 19 | 
 20 | image_path: str = ...
 21 | # 传入图片路径和保存路径
 22 | reconstructed_image = rec(image_path, file_name='reconstructed_image.png')
 23 | ```
 24 | 
 25 | ## 从分割图采样
 26 | 
 27 | `taming_transformer`可以利用分割图作为引导，逐步的从噪声中进行采样。可以使用如下代码进行采样。
 28 | 
 29 | ```python
 30 | from official.multimodal.taming_transformer import s_flckr_transformer
 31 | # 加载模型及权重
 32 | model = s_flckr_transformer(pretrained=True)
 33 | 
 34 | sampler = ConditionalSampler(
 35 |     model,
 36 |     temperature=1.0,
 37 |     top_k=100,
 38 |     update_every=50, # 多少次采样保存一次图片
 39 |     scale_factor=1.0, # 对输入图片进行缩放
 40 |     animate=True, # 保存采样过程为mp4
 41 |     root='test', # 根目录，用于保存采样过程中的文件和视频
 42 |     seed=2022, # 固定随机种子
 43 |     kernal_size=16, # 每次采样的窗口大小，越大效果越好
 44 |     fps=15, # 保存视频的帧率
 45 |     segmentation_save=True # 为分割图使用专门的保存方式，保证每次推理保存的分割图色彩一致
 46 | )
 47 | 
 48 | # 可以在official/multimodal/taming_transformer/data目录下找到更多图片
 49 | segmentation_path: str = r"official/multimodal/taming_transformer/data/sflckr_segmentations/norway/25735082181_999927fe5a_b.png"
 50 | # 传入分割图地址
 51 | sampler.sample_segmentation(segmentation_path, name='norway')
 52 | ```
 53 | 
 54 | 分割图如下所示：
 55 | ![segmentation](../../assets/norway_segmentation.png)
 56 | 
 57 | 采样结果如下所示：
 58 | ![result](../../assets/norway_sample_2687.png)
 59 | 多次运行即可获得更多样的结果
 60 | 
 61 | 采样过程：
 62 | 
 63 | <iframe height=512 width=1024 src="../../assets/norway_sampling.mp4">
 64 | 
 65 | ## 从深度图采样
 66 | 
 67 | 与从分割图采样类似，传入深度图即可进行引导
 68 | 
 69 | ```python
 70 | model = drin_transformer(pretrained=True)
 71 | 
 72 | sampler = ConditionalSampler(
 73 |     model,
 74 |     temperature=1.0,
 75 |     top_k=100,
 76 |     root='test'
 77 | )
 78 | depth_path: str = r"official/multimodal/taming_transformer/data/drin_depth/n01819313/ILSVRC2012_val_00003068.png"
 79 | # 传入深度图地址
 80 | sampler.sample_segmentation(depth_path, name='test')
 81 | 
 82 | ```
 83 | 
 84 | 深度图如下所示：
 85 | 
 86 | ![depth](../../assets/test_depth.png)
 87 | 
 88 | 采样结果：
 89 | 
 90 | ![depth_res](../../assets/test_sample_255.png)
 91 | 
 92 | 采样过程：
 93 | 
 94 | <iframe height=512 width=1024 src="../../assets/test_sampling.mp4">
 95 | 
 96 | ## 无条件采样（Unconditional Sample）
 97 | 
 98 | 无条件采样不需要任何数据，只需要通过以下代码设置采样参数即可。
 99 | 
100 | ```python
101 | 
102 | # 加载模型及权重
103 | model = celebahq_transformer(pretrained=True)
104 | 
105 | sampler = FastSampler(
106 |     model,
107 |     batch_size=25,
108 |     temperature=1.0,
109 |     top_k=250,
110 |     top_p=1.0,
111 |     num_samples=50000, # 采样图片个数
112 |     class_labels=None # 仅在classs-conditonal sample中使用
113 |     steps=256, # 每个图片采样的步数
114 |     root='celebahq', # 根目录，用于保存采样过程中的图片
115 |     seed=2022, # 固定随机种子
116 | )
117 | 
118 | sampler(
119 |     dim_z=256,
120 |     h=16,
121 |     w=16, #  w*h == steps
122 |     name='test'
123 | )
124 | ```
125 | 
126 | 采样结果：
127 | 
128 | ![res1](../../assets/test_000009.png)
129 | 
130 | ![res1](../../assets/test_000010.png)
131 | 
132 | ## 参考
133 | 
134 | [taming_transformers](https://github.com/CompVis/taming-transformers)
135 | 


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/__init__.py:
--------------------------------------------------------------------------------
 1 | from .cond_transformer import (
 2 |     Net2NetTransformer,
 3 |     celebahq_transformer,
 4 |     drin_transformer,
 5 |     s_flckr_transformer
 6 | )
 7 | from .inference_utils import (
 8 |     ConditionalSampler,
 9 |     FastSampler,
10 |     Reconstruction,
11 |     convert_tensor_to_image,
12 |     preprocess_depth,
13 |     preprocess_segmetation
14 | )
15 | from .vqgan import vqgan_gumbel_f8, vqgan_imagenet_f16_1024, vqgan_imagenet_f16_16384
16 | 


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n01795545/ILSVRC2012_val_00023344.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n01795545/ILSVRC2012_val_00023344.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n01819313/ILSVRC2012_val_00003068.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n01819313/ILSVRC2012_val_00003068.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n01820546/ILSVRC2012_val_00034784.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n01820546/ILSVRC2012_val_00034784.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n01820546/ILSVRC2012_val_00047491.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n01820546/ILSVRC2012_val_00047491.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n01828970/ILSVRC2012_val_00001336.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n01828970/ILSVRC2012_val_00001336.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n01828970/ILSVRC2012_val_00008236.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n01828970/ILSVRC2012_val_00008236.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n01828970/ILSVRC2012_val_00046802.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n01828970/ILSVRC2012_val_00046802.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n01843065/ILSVRC2012_val_00022439.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n01843065/ILSVRC2012_val_00022439.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n01847000/ILSVRC2012_val_00022364.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n01847000/ILSVRC2012_val_00022364.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n02085782/ILSVRC2012_val_00012298.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n02085782/ILSVRC2012_val_00012298.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n02086646/ILSVRC2012_val_00011473.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n02086646/ILSVRC2012_val_00011473.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n02088466/ILSVRC2012_val_00013651.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n02088466/ILSVRC2012_val_00013651.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n02089973/ILSVRC2012_val_00000028.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n02089973/ILSVRC2012_val_00000028.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n02093256/ILSVRC2012_val_00046547.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n02093256/ILSVRC2012_val_00046547.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n02096294/ILSVRC2012_val_00042133.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n02096294/ILSVRC2012_val_00042133.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n02099601/ILSVRC2012_val_00005697.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n02099601/ILSVRC2012_val_00005697.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n02099712/ILSVRC2012_val_00023471.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n02099712/ILSVRC2012_val_00023471.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n02100877/ILSVRC2012_val_00039863.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n02100877/ILSVRC2012_val_00039863.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n02101006/ILSVRC2012_val_00032333.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n02101006/ILSVRC2012_val_00032333.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n02101006/ILSVRC2012_val_00047325.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n02101006/ILSVRC2012_val_00047325.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n02101556/ILSVRC2012_val_00030540.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n02101556/ILSVRC2012_val_00030540.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n02102318/ILSVRC2012_val_00024691.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n02102318/ILSVRC2012_val_00024691.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n02105505/ILSVRC2012_val_00031252.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n02105505/ILSVRC2012_val_00031252.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n02110627/ILSVRC2012_val_00008310.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n02110627/ILSVRC2012_val_00008310.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/data/drin_images/n02111889/ILSVRC2012_val_00042625.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/multimodal/taming_transformer/data/drin_images/n02111889/ILSVRC2012_val_00042625.JPEG


--------------------------------------------------------------------------------
/official/multimodal/taming_transformer/functional.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from typing import Sequence
  3 | 
  4 | import megengine as mge
  5 | import megengine.functional as F
  6 | from megengine import Tensor
  7 | 
  8 | 
  9 | def sample_exponential(size: Sequence[int], lambd: float = 1., eps: float = 1e-10):
 10 |     """
 11 |         Generate random numbers from exponential distribution.
 12 |     """
 13 |     random_tensor = mge.random.uniform(0, 1, size=size)
 14 |     return -(1 / lambd) * F.log(random_tensor + eps)
 15 | 
 16 | 
 17 | def gumbel_softmax(
 18 |     logits: Tensor,
 19 |     tau: float = 1.,
 20 |     hard: bool = False,
 21 |     eps: float = 1e-10,
 22 |     axis: int = -1,
 23 | ) -> Tensor:
 24 |     r"""
 25 |         Generate gumble noise, G_i = -log(-log(U_i)), U_i \in U(0, 1)
 26 |         More details see https://arxiv.org/pdf/1611.00712.pdf
 27 |     """
 28 |     gumble_noise = -F.log(sample_exponential(logits.shape, eps=eps) + eps)
 29 | 
 30 |     gumbels = (logits + gumble_noise) / tau
 31 |     y_soft = F.softmax(gumbels, axis=axis)
 32 | 
 33 |     if hard:
 34 |         index = F.argmax(y_soft, axis=axis, keepdims=True)
 35 |         y_hard = F.scatter(F.zeros_like(logits), axis=axis,
 36 |                            index=index, source=F.ones(index.shape, dtype='float32'))
 37 |         ret = y_hard - y_soft.detach() + y_soft
 38 |     else:
 39 |         ret = y_soft
 40 |     return ret
 41 | 
 42 | 
 43 | def top_k_top_p_filtering(
 44 |     logits: Tensor,
 45 |     top_k: int = 0,
 46 |     top_p: float = 1.0,
 47 |     filter_value: float = -float("Inf"),
 48 |     min_tokens_to_keep: int = 1,
 49 | ) -> Tensor:
 50 |     """
 51 |         Take and adapt from huggingface/transformers.
 52 |     """
 53 | 
 54 |     if top_k > 0:
 55 |         top_k = min(max(top_k, min_tokens_to_keep), logits.shape[-1])
 56 |         filter_indices = logits < F.topk(logits, top_k, descending=True)[
 57 |             0][..., -1, None]
 58 |         logits[filter_indices] = filter_value
 59 | 
 60 |     if 0.0 <= top_p <= 1.0:
 61 |         sorted_logits, sorted_indices = F.sort(logits, descending=False)
 62 | 
 63 |         cumulative_probs = F.cumsum(F.softmax(sorted_logits, axis=-1), axis=-1)
 64 |         sorted_indices_to_filter = cumulative_probs <= 1 - top_p
 65 | 
 66 |         if min_tokens_to_keep > 1:
 67 |             sorted_indices_to_filter[..., -min_tokens_to_keep] = 0
 68 | 
 69 |         filter_indices = F.scatter(
 70 |             sorted_indices_to_filter, axis=1, index=sorted_indices, source=sorted_indices_to_filter)
 71 | 
 72 |         logits[filter_indices] = filter_value
 73 | 
 74 |     return logits
 75 | 
 76 | 
 77 | def multinomial(x, num_samples, repalcement=None):
 78 |     """
 79 |         Implemented by python.
 80 |     """
 81 |     if x.ndim != 2:
 82 |         raise ValueError(f"expected input has 2 dimention, but got {x.ndim}")
 83 |     if repalcement is not None:
 84 |         raise ValueError("Currently not support `replacement`")
 85 |     _, num_col = x.shape
 86 |     x = F.cumsum(x, axis=1)
 87 |     choices = []
 88 |     for t in x:
 89 |         t = t.numpy()
 90 |         ch = []
 91 |         for _ in range(num_samples):
 92 |             prob = random.random()
 93 |             for id in range(num_col):
 94 |                 if t[id] > prob:
 95 |                     idx = id
 96 |                     break
 97 |             ch.append(idx)
 98 |         choices.append(ch)
 99 |     return mge.tensor(choices, dtype='int32')
100 | 


--------------------------------------------------------------------------------
/official/nlp/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | 


--------------------------------------------------------------------------------
/official/nlp/bert/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | 


--------------------------------------------------------------------------------
/official/nlp/bert/config_args.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import argparse
10 | 
11 | 
12 | def get_args():
13 |     parser = argparse.ArgumentParser()
14 | 
15 |     # parameters
16 |     parser.add_argument(
17 |         "--data_dir",
18 |         default=None,
19 |         type=str,
20 |         required=True,
21 |         help="The input data dir. Should contain the .tsv files (or other data files)"
22 |         " for the task.",
23 |     )
24 | 
25 |     parser.add_argument(
26 |         "--pretrained_bert", required=True, type=str, help="pretrained bert name"
27 |     )
28 | 
29 |     parser.add_argument(
30 |         "--max_seq_length",
31 |         default=128,
32 |         type=int,
33 |         help="The maximum total input sequence length after WordPiece tokenization. \n"
34 |         "Sequences longer than this will be truncated, and sequences shorter \n"
35 |         "than this will be padded.",
36 |     )
37 |     parser.add_argument(
38 |         "--do_lower_case",
39 |         default=False,
40 |         action="store_true",
41 |         help="Set this flag if you are using an uncased model.",
42 |     )
43 | 
44 |     parser.add_argument(
45 |         "--train_batch_size",
46 |         default=16,
47 |         type=int,
48 |         help="Total batch size for training.",
49 |     )
50 |     parser.add_argument(
51 |         "--learning_rate",
52 |         default=5e-5,
53 |         type=float,
54 |         help="The initial learning rate for Adam.",
55 |     )
56 |     parser.add_argument(
57 |         "--num_train_epochs",
58 |         default=3,
59 |         type=int,
60 |         help="Total number of training epochs to perform.",
61 |     )
62 | 
63 |     parser.add_argument(
64 |         "--eval_batch_size", default=16, type=int, help="Total batch size for eval."
65 |     )
66 |     parser.add_argument(
67 |         "--load_model_path",
68 |         default="./check_point_last.pkl",
69 |         type=str,
70 |         help="the initial model",
71 |     )
72 | 
73 |     parser.add_argument(
74 |         "--save_model_path",
75 |         default="./check_point_last.pkl",
76 |         type=str,
77 |         help="the path to save model",
78 |     )
79 | 
80 |     return parser.parse_args()
81 | 


--------------------------------------------------------------------------------
/official/nlp/bert/test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from tqdm import tqdm
10 | 
11 | import megengine as mge
12 | import megengine.functional as F
13 | 
14 | from official.nlp.bert.config_args import get_args
15 | from official.nlp.bert.model import BertForSequenceClassification, create_hub_bert
16 | from official.nlp.bert.mrpc_dataset import MRPCDataset
17 | 
18 | args = get_args()
19 | logger = mge.get_logger(__name__)
20 | 
21 | 
22 | def net_eval(input_ids, segment_ids, input_mask, label_ids, net=None):
23 |     net.eval()
24 |     results = net(input_ids, segment_ids, input_mask, label_ids)
25 |     logits, loss = results
26 |     return loss, logits, label_ids
27 | 
28 | 
29 | def eval(dataloader, net):
30 |     logger.info("***** Running evaluation *****")
31 |     logger.info("batch size = %d", args.eval_batch_size)
32 | 
33 |     sum_loss, sum_accuracy, total_steps, total_examples = 0, 0, 0, 0
34 | 
35 |     for _, batch in enumerate(tqdm(dataloader, desc="Iteration")):
36 |         input_ids, input_mask, segment_ids, label_ids = tuple(
37 |             mge.tensor(t) for t in batch
38 |         )
39 |         batch_size = input_ids.shape[0]
40 |         loss, logits, label_ids = net_eval(
41 |             input_ids, segment_ids, input_mask, label_ids, net=net
42 |         )
43 |         sum_loss += loss.mean().item()
44 |         sum_accuracy += F.topk_accuracy(logits, label_ids) * batch_size
45 |         total_examples += batch_size
46 |         total_steps += 1
47 | 
48 |     result = {
49 |         "eval_loss": sum_loss / total_steps,
50 |         "eval_accuracy": sum_accuracy / total_examples,
51 |     }
52 | 
53 |     logger.info("***** Eval results *****")
54 |     for key in sorted(result.keys()):
55 |         logger.info("%s = %s", key, str(result[key]))
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     bert, config, vocab_file = create_hub_bert(args.pretrained_bert, pretrained=False)
60 |     args.vocab_file = vocab_file
61 |     model = BertForSequenceClassification(config, num_labels=2, bert=bert)
62 |     mrpc_dataset = MRPCDataset(args)
63 |     model.load_state_dict(mge.load(args.load_model_path))
64 |     mrpc_dataset = MRPCDataset(args)
65 |     eval_dataloader, eval_size = mrpc_dataset.get_eval_dataloader()
66 |     eval(eval_dataloader, model)
67 | 


--------------------------------------------------------------------------------
/official/quantization/README.md:
--------------------------------------------------------------------------------
  1 | 模型量化 Model Quantization
  2 | ---
  3 | 
  4 | 本目录包含了采用MegEngine实现的量化训练和部署的代码，包括常用的ResNet、ShuffleNet和MobileNet，其量化模型的ImageNet Top 1 准确率如下：
  5 | 
  6 | | Model | top1 acc (float32) | FPS* (float32) | top1 acc (int8) | FPS* (int8) |
  7 | | --- | --- | --- | --- | --- |
  8 | | ResNet18 |  69.796  | 10.5   | 69.814 | 16.3 |
  9 | | ShufflenetV1 (1.5x) | 71.948  |  17.3 | 70.806 | 25.3 |
 10 | | MobilenetV2 | 72.808  |  13.1  | 71.228 | 17.4 |
 11 | 
 12 | **: FPS is measured on Intel(R) Xeon(R) Gold 6130 CPU @ 2.10GHz, single 224x224 image*
 13 | 
 14 | *We finetune mobile models with QAT for 30 epochs, training longer may yield better accuracy*
 15 | 
 16 | 量化模型使用时，统一读取0-255的uint8图片，减去128的均值，转化为int8，输入网络。
 17 | 
 18 | 
 19 | #### (Optional) Download Pretrained Models
 20 | ```
 21 | wget https://data.megengine.org.cn/models/weights/mobilenet_v2_normal_72808.pkl 
 22 | wget https://data.megengine.org.cn/models/weights/mobilenet_v2_qat_71228.pkl
 23 | wget https://data.megengine.org.cn/models/weights/resnet18_normal_69796.pkl
 24 | wget https://data.megengine.org.cn/models/weights/resnet18_qat_69814.pkl
 25 | wget https://data.megengine.org.cn/models/weights/shufflenet_v1_x1_5_g3_normal_71948.pkl
 26 | wget https://data.megengine.org.cn/models/weights/shufflenet_v1_x1_5_g3_qat_70806.pkl
 27 | ```
 28 | 
 29 | ## Quantization Aware Training (QAT)
 30 | 
 31 | ```python
 32 | import megengine.quantization as Q
 33 | 
 34 | model = ...
 35 | 
 36 | # Quantization Aware Training
 37 | Q.quantize_qat(model, qconfig=Q.ema_fakequant_qconfig)
 38 | 
 39 | for _ in range(...):
 40 |     train(model)
 41 | ```
 42 | 
 43 | ## Deploying Quantized Model
 44 | 
 45 | ```python
 46 | import megengine.quantization as Q
 47 | from megengine.jit import trace
 48 | 
 49 | model = ...
 50 | 
 51 | Q.quantize_qat(model, qconfig=Q.ema_fakequant_qconfig)
 52 | 
 53 | # real quant
 54 | Q.quantize(model)
 55 | 
 56 | @trace(symbolic=True, capture_as_const=True)
 57 | def inference_func(x):
 58 |     return model(x)
 59 | 
 60 | inference_func(x)
 61 | inference_func.dump(...)
 62 | ```
 63 | 
 64 | # HOWTO use this codebase
 65 | 
 66 | ## Step 1. Train a fp32 model
 67 | 
 68 | ```
 69 | python3 train.py -a resnet18 -d /path/to/imagenet --mode normal
 70 | ```
 71 | 
 72 | ## Step 2. Finetune fp32 model with quantization aware training(QAT)
 73 | 
 74 | ```
 75 | python3 finetune.py -a resnet18 -d /path/to/imagenet --checkpoint /path/to/resnet18.normal/checkpoint.pkl --mode qat
 76 | ```
 77 | 
 78 | ## Step 2. Calibration
 79 | ```
 80 | python3 calibration.py -a resnet18 -d /path/to/imagenet --checkpoint /path/to/resnet18.normal/checkpoint.pkl
 81 | ```
 82 | 
 83 | ## Step 3. Test QAT model on ImageNet Testset
 84 | 
 85 | ```
 86 | python3 test.py -a resnet18 -d /path/to/imagenet --checkpoint /path/to/resnet18.qat/checkpoint.pkl --mode qat
 87 | ```
 88 | 
 89 | or testing in quantized mode
 90 | 
 91 | ```
 92 | python3 test.py -a resnet18 -d /path/to/imagenet --checkpoint /path/to/resnet18.qat/checkpoint.pkl --mode quantized -n 1
 93 | ```
 94 | 
 95 | ## Step 4. Inference and dump
 96 | 
 97 | ```
 98 | python3 inference.py -a resnet18 --checkpoint /path/to/resnet18.qat/checkpoint.pkl --mode quantized --dump
 99 | ```
100 | 
101 | will feed a cat image to the network and output the classification probabilities with quantized network.
102 | 
103 | Also, set `--dump` will dump the quantized network to `resnet18.quantized.megengine` binary file.
104 | 
105 | 


--------------------------------------------------------------------------------
/official/quantization/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | 


--------------------------------------------------------------------------------
/official/quantization/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from .mobilenet_v2 import *
10 | from .resnet import *
11 | from .shufflenet import *
12 | 


--------------------------------------------------------------------------------
/official/quantization/param_config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | """
10 | Configurations to train/finetune quantized classification models
11 | """
12 | import megengine.data.transform as T
13 | 
14 | 
15 | class ShufflenetConfig:
16 |     BATCH_SIZE = 128
17 |     LEARNING_RATE = 0.0625
18 |     MOMENTUM = 0.9
19 |     WEIGHT_DECAY = (
20 |         lambda self, n, p: 4e-5 if n.find("weight") >= 0 and len(p.shape) > 1 else 0
21 |     )
22 |     EPOCHS = 240
23 | 
24 |     SCHEDULER = "Linear"
25 |     COLOR_JITTOR = T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4)
26 | 
27 | 
28 | class ResnetConfig:
29 |     BATCH_SIZE = 32
30 |     LEARNING_RATE = 0.0125
31 |     MOMENTUM = 0.9
32 |     WEIGHT_DECAY = 1e-4
33 |     EPOCHS = 90
34 | 
35 |     SCHEDULER = "Multistep"
36 |     SCHEDULER_STEPS = [30, 60, 80]
37 |     SCHEDULER_GAMMA = 0.1
38 |     COLOR_JITTOR = T.PseudoTransform()  # disable colorjittor
39 | 
40 | 
41 | def get_config(arch: str):
42 |     if "resne" in arch:  # both resnet and resnext
43 |         return ResnetConfig()
44 |     elif "shufflenet" in arch or "mobilenet" in arch:
45 |         return ShufflenetConfig()
46 |     else:
47 |         raise ValueError("config for {} not exists".format(arch))
48 | 
49 | 
50 | class ShufflenetFinetuneConfig(ShufflenetConfig):
51 |     BATCH_SIZE = 64 // 2
52 |     LEARNING_RATE = 0.003125 / 2
53 |     EPOCHS = 30
54 | 
55 | 
56 | class ResnetFinetuneConfig(ResnetConfig):
57 |     BATCH_SIZE = 32
58 |     LEARNING_RATE = 0.000125
59 |     EPOCHS = 12
60 | 
61 |     SCHEDULER = "Multistep"
62 |     SCHEDULER_STEPS = [
63 |         6,
64 |     ]
65 |     SCHEDULER_GAMMA = 0.1
66 | 
67 | 
68 | def get_finetune_config(arch: str):
69 |     if "resne" in arch:  # both resnet and resnext
70 |         return ResnetFinetuneConfig()
71 |     elif "shufflenet" in arch or "mobilenet" in arch:
72 |         return ShufflenetFinetuneConfig()
73 |     else:
74 |         raise ValueError("config for {} not exists".format(arch))
75 | 


--------------------------------------------------------------------------------
/official/vision/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | 


--------------------------------------------------------------------------------
/official/vision/classification/README.md:
--------------------------------------------------------------------------------
 1 | # MegEngine classification models
 2 | 
 3 | 图像分类是计算机视觉的基础任务。许多计算机视觉的其它任务（例如物体检测）都使用了基于图像分类的预训练模型。因此，我们提供了各种在ImageNet上预训练好的分类模型，
 4 | 包括[ResNet](./resnet)系列, [Shufflenet](./shufflenet)系列等，这些模型在**ImageNet验证集**上的测试结果如下表：
 5 | 
 6 | | 模型 | top1 acc | top5 acc |
 7 | | --- | :---: | :---: |
 8 | | ResNet18 | 70.312 | 89.430 |
 9 | | ResNet34 | 73.960 | 91.630 |
10 | | ResNet50 | 76.254 | 93.056 |
11 | | ResNet101 | 77.944 | 93.844 |
12 | | ResNet152 | 78.582 | 94.130 |
13 | | ResNeXt50 32x4d | 77.592 | 93.644 |
14 | | ResNeXt101 32x8d| 79.520 | 94.586 |
15 | | ShuffleNetV2 x0.5 | 60.696 | 82.190 |
16 | | ShuffleNetV2 x1.0 | 69.372 | 88.764 |
17 | | ShuffleNetV2 x1.5 | 72.806 | 90.792 |
18 | | ShuffleNetV2 x2.0 | 75.074 | 92.278 |
19 | 


--------------------------------------------------------------------------------
/official/vision/classification/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | 


--------------------------------------------------------------------------------
/official/vision/classification/dump.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | 
10 | import argparse
11 | import sys
12 | 
13 | # pylint: disable=import-error
14 | import resnet.model as resnet_model
15 | # pylint: disable=import-error
16 | import shufflenet.model as snet_model
17 | 
18 | import numpy as np
19 | 
20 | import megengine as mge
21 | from megengine import jit
22 | 
23 | 
24 | def dump_static_graph(model, graph_name, shape):
25 |     model.eval()
26 | 
27 |     data = mge.Tensor(np.random.random(shape))
28 | 
29 |     @jit.trace(capture_as_const=True)
30 |     def pred_func(data):
31 |         outputs = model(data)
32 |         return outputs
33 | 
34 |     pred_func(data)
35 |     pred_func.dump(
36 |         graph_name,
37 |         arg_names=["data"],
38 |         optimize_for_inference=True,
39 |         enable_fuse_conv_bias_nonlinearity=True,
40 |     )
41 | 
42 | 
43 | def main():
44 |     parser = argparse.ArgumentParser(description="MegEngine Classification Dump .mge")
45 |     parser.add_argument(
46 |         "-a",
47 |         "--arch",
48 |         default="resnet18",
49 |         help="model architecture (default: resnet18)",
50 |     )
51 |     parser.add_argument(
52 |         "-s",
53 |         "--shape",
54 |         type=int,
55 |         nargs=4,
56 |         default=(1, 3, 224, 224),
57 |         help="input shape (default: 1 3 224 224)"
58 |     )
59 |     parser.add_argument(
60 |         "-o",
61 |         "--output",
62 |         type=str,
63 |         default="model.mge",
64 |         help="output filename"
65 |     )
66 | 
67 |     args = parser.parse_args()
68 | 
69 |     if 'resnet' in args.arch:
70 |         model = getattr(resnet_model, args.arch)(pretrained=True)
71 |     elif 'shufflenet' in args.arch:
72 |         model = getattr(snet_model, args.arch)(pretrained=True)
73 |     else:
74 |         print('unavailable arch {}'.format(args.arch))
75 |         sys.exit()
76 |     dump_static_graph(model, args.output, tuple(args.shape))
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     main()
81 | 


--------------------------------------------------------------------------------
/official/vision/classification/resnet/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | 


--------------------------------------------------------------------------------
/official/vision/classification/resnet/inference.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import argparse
10 | import json
11 | 
12 | # pylint: disable=import-error
13 | import model as resnet_model
14 | 
15 | import cv2
16 | import numpy as np
17 | 
18 | import megengine
19 | import megengine.data.transform as T
20 | import megengine.functional as F
21 | 
22 | logging = megengine.logger.get_logger()
23 | 
24 | 
25 | def main():
26 |     parser = argparse.ArgumentParser()
27 |     parser.add_argument("-a", "--arch", default="resnet18", type=str)
28 |     parser.add_argument("-m", "--model", default=None, type=str)
29 |     parser.add_argument("-i", "--image", default=None, type=str)
30 |     args = parser.parse_args()
31 | 
32 |     model = resnet_model.__dict__[args.arch](pretrained=(args.model is None))
33 |     if args.model is not None:
34 |         logging.info("load from checkpoint %s", args.model)
35 |         checkpoint = megengine.load(args.model)
36 |         if "state_dict" in checkpoint:
37 |             state_dict = checkpoint["state_dict"]
38 |         model.load_state_dict(state_dict)
39 | 
40 |     if args.image is None:
41 |         path = "../../../assets/cat.jpg"
42 |     else:
43 |         path = args.image
44 |     image = cv2.imread(path, cv2.IMREAD_COLOR)
45 | 
46 |     transform = T.Compose(
47 |         [
48 |             T.Resize(256),
49 |             T.CenterCrop(224),
50 |             T.Normalize(
51 |                 mean=[103.530, 116.280, 123.675], std=[57.375, 57.120, 58.395]
52 |             ),  # BGR
53 |             T.ToMode("CHW"),
54 |         ]
55 |     )
56 | 
57 |     def infer_func(processed_img):
58 |         model.eval()
59 |         logits = model(processed_img)
60 |         probs = F.softmax(logits)
61 |         return probs
62 | 
63 |     processed_img = transform.apply(image)[np.newaxis, :]
64 |     processed_img = megengine.tensor(processed_img, dtype="float32")
65 |     probs = infer_func(processed_img)
66 | 
67 |     top_probs, classes = F.topk(probs, k=5, descending=True)
68 | 
69 |     with open("../../../assets/imagenet_class_info.json") as fp:
70 |         imagenet_class_index = json.load(fp)
71 | 
72 |     for rank, (prob, classid) in enumerate(
73 |         zip(top_probs.numpy().reshape(-1), classes.numpy().reshape(-1))
74 |     ):
75 |         print(
76 |             "{}: class = {:20s} with probability = {:4.1f} %".format(
77 |                 rank, imagenet_class_index[str(classid)][1], 100 * prob
78 |             )
79 |         )
80 | 
81 | 
82 | if __name__ == "__main__":
83 |     main()
84 | 


--------------------------------------------------------------------------------
/official/vision/classification/shufflenet/README.md:
--------------------------------------------------------------------------------
  1 | # ShuffleNet Series
  2 | 
  3 | 本目录包含了采用MegEngine实现的`ShuffleNet V2`网络结构，同时提供了在ImageNet训练集上的完整训练和测试代码。
  4 | 
  5 | `model.py`中定义了如下常见网络结构：`shufflenet_v2_x0_5`, `shufflenet_v2_x1_0`, `shufflenet_v2_x1_5`, `shufflenet_v2_x2_0`.
  6 | 
  7 | 目前我们提供了部分在ImageNet上的预训练模型(见下表)，各个网络结构在ImageNet验证集上的表现如下：
  8 | 
  9 | | 模型 | top1 acc | top5 acc |
 10 | | --- | --- | --- |
 11 | | ShuffleNetV2 x0.5 |  60.696  |  82.190  |
 12 | | ShuffleNetV2 x1.0 |  69.372  |  88.764  |
 13 | | ShuffleNetV2 x1.5 |  72.806  |  90.792  |
 14 | | ShuffleNetV2 x2.0 |  75.074  |  92.278  |
 15 | 
 16 | 用户可以通过`megengine.hub`直接加载本目录下定义好的模型，例如：
 17 | 
 18 | ```bash
 19 | import megengine.hub
 20 | 
 21 | # 只加载网络结构
 22 | resnet18 = megengine.hub.load("megengine/models", "shufflenet_v2_x1_0")
 23 | # 加载网络结构和预训练权重
 24 | resnet18 = megengine.hub.load("megengine/models", "shufflenet_v2_x1_0", pretrained=True)
 25 | ```
 26 | 
 27 | ## 安装和环境配置
 28 | 
 29 | 在开始运行本目录下的代码之前，请确保按照[README](../../../../README.md)进行了正确的环境配置。
 30 | 
 31 | ## 如何训练
 32 | 
 33 | 在开始训练前，请确保已经下载解压好[ImageNet数据集](http://image-net.org/download)，并放在合适的目录下，准备好的数据集的目录结构如下所示：
 34 | 
 35 | ```bash
 36 | /path/to/imagenet
 37 |     train
 38 |          n01440764
 39 |               xxx.jpg
 40 |               ...
 41 |          n01443537
 42 |               xxx.jpg
 43 |               ...
 44 |          ...
 45 |     val
 46 |          n01440764
 47 |               xxx.jpg
 48 |               ...
 49 |          n01443537
 50 |               xxx.jpg
 51 |               ...
 52 |          ...
 53 | ```
 54 | 
 55 | 准备好数据集后，可以运行以下命令开始训练：
 56 | 
 57 | ```bash
 58 | python3 train.py --data=/path/to/imagenet
 59 | ```
 60 | 
 61 | `train.py`提供了灵活的命令行选项，包括：
 62 | 
 63 | - `--data`, ImageNet数据集的根目录，默认`/data/datasets/imagenet`;
 64 | - `--arch`, 需要训练的网络结构，默认`shufflenet_v2_x1_0`；
 65 | - `--batch-size`，训练时每张卡采用的batch size, 默认128；
 66 | - `--ngpus`, 训练时每个节点采用的gpu数量，默认`None`，即使用全部gpu；当使用多张gpu时，将自动切换为分布式训练模式；
 67 | - `--save`, 模型以及log存储的目录，默认`outputs`;
 68 | - `--learning-rate`, 训练时的初始学习率，默认0.0625，在分布式训练下，实际学习率等于初始学习率乘以节点/gpu数；
 69 | - `--epochs`, 训练多少个epoch，默认240；
 70 | 
 71 | 例如，可以通过以下命令在8块GPU上以128 x 8 = 1024的batch大小训练一个`shufflenet_v2_x1_5`的模型：
 72 | 
 73 | ```bash
 74 | python3 train.py --data /path/to/imagenet \
 75 |                  --arch shufflenet_v2_x1_0 \
 76 |                  --batch-size 128 \
 77 |                  --learning-rate 0.0625 \
 78 |                  --ngpus 8 \
 79 |                  --save /path/to/save_dir
 80 | ```
 81 | 
 82 | 更多详细的介绍可以通过运行`python3 train.py --help`查看。
 83 | 
 84 | ## 如何测试
 85 | 
 86 | 在训练的过程中，可以通过如下命令测试模型在ImageNet验证集的性能：
 87 | 
 88 | ```bash
 89 | python3 test.py --data /path/to/imagenet --arch shufflenet_v2_x1_0 --model /path/to/model --ngpus 1
 90 | ```
 91 | 
 92 | `test.py`的命令行选项如下：
 93 | 
 94 | - `--data`，ImageNet数据集的根目录，默认`/data/datasets/imagenet`；
 95 | - `--arch`, 需要测试的网络结构，默认``；
 96 | - `--model`, 需要测试的模型，默认使用官方预训练模型；
 97 | - `--ngpus`, 用于测试的gpu数量，默认`None`；
 98 | 
 99 | 更多详细介绍可以通过运行`python3 test.py --help`查看。
100 | 
101 | ## 如何使用
102 | 
103 | 模型训练好之后，可以通过如下命令测试单张图片:
104 | 
105 | ```bash
106 | python3 inference.py --model /path/to/model --image /path/to/image.jpg
107 | ```
108 | 
109 | 使用默认的测试图片和默认的`shufflenet_v2_x1_0`预训练模型，将输出如下结果：
110 | ```
111 | 0: class = Siamese_cat          with probability = 53.5 %
112 | 1: class = lynx                 with probability =  6.9 %
113 | 2: class = tabby                with probability =  4.6 %
114 | 3: class = Persian_cat          with probability =  2.6 %
115 | 4: class = Angora               with probability =  1.4 %
116 | ```
117 | 
118 | `inference.py`的命令行选项如下：
119 | 
120 | - `--arch`, 需要使用的网络结构，默认`shufflenet_v2_x1_0`；
121 | - `--model`, 训练好的模型权重地址，默认使用官方预训练的`shufflenet_v2_x1_0`模型；
122 | - `--image`, 用于测试的图片；
123 | 
124 | ## 参考文献
125 | 
126 | - [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164), Ma, Ningning, et al. "Shufflenet v2: Practical guidelines for efficient cnn architecture design." Proceedings of the European Conference on Computer Vision (ECCV). 2018.
127 | 


--------------------------------------------------------------------------------
/official/vision/classification/shufflenet/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | 


--------------------------------------------------------------------------------
/official/vision/classification/shufflenet/inference.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import argparse
10 | import json
11 | 
12 | # pylint: disable=import-error
13 | import model as snet_model
14 | 
15 | import cv2
16 | import numpy as np
17 | 
18 | import megengine
19 | import megengine.data.transform as T
20 | import megengine.functional as F
21 | 
22 | logging = megengine.logger.get_logger()
23 | 
24 | 
25 | def main():
26 |     parser = argparse.ArgumentParser()
27 |     parser.add_argument("-a", "--arch", default="shufflenet_v2_x1_0", type=str)
28 |     parser.add_argument("-m", "--model", default=None, type=str)
29 |     parser.add_argument("-i", "--image", default=None, type=str)
30 |     args = parser.parse_args()
31 | 
32 |     model = snet_model.__dict__[args.arch](pretrained=(args.model is None))
33 |     if args.model is not None:
34 |         logging.info("load from checkpoint %s", args.model)
35 |         checkpoint = megengine.load(args.model)
36 |         if "state_dict" in checkpoint:
37 |             state_dict = checkpoint["state_dict"]
38 |         model.load_state_dict(state_dict)
39 | 
40 |     if args.image is None:
41 |         path = "../../../assets/cat.jpg"
42 |     else:
43 |         path = args.image
44 |     image = cv2.imread(path, cv2.IMREAD_COLOR)
45 | 
46 |     transform = T.Compose(
47 |         [
48 |             T.Resize(256),
49 |             T.CenterCrop(224),
50 |             T.Normalize(
51 |                 mean=[103.530, 116.280, 123.675], std=[57.375, 57.120, 58.395]
52 |             ),  # BGR
53 |             T.ToMode("CHW"),
54 |         ]
55 |     )
56 | 
57 |     def infer_func(processed_img):
58 |         model.eval()
59 |         logits = model(processed_img)
60 |         probs = F.softmax(logits)
61 |         return probs
62 | 
63 |     processed_img = transform.apply(image)[np.newaxis, :]
64 |     processed_img = megengine.tensor(processed_img, dtype="float32")
65 |     probs = infer_func(processed_img)
66 | 
67 |     top_probs, classes = F.topk(probs, k=5, descending=True)
68 | 
69 |     with open("../../../assets/imagenet_class_info.json") as fp:
70 |         imagenet_class_index = json.load(fp)
71 | 
72 |     for rank, (prob, classid) in enumerate(
73 |         zip(top_probs.numpy().reshape(-1), classes.numpy().reshape(-1))
74 |     ):
75 |         print(
76 |             "{}: class = {:20s} with probability = {:4.1f} %".format(
77 |                 rank, imagenet_class_index[str(classid)][1], 100 * prob
78 |             )
79 |         )
80 | 
81 | 
82 | if __name__ == "__main__":
83 |     main()
84 | 


--------------------------------------------------------------------------------
/official/vision/detection/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding:utf-8 -*-
3 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/__init__.py:
--------------------------------------------------------------------------------
 1 | from .atss_res18_coco_3x_800size import atss_res18_coco_3x_800size
 2 | from .atss_res34_coco_3x_800size import atss_res34_coco_3x_800size
 3 | from .atss_res50_coco_3x_800size import atss_res50_coco_3x_800size
 4 | from .atss_res101_coco_3x_800size import atss_res101_coco_3x_800size
 5 | from .atss_resx101_coco_2x_800size import atss_resx101_coco_2x_800size
 6 | from .faster_rcnn_res18_coco_3x_800size import faster_rcnn_res18_coco_3x_800size
 7 | from .faster_rcnn_res34_coco_3x_800size import faster_rcnn_res34_coco_3x_800size
 8 | from .faster_rcnn_res50_coco_3x_800size import faster_rcnn_res50_coco_3x_800size
 9 | from .faster_rcnn_res101_coco_3x_800size import faster_rcnn_res101_coco_3x_800size
10 | from .faster_rcnn_resx101_coco_2x_800size import faster_rcnn_resx101_coco_2x_800size
11 | from .fcos_res18_coco_3x_800size import fcos_res18_coco_3x_800size
12 | from .fcos_res34_coco_3x_800size import fcos_res34_coco_3x_800size
13 | from .fcos_res50_coco_3x_800size import fcos_res50_coco_3x_800size
14 | from .fcos_res101_coco_3x_800size import fcos_res101_coco_3x_800size
15 | from .fcos_resx101_coco_2x_800size import fcos_resx101_coco_2x_800size
16 | from .freeanchor_res18_coco_3x_800size import freeanchor_res18_coco_3x_800size
17 | from .freeanchor_res34_coco_3x_800size import freeanchor_res34_coco_3x_800size
18 | from .freeanchor_res50_coco_3x_800size import freeanchor_res50_coco_3x_800size
19 | from .freeanchor_res101_coco_3x_800size import freeanchor_res101_coco_3x_800size
20 | from .freeanchor_resx101_coco_2x_800size import freeanchor_resx101_coco_2x_800size
21 | from .retinanet_res18_coco_3x_800size import retinanet_res18_coco_3x_800size
22 | from .retinanet_res34_coco_3x_800size import retinanet_res34_coco_3x_800size
23 | from .retinanet_res50_coco_3x_800size import retinanet_res50_coco_3x_800size
24 | from .retinanet_res101_coco_3x_800size import retinanet_res101_coco_3x_800size
25 | from .retinanet_resx101_coco_2x_800size import retinanet_resx101_coco_2x_800size
26 | 
27 | _EXCLUDE = {}
28 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
29 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/atss_res101_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomATSSConfig(models.ATSSConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet101"
19 | 
20 | 
21 | @hub.pretrained(
22 |     "https://data.megengine.org.cn/models/weights/"
23 |     "atss_res101_coco_3x_800size_44dot7_9181687e.pkl"
24 | )
25 | def atss_res101_coco_3x_800size(**kwargs):
26 |     r"""
27 |     ATSS trained from COCO dataset.
28 |     `"ATSS" <https://arxiv.org/abs/1912.02424>`_
29 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
30 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
31 |     """
32 |     cfg = CustomATSSConfig()
33 |     cfg.backbone_pretrained = False
34 |     return models.ATSS(cfg, **kwargs)
35 | 
36 | 
37 | Net = models.ATSS
38 | Cfg = CustomATSSConfig
39 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/atss_res18_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomATSSConfig(models.ATSSConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet18"
19 |         self.fpn_in_channels = [128, 256, 512]
20 | 
21 | 
22 | @hub.pretrained(
23 |     "https://data.megengine.org.cn/models/weights/"
24 |     "atss_res18_coco_3x_800size_38dot3_58e249d5.pkl"
25 | )
26 | def atss_res18_coco_3x_800size(**kwargs):
27 |     r"""
28 |     ATSS trained from COCO dataset.
29 |     `"ATSS" <https://arxiv.org/abs/1912.02424>`_
30 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
31 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
32 |     """
33 |     cfg = CustomATSSConfig()
34 |     cfg.backbone_pretrained = False
35 |     return models.ATSS(cfg, **kwargs)
36 | 
37 | 
38 | Net = models.ATSS
39 | Cfg = CustomATSSConfig
40 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/atss_res34_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomATSSConfig(models.ATSSConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet34"
19 |         self.fpn_in_channels = [128, 256, 512]
20 | 
21 | 
22 | @hub.pretrained(
23 |     "https://data.megengine.org.cn/models/weights/"
24 |     "atss_res34_coco_3x_800size_41dot5_ec16a67b.pkl"
25 | )
26 | def atss_res34_coco_3x_800size(**kwargs):
27 |     r"""
28 |     ATSS trained from COCO dataset.
29 |     `"ATSS" <https://arxiv.org/abs/1912.02424>`_
30 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
31 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
32 |     """
33 |     cfg = CustomATSSConfig()
34 |     cfg.backbone_pretrained = False
35 |     return models.ATSS(cfg, **kwargs)
36 | 
37 | 
38 | Net = models.ATSS
39 | Cfg = CustomATSSConfig
40 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/atss_res50_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | @hub.pretrained(
15 |     "https://data.megengine.org.cn/models/weights/"
16 |     "atss_res50_coco_3x_800size_42dot6_9a92ed8c.pkl"
17 | )
18 | def atss_res50_coco_3x_800size(**kwargs):
19 |     r"""
20 |     ATSS trained from COCO dataset.
21 |     `"ATSS" <https://arxiv.org/abs/1912.02424>`_
22 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
23 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
24 |     """
25 |     cfg = models.ATSSConfig()
26 |     cfg.backbone_pretrained = False
27 |     return models.ATSS(cfg, **kwargs)
28 | 
29 | 
30 | Net = models.ATSS
31 | Cfg = models.ATSSConfig
32 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/atss_resx101_coco_2x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomATSSConfig(models.ATSSConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnext101_32x8d"
19 |         self.max_epoch = 36
20 |         self.lr_decay_stages = [24, 32]
21 | 
22 | 
23 | @hub.pretrained(
24 |     "https://data.megengine.org.cn/models/weights/"
25 |     "atss_resx101_coco_2x_800size_45dot6_b3a91b36.pkl"
26 | )
27 | def atss_resx101_coco_2x_800size(**kwargs):
28 |     r"""
29 |     ATSS trained from COCO dataset.
30 |     `"ATSS" <https://arxiv.org/abs/1912.02424>`_
31 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
32 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
33 |     """
34 |     cfg = CustomATSSConfig()
35 |     cfg.backbone_pretrained = False
36 |     return models.ATSS(cfg, **kwargs)
37 | 
38 | 
39 | Net = models.ATSS
40 | Cfg = CustomATSSConfig
41 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/faster_rcnn_res101_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet101"
19 | 
20 | 
21 | @hub.pretrained(
22 |     "https://data.megengine.org.cn/models/weights/"
23 |     "faster_rcnn_res101_coco_3x_800size_42dot6_2538b0ff.pkl"
24 | )
25 | def faster_rcnn_res101_coco_3x_800size(**kwargs):
26 |     r"""
27 |     Faster-RCNN FPN trained from COCO dataset.
28 |     `"Faster-RCNN" <https://arxiv.org/abs/1506.01497>`_
29 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
30 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
31 |     """
32 |     cfg = CustomFasterRCNNConfig()
33 |     cfg.backbone_pretrained = False
34 |     return models.FasterRCNN(cfg, **kwargs)
35 | 
36 | 
37 | Net = models.FasterRCNN
38 | Cfg = CustomFasterRCNNConfig
39 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/faster_rcnn_res18_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet18"
19 |         self.fpn_in_channels = [64, 128, 256, 512]
20 | 
21 | 
22 | @hub.pretrained(
23 |     "https://data.megengine.org.cn/models/weights/"
24 |     "faster_rcnn_res18_coco_3x_800size_35dot7_a33835ca.pkl"
25 | )
26 | def faster_rcnn_res18_coco_3x_800size(**kwargs):
27 |     r"""
28 |     Faster-RCNN FPN trained from COCO dataset.
29 |     `"Faster-RCNN" <https://arxiv.org/abs/1506.01497>`_
30 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
31 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
32 |     """
33 |     cfg = CustomFasterRCNNConfig()
34 |     cfg.backbone_pretrained = False
35 |     return models.FasterRCNN(cfg, **kwargs)
36 | 
37 | 
38 | Net = models.FasterRCNN
39 | Cfg = CustomFasterRCNNConfig
40 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/faster_rcnn_res34_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet34"
19 |         self.fpn_in_channels = [64, 128, 256, 512]
20 | 
21 | 
22 | @hub.pretrained(
23 |     "https://data.megengine.org.cn/models/weights/"
24 |     "faster_rcnn_res34_coco_3x_800size_39dot6_11fca4d4.pkl"
25 | )
26 | def faster_rcnn_res34_coco_3x_800size(**kwargs):
27 |     r"""
28 |     Faster-RCNN FPN trained from COCO dataset.
29 |     `"Faster-RCNN" <https://arxiv.org/abs/1506.01497>`_
30 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
31 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
32 |     """
33 |     cfg = CustomFasterRCNNConfig()
34 |     cfg.backbone_pretrained = False
35 |     return models.FasterRCNN(cfg, **kwargs)
36 | 
37 | 
38 | Net = models.FasterRCNN
39 | Cfg = CustomFasterRCNNConfig
40 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/faster_rcnn_res50_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | @hub.pretrained(
15 |     "https://data.megengine.org.cn/models/weights/"
16 |     "faster_rcnn_res50_coco_3x_800size_40dot1_8682ff1a.pkl"
17 | )
18 | def faster_rcnn_res50_coco_3x_800size(**kwargs):
19 |     r"""
20 |     Faster-RCNN FPN trained from COCO dataset.
21 |     `"Faster-RCNN" <https://arxiv.org/abs/1506.01497>`_
22 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
23 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
24 |     """
25 |     cfg = models.FasterRCNNConfig()
26 |     cfg.backbone_pretrained = False
27 |     return models.FasterRCNN(cfg, **kwargs)
28 | 
29 | 
30 | Net = models.FasterRCNN
31 | Cfg = models.FasterRCNNConfig
32 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/faster_rcnn_resx101_coco_2x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnext101_32x8d"
19 |         self.max_epoch = 36
20 |         self.lr_decay_stages = [24, 32]
21 | 
22 | 
23 | @hub.pretrained(
24 |     "https://data.megengine.org.cn/models/weights/"
25 |     "faster_rcnn_resx101_coco_2x_800size_44dot1_e5e0060b.pkl"
26 | )
27 | def faster_rcnn_resx101_coco_2x_800size(**kwargs):
28 |     r"""
29 |     Faster-RCNN FPN trained from COCO dataset.
30 |     `"Faster-RCNN" <https://arxiv.org/abs/1506.01497>`_
31 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
32 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
33 |     """
34 |     cfg = CustomFasterRCNNConfig()
35 |     cfg.backbone_pretrained = False
36 |     return models.FasterRCNN(cfg, **kwargs)
37 | 
38 | 
39 | Net = models.FasterRCNN
40 | Cfg = CustomFasterRCNNConfig
41 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/fcos_res101_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomFCOSConfig(models.FCOSConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet101"
19 | 
20 | 
21 | @hub.pretrained(
22 |     "https://data.megengine.org.cn/models/weights/"
23 |     "fcos_res101_coco_3x_800size_44dot3_f38e8df1.pkl"
24 | )
25 | def fcos_res101_coco_3x_800size(**kwargs):
26 |     r"""
27 |     FCOS trained from COCO dataset.
28 |     `"FCOS" <https://arxiv.org/abs/1904.01355>`_
29 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
30 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
31 |     """
32 |     cfg = CustomFCOSConfig()
33 |     cfg.backbone_pretrained = False
34 |     return models.FCOS(cfg, **kwargs)
35 | 
36 | 
37 | Net = models.FCOS
38 | Cfg = CustomFCOSConfig
39 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/fcos_res18_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomFCOSConfig(models.FCOSConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet18"
19 |         self.fpn_in_channels = [128, 256, 512]
20 | 
21 | 
22 | @hub.pretrained(
23 |     "https://data.megengine.org.cn/models/weights/"
24 |     "fcos_res18_coco_3x_800size_37dot6_adab0136.pkl"
25 | )
26 | def fcos_res18_coco_3x_800size(**kwargs):
27 |     r"""
28 |     FCOS trained from COCO dataset.
29 |     `"FCOS" <https://arxiv.org/abs/1904.01355>`_
30 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
31 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
32 |     """
33 |     cfg = CustomFCOSConfig()
34 |     cfg.backbone_pretrained = False
35 |     return models.FCOS(cfg, **kwargs)
36 | 
37 | 
38 | Net = models.FCOS
39 | Cfg = CustomFCOSConfig
40 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/fcos_res34_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomFCOSConfig(models.FCOSConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet34"
19 |         self.fpn_in_channels = [128, 256, 512]
20 | 
21 | 
22 | @hub.pretrained(
23 |     "https://data.megengine.org.cn/models/weights/"
24 |     "fcos_res34_coco_3x_800size_41dot0_8ba4633f.pkl"
25 | )
26 | def fcos_res34_coco_3x_800size(**kwargs):
27 |     r"""
28 |     FCOS trained from COCO dataset.
29 |     `"FCOS" <https://arxiv.org/abs/1904.01355>`_
30 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
31 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
32 |     """
33 |     cfg = CustomFCOSConfig()
34 |     cfg.backbone_pretrained = False
35 |     return models.FCOS(cfg, **kwargs)
36 | 
37 | 
38 | Net = models.FCOS
39 | Cfg = CustomFCOSConfig
40 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/fcos_res50_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | @hub.pretrained(
15 |     "https://data.megengine.org.cn/models/weights/"
16 |     "fcos_res50_coco_3x_800size_42dot2_b16f9c8b.pkl"
17 | )
18 | def fcos_res50_coco_3x_800size(**kwargs):
19 |     r"""
20 |     FCOS trained from COCO dataset.
21 |     `"FCOS" <https://arxiv.org/abs/1904.01355>`_
22 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
23 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
24 |     """
25 |     cfg = models.FCOSConfig()
26 |     cfg.backbone_pretrained = False
27 |     return models.FCOS(cfg, **kwargs)
28 | 
29 | 
30 | Net = models.FCOS
31 | Cfg = models.FCOSConfig
32 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/fcos_resx101_coco_2x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomFCOSConfig(models.FCOSConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnext101_32x8d"
19 |         self.max_epoch = 36
20 |         self.lr_decay_stages = [24, 32]
21 | 
22 | 
23 | @hub.pretrained(
24 |     "https://data.megengine.org.cn/models/weights/"
25 |     "fcos_resx101_coco_2x_800size_44dot8_42ac8e82.pkl"
26 | )
27 | def fcos_resx101_coco_2x_800size(**kwargs):
28 |     r"""
29 |     FCOS trained from COCO dataset.
30 |     `"FCOS" <https://arxiv.org/abs/1904.01355>`_
31 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
32 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
33 |     """
34 |     cfg = CustomFCOSConfig()
35 |     cfg.backbone_pretrained = False
36 |     return models.FCOS(cfg, **kwargs)
37 | 
38 | 
39 | Net = models.FCOS
40 | Cfg = CustomFCOSConfig
41 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/freeanchor_res101_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet101"
19 | 
20 | 
21 | @hub.pretrained(
22 |     "https://data.megengine.org.cn/models/weights/"
23 |     "freeanchor_res101_coco_3x_800size_43dot9_8c707d7d.pkl"
24 | )
25 | def freeanchor_res101_coco_3x_800size(**kwargs):
26 |     r"""
27 |     FreeAnchor trained from COCO dataset.
28 |     `"FreeAnchor" <https://arxiv.org/abs/1909.02466>`_
29 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
30 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
31 |     """
32 |     cfg = models.FreeAnchorConfig()
33 |     cfg.backbone_pretrained = False
34 |     return models.FreeAnchor(cfg, **kwargs)
35 | 
36 | 
37 | Net = models.FreeAnchor
38 | Cfg = CustomFreeAnchorConfig
39 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/freeanchor_res18_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet18"
19 |         self.fpn_in_channels = [128, 256, 512]
20 | 
21 | 
22 | @hub.pretrained(
23 |     "https://data.megengine.org.cn/models/weights/"
24 |     "freeanchor_res18_coco_3x_800size_38dot1_3d0559a8.pkl"
25 | )
26 | def freeanchor_res18_coco_3x_800size(**kwargs):
27 |     r"""
28 |     FreeAnchor trained from COCO dataset.
29 |     `"FreeAnchor" <https://arxiv.org/abs/1909.02466>`_
30 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
31 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
32 |     """
33 |     cfg = models.FreeAnchorConfig()
34 |     cfg.backbone_pretrained = False
35 |     return models.FreeAnchor(cfg, **kwargs)
36 | 
37 | 
38 | Net = models.FreeAnchor
39 | Cfg = CustomFreeAnchorConfig
40 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/freeanchor_res34_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet34"
19 |         self.fpn_in_channels = [128, 256, 512]
20 | 
21 | 
22 | @hub.pretrained(
23 |     "https://data.megengine.org.cn/models/weights/"
24 |     "freeanchor_res34_coco_3x_800size_41dot1_3b03734e.pkl"
25 | )
26 | def freeanchor_res34_coco_3x_800size(**kwargs):
27 |     r"""
28 |     FreeAnchor trained from COCO dataset.
29 |     `"FreeAnchor" <https://arxiv.org/abs/1909.02466>`_
30 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
31 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
32 |     """
33 |     cfg = models.FreeAnchorConfig()
34 |     cfg.backbone_pretrained = False
35 |     return models.FreeAnchor(cfg, **kwargs)
36 | 
37 | 
38 | Net = models.FreeAnchor
39 | Cfg = CustomFreeAnchorConfig
40 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/freeanchor_res50_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | @hub.pretrained(
15 |     "https://data.megengine.org.cn/models/weights/"
16 |     "freeanchor_res50_coco_3x_800size_42dot1_5c567f14.pkl"
17 | )
18 | def freeanchor_res50_coco_3x_800size(**kwargs):
19 |     r"""
20 |     FreeAnchor trained from COCO dataset.
21 |     `"FreeAnchor" <https://arxiv.org/abs/1909.02466>`_
22 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
23 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
24 |     """
25 |     cfg = models.FreeAnchorConfig()
26 |     cfg.backbone_pretrained = False
27 |     return models.FreeAnchor(cfg, **kwargs)
28 | 
29 | 
30 | Net = models.FreeAnchor
31 | Cfg = models.FreeAnchorConfig
32 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/freeanchor_resx101_coco_2x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnext101_32x8d"
19 |         self.max_epoch = 36
20 |         self.lr_decay_stages = [24, 32]
21 | 
22 | 
23 | @hub.pretrained(
24 |     "https://data.megengine.org.cn/models/weights/"
25 |     "freeanchor_resx101_coco_2x_800size_44dot9_5a23fca7.pkl"
26 | )
27 | def freeanchor_resx101_coco_2x_800size(**kwargs):
28 |     r"""
29 |     FreeAnchor trained from COCO dataset.
30 |     `"FreeAnchor" <https://arxiv.org/abs/1909.02466>`_
31 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
32 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
33 |     """
34 |     cfg = models.FreeAnchorConfig()
35 |     cfg.backbone_pretrained = False
36 |     return models.FreeAnchor(cfg, **kwargs)
37 | 
38 | 
39 | Net = models.FreeAnchor
40 | Cfg = CustomFreeAnchorConfig
41 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/retinanet_res101_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomRetinaNetConfig(models.RetinaNetConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet101"
19 | 
20 | 
21 | @hub.pretrained(
22 |     "https://data.megengine.org.cn/models/weights/"
23 |     "retinanet_res101_coco_3x_800size_41dot4_73b01887.pkl"
24 | )
25 | def retinanet_res101_coco_3x_800size(**kwargs):
26 |     r"""
27 |     RetinaNet trained from COCO dataset.
28 |     `"RetinaNet" <https://arxiv.org/abs/1708.02002>`_
29 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
30 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
31 |     """
32 |     cfg = CustomRetinaNetConfig()
33 |     cfg.backbone_pretrained = False
34 |     return models.RetinaNet(cfg, **kwargs)
35 | 
36 | 
37 | Net = models.RetinaNet
38 | Cfg = CustomRetinaNetConfig
39 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/retinanet_res18_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomRetinaNetConfig(models.RetinaNetConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet18"
19 |         self.fpn_in_channels = [128, 256, 512]
20 |         self.fpn_top_in_channel = 512
21 | 
22 | 
23 | @hub.pretrained(
24 |     "https://data.megengine.org.cn/models/weights/"
25 |     "retinanet_res18_coco_3x_800size_35dot3_0c4956c8.pkl"
26 | )
27 | def retinanet_res18_coco_3x_800size(**kwargs):
28 |     r"""
29 |     RetinaNet trained from COCO dataset.
30 |     `"RetinaNet" <https://arxiv.org/abs/1708.02002>`_
31 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
32 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
33 |     """
34 |     cfg = CustomRetinaNetConfig()
35 |     cfg.backbone_pretrained = False
36 |     return models.RetinaNet(cfg, **kwargs)
37 | 
38 | 
39 | Net = models.RetinaNet
40 | Cfg = CustomRetinaNetConfig
41 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/retinanet_res34_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomRetinaNetConfig(models.RetinaNetConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet34"
19 |         self.fpn_in_channels = [128, 256, 512]
20 |         self.fpn_top_in_channel = 512
21 | 
22 | 
23 | @hub.pretrained(
24 |     "https://data.megengine.org.cn/models/weights/"
25 |     "retinanet_res34_coco_3x_800size_38dot4_3485f9ec.pkl"
26 | )
27 | def retinanet_res34_coco_3x_800size(**kwargs):
28 |     r"""
29 |     RetinaNet trained from COCO dataset.
30 |     `"RetinaNet" <https://arxiv.org/abs/1708.02002>`_
31 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
32 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
33 |     """
34 |     cfg = CustomRetinaNetConfig()
35 |     cfg.backbone_pretrained = False
36 |     return models.RetinaNet(cfg, **kwargs)
37 | 
38 | 
39 | Net = models.RetinaNet
40 | Cfg = CustomRetinaNetConfig
41 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/retinanet_res50_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | @hub.pretrained(
15 |     "https://data.megengine.org.cn/models/weights/"
16 |     "retinanet_res50_coco_3x_800size_39dot3_8eaec532.pkl"
17 | )
18 | def retinanet_res50_coco_3x_800size(**kwargs):
19 |     r"""
20 |     RetinaNet trained from COCO dataset.
21 |     `"RetinaNet" <https://arxiv.org/abs/1708.02002>`_
22 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
23 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
24 |     """
25 |     cfg = models.RetinaNetConfig()
26 |     cfg.backbone_pretrained = False
27 |     return models.RetinaNet(cfg, **kwargs)
28 | 
29 | 
30 | Net = models.RetinaNet
31 | Cfg = models.RetinaNetConfig
32 | 


--------------------------------------------------------------------------------
/official/vision/detection/configs/retinanet_resx101_coco_2x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | from official.vision.detection import models
12 | 
13 | 
14 | class CustomRetinaNetConfig(models.RetinaNetConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnext101_32x8d"
19 |         self.max_epoch = 36
20 |         self.lr_decay_stages = [24, 32]
21 | 
22 | 
23 | @hub.pretrained(
24 |     "https://data.megengine.org.cn/models/weights/"
25 |     "retinanet_resx101_coco_2x_800size_42dot3_1502eace.pkl"
26 | )
27 | def retinanet_resx101_coco_2x_800size(**kwargs):
28 |     r"""
29 |     RetinaNet trained from COCO dataset.
30 |     `"RetinaNet" <https://arxiv.org/abs/1708.02002>`_
31 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
32 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
33 |     """
34 |     cfg = CustomRetinaNetConfig()
35 |     cfg.backbone_pretrained = False
36 |     return models.RetinaNet(cfg, **kwargs)
37 | 
38 | 
39 | Net = models.RetinaNet
40 | Cfg = CustomRetinaNetConfig
41 | 


--------------------------------------------------------------------------------
/official/vision/detection/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from .basic import *
10 | from .det import *
11 | 
12 | _EXCLUDE = {}
13 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
14 | 


--------------------------------------------------------------------------------
/official/vision/detection/layers/basic/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from .functional import *
10 | from .nn import *
11 | from .norm import *
12 | 
13 | _EXCLUDE = {}
14 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
15 | 


--------------------------------------------------------------------------------
/official/vision/detection/layers/basic/nn.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2019 - present, Facebook, Inc
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ---------------------------------------------------------------------
16 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
17 | #
18 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
19 | #
20 | # Unless required by applicable law or agreed to in writing,
21 | # software distributed under the License is distributed on an
22 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | #
24 | # This file has been modified by Megvii ("Megvii Modifications").
25 | # All Megvii Modifications are Copyright (C) 2014-2021 Megvii Inc. All rights reserved.
26 | # ---------------------------------------------------------------------
27 | from collections import namedtuple
28 | 
29 | import megengine.module as M
30 | 
31 | 
32 | class Conv2d(M.Conv2d):
33 |     """
34 |     A wrapper around :class:`megengine.module.Conv2d`.
35 |     """
36 | 
37 |     def __init__(self, *args, **kwargs):
38 |         """
39 |         Extra keyword arguments supported in addition to
40 |         `megengine.module.Conv2d`.
41 | 
42 |         Args:
43 |             norm (M.Module, optional): a normalization layer
44 |             activation (callable(Tensor) -> Tensor): a callable activation
45 |                 function
46 |         """
47 |         norm = kwargs.pop("norm", None)
48 |         activation = kwargs.pop("activation", None)
49 |         super().__init__(*args, **kwargs)
50 | 
51 |         self.norm = norm
52 |         self.activation = activation
53 | 
54 |     def forward(self, x):
55 |         x = super().forward(x)
56 |         if self.norm is not None:
57 |             x = self.norm(x)
58 |         if self.activation is not None:
59 |             x = self.activation(x)
60 |         return x
61 | 
62 | 
63 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
64 |     """
65 |     A simple structure that contains basic shape specification about a tensor.
66 |     Useful for getting the modules output channels when building the graph.
67 |     """
68 | 
69 |     def __new__(cls, channels=None, height=None, width=None, stride=None):
70 |         return super().__new__(cls, channels, height, width, stride)
71 | 


--------------------------------------------------------------------------------
/official/vision/detection/layers/basic/norm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2019 - present, Facebook, Inc
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ---------------------------------------------------------------------
16 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
17 | #
18 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
19 | #
20 | # Unless required by applicable law or agreed to in writing,
21 | # software distributed under the License is distributed on an
22 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | #
24 | # This file has been modified by Megvii ("Megvii Modifications").
25 | # All Megvii Modifications are Copyright (C) 2014-2021 Megvii Inc. All rights reserved.
26 | # ---------------------------------------------------------------------
27 | from functools import partial
28 | 
29 | import megengine.module as M
30 | from megengine.module.normalization import GroupNorm, InstanceNorm, LayerNorm
31 | 
32 | 
33 | def get_norm(norm):
34 |     """
35 |     Args:
36 |         norm (str): currently support "BN", "SyncBN", "FrozenBN", "GN", "LN" and "IN"
37 | 
38 |     Returns:
39 |         M.Module or None: the normalization layer
40 |     """
41 |     if norm is None:
42 |         return None
43 |     norm = {
44 |         "BN": M.BatchNorm2d,
45 |         "SyncBN": M.SyncBatchNorm,
46 |         "FrozenBN": partial(M.BatchNorm2d, freeze=True),
47 |         "GN": GroupNorm,
48 |         "LN": LayerNorm,
49 |         "IN": InstanceNorm,
50 |     }[norm]
51 |     return norm
52 | 


--------------------------------------------------------------------------------
/official/vision/detection/layers/det/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from .anchor import *
10 | from .box_head import *
11 | from .box_utils import *
12 | from .fpn import *
13 | from .loss import *
14 | from .matcher import *
15 | from .point_head import *
16 | from .pooler import *
17 | from .rcnn import *
18 | from .rpn import *
19 | from .sampling import *
20 | 
21 | _EXCLUDE = {}
22 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
23 | 


--------------------------------------------------------------------------------
/official/vision/detection/layers/det/box_head.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import math
10 | from typing import List
11 | 
12 | import megengine.module as M
13 | from megengine import Tensor
14 | 
15 | from official.vision.detection import layers
16 | 
17 | 
18 | class BoxHead(M.Module):
19 |     """
20 |     The head used when anchor boxes are adopted for object classification and box regression.
21 |     """
22 | 
23 |     def __init__(self, cfg, input_shape: List[layers.ShapeSpec]):
24 |         super().__init__()
25 | 
26 |         in_channels = input_shape[0].channels
27 |         num_classes = cfg.num_classes
28 |         num_convs = 4
29 |         prior_prob = cfg.cls_prior_prob
30 |         num_anchors = [
31 |             len(cfg.anchor_scales[i]) * len(cfg.anchor_ratios[i])
32 |             for i in range(len(input_shape))
33 |         ]
34 | 
35 |         assert (
36 |             len(set(num_anchors)) == 1
37 |         ), "not support different number of anchors between levels"
38 |         num_anchors = num_anchors[0]
39 | 
40 |         cls_subnet = []
41 |         bbox_subnet = []
42 |         for _ in range(num_convs):
43 |             cls_subnet.append(
44 |                 M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
45 |             )
46 |             cls_subnet.append(M.ReLU())
47 |             bbox_subnet.append(
48 |                 M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
49 |             )
50 |             bbox_subnet.append(M.ReLU())
51 | 
52 |         self.cls_subnet = M.Sequential(*cls_subnet)
53 |         self.bbox_subnet = M.Sequential(*bbox_subnet)
54 |         self.cls_score = M.Conv2d(
55 |             in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1
56 |         )
57 |         self.bbox_pred = M.Conv2d(
58 |             in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1
59 |         )
60 | 
61 |         # Initialization
62 |         for modules in [
63 |             self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred
64 |         ]:
65 |             for layer in modules.modules():
66 |                 if isinstance(layer, M.Conv2d):
67 |                     M.init.normal_(layer.weight, mean=0, std=0.01)
68 |                     M.init.fill_(layer.bias, 0)
69 | 
70 |         # Use prior in model initialization to improve stability
71 |         bias_value = -math.log((1 - prior_prob) / prior_prob)
72 |         M.init.fill_(self.cls_score.bias, bias_value)
73 | 
74 |     def forward(self, features: List[Tensor]):
75 |         logits, offsets = [], []
76 |         for feature in features:
77 |             logits.append(self.cls_score(self.cls_subnet(feature)))
78 |             offsets.append(self.bbox_pred(self.bbox_subnet(feature)))
79 |         return logits, offsets
80 | 


--------------------------------------------------------------------------------
/official/vision/detection/layers/det/matcher.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import megengine.functional as F
10 | 
11 | 
12 | class Matcher:
13 | 
14 |     def __init__(self, thresholds, labels, allow_low_quality_matches=False):
15 |         assert len(thresholds) + 1 == len(labels), "thresholds and labels are not matched"
16 |         assert all(low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:]))
17 |         thresholds.append(float("inf"))
18 |         thresholds.insert(0, -float("inf"))
19 | 
20 |         self.thresholds = thresholds
21 |         self.labels = labels
22 |         self.allow_low_quality_matches = allow_low_quality_matches
23 | 
24 |     def __call__(self, matrix):
25 |         """
26 |         matrix(tensor): A two dim tensor with shape of (N, M). N is number of GT-boxes,
27 |             while M is the number of anchors in detection.
28 |         """
29 |         assert len(matrix.shape) == 2
30 |         max_scores = matrix.max(axis=0)
31 |         match_indices = F.argmax(matrix, axis=0)
32 | 
33 |         # default ignore label: -1
34 |         labels = F.full_like(match_indices, -1)
35 | 
36 |         for label, low, high in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]):
37 |             mask = (max_scores >= low) & (max_scores < high)
38 |             labels[mask] = label
39 | 
40 |         if self.allow_low_quality_matches:
41 |             mask = (matrix == F.max(matrix, axis=1, keepdims=True)).sum(axis=0) > 0
42 |             labels[mask] = 1
43 | 
44 |         return match_indices, labels
45 | 


--------------------------------------------------------------------------------
/official/vision/detection/layers/det/point_head.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import math
10 | from typing import List
11 | 
12 | import numpy as np
13 | 
14 | import megengine as mge
15 | import megengine.functional as F
16 | import megengine.module as M
17 | from megengine import Tensor
18 | from megengine.module.normalization import GroupNorm
19 | 
20 | from official.vision.detection import layers
21 | 
22 | 
23 | class PointHead(M.Module):
24 |     """
25 |     The head used when anchor points are adopted for object classification and box regression.
26 |     """
27 | 
28 |     def __init__(self, cfg, input_shape: List[layers.ShapeSpec]):
29 |         super().__init__()
30 |         self.stride_list = cfg.stride
31 | 
32 |         in_channels = input_shape[0].channels
33 |         num_classes = cfg.num_classes
34 |         num_convs = 4
35 |         prior_prob = cfg.cls_prior_prob
36 |         num_anchors = [cfg.num_anchors] * len(input_shape)
37 | 
38 |         assert (
39 |             len(set(num_anchors)) == 1
40 |         ), "not support different number of anchors between levels"
41 |         num_anchors = num_anchors[0]
42 | 
43 |         cls_subnet = []
44 |         bbox_subnet = []
45 |         for _ in range(num_convs):
46 |             cls_subnet.append(
47 |                 M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
48 |             )
49 |             cls_subnet.append(GroupNorm(32, in_channels))
50 |             cls_subnet.append(M.ReLU())
51 |             bbox_subnet.append(
52 |                 M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
53 |             )
54 |             bbox_subnet.append(GroupNorm(32, in_channels))
55 |             bbox_subnet.append(M.ReLU())
56 | 
57 |         self.cls_subnet = M.Sequential(*cls_subnet)
58 |         self.bbox_subnet = M.Sequential(*bbox_subnet)
59 |         self.cls_score = M.Conv2d(
60 |             in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1
61 |         )
62 |         self.bbox_pred = M.Conv2d(
63 |             in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1
64 |         )
65 |         self.ctrness = M.Conv2d(
66 |             in_channels, num_anchors * 1, kernel_size=3, stride=1, padding=1
67 |         )
68 | 
69 |         # Initialization
70 |         for modules in [
71 |             self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred,
72 |             self.ctrness
73 |         ]:
74 |             for layer in modules.modules():
75 |                 if isinstance(layer, M.Conv2d):
76 |                     M.init.normal_(layer.weight, mean=0, std=0.01)
77 |                     M.init.fill_(layer.bias, 0)
78 | 
79 |         # Use prior in model initialization to improve stability
80 |         bias_value = -math.log((1 - prior_prob) / prior_prob)
81 |         M.init.fill_(self.cls_score.bias, bias_value)
82 | 
83 |         self.scale_list = mge.Parameter(np.ones(len(self.stride_list), dtype="float32"))
84 | 
85 |     def forward(self, features: List[Tensor]):
86 |         logits, offsets, ctrness = [], [], []
87 |         for feature, scale, stride in zip(features, self.scale_list, self.stride_list):
88 |             logits.append(self.cls_score(self.cls_subnet(feature)))
89 |             bbox_subnet = self.bbox_subnet(feature)
90 |             offsets.append(F.relu(self.bbox_pred(bbox_subnet) * scale) * stride)
91 |             ctrness.append(self.ctrness(bbox_subnet))
92 |         return logits, offsets, ctrness
93 | 


--------------------------------------------------------------------------------
/official/vision/detection/layers/det/pooler.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import math
10 | 
11 | import megengine.functional as F
12 | 
13 | 
14 | def roi_pool(
15 |     rpn_fms, rois, stride, pool_shape, pooler_type="roi_align",
16 | ):
17 |     rois = rois.detach()
18 |     assert len(stride) == len(rpn_fms)
19 |     canonical_level = 4
20 |     canonical_box_size = 224
21 |     min_level = int(math.log2(stride[0]))
22 |     max_level = int(math.log2(stride[-1]))
23 | 
24 |     num_fms = len(rpn_fms)
25 |     box_area = (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2])
26 |     assigned_level = F.floor(
27 |         canonical_level + F.log(F.sqrt(box_area) / canonical_box_size) / math.log(2)
28 |     ).astype("int32")
29 |     assigned_level = F.minimum(assigned_level, max_level)
30 |     assigned_level = F.maximum(assigned_level, min_level)
31 |     assigned_level = assigned_level - min_level
32 | 
33 |     # avoid empty assignment
34 |     assigned_level = F.concat(
35 |         [assigned_level, F.arange(num_fms, dtype="int32", device=assigned_level.device)],
36 |     )
37 |     rois = F.concat([rois, F.zeros((num_fms, rois.shape[-1]))])
38 | 
39 |     pool_list, inds_list = [], []
40 |     for i in range(num_fms):
41 |         _, inds = F.cond_take(assigned_level == i, assigned_level)
42 |         level_rois = rois[inds]
43 | 
44 |         if pooler_type == "roi_pool":
45 |             pool_fm = F.nn.roi_pooling(
46 |                 rpn_fms[i], level_rois, pool_shape, mode="max", scale=1.0 / stride[i]
47 |             )
48 |         elif pooler_type == "roi_align":
49 |             pool_fm = F.nn.roi_align(
50 |                 rpn_fms[i],
51 |                 level_rois,
52 |                 pool_shape,
53 |                 mode="average",
54 |                 spatial_scale=1.0 / stride[i],
55 |                 sample_points=2,
56 |                 aligned=True,
57 |             )
58 |         pool_list.append(pool_fm)
59 |         inds_list.append(inds)
60 | 
61 |     fm_order = F.argsort(F.concat(inds_list, axis=0))
62 |     pool_feature = F.concat(pool_list, axis=0)
63 |     pool_feature = pool_feature[fm_order][:-num_fms]
64 | 
65 |     return pool_feature
66 | 


--------------------------------------------------------------------------------
/official/vision/detection/layers/det/sampling.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import megengine.functional as F
10 | from megengine.random import uniform
11 | 
12 | 
13 | def sample_labels(labels, num_samples, label_value, ignore_label=-1):
14 |     """sample N labels with label value = sample_labels
15 | 
16 |     Args:
17 |         labels(Tensor): shape of label is (N,)
18 |         num_samples(int):
19 |         label_value(int):
20 | 
21 |     Returns:
22 |         label(Tensor): label after sampling
23 |     """
24 |     assert labels.ndim == 1, "Only tensor of dim 1 is supported."
25 |     mask = (labels == label_value)
26 |     num_valid = mask.sum()
27 |     if num_valid <= num_samples:
28 |         return labels
29 | 
30 |     random_tensor = F.zeros_like(labels).astype("float32")
31 |     random_tensor[mask] = uniform(size=num_valid)
32 |     _, invalid_inds = F.topk(random_tensor, k=num_samples - num_valid)
33 | 
34 |     labels[invalid_inds] = ignore_label
35 |     return labels
36 | 


--------------------------------------------------------------------------------
/official/vision/detection/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from .atss import *
10 | from .faster_rcnn import *
11 | from .fcos import *
12 | from .freeanchor import *
13 | from .retinanet import *
14 | 
15 | _EXCLUDE = {}
16 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
17 | 


--------------------------------------------------------------------------------
/official/vision/detection/tools/data_mapper.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine.data.dataset import COCO, Objects365, PascalVOC
10 | 
11 | data_mapper = dict(
12 |     coco=COCO,
13 |     objects365=Objects365,
14 |     voc=PascalVOC,
15 | )
16 | 


--------------------------------------------------------------------------------
/official/vision/detection/tools/inference.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import argparse
10 | 
11 | import cv2
12 | 
13 | import megengine as mge
14 | 
15 | from official.vision.detection.tools.data_mapper import data_mapper
16 | from official.vision.detection.tools.utils import DetEvaluator, import_from_file
17 | 
18 | logger = mge.get_logger(__name__)
19 | logger.setLevel("INFO")
20 | 
21 | 
22 | def make_parser():
23 |     parser = argparse.ArgumentParser()
24 |     parser.add_argument(
25 |         "-f", "--file", default="net.py", type=str, help="net description file"
26 |     )
27 |     parser.add_argument(
28 |         "-w", "--weight_file", default=None, type=str, help="weights file",
29 |     )
30 |     parser.add_argument("-i", "--image", type=str)
31 |     return parser
32 | 
33 | 
34 | def main():
35 |     parser = make_parser()
36 |     args = parser.parse_args()
37 | 
38 |     current_network = import_from_file(args.file)
39 |     cfg = current_network.Cfg()
40 |     cfg.backbone_pretrained = False
41 |     model = current_network.Net(cfg)
42 |     model.eval()
43 | 
44 |     state_dict = mge.load(args.weight_file)
45 |     if "state_dict" in state_dict:
46 |         state_dict = state_dict["state_dict"]
47 |     model.load_state_dict(state_dict)
48 | 
49 |     evaluator = DetEvaluator(model)
50 | 
51 |     ori_img = cv2.imread(args.image)
52 |     image, im_info = DetEvaluator.process_inputs(
53 |         ori_img.copy(), model.cfg.test_image_short_size, model.cfg.test_image_max_size,
54 |     )
55 |     pred_res = evaluator.predict(
56 |         image=mge.tensor(image),
57 |         im_info=mge.tensor(im_info)
58 |     )
59 |     res_img = DetEvaluator.vis_det(
60 |         ori_img,
61 |         pred_res,
62 |         is_show_label=True,
63 |         classes=data_mapper[cfg.test_dataset["name"]].class_names,
64 |     )
65 |     cv2.imwrite("results.jpg", res_img)
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     main()
70 | 


--------------------------------------------------------------------------------
/official/vision/detection/tools/nms.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import numpy as np
10 | 
11 | 
12 | def py_cpu_nms(dets, thresh):
13 |     x1 = np.ascontiguousarray(dets[:, 0])
14 |     y1 = np.ascontiguousarray(dets[:, 1])
15 |     x2 = np.ascontiguousarray(dets[:, 2])
16 |     y2 = np.ascontiguousarray(dets[:, 3])
17 | 
18 |     areas = (x2 - x1) * (y2 - y1)
19 |     order = dets[:, 4].argsort()[::-1]
20 |     keep = list()
21 | 
22 |     while order.size > 0:
23 |         pick_idx = order[0]
24 |         keep.append(pick_idx)
25 |         order = order[1:]
26 | 
27 |         xx1 = np.maximum(x1[pick_idx], x1[order])
28 |         yy1 = np.maximum(y1[pick_idx], y1[order])
29 |         xx2 = np.minimum(x2[pick_idx], x2[order])
30 |         yy2 = np.minimum(y2[pick_idx], y2[order])
31 | 
32 |         inter = np.maximum(xx2 - xx1, 0) * np.maximum(yy2 - yy1, 0)
33 |         iou = inter / np.maximum(areas[pick_idx] + areas[order] - inter, 1e-5)
34 | 
35 |         order = order[iou <= thresh]
36 | 
37 |     return keep
38 | 


--------------------------------------------------------------------------------
/official/vision/detection/tools/test_in_table.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import argparse
 5 | import os
 6 | from tabulate import tabulate
 7 | 
 8 | import megengine as mge
 9 | 
10 | from official.vision.detection.tools.utils import import_from_file
11 | 
12 | logger = mge.get_logger(__name__)
13 | logger.setLevel("INFO")
14 | 
15 | 
16 | def make_parser():
17 |     parser = argparse.ArgumentParser()
18 |     parser.add_argument(
19 |         "-f", "--files", nargs="+", default=None, help="all config file"
20 |     )
21 |     parser.add_argument(
22 |         "-j", "--jsons", nargs="+", default=None, help="all json file"
23 |     )
24 |     parser.add_argument(
25 |         "-d", "--dataset_dir", default="/data/Datasets", type=str,
26 |     )
27 |     return parser
28 | 
29 | 
30 | def main():
31 |     # pylint: disable=import-outside-toplevel,too-many-branches,too-many-statements
32 |     from pycocotools.coco import COCO
33 |     from pycocotools.cocoeval import COCOeval
34 | 
35 |     parser = make_parser()
36 |     args = parser.parse_args()
37 |     assert len(args.files) == len(args.jsons), "length of config and json mismatch"
38 |     table_content = []
39 | 
40 |     for cfg_file, json_path in zip(args.files, args.jsons):
41 |         current_network = import_from_file(cfg_file)
42 |         cfg = current_network.Cfg()
43 | 
44 |         logger.info(f"load json from {json_path}, start evaluation!")
45 | 
46 |         eval_gt = COCO(
47 |             os.path.join(
48 |                 args.dataset_dir, cfg.test_dataset["name"], cfg.test_dataset["ann_file"]
49 |             )
50 |         )
51 |         eval_dt = eval_gt.loadRes(json_path)
52 |         cocoEval = COCOeval(eval_gt, eval_dt, iouType="bbox")
53 |         cocoEval.evaluate()
54 |         cocoEval.accumulate()
55 |         cocoEval.summarize()
56 |         cfg_name = cfg_file.split(".")[0]
57 |         table_content.append([cfg_name, *["{:.3f}".format(v) for v in cocoEval.stats]])
58 | 
59 |     headers = [
60 |         "name", "AP", "AP@0.5", "AP@0.75", "APs", "APm", "APl",
61 |         "AR@1", "AR@10", "AR@100", "ARs", "ARm", "ARl",
62 |     ]
63 |     table = tabulate(table_content, headers=headers, tablefmt="pipe")
64 |     logger.info("\n" + table)
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     main()
69 | 


--------------------------------------------------------------------------------
/official/vision/gan/README.md:
--------------------------------------------------------------------------------
 1 | Generative Adversarial Networks
 2 | ---
 3 | 
 4 | This directory provides code to build, train and evaluate popular GAN models including DCGAN and WGAN. Most of the code are modified from a well-written and reproducible GAN benchmark [pytorch_mimicry](https://github.com/kwotsin/mimicry).
 5 | 
 6 | We provide pretrained DCGAN and WGAN on cifar10. They use similar ResNet backbone and share the same training setting.
 7 | 
 8 | ![images generated by DCGAN](../../assets/dcgan.png)
 9 | 
10 | #### Training Parameters
11 | | Resolution | Batch Size | Learning Rate | β<sub>1</sub> | β<sub>2</sub> | Decay Policy | n<sub>dis</sub> | n<sub>iter</sub> |
12 | |:----------:|:----------:|:-------------:|:-------------:|:-------------:|:------------:|:---------------:|------------------|
13 | | 32 x 32 | 64 | 2e-4 | 0.0 | 0.9 | Linear | 5 | 100K |
14 | 
15 | Their FID and Inception Score(IS) are listed below.
16 | 
17 | #### Metrics
18 | | Metric | Method |
19 | |:--------------------------------:|:---------------------------------------:|
20 | | [Inception Score (IS)](https://arxiv.org/abs/1606.03498) | 50K samples at 10 splits|
21 | | [Fréchet Inception Distance (FID)](https://arxiv.org/abs/1706.08500) | 50K real/generated samples |
22 | | [Kernel Inception Distance (KID)](https://arxiv.org/abs/1801.01401) | 50K real/generated samples, averaged over 10 splits.|
23 | 
24 | 
25 | #### Cifar10 Results
26 | | Method | FID Score | IS Score | KID Score |
27 | | :-: | :-: | :-: | :-: |
28 | | DCGAN  | 27.2 | 7.0 | 0.0242 |
29 | | WGAN-WC  | 30.5  | 6.7 | 0.0249 |
30 | 
31 | ### Generate images with pretrained weights
32 | 
33 | ```python
34 | import megengine.hub as hub
35 | import megengine_mimicry.nets.dcgan.dcgan_cifar as dcgan
36 | import megengine_mimicry.utils.vis as vis
37 | 
38 | netG = dcgan.DCGANGeneratorCIFAR()
39 | netG.load_state_dict(hub.load_serialized_obj_from_url("https://data.megengine.org.cn/models/weights/dcgan_cifar.pkl"))
40 | images = dcgan_generator.generate_images(num_images=64)  # in NCHW format with normalized pixel values in [0, 1]
41 | grid = vis.make_grid(images)  # in HW3 format with [0, 255] BGR images for visualization
42 | vis.save_image(grid, "visual.png")
43 | ```
44 | 
45 | ### Train and evaluate a DCGAN or WGAN
46 | 
47 | ```bash
48 | # train and evaluate a DCGAN
49 | python3 train_dcgan.py
50 | # train and evaluate a WGAN
51 | python3 train_wgan.py
52 | ```
53 | 
54 | #### Tensorboard visualization
55 | ```bash
56 | tensorboard --logdir ./log --bind_all
57 | ```
58 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 Kwot Sin Lee
 2 | # This code is licensed under MIT license
 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE)
 4 | # ------------------------------------------------------------------------------
 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 6 | #
 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 8 | #
 9 | # Unless required by applicable law or agreed to in writing,
10 | # software distributed under the License is distributed on an
11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #
13 | # This file has been modified by Megvii ("Megvii Modifications").
14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved.
15 | # ------------------------------------------------------------------------------
16 | from . import nets, training, datasets, metrics
17 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .data_utils import load_dataset
2 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/datasets/data_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 Kwot Sin Lee
 2 | # This code is licensed under MIT license
 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE)
 4 | # ------------------------------------------------------------------------------
 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 6 | #
 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 8 | #
 9 | # Unless required by applicable law or agreed to in writing,
10 | # software distributed under the License is distributed on an
11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #
13 | # This file has been modified by Megvii ("Megvii Modifications").
14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved.
15 | # ------------------------------------------------------------------------------
16 | """
17 | Script for loading datasets.
18 | """
19 | import os
20 | 
21 | import megengine.data as data
22 | import megengine.data.transform as T
23 | 
24 | 
25 | def load_dataset(root, name, **kwargs):
26 |     """
27 |     Loads different datasets specifically for GAN training.
28 |     By default, all images are normalized to values in the range [-1, 1].
29 | 
30 |     Args:
31 |         root (str): Path to where datasets are stored.
32 |         name (str): Name of dataset to load.
33 | 
34 |     Returns:
35 |         Dataset: Torch Dataset object for a specific dataset.
36 |     """
37 |     if name == "cifar10":
38 |         return load_cifar10_dataset(root, **kwargs)
39 | 
40 |     else:
41 |         raise ValueError("Invalid dataset name {} selected.".format(name))
42 | 
43 | 
44 | def load_cifar10_dataset(root=None,
45 |                          split='train',
46 |                          download=True,
47 |                          **kwargs):
48 |     """
49 |     Loads the CIFAR-10 dataset.
50 | 
51 |     Args:
52 |         root (str): Path to where datasets are stored.
53 |         split (str): The split of data to use.
54 |         download (bool): If True, downloads the dataset.
55 | 
56 |     Returns:
57 |         Dataset: Torch Dataset object.
58 |     """
59 |     dataset_dir = root
60 |     if dataset_dir and not os.path.exists(dataset_dir):
61 |         os.makedirs(dataset_dir)
62 | 
63 |     # Build datasets
64 |     if split == "train":
65 |         dataset = data.dataset.CIFAR10(root=dataset_dir,
66 |                                        train=True,
67 |                                        download=download,
68 |                                        **kwargs)
69 |     elif split == "test":
70 |         dataset = data.dataset.CIFAR10(root=dataset_dir,
71 |                                        train=False,
72 |                                        download=download,
73 |                                        **kwargs)
74 |     else:
75 |         raise ValueError("split argument must one of ['train', 'val']")
76 | 
77 |     return dataset
78 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/datasets/image_loader.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 Kwot Sin Lee
  2 | # This code is licensed under MIT license
  3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE)
  4 | # ------------------------------------------------------------------------------
  5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  6 | #
  7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  8 | #
  9 | # Unless required by applicable law or agreed to in writing,
 10 | # software distributed under the License is distributed on an
 11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #
 13 | # This file has been modified by Megvii ("Megvii Modifications").
 14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved.
 15 | # ------------------------------------------------------------------------------
 16 | """
 17 | Loads randomly sampled images from datasets for computing metrics.
 18 | """
 19 | import os
 20 | 
 21 | import numpy as np
 22 | import megengine.data.transform as T
 23 | 
 24 | from . import data_utils
 25 | 
 26 | 
 27 | def get_random_images(dataset, num_samples):
 28 |     """
 29 |     Randomly sample without replacement num_samples images.
 30 | 
 31 |     Args:
 32 |         dataset (Dataset): Torch Dataset object for indexing elements.
 33 |         num_samples (int): The number of images to randomly sample.
 34 | 
 35 |     Returns:
 36 |         Tensor: Batch of num_samples images in np array form [N, H, W, C](0-255).
 37 |     """
 38 |     choices = np.random.choice(range(len(dataset)),
 39 |                                size=num_samples,
 40 |                                replace=False)
 41 | 
 42 |     images = []
 43 |     for choice in choices:
 44 |         img = np.array(dataset[choice][0])
 45 |         img = np.expand_dims(img, axis=0)
 46 |         images.append(img)
 47 |     images = np.concatenate(images, axis=0)
 48 | 
 49 |     return images
 50 | 
 51 | 
 52 | def get_cifar10_images(num_samples, root=None, **kwargs):
 53 |     """
 54 |     Loads randomly sampled CIFAR-10 training images.
 55 | 
 56 |     Args:
 57 |         num_samples (int): The number of images to randomly sample.
 58 |         root (str): The root directory where all datasets are stored.
 59 | 
 60 |     Returns:
 61 |         Tensor: Batch of num_samples images in np array form.
 62 |     """
 63 |     dataset = data_utils.load_cifar10_dataset(root=root, **kwargs)
 64 | 
 65 |     images = get_random_images(dataset, num_samples)
 66 | 
 67 |     return images
 68 | 
 69 | 
 70 | def get_dataset_images(dataset_name, num_samples=50000, **kwargs):
 71 |     """
 72 |     Randomly sample num_samples images based on input dataset name.
 73 | 
 74 |     Args:
 75 |         dataset_name (str): Dataset name to load images from.
 76 |         num_samples (int): The number of images to randomly sample.
 77 | 
 78 |     Returns:
 79 |         Tensor: Batch of num_samples images from the specific dataset in np array form.
 80 |     """
 81 |     if dataset_name == "cifar10":
 82 |         images = get_cifar10_images(num_samples, **kwargs)
 83 | 
 84 |     elif dataset_name == "cifar10_test":
 85 |         images = get_cifar10_images(num_samples, split='test', **kwargs)
 86 | 
 87 |     else:
 88 |         raise ValueError("Invalid dataset name {}.".format(dataset_name))
 89 | 
 90 |     # Check shape and permute if needed
 91 |     if images.shape[1] == 3:
 92 |         images = images.transpose((0, 2, 3, 1))
 93 | 
 94 |     # Ensure the values lie within the correct range, otherwise there might be some
 95 |     # preprocessing error from the library causing ill-valued scores.
 96 |     if np.min(images) < 0 or np.max(images) > 255:
 97 |         raise ValueError(
 98 |             'Image pixel values must lie between 0 to 255 inclusive.')
 99 | 
100 |     return images
101 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 Kwot Sin Lee
 2 | # This code is licensed under MIT license
 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE)
 4 | # ------------------------------------------------------------------------------
 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 6 | #
 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 8 | #
 9 | # Unless required by applicable law or agreed to in writing,
10 | # software distributed under the License is distributed on an
11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #
13 | # This file has been modified by Megvii ("Megvii Modifications").
14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved.
15 | # ------------------------------------------------------------------------------
16 | from . import fid, kid, inception_score, inception_model
17 | from .compute_fid import *
18 | from .compute_is import *
19 | from .compute_kid import *
20 | from .compute_metrics import *
21 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/metrics/compute_is.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 Kwot Sin Lee
 2 | # This code is licensed under MIT license
 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE)
 4 | # ------------------------------------------------------------------------------
 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 6 | #
 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 8 | #
 9 | # Unless required by applicable law or agreed to in writing,
10 | # software distributed under the License is distributed on an
11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #
13 | # This file has been modified by Megvii ("Megvii Modifications").
14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved.
15 | # ------------------------------------------------------------------------------
16 | """
17 | MegEngine interface for computing Inception Score.
18 | """
19 | import os
20 | import random
21 | import time
22 | 
23 | import numpy as np
24 | 
25 | from .inception_model import inception_utils
26 | from .inception_score import inception_score_utils as tf_inception_score
27 | from .utils import _normalize_images
28 | 
29 | 
30 | def inception_score(netG,
31 |                     device,
32 |                     num_samples,
33 |                     batch_size=50,
34 |                     splits=10,
35 |                     log_dir='./log',
36 |                     seed=0,
37 |                     print_every=20):
38 |     """
39 |     Computes the inception score of generated images.
40 | 
41 |     Args:
42 |         netG (Module): The generator model to use for generating images.
43 |         device (Device): Torch device object to send model and data to.
44 |         num_samples (int): The number of samples to generate.
45 |         batch_size (int): Batch size per feedforward step for inception model.
46 |         splits (int): The number of splits to use for computing IS.
47 |         log_dir (str): Path to store metric computation objects.
48 |         seed (int): Random seed for generation.
49 |     Returns:
50 |         Mean and standard deviation of the inception score computed from using
51 |         num_samples generated images.
52 |     """
53 |     # Make sure the random seeds are fixed
54 |     random.seed(seed)
55 |     np.random.seed(seed)
56 | 
57 |     # Build inception
58 |     inception_path = os.path.join(log_dir, 'metrics/inception_model')
59 |     inception_utils.create_inception_graph(inception_path)
60 | 
61 |     # Inference variables
62 |     batch_size = min(batch_size, num_samples)
63 |     num_batches = num_samples // batch_size
64 | 
65 |     # Get images
66 |     images = []
67 |     start_time = time.time()
68 |     for idx in range(num_batches):
69 |         fake_images = netG.generate_images(num_images=batch_size).numpy()
70 | 
71 |         fake_images = _normalize_images(fake_images)  # NCHW(BGR) -> NHWC(RGB)
72 |         images.append(fake_images)
73 | 
74 |         if (idx + 1) % min(print_every, num_batches) == 0:
75 |             end_time = time.time()
76 |             print(
77 |                 "INFO: Generated image {}/{} [Random Seed {}] ({:.4f} sec/idx)"
78 |                 .format(
79 |                     (idx + 1) * batch_size, num_samples, seed,
80 |                     (end_time - start_time) / (print_every * batch_size)))
81 |             start_time = end_time
82 | 
83 |     images = np.concatenate(images, axis=0)
84 | 
85 |     IS_score = tf_inception_score.get_inception_score(images,
86 |                                                       splits=splits,
87 |                                                       device=device)
88 |     print("INFO: IS Score: {}".format(IS_score))
89 |     return IS_score
90 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/metrics/fid/__init__.py:
--------------------------------------------------------------------------------
1 | from .fid_utils import *
2 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/metrics/inception_model/__init__.py:
--------------------------------------------------------------------------------
1 | from .inception_utils import *
2 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/metrics/inception_score/__init__.py:
--------------------------------------------------------------------------------
1 | from .inception_score_utils import *
2 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/metrics/kid/__init__.py:
--------------------------------------------------------------------------------
1 | from .kid_utils import *
2 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/metrics/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 Kwot Sin Lee
 2 | # This code is licensed under MIT license
 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE)
 4 | # ------------------------------------------------------------------------------
 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 6 | #
 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 8 | #
 9 | # Unless required by applicable law or agreed to in writing,
10 | # software distributed under the License is distributed on an
11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #
13 | # This file has been modified by Megvii ("Megvii Modifications").
14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved.
15 | # ------------------------------------------------------------------------------
16 | import numpy as np
17 | 
18 | 
19 | def _normalize_images(images):
20 |     """
21 |     Given a tensor of (megengine BGR) images, uses the torchvision
22 |     normalization method to convert floating point data to integers. See reference
23 |     at: https://pytorch.org/docs/stable/_modules/torchvision/utils.html#save_image
24 | 
25 |     The function uses the normalization from make_grid and save_image functions.
26 | 
27 |     Args:
28 |         images (Tensor): Batch of images of shape (N, 3, H, W).
29 | 
30 |     Returns:
31 |         ndarray: Batch of normalized (0-255) RGB images of shape (N, H, W, 3).
32 |     """
33 |     # Shift the image from [-1, 1] range to [0, 1] range.
34 |     min_val = float(images.min())
35 |     max_val = float(images.max())
36 | 
37 |     images = (images - min_val) / (max_val - min_val + 1e-5)
38 | 
39 |     images = np.clip(images * 255 + 0.5, 0, 255).astype("uint8")
40 | 
41 |     images = np.transpose(images, [0, 2, 3, 1])
42 | 
43 |     # NOTE: megengine(opencv) uses BGR, while TF uses RGB. Needs conversion.
44 |     images = images[:, :, :, ::-1]
45 | 
46 |     return images
47 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/nets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/vision/gan/megengine_mimicry/nets/__init__.py


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/nets/dcgan/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/vision/gan/megengine_mimicry/nets/dcgan/__init__.py


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/nets/dcgan/dcgan_base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 Kwot Sin Lee
 2 | # This code is licensed under MIT license
 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE)
 4 | # ------------------------------------------------------------------------------
 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 6 | #
 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 8 | #
 9 | # Unless required by applicable law or agreed to in writing,
10 | # software distributed under the License is distributed on an
11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #
13 | # This file has been modified by Megvii ("Megvii Modifications").
14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved.
15 | # ------------------------------------------------------------------------------
16 | from .. import gan
17 | 
18 | 
19 | class DCGANBaseGenerator(gan.BaseGenerator):
20 |     r"""
21 |     ResNet backbone generator for ResNet DCGAN.
22 | 
23 |     Attributes:
24 |         nz (int): Noise dimension for upsampling.
25 |         ngf (int): Variable controlling generator feature map sizes.
26 |         bottom_width (int): Starting width for upsampling generator output to an image.
27 |         loss_type (str): Name of loss to use for GAN loss.
28 |     """
29 |     def __init__(self, nz, ngf, bottom_width, loss_type='ns', **kwargs):
30 |         super().__init__(nz=nz,
31 |                          ngf=ngf,
32 |                          bottom_width=bottom_width,
33 |                          loss_type=loss_type,
34 |                          **kwargs)
35 | 
36 | 
37 | class DCGANBaseDiscriminator(gan.BaseDiscriminator):
38 |     r"""
39 |     ResNet backbone discriminator for ResNet DCGAN.
40 | 
41 |     Attributes:
42 |         ndf (int): Variable controlling discriminator feature map sizes.
43 |         loss_type (str): Name of loss to use for GAN loss.
44 |     """
45 |     def __init__(self, ndf, loss_type='ns', **kwargs):
46 |         super().__init__(ndf=ndf, loss_type=loss_type, **kwargs)
47 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/nets/wgan/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/Models/78882f9cbaa037ad701f47d47bb80b66ad95ce87/official/vision/gan/megengine_mimicry/nets/wgan/__init__.py


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/nets/wgan/wgan_base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 Kwot Sin Lee
 2 | # This code is licensed under MIT license
 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE)
 4 | # ------------------------------------------------------------------------------
 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 6 | #
 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 8 | #
 9 | # Unless required by applicable law or agreed to in writing,
10 | # software distributed under the License is distributed on an
11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #
13 | # This file has been modified by Megvii ("Megvii Modifications").
14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved.
15 | # ------------------------------------------------------------------------------
16 | import megengine.functional as F
17 | import megengine.jit as jit
18 | 
19 | from .. import gan
20 | from ..blocks import DBlock, DBlockOptimized
21 | 
22 | 
23 | class WGANBaseGenerator(gan.BaseGenerator):
24 |     r"""
25 |     ResNet backbone generator for ResNet WGAN.
26 | 
27 |     Attributes:
28 |         nz (int): Noise dimension for upsampling.
29 |         ngf (int): Variable controlling generator feature map sizes.
30 |         bottom_width (int): Starting width for upsampling generator output to an image.
31 |         loss_type (str): Name of loss to use for GAN loss.
32 |     """
33 |     def __init__(self, nz, ngf, bottom_width, **kwargs):
34 |         super().__init__(nz=nz,
35 |                          ngf=ngf,
36 |                          bottom_width=bottom_width,
37 |                          loss_type="wasserstein",
38 |                          **kwargs)
39 | 
40 | 
41 | class WGANBaseDiscriminator(gan.BaseDiscriminator):
42 |     r"""
43 |     ResNet backbone discriminator for ResNet WGAN.
44 | 
45 |     Attributes:
46 |         ndf (int): Variable controlling discriminator feature map sizes.
47 |         loss_type (str): Name of loss to use for GAN loss.
48 |     """
49 |     def __init__(self, ndf, **kwargs):
50 |         super().__init__(ndf=ndf, loss_type="wasserstein", **kwargs)
51 | 
52 |     def _reset_jit_graph(self, impl: callable):
53 |         """We override this func to attach weight clipping after default training step"""
54 |         traced_obj = jit.trace(impl)
55 |         def _(*args, **kwargs):
56 |             ret = traced_obj(*args, **kwargs)
57 |             if self.training:
58 |                 self._apply_lipshitz_constraint()  # dynamically apply weight clipping
59 |             return ret
60 |         return _
61 | 
62 |     def _apply_lipshitz_constraint(self):
63 |         """Weight clipping described in [Wasserstein GAN](https://arxiv.org/abs/1701.07875)"""
64 |         for p in self.parameters():
65 |             F.add_update(p, F.clamp(p, lower=-3e-2, upper=3e-2), alpha=0)
66 | 
67 | 
68 | def layernorm(x):
69 |     original_shape = x.shape
70 |     x = x.reshape(original_shape[0], -1)
71 |     m = F.mean(x, axis=1, keepdims=True)
72 |     v = F.mean((x - m) ** 2, axis=1, keepdims=True)
73 |     x = (x - m) / F.maximum(F.sqrt(v), 1e-6)
74 |     x = x.reshape(original_shape)
75 |     return x
76 | 
77 | 
78 | class WGANDBlockWithLayerNorm(DBlock):
79 |     def _residual(self, x):
80 |         h = x
81 |         h = layernorm(h)
82 |         h = self.activation(h)
83 |         h = self.c1(h)
84 |         h = layernorm(h)
85 |         h = self.activation(h)
86 |         h = self.c2(h)
87 |         if self.downsample:
88 |             h = F.avg_pool2d(h, 2)
89 | 
90 |         return h
91 | 
92 | 
93 | class WGANDBlockOptimized(DBlockOptimized):
94 |     pass
95 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/training/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 Kwot Sin Lee
 2 | # This code is licensed under MIT license
 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE)
 4 | # ------------------------------------------------------------------------------
 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 6 | #
 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 8 | #
 9 | # Unless required by applicable law or agreed to in writing,
10 | # software distributed under the License is distributed on an
11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #
13 | # This file has been modified by Megvii ("Megvii Modifications").
14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved.
15 | # ------------------------------------------------------------------------------
16 | from .trainer import Trainer
17 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/training/metric_log.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 Kwot Sin Lee
 2 | # This code is licensed under MIT license
 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE)
 4 | # ------------------------------------------------------------------------------
 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 6 | #
 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 8 | #
 9 | # Unless required by applicable law or agreed to in writing,
10 | # software distributed under the License is distributed on an
11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #
13 | # This file has been modified by Megvii ("Megvii Modifications").
14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved.
15 | # ------------------------------------------------------------------------------
16 | """
17 | MetricLog object for intelligently logging data to display them more intuitively.
18 | """
19 | 
20 | 
21 | class MetricLog:
22 |     """
23 |     A dictionary-like object that logs data, and includes an extra dict to map the metrics
24 |     to its group name, if any, and the corresponding precision to print out.
25 | 
26 |     Attributes:
27 |         metrics_dict (dict): A dictionary mapping to another dict containing
28 |             the corresponding value, precision, and the group this metric belongs to.
29 |     """
30 |     def __init__(self, **kwargs):
31 |         self.metrics_dict = {}
32 | 
33 |     def add_metric(self, name, value, group=None, precision=4):
34 |         """
35 |         Logs metric to internal dict, but with an additional option
36 |         of grouping certain metrics together.
37 | 
38 |         Args:
39 |             name (str): Name of metric to log.
40 |             value (Tensor/Float): Value of the metric to log.
41 |             group (str): Name of the group to classify different metrics together.
42 |             precision (int): The number of floating point precision to represent the value.
43 | 
44 |         Returns:
45 |             None
46 |         """
47 |         # Grab tensor values only
48 |         try:
49 |             value = value.item()
50 |         except AttributeError:
51 |             value = value
52 | 
53 |         self.metrics_dict[name] = dict(value=value,
54 |                                        group=group,
55 |                                        precision=precision)
56 | 
57 |     def __getitem__(self, key):
58 |         return round(self.metrics_dict[key]['value'],
59 |                      self.metrics_dict[key]['precision'])
60 | 
61 |     def get_group_name(self, name):
62 |         """
63 |         Obtains the group name of a particular metric. For example, errD and errG
64 |         which represents the discriminator/generator losses could fall under a
65 |         group name called "loss".
66 | 
67 |         Args:
68 |             name (str): The name of the metric to retrieve group name.
69 | 
70 |         Returns:
71 |             str: A string representing the group name of the metric.
72 |         """
73 |         return self.metrics_dict[name]['group']
74 | 
75 |     def keys(self):
76 |         """
77 |         Dict like functionality for retrieving keys.
78 |         """
79 |         return self.metrics_dict.keys()
80 | 
81 |     def items(self):
82 |         """
83 |         Dict like functionality for retrieving items.
84 |         """
85 |         return self.metrics_dict.items()
86 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 Kwot Sin Lee
 2 | # This code is licensed under MIT license
 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE)
 4 | # ------------------------------------------------------------------------------
 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 6 | #
 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 8 | #
 9 | # Unless required by applicable law or agreed to in writing,
10 | # software distributed under the License is distributed on an
11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #
13 | # This file has been modified by Megvii ("Megvii Modifications").
14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved.
15 | # ------------------------------------------------------------------------------
16 | from .common import *
17 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/utils/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 Kwot Sin Lee
 2 | # This code is licensed under MIT license
 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE)
 4 | # ------------------------------------------------------------------------------
 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 6 | #
 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 8 | #
 9 | # Unless required by applicable law or agreed to in writing,
10 | # software distributed under the License is distributed on an
11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #
13 | # This file has been modified by Megvii ("Megvii Modifications").
14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved.
15 | # ------------------------------------------------------------------------------
16 | """
17 | Script for common utility functions.
18 | """
19 | import json
20 | import os
21 | 
22 | import numpy as np
23 | 
24 | 
25 | def write_to_json(dict_to_write, output_file):
26 |     """
27 |     Outputs a given dictionary as a JSON file with indents.
28 | 
29 |     Args:
30 |         dict_to_write (dict): Input dictionary to output.
31 |         output_file (str): File path to write the dictionary.
32 | 
33 |     Returns:
34 |         None
35 |     """
36 |     with open(output_file, 'w') as file:
37 |         json.dump(dict_to_write, file, indent=4)
38 | 
39 | 
40 | def load_from_json(json_file):
41 |     """
42 |     Loads a JSON file as a dictionary and return it.
43 | 
44 |     Args:
45 |         json_file (str): Input JSON file to read.
46 | 
47 |     Returns:
48 |         dict: Dictionary loaded from the JSON file.
49 |     """
50 |     with open(json_file, 'r') as file:
51 |         return json.load(file)
52 | 


--------------------------------------------------------------------------------
/official/vision/gan/megengine_mimicry/utils/vis.py:
--------------------------------------------------------------------------------
 1 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 2 | #
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | #
 5 | # Unless required by applicable law or agreed to in writing,
 6 | # software distributed under the License is distributed on an
 7 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 8 | import math
 9 | 
10 | import cv2
11 | import megengine
12 | 
13 | 
14 | def normalize_image(tensor: megengine.Tensor, scale=255):
15 |     """normalize image tensors of any range to [0, scale=255]"""
16 |     mi = tensor.min()
17 |     ma = tensor.max()
18 |     tensor = scale * (tensor - mi) / (ma - mi + 1e-9)
19 |     return tensor
20 | 
21 | 
22 | def make_grid(
23 |         tensor: megengine.Tensor,  # [N,C,H,W]
24 |         nrow: int = 8,
25 |         padding: int = 2,
26 |         background: float = 0,
27 |         normalize: bool = False,
28 | ) -> megengine.Tensor:
29 |     """align [N, C, H, W] image tensor to [H, W, 3] image grids, for visualization"""
30 |     if normalize:
31 |         tensor = normalize_image(tensor, scale=255)  # normalize to 0-255 scale
32 | 
33 |     c = tensor.shape[1]
34 |     assert c in (1, 3), "only support color/grayscale images, got channel = {}".format(c)
35 |     nmaps = tensor.shape[0]
36 |     xmaps = min(nrow, nmaps)
37 |     ymaps = int(math.ceil(float(nmaps) / xmaps))
38 |     height, width = int(tensor.shape[2] + padding), int(tensor.shape[3] + padding)
39 |     num_channels = tensor.shape[1]
40 |     grid = megengine.ones((num_channels, height * ymaps + padding, width * xmaps + padding), "float32") * background
41 |     k = 0
42 |     for y in range(ymaps):
43 |         for x in range(xmaps):
44 |             if k >= nmaps:
45 |                 break
46 |             grid = grid.set_subtensor(tensor[k])[:,
47 |                                                  y * height + padding: (y + 1) * height,
48 |                                                  x * width + padding: (x + 1) * width]
49 |             k = k + 1
50 |     c, h, w = grid.shape
51 |     grid = grid.dimshuffle(1, 2, 0)  # [C,H,W] -> [H,W,C]
52 |     grid = grid.broadcast(h, w, 3)   # [H,W,C] -> [H,W,3]
53 |     return grid
54 | 
55 | 
56 | def save_image(image, path):
57 |     if isinstance(image, megengine.Tensor):
58 |         image = image.numpy()
59 |     cv2.imwrite(path, image)
60 | 


--------------------------------------------------------------------------------
/official/vision/gan/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow>=2.0
2 | tensorboardX
3 | 


--------------------------------------------------------------------------------
/official/vision/gan/train_dcgan.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 Kwot Sin Lee
 2 | # This code is licensed under MIT license
 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE)
 4 | # ------------------------------------------------------------------------------
 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 6 | #
 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 8 | #
 9 | # Unless required by applicable law or agreed to in writing,
10 | # software distributed under the License is distributed on an
11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #
13 | # This file has been modified by Megvii ("Megvii Modifications").
14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved.
15 | # ------------------------------------------------------------------------------
16 | import megengine.data as data
17 | import megengine.data.transform as T
18 | import megengine.optimizer as optim
19 | 
20 | import megengine_mimicry as mmc
21 | import megengine_mimicry.nets.dcgan.dcgan_cifar as dcgan
22 | 
23 | dataset = mmc.datasets.load_dataset(root=None, name='cifar10')
24 | dataloader = data.DataLoader(
25 |     dataset,
26 |     sampler=data.Infinite(data.RandomSampler(dataset, batch_size=64, drop_last=True)),
27 |     transform=T.Compose([T.Normalize(std=255), T.ToMode("CHW")]),
28 |     num_workers=4
29 | )
30 | 
31 | netG = dcgan.DCGANGeneratorCIFAR()
32 | netD = dcgan.DCGANDiscriminatorCIFAR()
33 | optD = optim.Adam(netD.parameters(), 2e-4, betas=(0.0, 0.9))
34 | optG = optim.Adam(netG.parameters(), 2e-4, betas=(0.0, 0.9))
35 | 
36 | LOG_DIR = "./log/dcgan_example"
37 | 
38 | trainer = mmc.training.Trainer(
39 |     netD=netD,
40 |     netG=netG,
41 |     optD=optD,
42 |     optG=optG,
43 |     n_dis=5,
44 |     num_steps=100000,
45 |     lr_decay="linear",
46 |     dataloader=dataloader,
47 |     log_dir=LOG_DIR,
48 |     device=0)
49 | 
50 | trainer.train()
51 | 
52 | mmc.metrics.compute_metrics.evaluate(
53 |     metric="fid",
54 |     netG=netG,
55 |     log_dir=LOG_DIR,
56 |     evaluate_step=100000,
57 |     num_runs=1,
58 |     device=0,
59 |     num_real_samples=50000,
60 |     num_fake_samples=50000,
61 |     dataset_name="cifar10",
62 | )
63 | 
64 | mmc.metrics.compute_metrics.evaluate(
65 |     metric="inception_score",
66 |     netG=netG,
67 |     log_dir=LOG_DIR,
68 |     evaluate_step=100000,
69 |     num_runs=1,
70 |     device=0,
71 |     num_samples=50000,
72 | )
73 | 
74 | mmc.metrics.compute_metrics.evaluate(
75 |     metric="kid",
76 |     netG=netG,
77 |     log_dir=LOG_DIR,
78 |     evaluate_step=100000,
79 |     num_runs=1,
80 |     device=0,
81 |     num_subsets=50,
82 |     subset_size=1000,
83 |     dataset_name="cifar10",
84 | )
85 | 
86 | 


--------------------------------------------------------------------------------
/official/vision/gan/train_wgan.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 Kwot Sin Lee
 2 | # This code is licensed under MIT license
 3 | # (https://github.com/kwotsin/mimicry/blob/master/LICENSE)
 4 | # ------------------------------------------------------------------------------
 5 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 6 | #
 7 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 8 | #
 9 | # Unless required by applicable law or agreed to in writing,
10 | # software distributed under the License is distributed on an
11 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #
13 | # This file has been modified by Megvii ("Megvii Modifications").
14 | # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved.
15 | # ------------------------------------------------------------------------------
16 | import megengine.data as data
17 | import megengine.data.transform as T
18 | import megengine.optimizer as optim
19 | 
20 | import megengine_mimicry as mmc
21 | import megengine_mimicry.nets.wgan.wgan_cifar as wgan
22 | 
23 | dataset = mmc.datasets.load_dataset(root=None, name='cifar10')
24 | dataloader = data.DataLoader(
25 |     dataset,
26 |     sampler=data.Infinite(data.RandomSampler(dataset, batch_size=64, drop_last=True)),
27 |     transform=T.Compose([T.Normalize(mean=127, std=127), T.ToMode("CHW")]),
28 |     num_workers=4
29 | )
30 | 
31 | netG = wgan.WGANGeneratorCIFAR()
32 | netD = wgan.WGANDiscriminatorCIFAR()
33 | optD = optim.Adam(netD.parameters(), 2e-4, betas=(0.0, 0.9))
34 | optG = optim.Adam(netG.parameters(), 2e-4, betas=(0.0, 0.9))
35 | 
36 | LOG_DIR = "./log/wgan_example"
37 | 
38 | trainer = mmc.training.Trainer(
39 |     netD=netD,
40 |     netG=netG,
41 |     optD=optD,
42 |     optG=optG,
43 |     n_dis=5,
44 |     num_steps=100000,
45 |     lr_decay="linear",
46 |     dataloader=dataloader,
47 |     log_dir=LOG_DIR,
48 |     device=0)
49 | 
50 | trainer.train()
51 | 
52 | mmc.metrics.compute_metrics.evaluate(
53 |     metric="fid",
54 |     netG=netG,
55 |     log_dir=LOG_DIR,
56 |     evaluate_step=100000,
57 |     num_runs=1,
58 |     device=0,
59 |     num_real_samples=50000,
60 |     num_fake_samples=50000,
61 |     dataset_name="cifar10",
62 | )
63 | 
64 | mmc.metrics.compute_metrics.evaluate(
65 |     metric="inception_score",
66 |     netG=netG,
67 |     log_dir=LOG_DIR,
68 |     evaluate_step=100000,
69 |     num_runs=1,
70 |     device=0,
71 |     num_samples=50000,
72 | )
73 | 
74 | mmc.metrics.compute_metrics.evaluate(
75 |     metric="kid",
76 |     netG=netG,
77 |     log_dir=LOG_DIR,
78 |     evaluate_step=100000,
79 |     num_runs=1,
80 |     device=0,
81 |     num_subsets=50,
82 |     subset_size=1000,
83 |     dataset_name="cifar10",
84 | )
85 | 
86 | 


--------------------------------------------------------------------------------
/official/vision/keypoints/README.md:
--------------------------------------------------------------------------------
  1 | # Human Pose Esimation
  2 | 
  3 | 本目录包含了采用MegEngine实现的经典[SimpleBaseline](https://arxiv.org/pdf/1804.06208.pdf)的网络结构，同时提供了在COCO数据集上的完整训练和测试代码。
  4 | 
  5 | 本目录使用了在COCO val2017上的Human AP为56.4的人体检测结果，最后在COCO val2017上人体关节点估计结果为
  6 | |Methods|Backbone|Input Size| AP | Ap .5 | AP .75 | AP (M) | AP (L) | AR | AR .5 | AR .75 | AR (M) | AR (L) |
  7 | |---|:---:|---|---|---|---|---|---|---|---|---|---|---|
  8 | | SimpleBaseline |Res50 |256x192| 0.711 | 0.885 | 0.779 | 0.674 | 0.783 | 0.782 | 0.930 | 0.839 | 0.731 | 0.852 |
  9 | | SimpleBaseline |Res101|256x192| 0.718 | 0.892 | 0.788 | 0.681 | 0.793 | 0.790 | 0.937 | 0.848 | 0.739 | 0.861 |
 10 | | SimpleBaseline |Res152|256x192| 0.723 | 0.888 | 0.794 | 0.688 | 0.795 | 0.795 | 0.934 | 0.856 | 0.746 | 0.863 |
 11 | 
 12 | ## 安装和环境配置
 13 | 
 14 | * 在开始运行本目录下的代码之前，请确保按照[README](../../../../README.md)进行了正确的环境配置。
 15 | * 安装[COCOAPI](https://github.com/cocodataset/cocoapi):
 16 | ```bash
 17 | # COCOAPI=/path/to/clone/cocoapi
 18 | git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
 19 | cd $COCOAPI/PythonAPI
 20 | # Install into global site-packages
 21 | make install
 22 | # Alternatively, if you do not have permissions or prefer
 23 | # not to install the COCO API into global site-packages
 24 | python3 setup.py install --user
 25 | ```
 26 | 
 27 | 
 28 | ## 如何训练
 29 | 
 30 | 1、在开始训练前，请下载[COCO官方数据集](http://cocodataset.org/#download)，并解压到合适的目录下。从[OneDrive](https://1drv.ms/f/s!AhIXJn_J-blWzzDXoz5BeFl8sWM-) 或者 [GoogleDrive](https://drive.google.com/drive/folders/1fRUDNUDxe9fjqcRZ2bnF_TKMlO0nB_dk?usp=sharing)下载COCO val2017上人体检测的结果，该结果在COCO val2017上人体检测AP为56.
 31 | 
 32 | 准备好的 COCO 数据目录结构如下：
 33 | ```bash
 34 | ${COCO_DATA_ROOT}
 35 | |-- annotations
 36 | |   |-- person_keypoints_train2017.json
 37 | |   |-- person_keypoints_val2017.json
 38 | |-- person_detection_results
 39 | |   |-- COCO_val2017_detections_AP_H_56_person.json
 40 | |-- train2017
 41 | |   |   |-- 000000000009.jpg
 42 | |   |   |-- 000000000025.jpg
 43 | |   |   |-- 000000000030.jpg
 44 | |   |   |-- ... 
 45 | |-- val2017
 46 |         |-- 000000000139.jpg
 47 |         |-- 000000000285.jpg
 48 |         |-- 000000000632.jpg
 49 |         |-- ... 
 50 | ```
 51 | 
 52 | 更改[config.py](.config.py)中的`data_root`为${COCO_DATA_ROOT}
 53 | 
 54 | 3、开始训练:
 55 | 
 56 | `train.py`的命令行参数如下:
 57 | - `--arch`, 训练的网络的名字
 58 | - `--resume`, 是否从已训好的模型继续训练
 59 | - `--ngpus`, 使用的GPU数量
 60 | - `--multi_scale_supervision`, 是否使用多尺度监督；
 61 | 
 62 | 例如训练SimpleBaseline_Res50:
 63 | ```bash
 64 | python3 train.py --arch simplebaseline_res50 \
 65 |                  --resume /path/to/model \
 66 |                  --ngpus 8 \
 67 |                  
 68 | ```
 69 | 
 70 | ## 如何测试
 71 | 
 72 | 模型训练好之后，可以通过如下命令测试指定模型在COCOval2017验证集的性能：
 73 | ```bash
 74 | python3 test.py --arch name/of/network \
 75 |                 --model /path/to/model.pkl \
 76 | ```
 77 | `test.py`的命令行参数如下：
 78 | - `--arch`, 网络的名字;
 79 | - `--model`, 待检测的模型;
 80 | 
 81 | 也可以连续验证多个模型的性能:
 82 | 
 83 | ```bash
 84 | python3 test.py --arch name/of/network \
 85 |                 --model_dir path/of/saved/models \
 86 |                 --start_epoch num/of/start/epoch \
 87 |                 --end_epoch num/of/end/epoch \
 88 |                 --test_freq test/frequence
 89 | ```
 90 | 
 91 | ## 如何使用
 92 | 
 93 | 模型训练好之后，可以通过如下命令测试单张图片(先使用预训练的RetainNet检测出人的框），得到人体姿态可视化结果：
 94 | 
 95 | ```bash
 96 | python3 inference.py --arch /name/of/tested/network \
 97 |                      --detector /name/of/human/detector \
 98 |                      --model /path/to/model \
 99 |                      --image /path/to/image.jpg
100 | ```
101 | 
102 | `inference.py`的命令行参数如下：
103 | - `--arch`, 网络的名字;
104 | - `--detector`, 人体检测器的名字;
105 | - `--model`，载入训练好的模型;
106 | - `--image`，载入待测试的图像.
107 | 
108 | ## 参考文献
109 | 
110 | - [Simple Baselines for Human Pose Estimation and Tracking](https://arxiv.org/abs/1804.06208) Bin Xiao, Haiping Wu, and Yichen Wei. European Conference on Computer Vision (ECCV), 2018.
111 | 


--------------------------------------------------------------------------------
/official/vision/keypoints/config.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3 | #
  4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  5 | #
  6 | # Unless required by applicable law or agreed to in writing,
  7 | # software distributed under the License is distributed on an
  8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9 | 
 10 | 
 11 | class Config:
 12 |     # model
 13 |     model_choices = [
 14 |         "simplebaseline_res50",
 15 |         "simplebaseline_res101",
 16 |         "simplebaseline_res152",
 17 |     ]
 18 | 
 19 |     # train
 20 |     initial_lr = 3e-4
 21 |     lr_ratio = 0.1
 22 | 
 23 |     batch_size = 32
 24 |     epochs = 200
 25 |     warm_epochs = 0
 26 |     weight_decay = 0
 27 | 
 28 |     report_freq = 10
 29 |     save_freq = 1
 30 | 
 31 |     # data
 32 |     # path
 33 |     data_root = "/data/coco_data/"
 34 | 
 35 |     # normalize
 36 |     img_mean = [103.530, 116.280, 123.675]
 37 |     img_std = [57.375, 57.120, 58.395]
 38 | 
 39 |     # shape
 40 |     input_shape = (256, 192)
 41 |     output_shape = (64, 48)
 42 | 
 43 |     # heat maps
 44 |     keypoint_num = 17
 45 |     heat_kernels = [k * 4 for k in [2.6, 2.0, 1.7, 1.4]]
 46 |     heat_thr = 1e-2
 47 |     heat_range = 255
 48 | 
 49 |     # augmentation
 50 |     half_body_transform = True
 51 |     extend_boxes = True
 52 | 
 53 |     # extend
 54 |     x_ext = 0.6
 55 |     y_ext = 0.6
 56 | 
 57 |     # half body
 58 |     num_keypoints_half_body = 3
 59 |     prob_half_body = 0.3
 60 |     upper_body_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
 61 |     lower_body_ids = [11, 12, 13, 14, 15, 16]
 62 | 
 63 |     keypoint_flip_order = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
 64 | 
 65 |     # scale
 66 |     scale_prob = 1
 67 |     scale_range = 0.3
 68 | 
 69 |     # rorate
 70 |     rotation_prob = 0.6
 71 |     rotate_range = 40
 72 | 
 73 |     # test settings
 74 |     test_aug_border = 10
 75 |     test_x_ext = 0.10
 76 |     test_y_ext = 0.10
 77 |     test_gaussian_kernel = 17
 78 |     second_value_aug = True
 79 | 
 80 |     # inference settings
 81 |     nms_thr = 0.7
 82 |     vis_colors = [
 83 |         [255, 0, 0],
 84 |         [255, 85, 0],
 85 |         [255, 170, 0],
 86 |         [255, 255, 0],
 87 |         [170, 255, 0],
 88 |         [85, 255, 0],
 89 |         [0, 255, 0],
 90 |         [0, 255, 85],
 91 |         [0, 255, 170],
 92 |         [0, 255, 255],
 93 |         [0, 170, 255],
 94 |         [0, 85, 255],
 95 |         [0, 0, 255],
 96 |         [85, 0, 255],
 97 |         [170, 0, 255],
 98 |         [255, 0, 255],
 99 |         [255, 0, 170],
100 |         [255, 0, 85],
101 |         [255, 85, 85],
102 |         [255, 170, 85],
103 |         [255, 170, 170],
104 |     ]
105 | 
106 |     vis_skeletons = [
107 |         [0, 1],
108 |         [0, 2],
109 |         [1, 3],
110 |         [2, 4],
111 |         [3, 5],
112 |         [4, 6],
113 |         [5, 6],
114 |         [5, 7],
115 |         [7, 9],
116 |         [6, 8],
117 |         [8, 10],
118 |         [5, 11],
119 |         [6, 12],
120 |         [11, 12],
121 |         [11, 13],
122 |         [13, 15],
123 |         [12, 14],
124 |         [14, 16],
125 |     ]
126 | 


--------------------------------------------------------------------------------
/official/vision/keypoints/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from .simplebaseline import simplebaseline_res50, simplebaseline_res101, simplebaseline_res152
10 | 


--------------------------------------------------------------------------------
/official/vision/segmentation/README.md:
--------------------------------------------------------------------------------
  1 | # Megengine Semantic Segmentation Models
  2 | 
  3 | ## 介绍
  4 | 
  5 | 本目录包含了采用MegEngine实现的经典[DeepLabV3+](https://arxiv.org/abs/1802.02611.pdf)网络结构，同时提供了在Pascal VOC2012和Cityscapes数据集上的完整训练和测试代码。
  6 | 
  7 | 网络在Pascal VOC2012验证集上的性能和结果如下：
  8 | 
  9 | | 模型                             | mIoU |
 10 | | ---                              | :--: |
 11 | | deeplabv3plus-res101-voc-512size | 79.5 |
 12 | 
 13 | 网络在Cityscapes验证集上的性能和结果如下：
 14 | 
 15 | | 模型                                    | mIoU |
 16 | | ---                                     | :--: |
 17 | | deeplabv3plus-res101-cityscapes-768size | 78.5 |
 18 | 
 19 | ## 安装和环境配置
 20 | 
 21 | 本目录下代码基于MegEngine v1.2，在开始运行本目录下的代码之前，请确保按照[README](../../../README.md)进行了正确的环境配置。
 22 | 
 23 | ## 如何使用
 24 | 
 25 | 以DeepLabV3+为例，模型训练好之后，可以通过如下命令测试单张图片：
 26 | 
 27 | ```bash
 28 | python3 tools/inference.py -f configs/deeplabv3plus_res101_voc_512size.py \
 29 |                            -w /path/to/model_weights.pkl \
 30 |                            -i ../../assets/cat.jpg
 31 | ```
 32 | 
 33 | `tools/inference.py`的命令行选项如下:
 34 | 
 35 | - `-f`, 测试的网络结构描述文件。
 36 | - `-w`, 需要测试的模型权重。
 37 | - `-i`, 需要测试的样例图片。
 38 | 
 39 | 使用默认图片和默认模型测试的结果见下图:
 40 | 
 41 | ![demo image](../../assets/cat_seg_out.jpg)
 42 | 
 43 | ## 如何训练
 44 | 
 45 | 以DeepLabV3+在Pascal VOC2012数据集上训练为例。
 46 | 
 47 | 1. 在开始训练前，请下载[Pascal VOC2012数据集](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/#data)，并解压到合适的目录下。为保证一样的训练环境，还需要下载[SegmentationClassAug](https://www.dropbox.com/s/oeu149j8qtbs1x0/SegmentationClassAug.zip?dl=0&file_subpath=%2FSegmentationClassAug)。具体可以参照这个[流程](https://www.sun11.me/blog/2018/how-to-use-10582-trainaug-images-on-DeeplabV3-code/)。
 48 | 
 49 | 准备好的 VOC 数据目录结构如下：
 50 | 
 51 | ```
 52 | /path/to/
 53 |     |->VOC2012
 54 |     |    |Annotations
 55 |     |    |ImageSets
 56 |     |    |JPEGImages
 57 |     |    |SegmentationClass
 58 |     |    |SegmentationClass_aug
 59 | ```
 60 | 
 61 | 其中，ImageSets/Segmentation中包含了[trainaug.txt](https://gist.githubusercontent.com/sun11/2dbda6b31acc7c6292d14a872d0c90b7/raw/5f5a5270089239ef2f6b65b1cc55208355b5acca/trainaug.txt)。
 62 | 
 63 | 注意：SegmentationClass_aug和SegmentationClass中的数据格式不同。
 64 | 
 65 | 2. 准备预训练的`backbone`网络权重：可使用 megengine.hub 下载`megengine`官方提供的在ImageNet上训练的模型, 并存放在 `/path/to/pretrain.pkl`。
 66 | 
 67 | 3. 开始训练:
 68 | 
 69 | ```bash
 70 | python3 tools/train.py -f configs/deeplabv3plus_res101_voc_512size.py -n 8 \
 71 |                        -d /path/to/VOC2012
 72 | ```
 73 | 
 74 | `tools/train.py`的命令行选项如下：
 75 | 
 76 | - `-f`, 所需要训练的网络结构描述文件。
 77 | - `-n`, 用于训练的devices(gpu)数量。
 78 | - `-w`, 预训练的backbone网络权重。
 79 | - `-d`, 数据集的上级目录，默认`/data/datasets`。
 80 | - `-r`, 是否从已训好的模型继续训练，默认`None`。
 81 | 
 82 | 默认情况下模型会存在 `log-of-模型名`目录下。
 83 | 
 84 | ## 如何测试
 85 | 
 86 | 以DeepLabV3+在Pascal VOC2012数据集上测试为例。
 87 | 
 88 | 在得到训练完保存的模型之后，可以通过tools下的test.py文件测试模型在验证集上的性能。
 89 | 
 90 | ```bash
 91 | python3 tools/test.py -f configs/deeplabv3plus_res101_voc_512size.py -n 8 \
 92 |                       -w /path/to/model_weights.pkl \
 93 |                       -d /path/to/VOC2012
 94 | ```
 95 | 
 96 | `tools/test.py`的命令行选项如下：
 97 | 
 98 | - `-f`, 所需要测试的网络结构描述文件。
 99 | - `-n`, 用于测试的devices(gpu)数量。
100 | - `-w`, 需要测试的模型权重。
101 | - `-d`，数据集的上级目录，默认`/data/datasets`。
102 | 
103 | ## 参考文献
104 | 
105 | - [Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1802.02611) Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, and Hartwig Adam. European Conference on Computer Vision (ECCV), 2018.
106 | 


--------------------------------------------------------------------------------
/official/vision/segmentation/configs/__init__.py:
--------------------------------------------------------------------------------
1 | from .deeplabv3plus_res101_cityscapes_768size import deeplabv3plus_res101_cityscapes_768size
2 | from .deeplabv3plus_res101_voc_512size import deeplabv3plus_res101_voc_512size
3 | 
4 | _EXCLUDE = {}
5 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
6 | 


--------------------------------------------------------------------------------
/official/vision/segmentation/configs/deeplabv3plus_res101_cityscapes_768size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | from megengine import hub
 9 | 
10 | from official.vision.segmentation import models
11 | 
12 | 
13 | class CityscapesConfig:
14 |     def __init__(self):
15 |         self.dataset = "Cityscapes"
16 | 
17 |         self.backbone = "resnet101"
18 |         self.backbone_pretrained = True
19 | 
20 |         self.batch_size = 4
21 |         self.learning_rate = 0.01
22 |         self.momentum = 0.9
23 |         self.weight_decay = 0.0001
24 |         self.max_epoch = 40
25 |         self.nr_images_epoch = 32000
26 | 
27 |         self.ignore_label = 255
28 |         self.num_classes = 19
29 |         self.img_height = 768
30 |         self.img_width = 768
31 |         self.img_mean = [103.530, 116.280, 123.675]  # BGR
32 |         self.img_std = [57.375, 57.120, 58.395]
33 | 
34 |         self.val_height = 1024
35 |         self.val_width = 2048
36 |         self.val_multiscale = [1.0]  # [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
37 |         self.val_flip = False
38 |         self.val_slip = False
39 |         self.val_save_path = None
40 | 
41 |         self.log_interval = 20
42 | 
43 | 
44 | @hub.pretrained(
45 |     "https://data.megengine.org.cn/models/weights/"
46 |     "deeplabv3plus_res101_cityscapes_768size_78dot5_c45e0cb9.pkl"
47 | )
48 | def deeplabv3plus_res101_cityscapes_768size(**kwargs):
49 |     r"""DeepLab v3+ model from
50 |     `"Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation"
51 |     <https://arxiv.org/abs/1802.02611>`_
52 |     """
53 |     cfg = CityscapesConfig()
54 |     cfg.backbone_pretrained = False
55 |     return models.DeepLabV3Plus(cfg, **kwargs)
56 | 
57 | 
58 | Net = models.DeepLabV3Plus
59 | Cfg = CityscapesConfig
60 | 


--------------------------------------------------------------------------------
/official/vision/segmentation/configs/deeplabv3plus_res101_voc_512size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | from megengine import hub
 9 | 
10 | from official.vision.segmentation import models
11 | 
12 | 
13 | class VOCConfig:
14 |     def __init__(self):
15 |         self.dataset = "VOC2012"
16 |         self.data_type = "trainaug"
17 | 
18 |         self.backbone = "resnet101"
19 |         self.backbone_pretrained = True
20 | 
21 |         self.batch_size = 8
22 |         self.learning_rate = 0.02
23 |         self.momentum = 0.9
24 |         self.weight_decay = 0.0001
25 |         self.max_epoch = 40
26 |         self.nr_images_epoch = 64000
27 | 
28 |         self.ignore_label = 255
29 |         self.num_classes = 21
30 |         self.img_height = 512
31 |         self.img_width = 512
32 |         self.img_mean = [103.530, 116.280, 123.675]  # BGR
33 |         self.img_std = [57.375, 57.120, 58.395]
34 | 
35 |         self.val_height = 512
36 |         self.val_width = 512
37 |         self.val_multiscale = [1.0]  # [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
38 |         self.val_flip = False
39 |         self.val_slip = False
40 |         self.val_save_path = None
41 | 
42 |         self.log_interval = 20
43 | 
44 | 
45 | @hub.pretrained(
46 |     "https://data.megengine.org.cn/models/weights/"
47 |     "deeplabv3plus_res101_voc_512size_79dot5_7856dc84.pkl"
48 | )
49 | def deeplabv3plus_res101_voc_512size(**kwargs):
50 |     r"""DeepLab v3+ model from
51 |     `"Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation"
52 |     <https://arxiv.org/abs/1802.02611>`_
53 |     """
54 |     cfg = VOCConfig()
55 |     cfg.backbone_pretrained = False
56 |     return models.DeepLabV3Plus(cfg, **kwargs)
57 | 
58 | 
59 | Net = models.DeepLabV3Plus
60 | Cfg = VOCConfig
61 | 


--------------------------------------------------------------------------------
/official/vision/segmentation/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from .deeplabv3plus import *
10 | 
11 | _EXCLUDE = {}
12 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
13 | 


--------------------------------------------------------------------------------
/official/vision/segmentation/tools/inference.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import argparse
10 | 
11 | import cv2
12 | import numpy as np
13 | 
14 | import megengine as mge
15 | 
16 | from official.vision.segmentation.tools.utils import class_colors, import_from_file
17 | 
18 | logger = mge.get_logger(__name__)
19 | logger.setLevel("INFO")
20 | 
21 | 
22 | def main():
23 |     parser = argparse.ArgumentParser()
24 |     parser.add_argument(
25 |         "-f", "--file", default="net.py", type=str, help="net description file"
26 |     )
27 |     parser.add_argument(
28 |         "-w", "--weight_file", default=None, type=str, help="weights file",
29 |     )
30 |     parser.add_argument("-i", "--image", type=str)
31 |     args = parser.parse_args()
32 | 
33 |     current_network = import_from_file(args.file)
34 |     cfg = current_network.Cfg()
35 |     cfg.backbone_pretrained = False
36 |     model = current_network.Net(cfg)
37 |     model.eval()
38 | 
39 |     state_dict = mge.load(args.weight_file)
40 |     if "state_dict" in state_dict:
41 |         state_dict = state_dict["state_dict"]
42 |     model.load_state_dict(state_dict)
43 | 
44 |     img = cv2.imread(args.image)
45 |     pred = inference(img, model)
46 |     cv2.imwrite("results.jpg", pred)
47 | 
48 | 
49 | def inference(img, model):
50 |     def pred_func(data):
51 |         pred = model(data)
52 |         return pred
53 | 
54 |     img = (
55 |         img.astype("float32") - np.array(model.cfg.img_mean)
56 |     ) / np.array(model.cfg.img_std)
57 |     ori_h, ori_w = img.shape[:2]
58 |     img = cv2.resize(img, (model.cfg.val_height, model.cfg.val_width))
59 |     img = img.transpose(2, 0, 1)[np.newaxis]
60 | 
61 |     pred = pred_func(mge.tensor(img))
62 |     pred = pred.numpy().squeeze().argmax(0)
63 |     pred = cv2.resize(
64 |         pred.astype("uint8"), (ori_w, ori_h), interpolation=cv2.INTER_NEAREST
65 |     )
66 | 
67 |     out = np.zeros((ori_h, ori_w, 3))
68 |     nids = np.unique(pred)
69 |     for t in nids:
70 |         out[pred == t] = class_colors[t]
71 |     return out
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     main()
76 | 


--------------------------------------------------------------------------------
/official/vision/segmentation/tools/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3 | #
  4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  5 | #
  6 | # Unless required by applicable law or agreed to in writing,
  7 | # software distributed under the License is distributed on an
  8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9 | import functools
 10 | import importlib
 11 | import math
 12 | from tabulate import tabulate
 13 | 
 14 | import numpy as np
 15 | 
 16 | from megengine.data import MapSampler
 17 | 
 18 | 
 19 | class AverageMeter:
 20 |     """Computes and stores the average and current value"""
 21 | 
 22 |     def __init__(self, record_len=1):
 23 |         self.record_len = record_len
 24 |         self.reset()
 25 | 
 26 |     def reset(self):
 27 |         self.sum = [0 for i in range(self.record_len)]
 28 |         self.cnt = 0
 29 | 
 30 |     def update(self, val):
 31 |         self.sum = [s + v for s, v in zip(self.sum, val)]
 32 |         self.cnt += 1
 33 | 
 34 |     def average(self):
 35 |         return [s / self.cnt for s in self.sum]
 36 | 
 37 | 
 38 | def import_from_file(cfg_file):
 39 |     spec = importlib.util.spec_from_file_location("config", cfg_file)
 40 |     cfg_module = importlib.util.module_from_spec(spec)
 41 |     spec.loader.exec_module(cfg_module)
 42 |     return cfg_module
 43 | 
 44 | 
 45 | def get_config_info(config):
 46 |     config_table = []
 47 |     for c, v in config.__dict__.items():
 48 |         if not isinstance(v, (int, float, str, list, tuple, dict, np.ndarray)):
 49 |             if hasattr(v, "__name__"):
 50 |                 v = v.__name__
 51 |             elif hasattr(v, "__class__"):
 52 |                 v = v.__class__
 53 |             elif isinstance(v, functools.partial):
 54 |                 v = v.func.__name__
 55 |         config_table.append((str(c), str(v)))
 56 |     config_table = tabulate(config_table)
 57 |     return config_table
 58 | 
 59 | 
 60 | class InferenceSampler(MapSampler):
 61 |     def __init__(self, dataset, batch_size=1, world_size=None, rank=None):
 62 |         super().__init__(dataset, batch_size, False, None, world_size, rank)
 63 |         begin = self.num_samples * self.rank
 64 |         end = min(self.num_samples * (self.rank + 1), len(self.dataset))
 65 |         self.indices = list(range(begin, end))
 66 | 
 67 |     def sample(self):
 68 |         pass
 69 | 
 70 |     def batch(self):
 71 |         step, length = self.batch_size, len(self.indices)
 72 |         batch_index = [self.indices[i: i + step] for i in range(0, length, step)]
 73 |         return iter(batch_index)
 74 | 
 75 |     def __len__(self):
 76 |         return int(math.ceil(len(self.indices) / self.batch_size))
 77 | 
 78 | 
 79 | # pre-defined colors for at most 20 categories
 80 | class_colors = [
 81 |     [0, 0, 0],  # background
 82 |     [0, 0, 128],
 83 |     [0, 128, 0],
 84 |     [0, 128, 128],
 85 |     [128, 0, 0],
 86 |     [128, 0, 128],
 87 |     [128, 128, 0],
 88 |     [128, 128, 128],
 89 |     [0, 0, 64],
 90 |     [0, 0, 192],
 91 |     [0, 128, 64],
 92 |     [0, 128, 192],
 93 |     [128, 0, 64],
 94 |     [128, 0, 192],
 95 |     [128, 128, 64],
 96 |     [128, 128, 192],
 97 |     [0, 64, 0],
 98 |     [0, 64, 128],
 99 |     [0, 192, 0],
100 |     [0, 192, 128],
101 |     [128, 64, 0],
102 | ]
103 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | megengine
2 | numpy
3 | opencv-python
4 | tqdm
5 | tabulate
6 | ftfy
7 | imageio
8 | youtokentome
9 | regex==2020.10.15


--------------------------------------------------------------------------------
/requires-style.txt:
--------------------------------------------------------------------------------
1 | flake8==3.7.9
2 | isort==4.3.21
3 | pylint==2.5.2
4 | 


--------------------------------------------------------------------------------
/run_format_check.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | set -e
 4 | 
 5 | export PYTHONPATH=$PWD:$PYTHONPATH
 6 | pip install -q -r requires-style.txt
 7 | CHECK_DIR="official/vision official/quantization official/nlp official/multimodal"
 8 | pylint $CHECK_DIR --rcfile=.pylintrc || pylint_ret=$?
 9 | if [ "$pylint_ret" ]; then
10 |     exit $pylint_ret
11 | fi
12 | echo "All lint check passed!"
13 | flake8 official || flake8_ret=$?
14 | if [ "$flake8_ret" ]; then
15 |     exit $flake8_ret
16 | fi
17 | echo "All flake check passed!"
18 | isort --check-only -rc official || isort_ret=$?
19 | if [ "$isort_ret" ]; then
20 |     exit $isort_ret
21 | fi
22 | echo "All isort check passed!"
23 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line_length = 100
 3 | skip = official/vision/gan
 4 | multi_line_output = 3
 5 | balanced_wrapping = True
 6 | known_standard_library = setuptools
 7 | known_myself = official
 8 | known_data_processing = cv2,numpy,scipy,PIL,matplotlib
 9 | known_datasets = pycocotools
10 | known_deeplearning = megengine
11 | sections = FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,data_processing,datasets,deeplearning,myself,LOCALFOLDER
12 | no_lines_before = STDLIB,THIRDPARTY,datasets
13 | default_section = FIRSTPARTY
14 | 
15 | [flake8]
16 | ignore = W503
17 | max-line-length = 100
18 | max-complexity = 18
19 | select = B,C,E,F,W,T4,B9
20 | exclude = official/vision/gan
21 | per-file-ignores =
22 | 	**/__init__.py:F401,F403
23 | 


--------------------------------------------------------------------------------