├── .gitignore ├── .gitmodules ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── INSTALL.md ├── LICENSE ├── README.md ├── cog.yaml ├── cutler ├── __init__.py ├── config │ ├── __init__.py │ └── cutler_config.py ├── data │ ├── __init__.py │ ├── build.py │ ├── dataset_mapper.py │ ├── datasets │ │ ├── __init__.py │ │ ├── builtin.py │ │ ├── builtin_meta.py │ │ └── coco.py │ ├── detection_utils.py │ └── transforms │ │ ├── __init__.py │ │ ├── augmentation_impl.py │ │ └── transform.py ├── demo │ ├── __init__.py │ ├── demo.py │ ├── imgs │ │ ├── demo1.jpg │ │ ├── demo2.jpg │ │ ├── demo3.jpg │ │ ├── demo4.jpg │ │ ├── demo5.jpg │ │ ├── demo6.jpg │ │ ├── demo7.jpg │ │ └── demo8.jpg │ └── predictor.py ├── engine │ ├── __init__.py │ ├── defaults.py │ └── train_loop.py ├── evaluation │ ├── __init__.py │ └── coco_evaluation.py ├── model_zoo │ └── configs │ │ ├── Base-RCNN-FPN.yaml │ │ ├── COCO-Semisupervised │ │ ├── cascade_mask_rcnn_R_50_FPN_100perc.yaml │ │ ├── cascade_mask_rcnn_R_50_FPN_10perc.yaml │ │ ├── cascade_mask_rcnn_R_50_FPN_1perc.yaml │ │ ├── cascade_mask_rcnn_R_50_FPN_20perc.yaml │ │ ├── cascade_mask_rcnn_R_50_FPN_2perc.yaml │ │ ├── cascade_mask_rcnn_R_50_FPN_30perc.yaml │ │ ├── cascade_mask_rcnn_R_50_FPN_40perc.yaml │ │ ├── cascade_mask_rcnn_R_50_FPN_50perc.yaml │ │ ├── cascade_mask_rcnn_R_50_FPN_5perc.yaml │ │ ├── cascade_mask_rcnn_R_50_FPN_60perc.yaml │ │ └── cascade_mask_rcnn_R_50_FPN_80perc.yaml │ │ └── CutLER-ImageNet │ │ ├── cascade_mask_rcnn_R_50_FPN.yaml │ │ ├── cascade_mask_rcnn_R_50_FPN_demo.yaml │ │ ├── cascade_mask_rcnn_R_50_FPN_self_train.yaml │ │ └── mask_rcnn_R_50_FPN.yaml ├── modeling │ ├── __init__.py │ ├── meta_arch │ │ ├── __init__.py │ │ ├── build.py │ │ └── rcnn.py │ └── roi_heads │ │ ├── __init__.py │ │ ├── custom_cascade_rcnn.py │ │ ├── fast_rcnn.py │ │ └── roi_heads.py ├── solver │ ├── __init__.py │ └── build.py ├── structures │ ├── __init__.py │ └── boxes.py ├── tools │ ├── eval.sh │ ├── get_self_training_ann.py │ ├── run_with_submitit.sh │ ├── run_with_submitit_ssl.sh │ ├── single-node_run.sh │ └── train-1node.sh └── train_net.py ├── datasets └── README.md ├── docs ├── cutler-demo.jpg ├── demos_videocutler.gif ├── maskcut-demo.jpg ├── maskcut.gif ├── pipeline.jpg └── teaser_img.jpg ├── maskcut ├── colormap.py ├── crf.py ├── demo.py ├── dino.py ├── imgs │ ├── demo1.jpg │ ├── demo2.jpg │ ├── demo3.jpg │ ├── demo4.jpg │ ├── demo5.jpg │ ├── demo6.jpg │ ├── demo7.jpg │ └── demo8.jpg ├── maskcut.py ├── maskcut_with_submitit.py ├── merge_jsons.py ├── predict.py ├── run_maskcut_with_submitit.sh └── run_with_submitit_maskcut_array.py ├── requirements.txt └── videocutler ├── INSTALL.md ├── README.md ├── configs ├── imagenet │ └── instance-segmentation │ │ ├── Base-COCO-InstanceSegmentation.yaml │ │ ├── Base-imagenet-InstanceSegmentation.yaml │ │ └── mask2former_R50_imagenet.yaml └── imagenet_video │ ├── Base-YouTubeVIS-VideoInstanceSegmentation.yaml │ ├── video_mask2former_R50_cls_agnostic.yaml │ ├── videocutler_eval_ytvis2019.yaml │ └── videocutler_eval_ytvis2021.yaml ├── datasets ├── README.md ├── ade20k_instance_catid_mapping.txt ├── ade20k_instance_imgCatIds.json ├── prepare_ade20k_ins_seg.py ├── prepare_ade20k_pan_seg.py ├── prepare_ade20k_sem_seg.py └── prepare_coco_semantic_annos_from_panoptic_annos.py ├── demo.sh ├── demo ├── README.md ├── demo.py └── predictor.py ├── demo_video ├── colormap.py ├── demo.py ├── predictor.py └── visualizer.py ├── docs ├── demo-videos │ ├── 09773e4062 │ │ ├── 00100.jpg │ │ ├── 00105.jpg │ │ ├── 00110.jpg │ │ ├── 00115.jpg │ │ ├── 00120.jpg │ │ ├── 00125.jpg │ │ ├── 00130.jpg │ │ ├── 00135.jpg │ │ ├── 00140.jpg │ │ ├── 00145.jpg │ │ ├── 00150.jpg │ │ ├── 00155.jpg │ │ ├── 00160.jpg │ │ ├── 00165.jpg │ │ ├── 00170.jpg │ │ ├── 00175.jpg │ │ ├── 00180.jpg │ │ ├── 00185.jpg │ │ ├── 00190.jpg │ │ ├── 00195.jpg │ │ ├── 00200.jpg │ │ ├── 00205.jpg │ │ ├── 00210.jpg │ │ ├── 00215.jpg │ │ └── 00220.jpg │ ├── 4c7710908f │ │ ├── 00000.jpg │ │ ├── 00010.jpg │ │ ├── 00020.jpg │ │ ├── 00030.jpg │ │ ├── 00040.jpg │ │ ├── 00050.jpg │ │ ├── 00060.jpg │ │ ├── 00070.jpg │ │ ├── 00080.jpg │ │ ├── 00090.jpg │ │ ├── 00100.jpg │ │ ├── 00110.jpg │ │ ├── 00120.jpg │ │ ├── 00130.jpg │ │ ├── 00140.jpg │ │ ├── 00150.jpg │ │ ├── 00160.jpg │ │ └── 00170.jpg │ ├── 8b4f6d1186 │ │ ├── 00000.jpg │ │ ├── 00010.jpg │ │ ├── 00020.jpg │ │ ├── 00030.jpg │ │ ├── 00040.jpg │ │ ├── 00050.jpg │ │ ├── 00060.jpg │ │ ├── 00070.jpg │ │ ├── 00080.jpg │ │ ├── 00090.jpg │ │ ├── 00100.jpg │ │ ├── 00110.jpg │ │ ├── 00120.jpg │ │ ├── 00130.jpg │ │ ├── 00140.jpg │ │ ├── 00150.jpg │ │ ├── 00160.jpg │ │ └── 00170.jpg │ ├── 99c6b1acf2 │ │ ├── 00075.jpg │ │ ├── 00080.jpg │ │ ├── 00085.jpg │ │ ├── 00090.jpg │ │ ├── 00095.jpg │ │ ├── 00100.jpg │ │ ├── 00105.jpg │ │ ├── 00110.jpg │ │ ├── 00115.jpg │ │ ├── 00120.jpg │ │ ├── 00125.jpg │ │ ├── 00130.jpg │ │ ├── 00135.jpg │ │ ├── 00140.jpg │ │ ├── 00145.jpg │ │ ├── 00150.jpg │ │ ├── 00155.jpg │ │ ├── 00160.jpg │ │ ├── 00165.jpg │ │ └── 00170.jpg │ └── eea827bdda │ │ ├── 00000.jpg │ │ ├── 00005.jpg │ │ ├── 00010.jpg │ │ ├── 00015.jpg │ │ ├── 00020.jpg │ │ ├── 00025.jpg │ │ ├── 00030.jpg │ │ ├── 00035.jpg │ │ ├── 00040.jpg │ │ ├── 00045.jpg │ │ ├── 00050.jpg │ │ ├── 00055.jpg │ │ ├── 00060.jpg │ │ ├── 00065.jpg │ │ ├── 00070.jpg │ │ ├── 00075.jpg │ │ ├── 00080.jpg │ │ ├── 00085.jpg │ │ ├── 00090.jpg │ │ ├── 00095.jpg │ │ ├── 00100.jpg │ │ ├── 00105.jpg │ │ ├── 00110.jpg │ │ ├── 00115.jpg │ │ ├── 00120.jpg │ │ ├── 00125.jpg │ │ ├── 00130.jpg │ │ ├── 00135.jpg │ │ ├── 00140.jpg │ │ ├── 00145.jpg │ │ ├── 00150.jpg │ │ ├── 00155.jpg │ │ ├── 00160.jpg │ │ ├── 00165.jpg │ │ ├── 00170.jpg │ │ ├── 00175.jpg │ │ ├── 00180.jpg │ │ ├── 00185.jpg │ │ ├── 00190.jpg │ │ ├── 00195.jpg │ │ ├── 00200.jpg │ │ ├── 00205.jpg │ │ ├── 00210.jpg │ │ ├── 00215.jpg │ │ ├── 00220.jpg │ │ ├── 00225.jpg │ │ ├── 00230.jpg │ │ ├── 00235.jpg │ │ ├── 00240.jpg │ │ ├── 00245.jpg │ │ ├── 00250.jpg │ │ ├── 00255.jpg │ │ ├── 00260.jpg │ │ ├── 00265.jpg │ │ ├── 00270.jpg │ │ ├── 00275.jpg │ │ ├── 00280.jpg │ │ ├── 00285.jpg │ │ ├── 00290.jpg │ │ └── 00295.jpg ├── videocutler_demos.gif └── videocutler_pipeline.png ├── eval.sh ├── eval_ytvis.py ├── mask2former ├── __init__.py ├── config.py ├── data │ ├── __init__.py │ ├── dataset_mappers │ │ ├── __init__.py │ │ ├── coco_instance_new_baseline_dataset_mapper.py │ │ ├── coco_panoptic_new_baseline_dataset_mapper.py │ │ ├── mask_former_instance_dataset_mapper.py │ │ ├── mask_former_panoptic_dataset_mapper.py │ │ └── mask_former_semantic_dataset_mapper.py │ └── datasets │ │ ├── __init__.py │ │ ├── register_ade20k_full.py │ │ ├── register_ade20k_instance.py │ │ ├── register_ade20k_panoptic.py │ │ ├── register_coco_panoptic_annos_semseg.py │ │ ├── register_coco_stuff_10k.py │ │ ├── register_mapillary_vistas.py │ │ └── register_mapillary_vistas_panoptic.py ├── evaluation │ ├── __init__.py │ └── instance_evaluation.py ├── maskformer_model.py ├── modeling │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ └── swin.py │ ├── criterion.py │ ├── matcher.py │ ├── meta_arch │ │ ├── __init__.py │ │ ├── mask_former_head.py │ │ └── per_pixel_baseline.py │ ├── pixel_decoder │ │ ├── __init__.py │ │ ├── fpn.py │ │ ├── msdeformattn.py │ │ └── ops │ │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── ms_deform_attn_func.py │ │ │ ├── make.sh │ │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── ms_deform_attn.py │ │ │ ├── setup.py │ │ │ ├── src │ │ │ ├── cpu │ │ │ │ ├── ms_deform_attn_cpu.cpp │ │ │ │ └── ms_deform_attn_cpu.h │ │ │ ├── cuda │ │ │ │ ├── ms_deform_attn_cuda.cu │ │ │ │ ├── ms_deform_attn_cuda.h │ │ │ │ └── ms_deform_im2col_cuda.cuh │ │ │ ├── ms_deform_attn.h │ │ │ └── vision.cpp │ │ │ └── test.py │ └── transformer_decoder │ │ ├── __init__.py │ │ ├── mask2former_transformer_decoder.py │ │ ├── maskformer_transformer_decoder.py │ │ ├── position_encoding.py │ │ └── transformer.py ├── test_time_augmentation.py └── utils │ ├── __init__.py │ └── misc.py ├── mask2former_video ├── __init__.py ├── config.py ├── data_video │ ├── __init__.py │ ├── augmentation.py │ ├── build.py │ ├── dataset_mapper.py │ ├── datasets │ │ ├── __init__.py │ │ ├── builtin.py │ │ ├── ytvis.py │ │ └── ytvis_api │ │ │ ├── __init__.py │ │ │ ├── ytvos.py │ │ │ └── ytvoseval.py │ └── ytvis_eval.py ├── engine │ ├── __init__.py │ ├── defaults.py │ └── train_loop.py ├── modeling │ ├── __init__.py │ ├── criterion.py │ ├── matcher.py │ └── transformer_decoder │ │ ├── __init__.py │ │ ├── position_encoding.py │ │ └── video_mask2former_transformer_decoder.py ├── utils │ ├── __init__.py │ └── memory.py └── video_maskformer_model.py ├── predict.py ├── requirements.txt ├── single-node-video_run.sh ├── tools ├── README.md ├── analyze_model.py ├── convert-pretrained-swin-model-to-d2.py ├── convert-torchvision-to-d2.py ├── evaluate_coco_boundary_ap.py └── evaluate_pq_for_semantic_segmentation.py ├── train-1node.sh ├── train_net.py └── train_net_video.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | __MACOSX/ 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # dataset and wandb cache files 32 | */datasets/ 33 | */OUTPUT/ 34 | */wandb/ 35 | 36 | # local scripts 37 | */*.sh 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .nox/ 53 | .coverage 54 | .coverage.* 55 | .cache 56 | nosetests.xml 57 | coverage.xml 58 | *.cover 59 | *.py,cover 60 | .hypothesis/ 61 | .pytest_cache/ 62 | debug.ipynb 63 | */OUTPUT-DIR* 64 | */debug* 65 | 66 | # Translations 67 | *.mo 68 | *.pot 69 | 70 | # Django stuff: 71 | *.log 72 | local_settings.py 73 | db.sqlite3 74 | db.sqlite3-journal 75 | 76 | # Flask stuff: 77 | instance/ 78 | .webassets-cache 79 | 80 | # Scrapy stuff: 81 | .scrapy 82 | 83 | # Sphinx documentation 84 | docs/_build/ 85 | 86 | # PyBuilder 87 | target/ 88 | 89 | # Jupyter Notebook 90 | .ipynb_checkpoints 91 | 92 | # pretrained models 93 | videocutler/pretrain 94 | *.pth 95 | 96 | # demo results 97 | demos/ 98 | 99 | # IPython 100 | profile_default/ 101 | ipython_config.py 102 | 103 | # pyenv 104 | .python-version 105 | 106 | # pipenv 107 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 108 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 109 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 110 | # install all needed dependencies. 111 | #Pipfile.lock 112 | 113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 114 | __pypackages__/ 115 | 116 | # Celery stuff 117 | celerybeat-schedule 118 | celerybeat.pid 119 | 120 | # SageMath parsed files 121 | *.sage.py 122 | 123 | # Environments 124 | .env 125 | .venv 126 | env/ 127 | venv/ 128 | ENV/ 129 | env.bak/ 130 | venv.bak/ 131 | 132 | # Spyder project settings 133 | .spyderproject 134 | .spyproject 135 | 136 | # Rope project settings 137 | .ropeproject 138 | 139 | # mkdocs documentation 140 | /site -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third_party/TokenCut"] 2 | path = third_party/TokenCut 3 | url = https://github.com/YangtaoWANG95/TokenCut.git 4 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to make participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | This Code of Conduct also applies outside the project spaces when there is a 56 | reasonable belief that an individual's behavior may have a negative impact on 57 | the project or its community. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported by contacting the project team at . All 63 | complaints will be reviewed and investigated and will result in a response that 64 | is deemed necessary and appropriate to the circumstances. The project team is 65 | obligated to maintain confidentiality with regard to the reporter of an incident. 66 | Further details of specific enforcement policies may be posted separately. 67 | 68 | Project maintainers who do not follow or enforce the Code of Conduct in good 69 | faith may face temporary or permanent repercussions as determined by other 70 | members of the project's leadership. 71 | 72 | ## Attribution 73 | 74 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 75 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 76 | 77 | [homepage]: https://www.contributor-covenant.org 78 | 79 | For answers to common questions about this code of conduct, see 80 | https://www.contributor-covenant.org/faq 81 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to CutLER 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Pull Requests 6 | We actively welcome your pull requests. 7 | 8 | 1. Fork the repo and create your branch from `main`. 9 | 2. If you've added code that should be tested, add tests. 10 | 3. If you've changed APIs, update the documentation. 11 | 4. Ensure the test suite passes. 12 | 5. Make sure your code lints. 13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 14 | 15 | ## Contributor License Agreement ("CLA") 16 | In order to accept your pull request, we need you to submit a CLA. You only need 17 | to do this once to work on any of Facebook's open source projects. 18 | 19 | Complete your CLA here: 20 | 21 | ## Issues 22 | We use GitHub issues to track public bugs. Please ensure your description is 23 | clear and has sufficient instructions to be able to reproduce the issue. 24 | 25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 26 | disclosure of security bugs. In those cases, please go through the process 27 | outlined on that page and do not file a public issue. 28 | 29 | ## License 30 | By contributing to CutLER, you agree that your contributions will be licensed 31 | under the LICENSE file in the root directory of this source tree. -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | 2 | # Installation 3 | 4 | ## Requirements 5 | - Linux or macOS with Python ≥ 3.8 6 | - PyTorch ≥ 1.8 and [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation. 7 | Install them together at [pytorch.org](https://pytorch.org) to make sure of this. 8 | Note, please check PyTorch version matches that is required by Detectron2. 9 | - Detectron2: follow Detectron2 installation instructions. 10 | - OpenCV ≥ 4.6 is needed by demo and visualization. 11 | 12 | ## Example conda environment setup 13 | 14 | ```bash 15 | conda create --name cutler python=3.8 -y 16 | conda activate cutler 17 | conda install pytorch==1.8.1 torchvision==0.9.1 torchaudio==0.8.1 -c pytorch 18 | pip install git+https://github.com/lucasb-eyer/pydensecrf.git 19 | 20 | # under your working directory 21 | git clone git@github.com:facebookresearch/detectron2.git 22 | cd detectron2 23 | pip install -e . 24 | pip install git+https://github.com/cocodataset/panopticapi.git 25 | pip install git+https://github.com/mcordts/cityscapesScripts.git 26 | 27 | cd .. 28 | git clone --recursive git@github.com:facebookresearch/CutLER.git 29 | cd CutLER 30 | pip install -r requirements.txt 31 | ``` 32 | 33 | ## datasets 34 | If you want to train/evaluate on the datasets, please see [datasets/README.md](datasets/README.md) to see how we prepare datasets for this project. -------------------------------------------------------------------------------- /cog.yaml: -------------------------------------------------------------------------------- 1 | build: 2 | gpu: true 3 | cuda: "11.6" 4 | python_version: "3.8" 5 | python_packages: 6 | - "torch==1.11.0" 7 | - "torchvision==0.12.0" 8 | - "faiss-gpu==1.7.2" 9 | - "opencv-python==4.6.0.66" 10 | - "scikit-image==0.19.2" 11 | - "scikit-learn==1.1.1" 12 | - "shapely==1.8.2" 13 | - "timm==0.5.4" 14 | - "pyyaml==6.0" 15 | - "colored==1.4.4" 16 | - "fvcore==0.1.5.post20220512" 17 | - "gdown==4.5.4" 18 | - "pycocotools==2.0.6" 19 | - "numpy==1.20.0" 20 | 21 | run: 22 | - pip install git+https://github.com/lucasb-eyer/pydensecrf.git 23 | 24 | predict: "maskcut/predict.py:Predictor" 25 | -------------------------------------------------------------------------------- /cutler/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | import config 4 | import engine 5 | import modeling 6 | import structures 7 | import tools 8 | import demo 9 | 10 | # dataset loading 11 | from . import data # register all new datasets 12 | from data import datasets # register all new datasets 13 | from solver import * 14 | 15 | # from .data import register_all_imagenet -------------------------------------------------------------------------------- /cutler/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | from .cutler_config import add_cutler_config -------------------------------------------------------------------------------- /cutler/config/cutler_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | from detectron2.config import CfgNode as CN 4 | 5 | def add_cutler_config(cfg): 6 | cfg.DATALOADER.COPY_PASTE = False 7 | cfg.DATALOADER.COPY_PASTE_RATE = 0.0 8 | cfg.DATALOADER.COPY_PASTE_MIN_RATIO = 0.5 9 | cfg.DATALOADER.COPY_PASTE_MAX_RATIO = 1.0 10 | cfg.DATALOADER.COPY_PASTE_RANDOM_NUM = True 11 | cfg.DATALOADER.VISUALIZE_COPY_PASTE = False 12 | 13 | cfg.MODEL.ROI_HEADS.USE_DROPLOSS = False 14 | cfg.MODEL.ROI_HEADS.DROPLOSS_IOU_THRESH = 0.0 15 | 16 | cfg.SOLVER.BASE_LR_MULTIPLIER = 1 17 | cfg.SOLVER.BASE_LR_MULTIPLIER_NAMES = [] 18 | 19 | cfg.TEST.NO_SEGM = False -------------------------------------------------------------------------------- /cutler/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | from . import datasets # ensure the builtin datasets are registered 4 | from .detection_utils import * # isort:skip 5 | from .build import ( 6 | build_batch_data_loader, 7 | build_detection_train_loader, 8 | build_detection_test_loader, 9 | get_detection_dataset_dicts, 10 | load_proposals_into_dataset, 11 | print_instances_class_histogram, 12 | ) 13 | from detectron2.data.common import * 14 | 15 | __all__ = [k for k in globals().keys() if not k.startswith("_")] -------------------------------------------------------------------------------- /cutler/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | from .coco import load_coco_json, load_sem_seg, register_coco_instances, convert_to_coco_json 3 | from .builtin import ( 4 | register_all_imagenet, 5 | register_all_uvo, 6 | register_all_coco_ca, 7 | register_all_coco_semi, 8 | register_all_lvis, 9 | register_all_voc, 10 | register_all_cross_domain, 11 | register_all_kitti, 12 | register_all_objects365, 13 | register_all_openimages, 14 | ) 15 | 16 | __all__ = [k for k in globals().keys() if not k.startswith("_")] -------------------------------------------------------------------------------- /cutler/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # Modified by XuDong Wang from https://github.com/facebookresearch/detectron2/blob/main/detectron2/data/transforms/__init__.py 3 | 4 | from fvcore.transforms.transform import * 5 | from .transform import * 6 | from detectron2.data.transforms.augmentation import * 7 | from .augmentation_impl import * 8 | 9 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 10 | 11 | 12 | from detectron2.utils.env import fixup_module_metadata 13 | 14 | fixup_module_metadata(__name__, globals(), __all__) 15 | del fixup_module_metadata -------------------------------------------------------------------------------- /cutler/demo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | from demo import * 3 | from predictor import * 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] -------------------------------------------------------------------------------- /cutler/demo/imgs/demo1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/cutler/demo/imgs/demo1.jpg -------------------------------------------------------------------------------- /cutler/demo/imgs/demo2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/cutler/demo/imgs/demo2.jpg -------------------------------------------------------------------------------- /cutler/demo/imgs/demo3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/cutler/demo/imgs/demo3.jpg -------------------------------------------------------------------------------- /cutler/demo/imgs/demo4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/cutler/demo/imgs/demo4.jpg -------------------------------------------------------------------------------- /cutler/demo/imgs/demo5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/cutler/demo/imgs/demo5.jpg -------------------------------------------------------------------------------- /cutler/demo/imgs/demo6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/cutler/demo/imgs/demo6.jpg -------------------------------------------------------------------------------- /cutler/demo/imgs/demo7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/cutler/demo/imgs/demo7.jpg -------------------------------------------------------------------------------- /cutler/demo/imgs/demo8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/cutler/demo/imgs/demo8.jpg -------------------------------------------------------------------------------- /cutler/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | from .train_loop import * 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 6 | 7 | from .defaults import * -------------------------------------------------------------------------------- /cutler/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | from .coco_evaluation import COCOEvaluator -------------------------------------------------------------------------------- /cutler/model_zoo/configs/Base-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 9 | ANCHOR_GENERATOR: 10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 12 | RPN: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 16 | # Detectron1 uses 2000 proposals per-batch, 17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 19 | POST_NMS_TOPK_TRAIN: 1000 20 | POST_NMS_TOPK_TEST: 1000 21 | ROI_HEADS: 22 | NAME: "StandardROIHeads" 23 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 24 | ROI_BOX_HEAD: 25 | NAME: "FastRCNNConvFCHead" 26 | NUM_FC: 2 27 | POOLER_RESOLUTION: 7 28 | ROI_MASK_HEAD: 29 | NAME: "MaskRCNNConvUpsampleHead" 30 | NUM_CONV: 4 31 | POOLER_RESOLUTION: 14 32 | DATASETS: 33 | TRAIN: ("coco_2017_train",) 34 | TEST: ("coco_2017_val",) 35 | SOLVER: 36 | IMS_PER_BATCH: 16 37 | BASE_LR: 0.02 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | INPUT: 41 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 42 | VERSION: 2 43 | -------------------------------------------------------------------------------- /cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_100perc.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth" 6 | MASK_ON: True 7 | BACKBONE: 8 | FREEZE_AT: 0 9 | RESNETS: 10 | DEPTH: 50 11 | NORM: "SyncBN" 12 | STRIDE_IN_1X1: False 13 | FPN: 14 | NORM: "SyncBN" 15 | ROI_BOX_HEAD: 16 | CLS_AGNOSTIC_BBOX_REG: True 17 | ROI_HEADS: 18 | NAME: CustomCascadeROIHeads 19 | RPN: 20 | POST_NMS_TOPK_TRAIN: 2000 21 | DATASETS: 22 | TRAIN: ("coco_2017_train",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | STEPS: (60000, 80000) 28 | MAX_ITER: 90000 29 | BASE_LR_MULTIPLIER: 2 30 | BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred'] 31 | INPUT: 32 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 33 | MAX_SIZE_TRAIN: 1333 34 | MASK_FORMAT: "bitmask" 35 | FORMAT: "RGB" 36 | TEST: 37 | PRECISE_BN: 38 | ENABLED: True 39 | EVAL_PERIOD: 5000 40 | OUTPUT_DIR: "output/100perc" -------------------------------------------------------------------------------- /cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_10perc.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth" 6 | MASK_ON: True 7 | BACKBONE: 8 | FREEZE_AT: 0 9 | RESNETS: 10 | DEPTH: 50 11 | NORM: "SyncBN" 12 | STRIDE_IN_1X1: False 13 | FPN: 14 | NORM: "SyncBN" 15 | ROI_BOX_HEAD: 16 | CLS_AGNOSTIC_BBOX_REG: True 17 | ROI_HEADS: 18 | NAME: CustomCascadeROIHeads 19 | RPN: 20 | POST_NMS_TOPK_TRAIN: 2000 21 | DATASETS: 22 | TRAIN: ("coco_semi_10perc",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.04 27 | STEPS: (6000, 8000) 28 | MAX_ITER: 9000 29 | BASE_LR_MULTIPLIER: 4 30 | BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred'] 31 | INPUT: 32 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 33 | MAX_SIZE_TRAIN: 1333 34 | MASK_FORMAT: "bitmask" 35 | FORMAT: "RGB" 36 | TEST: 37 | PRECISE_BN: 38 | ENABLED: True 39 | EVAL_PERIOD: 5000 40 | OUTPUT_DIR: "output/10perc" -------------------------------------------------------------------------------- /cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_1perc.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth" 6 | MASK_ON: True 7 | BACKBONE: 8 | FREEZE_AT: 0 9 | RESNETS: 10 | DEPTH: 50 11 | NORM: "SyncBN" 12 | STRIDE_IN_1X1: False 13 | FPN: 14 | NORM: "SyncBN" 15 | ROI_BOX_HEAD: 16 | CLS_AGNOSTIC_BBOX_REG: True 17 | ROI_HEADS: 18 | NAME: CustomCascadeROIHeads 19 | RPN: 20 | POST_NMS_TOPK_TRAIN: 2000 21 | DATASETS: 22 | TRAIN: ("coco_semi_1perc",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.04 27 | STEPS: (2400, 3200) 28 | MAX_ITER: 3600 29 | WARMUP_FACTOR: 0.001 30 | WARMUP_ITERS: 1000 31 | BASE_LR_MULTIPLIER: 4 32 | BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred'] 33 | INPUT: 34 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 35 | MAX_SIZE_TRAIN: 1333 36 | MASK_FORMAT: "bitmask" 37 | FORMAT: "RGB" 38 | TEST: 39 | PRECISE_BN: 40 | ENABLED: True 41 | EVAL_PERIOD: 5000 42 | OUTPUT_DIR: "output/1perc" -------------------------------------------------------------------------------- /cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_20perc.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth" 6 | MASK_ON: True 7 | BACKBONE: 8 | FREEZE_AT: 0 9 | RESNETS: 10 | DEPTH: 50 11 | NORM: "SyncBN" 12 | STRIDE_IN_1X1: False 13 | FPN: 14 | NORM: "SyncBN" 15 | ROI_BOX_HEAD: 16 | CLS_AGNOSTIC_BBOX_REG: True 17 | ROI_HEADS: 18 | NAME: CustomCascadeROIHeads 19 | RPN: 20 | POST_NMS_TOPK_TRAIN: 2000 21 | DATASETS: 22 | TRAIN: ("coco_semi_20perc",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.04 27 | STEPS: (12000, 16000) 28 | MAX_ITER: 18000 29 | BASE_LR_MULTIPLIER: 4 30 | BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred'] 31 | INPUT: 32 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 33 | MAX_SIZE_TRAIN: 1333 34 | MASK_FORMAT: "bitmask" 35 | FORMAT: "RGB" 36 | TEST: 37 | PRECISE_BN: 38 | ENABLED: True 39 | EVAL_PERIOD: 5000 40 | OUTPUT_DIR: "output/20perc" -------------------------------------------------------------------------------- /cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_2perc.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth" 6 | MASK_ON: True 7 | BACKBONE: 8 | FREEZE_AT: 0 9 | RESNETS: 10 | DEPTH: 50 11 | NORM: "SyncBN" 12 | STRIDE_IN_1X1: False 13 | FPN: 14 | NORM: "SyncBN" 15 | ROI_BOX_HEAD: 16 | CLS_AGNOSTIC_BBOX_REG: True 17 | ROI_HEADS: 18 | NAME: CustomCascadeROIHeads 19 | RPN: 20 | POST_NMS_TOPK_TRAIN: 2000 21 | DATASETS: 22 | TRAIN: ("coco_semi_2perc",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.04 27 | STEPS: (2400, 3200) 28 | MAX_ITER: 3600 29 | WARMUP_FACTOR: 0.001 30 | WARMUP_ITERS: 1000 31 | BASE_LR_MULTIPLIER: 4 32 | BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred'] 33 | INPUT: 34 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 35 | MAX_SIZE_TRAIN: 1333 36 | MASK_FORMAT: "bitmask" 37 | FORMAT: "RGB" 38 | TEST: 39 | PRECISE_BN: 40 | ENABLED: True 41 | EVAL_PERIOD: 5000 42 | OUTPUT_DIR: "output/2perc" -------------------------------------------------------------------------------- /cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_30perc.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth" 6 | MASK_ON: True 7 | BACKBONE: 8 | FREEZE_AT: 0 9 | RESNETS: 10 | DEPTH: 50 11 | NORM: "SyncBN" 12 | STRIDE_IN_1X1: False 13 | FPN: 14 | NORM: "SyncBN" 15 | ROI_BOX_HEAD: 16 | CLS_AGNOSTIC_BBOX_REG: True 17 | ROI_HEADS: 18 | NAME: CustomCascadeROIHeads 19 | RPN: 20 | POST_NMS_TOPK_TRAIN: 2000 21 | DATASETS: 22 | TRAIN: ("coco_semi_30perc",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.04 27 | STEPS: (18000, 24000) 28 | MAX_ITER: 27000 29 | BASE_LR_MULTIPLIER: 4 30 | BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred'] 31 | INPUT: 32 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 33 | MAX_SIZE_TRAIN: 1333 34 | MASK_FORMAT: "bitmask" 35 | FORMAT: "RGB" 36 | TEST: 37 | PRECISE_BN: 38 | ENABLED: True 39 | EVAL_PERIOD: 5000 40 | OUTPUT_DIR: "output/30perc" -------------------------------------------------------------------------------- /cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_40perc.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth" 6 | MASK_ON: True 7 | BACKBONE: 8 | FREEZE_AT: 0 9 | RESNETS: 10 | DEPTH: 50 11 | NORM: "SyncBN" 12 | STRIDE_IN_1X1: False 13 | FPN: 14 | NORM: "SyncBN" 15 | ROI_BOX_HEAD: 16 | CLS_AGNOSTIC_BBOX_REG: True 17 | ROI_HEADS: 18 | NAME: CustomCascadeROIHeads 19 | RPN: 20 | POST_NMS_TOPK_TRAIN: 2000 21 | DATASETS: 22 | TRAIN: ("coco_semi_40perc",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.04 27 | STEPS: (24000, 32000) 28 | MAX_ITER: 36000 29 | BASE_LR_MULTIPLIER: 4 30 | BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred'] 31 | INPUT: 32 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 33 | MAX_SIZE_TRAIN: 1333 34 | MASK_FORMAT: "bitmask" 35 | FORMAT: "RGB" 36 | TEST: 37 | PRECISE_BN: 38 | ENABLED: True 39 | EVAL_PERIOD: 5000 40 | OUTPUT_DIR: "output/40perc" -------------------------------------------------------------------------------- /cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_50perc.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth" 6 | MASK_ON: True 7 | BACKBONE: 8 | FREEZE_AT: 0 9 | RESNETS: 10 | DEPTH: 50 11 | NORM: "SyncBN" 12 | STRIDE_IN_1X1: False 13 | FPN: 14 | NORM: "SyncBN" 15 | ROI_BOX_HEAD: 16 | CLS_AGNOSTIC_BBOX_REG: True 17 | ROI_HEADS: 18 | NAME: CustomCascadeROIHeads 19 | RPN: 20 | POST_NMS_TOPK_TRAIN: 2000 21 | DATASETS: 22 | TRAIN: ("coco_semi_50perc",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | STEPS: (30000, 40000) 28 | MAX_ITER: 45000 29 | BASE_LR_MULTIPLIER: 2 30 | BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred'] 31 | INPUT: 32 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 33 | MAX_SIZE_TRAIN: 1333 34 | MASK_FORMAT: "bitmask" 35 | FORMAT: "RGB" 36 | TEST: 37 | PRECISE_BN: 38 | ENABLED: True 39 | EVAL_PERIOD: 5000 40 | OUTPUT_DIR: "output/50perc" -------------------------------------------------------------------------------- /cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_5perc.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth" 6 | MASK_ON: True 7 | BACKBONE: 8 | FREEZE_AT: 0 9 | RESNETS: 10 | DEPTH: 50 11 | NORM: "SyncBN" 12 | STRIDE_IN_1X1: False 13 | FPN: 14 | NORM: "SyncBN" 15 | ROI_BOX_HEAD: 16 | CLS_AGNOSTIC_BBOX_REG: True 17 | ROI_HEADS: 18 | NAME: CustomCascadeROIHeads 19 | RPN: 20 | POST_NMS_TOPK_TRAIN: 2000 21 | DATASETS: 22 | TRAIN: ("coco_semi_5perc",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.04 27 | STEPS: (3000, 4000) 28 | MAX_ITER: 4500 29 | WARMUP_FACTOR: 0.001 30 | WARMUP_ITERS: 1000 31 | BASE_LR_MULTIPLIER: 4 32 | BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred'] 33 | INPUT: 34 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 35 | MAX_SIZE_TRAIN: 1333 36 | MASK_FORMAT: "bitmask" 37 | FORMAT: "RGB" 38 | TEST: 39 | PRECISE_BN: 40 | ENABLED: True 41 | EVAL_PERIOD: 5000 42 | OUTPUT_DIR: "output/5perc" -------------------------------------------------------------------------------- /cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_60perc.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth" 6 | MASK_ON: True 7 | BACKBONE: 8 | FREEZE_AT: 0 9 | RESNETS: 10 | DEPTH: 50 11 | NORM: "SyncBN" 12 | STRIDE_IN_1X1: False 13 | FPN: 14 | NORM: "SyncBN" 15 | ROI_BOX_HEAD: 16 | CLS_AGNOSTIC_BBOX_REG: True 17 | ROI_HEADS: 18 | NAME: CustomCascadeROIHeads 19 | RPN: 20 | POST_NMS_TOPK_TRAIN: 2000 21 | DATASETS: 22 | TRAIN: ("coco_semi_60perc",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | STEPS: (36000, 48000) 28 | MAX_ITER: 54000 29 | BASE_LR_MULTIPLIER: 2 30 | BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred'] 31 | INPUT: 32 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 33 | MAX_SIZE_TRAIN: 1333 34 | MASK_FORMAT: "bitmask" 35 | FORMAT: "RGB" 36 | TEST: 37 | PRECISE_BN: 38 | ENABLED: True 39 | EVAL_PERIOD: 5000 40 | OUTPUT_DIR: "output/60perc" -------------------------------------------------------------------------------- /cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_80perc.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth" 6 | MASK_ON: True 7 | BACKBONE: 8 | FREEZE_AT: 0 9 | RESNETS: 10 | DEPTH: 50 11 | NORM: "SyncBN" 12 | STRIDE_IN_1X1: False 13 | FPN: 14 | NORM: "SyncBN" 15 | ROI_BOX_HEAD: 16 | CLS_AGNOSTIC_BBOX_REG: True 17 | ROI_HEADS: 18 | NAME: CustomCascadeROIHeads 19 | RPN: 20 | POST_NMS_TOPK_TRAIN: 2000 21 | DATASETS: 22 | TRAIN: ("coco_semi_80perc",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | STEPS: (48000, 64000) 28 | MAX_ITER: 72000 29 | BASE_LR_MULTIPLIER: 2 30 | BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred'] 31 | INPUT: 32 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 33 | MAX_SIZE_TRAIN: 1333 34 | MASK_FORMAT: "bitmask" 35 | FORMAT: "RGB" 36 | TEST: 37 | PRECISE_BN: 38 | ENABLED: True 39 | EVAL_PERIOD: 5000 40 | OUTPUT_DIR: "output/80perc" -------------------------------------------------------------------------------- /cutler/model_zoo/configs/CutLER-ImageNet/cascade_mask_rcnn_R_50_FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | DATALOADER: 3 | COPY_PASTE: True 4 | COPY_PASTE_RATE: 1.0 5 | VISUALIZE_COPY_PASTE: False 6 | COPY_PASTE_RANDOM_NUM: True 7 | COPY_PASTE_MIN_RATIO: 0.3 8 | COPY_PASTE_MAX_RATIO: 1.0 9 | NUM_WORKERS: 0 10 | MODEL: 11 | PIXEL_MEAN: [123.675, 116.280, 103.530] 12 | PIXEL_STD: [58.395, 57.120, 57.375] 13 | WEIGHTS: 'http://dl.fbaipublicfiles.com/cutler/checkpoints/dino_RN50_pretrain_d2_format.pkl' 14 | MASK_ON: True 15 | BACKBONE: 16 | FREEZE_AT: 0 17 | RESNETS: 18 | DEPTH: 50 19 | NORM: "SyncBN" 20 | STRIDE_IN_1X1: False 21 | FPN: 22 | NORM: "SyncBN" 23 | ROI_BOX_HEAD: 24 | CLS_AGNOSTIC_BBOX_REG: True 25 | ROI_HEADS: 26 | NAME: CustomCascadeROIHeads 27 | NUM_CLASSES: 1 28 | SCORE_THRESH_TEST: 0.0 29 | POSITIVE_FRACTION: 0.25 30 | USE_DROPLOSS: True 31 | DROPLOSS_IOU_THRESH: 0.01 32 | RPN: 33 | POST_NMS_TOPK_TRAIN: 4000 34 | NMS_THRESH: 0.65 35 | DATASETS: 36 | TRAIN: ("imagenet_train",) 37 | SOLVER: 38 | IMS_PER_BATCH: 16 39 | BASE_LR: 0.01 40 | WEIGHT_DECAY: 0.00005 41 | STEPS: (80000,) 42 | MAX_ITER: 160000 43 | GAMMA: 0.02 44 | CLIP_GRADIENTS: 45 | CLIP_TYPE: norm 46 | CLIP_VALUE: 1.0 47 | ENABLED: true 48 | NORM_TYPE: 2.0 49 | AMP: 50 | ENABLED: True 51 | INPUT: 52 | MIN_SIZE_TRAIN: (240, 320, 480, 640, 672, 704, 736, 768, 800, 1024) 53 | MAX_SIZE_TRAIN: 1333 54 | MASK_FORMAT: "bitmask" 55 | FORMAT: "RGB" 56 | TEST: 57 | PRECISE_BN: 58 | ENABLED: True 59 | NUM_ITER: 200 60 | DETECTIONS_PER_IMAGE: 100 61 | OUTPUT_DIR: "output/" -------------------------------------------------------------------------------- /cutler/model_zoo/configs/CutLER-ImageNet/cascade_mask_rcnn_R_50_FPN_demo.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | DATALOADER: 3 | COPY_PASTE: True 4 | COPY_PASTE_RATE: 1.0 5 | VISUALIZE_COPY_PASTE: False 6 | COPY_PASTE_RANDOM_NUM: True 7 | COPY_PASTE_MIN_RATIO: 0.3 8 | COPY_PASTE_MAX_RATIO: 1.0 9 | NUM_WORKERS: 0 10 | MODEL: 11 | PIXEL_MEAN: [123.675, 116.280, 103.530] 12 | PIXEL_STD: [58.395, 57.120, 57.375] 13 | WEIGHTS: 'http://dl.fbaipublicfiles.com/cutler/checkpoints/dino_RN50_pretrain_d2_format.pkl' 14 | MASK_ON: True 15 | BACKBONE: 16 | FREEZE_AT: 0 17 | RESNETS: 18 | DEPTH: 50 19 | NORM: "SyncBN" 20 | STRIDE_IN_1X1: False 21 | FPN: 22 | NORM: "SyncBN" 23 | ROI_BOX_HEAD: 24 | CLS_AGNOSTIC_BBOX_REG: True 25 | ROI_HEADS: 26 | NAME: CustomCascadeROIHeads 27 | NUM_CLASSES: 1 28 | SCORE_THRESH_TEST: 0.0 29 | POSITIVE_FRACTION: 0.25 30 | USE_DROPLOSS: True 31 | DROPLOSS_IOU_THRESH: 0.01 32 | RPN: 33 | POST_NMS_TOPK_TRAIN: 4000 34 | NMS_THRESH: 0.65 35 | DATASETS: 36 | TRAIN: ("imagenet_train",) 37 | TEST: ("imagenet_train",) 38 | SOLVER: 39 | IMS_PER_BATCH: 16 40 | BASE_LR: 0.01 41 | WEIGHT_DECAY: 0.00005 42 | STEPS: (80000,) 43 | MAX_ITER: 160000 44 | GAMMA: 0.02 45 | CLIP_GRADIENTS: 46 | CLIP_TYPE: norm 47 | CLIP_VALUE: 1.0 48 | ENABLED: true 49 | NORM_TYPE: 2.0 50 | AMP: 51 | ENABLED: True 52 | INPUT: 53 | MIN_SIZE_TRAIN: (240, 320, 480, 640, 672, 704, 736, 768, 800, 1024) 54 | MAX_SIZE_TRAIN: 1333 55 | MASK_FORMAT: "bitmask" 56 | FORMAT: "RGB" 57 | TEST: 58 | PRECISE_BN: 59 | ENABLED: True 60 | NUM_ITER: 200 61 | DETECTIONS_PER_IMAGE: 100 62 | OUTPUT_DIR: "output/" 63 | -------------------------------------------------------------------------------- /cutler/model_zoo/configs/CutLER-ImageNet/cascade_mask_rcnn_R_50_FPN_self_train.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | DATALOADER: 3 | COPY_PASTE: True 4 | COPY_PASTE_RATE: 1.0 5 | VISUALIZE_COPY_PASTE: False 6 | COPY_PASTE_RANDOM_NUM: True 7 | COPY_PASTE_MIN_RATIO: 0.5 8 | COPY_PASTE_MAX_RATIO: 1.0 9 | NUM_WORKERS: 2 10 | MODEL: 11 | PIXEL_MEAN: [123.675, 116.280, 103.530] 12 | PIXEL_STD: [58.395, 57.120, 57.375] 13 | WEIGHTS: 'http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_r1.pth' # round 1 14 | # WEIGHTS: 'http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_r2.pth' # round 2 15 | MASK_ON: True 16 | BACKBONE: 17 | FREEZE_AT: 0 18 | RESNETS: 19 | DEPTH: 50 20 | NORM: "SyncBN" 21 | STRIDE_IN_1X1: False 22 | FPN: 23 | NORM: "SyncBN" 24 | ROI_BOX_HEAD: 25 | CLS_AGNOSTIC_BBOX_REG: True 26 | ROI_HEADS: 27 | NAME: CustomCascadeROIHeads 28 | NUM_CLASSES: 1 29 | SCORE_THRESH_TEST: 0.0 30 | POSITIVE_FRACTION: 0.25 31 | USE_DROPLOSS: False 32 | DROPLOSS_IOU_THRESH: 0.01 33 | DATASETS: 34 | TRAIN: ("imagenet_train_r1",) # round 1 35 | # TRAIN: ("imagenet_train_r2",) # round 2 36 | SOLVER: 37 | IMS_PER_BATCH: 16 38 | BASE_LR: 0.005 39 | STEPS: (79999,) 40 | MAX_ITER: 80000 41 | GAMMA: 1.0 42 | CLIP_GRADIENTS: 43 | CLIP_TYPE: norm 44 | CLIP_VALUE: 1.0 45 | ENABLED: true 46 | NORM_TYPE: 2.0 47 | AMP: 48 | ENABLED: True 49 | INPUT: 50 | MIN_SIZE_TRAIN: (240, 320, 480, 640, 672, 704, 736, 768, 800, 1024) 51 | MAX_SIZE_TRAIN: 1333 52 | MASK_FORMAT: "bitmask" 53 | FORMAT: "RGB" 54 | TEST: 55 | PRECISE_BN: 56 | ENABLED: True 57 | NUM_ITER: 200 58 | DETECTIONS_PER_IMAGE: 100 59 | OUTPUT_DIR: "output/self-train-r1/" # round 1 60 | # OUTPUT_DIR: "output/self-train-r2/" # round 2 -------------------------------------------------------------------------------- /cutler/model_zoo/configs/CutLER-ImageNet/mask_rcnn_R_50_FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | DATALOADER: 3 | COPY_PASTE: True 4 | COPY_PASTE_RATE: 1.0 5 | VISUALIZE_COPY_PASTE: False 6 | COPY_PASTE_RANDOM_NUM: True 7 | COPY_PASTE_MIN_RATIO: 0.3 8 | COPY_PASTE_MAX_RATIO: 1.0 9 | MODEL: 10 | PIXEL_MEAN: [123.675, 116.280, 103.530] 11 | PIXEL_STD: [58.395, 57.120, 57.375] 12 | WEIGHTS: 'http://dl.fbaipublicfiles.com/cutler/checkpoints/dino_RN50_pretrain_d2_format.pkl' 13 | MASK_ON: True 14 | BACKBONE: 15 | FREEZE_AT: 0 16 | RESNETS: 17 | DEPTH: 50 18 | NORM: "SyncBN" 19 | STRIDE_IN_1X1: False 20 | FPN: 21 | NORM: "SyncBN" 22 | ROI_HEADS: 23 | NAME: "CustomStandardROIHeads" 24 | NUM_CLASSES: 1 25 | SCORE_THRESH_TEST: 0.0 26 | USE_DROPLOSS: True 27 | DROPLOSS_IOU_THRESH: 0.01 28 | RPN: 29 | POST_NMS_TOPK_TRAIN: 4000 30 | NMS_THRESH: 0.65 31 | DATASETS: 32 | TRAIN: ("imagenet_train",) 33 | SOLVER: 34 | IMS_PER_BATCH: 16 35 | BASE_LR: 0.01 36 | WEIGHT_DECAY: 0.00005 37 | STEPS: (80000,) 38 | MAX_ITER: 160000 39 | CLIP_GRADIENTS: 40 | CLIP_TYPE: norm 41 | CLIP_VALUE: 1.0 42 | ENABLED: true 43 | NORM_TYPE: 2.0 44 | INPUT: 45 | MIN_SIZE_TRAIN: (240, 320, 480, 640, 672, 704, 736, 768, 800, 1024) 46 | MAX_SIZE_TRAIN: 1333 47 | MASK_FORMAT: "bitmask" 48 | FORMAT: "RGB" 49 | TEST: 50 | PRECISE_BN: 51 | ENABLED: True 52 | OUTPUT_DIR: "output/" -------------------------------------------------------------------------------- /cutler/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | from .roi_heads import ( 4 | ROI_HEADS_REGISTRY, 5 | ROIHeads, 6 | CustomStandardROIHeads, 7 | FastRCNNOutputLayers, 8 | build_roi_heads, 9 | ) 10 | from .roi_heads.custom_cascade_rcnn import CustomCascadeROIHeads 11 | from .roi_heads.fast_rcnn import FastRCNNOutputLayers 12 | from .meta_arch.rcnn import GeneralizedRCNN, ProposalNetwork 13 | from .meta_arch.build import build_model 14 | 15 | _EXCLUDE = {"ShapeSpec"} 16 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] -------------------------------------------------------------------------------- /cutler/modeling/meta_arch/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Meta Platforms, Inc. and affiliates. 3 | # Modified by XuDong Wang from https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/meta_arch/__init__.py 4 | 5 | from .build import META_ARCH_REGISTRY, build_model # isort:skip 6 | 7 | __all__ = list(globals().keys()) 8 | -------------------------------------------------------------------------------- /cutler/modeling/meta_arch/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # Modified by XuDong Wang from https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/meta_arch/build.py 3 | 4 | import torch 5 | 6 | from detectron2.utils.logger import _log_api_usage 7 | from detectron2.utils.registry import Registry 8 | 9 | META_ARCH_REGISTRY = Registry("META_ARCH") # noqa F401 isort:skip 10 | META_ARCH_REGISTRY.__doc__ = """ 11 | Registry for meta-architectures, i.e. the whole model. 12 | 13 | The registered object will be called with `obj(cfg)` 14 | and expected to return a `nn.Module` object. 15 | """ 16 | 17 | 18 | def build_model(cfg): 19 | """ 20 | Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``. 21 | Note that it does not load any weights from ``cfg``. 22 | """ 23 | meta_arch = cfg.MODEL.META_ARCHITECTURE 24 | model = META_ARCH_REGISTRY.get(meta_arch)(cfg) 25 | model.to(torch.device(cfg.MODEL.DEVICE)) 26 | _log_api_usage("modeling.meta_arch." + meta_arch) 27 | return model 28 | -------------------------------------------------------------------------------- /cutler/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | from .roi_heads import ( 4 | ROI_HEADS_REGISTRY, 5 | ROIHeads, 6 | Res5ROIHeads, 7 | CustomStandardROIHeads, 8 | build_roi_heads, 9 | select_foreground_proposals, 10 | ) 11 | from .custom_cascade_rcnn import CustomCascadeROIHeads 12 | from .fast_rcnn import FastRCNNOutputLayers 13 | 14 | from . import custom_cascade_rcnn # isort:skip 15 | 16 | __all__ = list(globals().keys()) 17 | -------------------------------------------------------------------------------- /cutler/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | from .build import build_lr_scheduler, build_optimizer, get_default_optimizer_params 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 6 | -------------------------------------------------------------------------------- /cutler/structures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | from .boxes import pairwise_iou_max_scores 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 6 | 7 | 8 | from detectron2.utils.env import fixup_module_metadata 9 | 10 | fixup_module_metadata(__name__, globals(), __all__) 11 | del fixup_module_metadata 12 | -------------------------------------------------------------------------------- /cutler/structures/boxes.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # Modified by XuDong Wang from https://github.com/facebookresearch/detectron2/blob/main/detectron2/structures/boxes.py 3 | 4 | import torch 5 | 6 | def pairwise_iou_max_scores(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor: 7 | """ 8 | Given two lists of boxes of size N and M, compute the IoU 9 | (intersection over union) between **all** N x M pairs of boxes. 10 | The box order must be (xmin, ymin, xmax, ymax). 11 | 12 | Args: 13 | boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. 14 | 15 | Returns: 16 | Tensor: IoU, sized [N,M]. 17 | """ 18 | area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) # [N] 19 | area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) # [M] 20 | 21 | width_height = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) - torch.max( 22 | boxes1[:, None, :2], boxes2[:, :2] 23 | ) # [N,M,2] 24 | 25 | width_height.clamp_(min=0) # [N,M,2] 26 | inter = width_height.prod(dim=2) # [N,M] 27 | 28 | # handle empty boxes 29 | iou = torch.where( 30 | inter > 0, 31 | inter / (area1[:, None] + area2 - inter), 32 | torch.zeros(1, dtype=inter.dtype, device=inter.device), 33 | ) 34 | iou_max, _ = torch.max(iou, dim=1) 35 | return iou_max -------------------------------------------------------------------------------- /cutler/tools/eval.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # link to the dataset folder, model weights and the config file. 4 | export DETECTRON2_DATASETS=/path/to/DETECTRON2_DATASETS/ 5 | model_weights="http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth" 6 | config_file="model_zoo/configs/CutLER-ImageNet/cascade_mask_rcnn_R_50_FPN.yaml" 7 | num_gpus=2 8 | 9 | echo "========== start evaluating the model on all 11 datasets ==========" 10 | 11 | test_dataset='cls_agnostic_clipart' 12 | echo "========== evaluating ${test_dataset} ==========" 13 | python train_net.py --num-gpus ${num_gpus} \ 14 | --config-file ${config_file} \ 15 | --test-dataset ${test_dataset} --no-segm \ 16 | --eval-only MODEL.WEIGHTS ${model_weights} 17 | 18 | test_dataset='cls_agnostic_watercolor' 19 | echo "========== evaluating ${test_dataset} ==========" 20 | python train_net.py --num-gpus ${num_gpus} \ 21 | --config-file ${config_file} \ 22 | --test-dataset ${test_dataset} --no-segm \ 23 | --eval-only MODEL.WEIGHTS ${model_weights} 24 | 25 | test_dataset='cls_agnostic_comic' 26 | echo "========== evaluating ${test_dataset} ==========" 27 | python train_net.py --num-gpus ${num_gpus} \ 28 | --config-file ${config_file} \ 29 | --test-dataset ${test_dataset} --no-segm \ 30 | --eval-only MODEL.WEIGHTS ${model_weights} 31 | 32 | test_dataset='cls_agnostic_voc' 33 | echo "========== evaluating ${test_dataset} ==========" 34 | python train_net.py --num-gpus ${num_gpus} \ 35 | --config-file ${config_file} \ 36 | --test-dataset ${test_dataset} --no-segm \ 37 | --eval-only MODEL.WEIGHTS ${model_weights} 38 | 39 | test_dataset='cls_agnostic_objects365' 40 | echo "========== evaluating ${test_dataset} ==========" 41 | python train_net.py --num-gpus ${num_gpus} \ 42 | --config-file ${config_file} \ 43 | --test-dataset ${test_dataset} --no-segm \ 44 | --eval-only MODEL.WEIGHTS ${model_weights} 45 | 46 | test_dataset='cls_agnostic_openimages' 47 | echo "========== evaluating ${test_dataset} ==========" 48 | python train_net.py --num-gpus ${num_gpus} \ 49 | --config-file ${config_file} \ 50 | --test-dataset ${test_dataset} --no-segm \ 51 | --eval-only MODEL.WEIGHTS ${model_weights} 52 | 53 | test_dataset='cls_agnostic_kitti' 54 | echo "========== evaluating ${test_dataset} ==========" 55 | python train_net.py --num-gpus ${num_gpus} \ 56 | --config-file ${config_file} \ 57 | --test-dataset ${test_dataset} --no-segm \ 58 | --eval-only MODEL.WEIGHTS ${model_weights} 59 | 60 | test_dataset='cls_agnostic_coco' 61 | echo "========== evaluating ${test_dataset} ==========" 62 | python train_net.py --num-gpus ${num_gpus} \ 63 | --config-file ${config_file} \ 64 | --test-dataset ${test_dataset} \ 65 | --eval-only MODEL.WEIGHTS ${model_weights} 66 | 67 | test_dataset='cls_agnostic_coco20k' 68 | echo "========== evaluating ${test_dataset} ==========" 69 | python train_net.py --num-gpus ${num_gpus} \ 70 | --config-file ${config_file} \ 71 | --test-dataset ${test_dataset} \ 72 | --eval-only MODEL.WEIGHTS ${model_weights} 73 | 74 | test_dataset='cls_agnostic_lvis' 75 | echo "========== evaluating ${test_dataset} ==========" 76 | # LVIS should set TEST.DETECTIONS_PER_IMAGE=300 77 | python train_net.py --num-gpus ${num_gpus} \ 78 | --config-file ${config_file} \ 79 | --test-dataset ${test_dataset} \ 80 | --eval-only MODEL.WEIGHTS ${model_weights} TEST.DETECTIONS_PER_IMAGE 300 81 | 82 | test_dataset='cls_agnostic_uvo' 83 | echo "========== evaluating ${test_dataset} ==========" 84 | python train_net.py --num-gpus ${num_gpus} \ 85 | --config-file ${config_file} \ 86 | --test-dataset ${test_dataset} \ 87 | --eval-only MODEL.WEIGHTS ${model_weights} 88 | 89 | echo "========== evaluation is completed ==========" -------------------------------------------------------------------------------- /cutler/tools/run_with_submitit.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | sbatch tools/train-1node.sh \ 3 | --config-file model_zoo/configs/CutLER-ImageNet/cascade_mask_rcnn_R_50_FPN.yaml \ 4 | OUTPUT_DIR /path/to/output -------------------------------------------------------------------------------- /cutler/tools/run_with_submitit_ssl.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | sbatch tools/train-1node.sh \ 3 | --config-file /private/home/xudongw/cutler-code-release/CutLER/cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_50perc.yaml \ -------------------------------------------------------------------------------- /cutler/tools/single-node_run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | #!/bin/bash 3 | export DETECTRON2_DATASETS=/path/to/DETECTRON2_DATASETS/ 4 | MASTER_NODE=$(scontrol show hostname "$SLURM_NODELIST" | head -n1) 5 | DIST_URL="tcp://$MASTER_NODE:12399" 6 | SOCKET_NAME=$(ip r | grep default | awk '{print $5}') 7 | export GLOO_SOCKET_IFNAME=$SOCKET_NAME 8 | 9 | python -u train_net.py --num-gpus 8 --num-machines 1 --machine-rank "$SLURM_NODEID" --dist-url "$DIST_URL" "$@" -------------------------------------------------------------------------------- /cutler/tools/train-1node.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | #!/bin/bash 3 | #SBATCH -p devlab 4 | #SBATCH --nodes=1 5 | #SBATCH --gres=gpu:8 6 | #SBATCH --gpus-per-node=8 7 | #SBATCH --cpus-per-task=80 8 | #SBATCH --mem=512G 9 | #SBATCH --time 2000 10 | #SBATCH -o "submitit/slurm-%j.out" 11 | 12 | srun tools/single-node_run.sh $@ -------------------------------------------------------------------------------- /docs/cutler-demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/docs/cutler-demo.jpg -------------------------------------------------------------------------------- /docs/demos_videocutler.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/docs/demos_videocutler.gif -------------------------------------------------------------------------------- /docs/maskcut-demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/docs/maskcut-demo.jpg -------------------------------------------------------------------------------- /docs/maskcut.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/docs/maskcut.gif -------------------------------------------------------------------------------- /docs/pipeline.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/docs/pipeline.jpg -------------------------------------------------------------------------------- /docs/teaser_img.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/docs/teaser_img.jpg -------------------------------------------------------------------------------- /maskcut/colormap.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # copied from https://github.com/facebookresearch/detectron2/blob/main/detectron2/utils/colormap.py 3 | 4 | """ 5 | An awesome colormap for really neat visualizations. 6 | Copied from Detectron, and removed gray colors. 7 | """ 8 | 9 | import numpy as np 10 | import random 11 | 12 | __all__ = ["colormap", "random_color", "random_colors"] 13 | 14 | # fmt: off 15 | # RGB: 16 | _COLORS = np.array( 17 | [ 18 | 0.000, 0.447, 0.741, 19 | 0.850, 0.325, 0.098, 20 | 0.929, 0.694, 0.125, 21 | 0.494, 0.184, 0.556, 22 | 0.466, 0.674, 0.188, 23 | 0.301, 0.745, 0.933, 24 | 0.635, 0.078, 0.184, 25 | 0.300, 0.300, 0.300, 26 | 0.600, 0.600, 0.600, 27 | 1.000, 0.000, 0.000, 28 | 1.000, 0.500, 0.000, 29 | 0.749, 0.749, 0.000, 30 | 0.000, 1.000, 0.000, 31 | 0.000, 0.000, 1.000, 32 | 0.667, 0.000, 1.000, 33 | 0.333, 0.333, 0.000, 34 | 0.333, 0.667, 0.000, 35 | 0.333, 1.000, 0.000, 36 | 0.667, 0.333, 0.000, 37 | 0.667, 0.667, 0.000, 38 | 0.667, 1.000, 0.000, 39 | 1.000, 0.333, 0.000, 40 | 1.000, 0.667, 0.000, 41 | 1.000, 1.000, 0.000, 42 | 0.000, 0.333, 0.500, 43 | 0.000, 0.667, 0.500, 44 | 0.000, 1.000, 0.500, 45 | 0.333, 0.000, 0.500, 46 | 0.333, 0.333, 0.500, 47 | 0.333, 0.667, 0.500, 48 | 0.333, 1.000, 0.500, 49 | 0.667, 0.000, 0.500, 50 | 0.667, 0.333, 0.500, 51 | 0.667, 0.667, 0.500, 52 | 0.667, 1.000, 0.500, 53 | 1.000, 0.000, 0.500, 54 | 1.000, 0.333, 0.500, 55 | 1.000, 0.667, 0.500, 56 | 1.000, 1.000, 0.500, 57 | 0.000, 0.333, 1.000, 58 | 0.000, 0.667, 1.000, 59 | 0.000, 1.000, 1.000, 60 | 0.333, 0.000, 1.000, 61 | 0.333, 0.333, 1.000, 62 | 0.333, 0.667, 1.000, 63 | 0.333, 1.000, 1.000, 64 | 0.667, 0.000, 1.000, 65 | 0.667, 0.333, 1.000, 66 | 0.667, 0.667, 1.000, 67 | 0.667, 1.000, 1.000, 68 | 1.000, 0.000, 1.000, 69 | 1.000, 0.333, 1.000, 70 | 1.000, 0.667, 1.000, 71 | 0.333, 0.000, 0.000, 72 | 0.500, 0.000, 0.000, 73 | 0.667, 0.000, 0.000, 74 | 0.833, 0.000, 0.000, 75 | 1.000, 0.000, 0.000, 76 | 0.000, 0.167, 0.000, 77 | 0.000, 0.333, 0.000, 78 | 0.000, 0.500, 0.000, 79 | 0.000, 0.667, 0.000, 80 | 0.000, 0.833, 0.000, 81 | 0.000, 1.000, 0.000, 82 | 0.000, 0.000, 0.167, 83 | 0.000, 0.000, 0.333, 84 | 0.000, 0.000, 0.500, 85 | 0.000, 0.000, 0.667, 86 | 0.000, 0.000, 0.833, 87 | 0.000, 0.000, 1.000, 88 | 0.000, 0.000, 0.000, 89 | 0.143, 0.143, 0.143, 90 | 0.857, 0.857, 0.857, 91 | 1.000, 1.000, 1.000 92 | ] 93 | ).astype(np.float32).reshape(-1, 3) 94 | # fmt: on 95 | 96 | 97 | def colormap(rgb=False, maximum=255): 98 | """ 99 | Args: 100 | rgb (bool): whether to return RGB colors or BGR colors. 101 | maximum (int): either 255 or 1 102 | Returns: 103 | ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1] 104 | """ 105 | assert maximum in [255, 1], maximum 106 | c = _COLORS * maximum 107 | if not rgb: 108 | c = c[:, ::-1] 109 | return c 110 | 111 | 112 | def random_color(rgb=False, maximum=255): 113 | """ 114 | Args: 115 | rgb (bool): whether to return RGB colors or BGR colors. 116 | maximum (int): either 255 or 1 117 | Returns: 118 | ndarray: a vector of 3 numbers 119 | """ 120 | idx = np.random.randint(0, len(_COLORS)) 121 | ret = _COLORS[idx] * maximum 122 | if not rgb: 123 | ret = ret[::-1] 124 | return ret 125 | 126 | 127 | def random_colors(N, rgb=False, maximum=255): 128 | """ 129 | Args: 130 | N (int): number of unique colors needed 131 | rgb (bool): whether to return RGB colors or BGR colors. 132 | maximum (int): either 255 or 1 133 | Returns: 134 | ndarray: a list of random_color 135 | """ 136 | indices = random.sample(range(len(_COLORS)), N) 137 | ret = [_COLORS[i] * maximum for i in indices] 138 | if not rgb: 139 | ret = [x[::-1] for x in ret] 140 | return ret 141 | 142 | 143 | if __name__ == "__main__": 144 | import cv2 145 | 146 | size = 100 147 | H, W = 10, 10 148 | canvas = np.random.rand(H * size, W * size, 3).astype("float32") 149 | for h in range(H): 150 | for w in range(W): 151 | idx = h * W + w 152 | if idx >= len(_COLORS): 153 | break 154 | canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx] 155 | cv2.imshow("a", canvas) 156 | cv2.waitKey(0) -------------------------------------------------------------------------------- /maskcut/crf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # modfied by Xudong Wang based on https://github.com/lucasb-eyer/pydensecrf/blob/master/pydensecrf/tests/test_dcrf.py and third_party/TokenCut 3 | 4 | import numpy as np 5 | import pydensecrf.densecrf as dcrf 6 | import pydensecrf.utils as utils 7 | import torch 8 | import torch.nn.functional as F 9 | import torchvision.transforms.functional as VF 10 | 11 | MAX_ITER = 10 12 | POS_W = 7 13 | POS_XY_STD = 3 14 | Bi_W = 10 15 | Bi_XY_STD = 50 16 | Bi_RGB_STD = 5 17 | 18 | def densecrf(image, mask): 19 | h, w = mask.shape 20 | mask = mask.reshape(1, h, w) 21 | fg = mask.astype(float) 22 | bg = 1 - fg 23 | output_logits = torch.from_numpy(np.concatenate((bg,fg), axis=0)) 24 | 25 | H, W = image.shape[:2] 26 | image = np.ascontiguousarray(image) 27 | 28 | output_logits = F.interpolate(output_logits.unsqueeze(0), size=(H, W), mode="bilinear").squeeze() 29 | output_probs = F.softmax(output_logits, dim=0).cpu().numpy() 30 | 31 | c = output_probs.shape[0] 32 | h = output_probs.shape[1] 33 | w = output_probs.shape[2] 34 | 35 | U = utils.unary_from_softmax(output_probs) 36 | U = np.ascontiguousarray(U) 37 | 38 | d = dcrf.DenseCRF2D(w, h, c) 39 | d.setUnaryEnergy(U) 40 | d.addPairwiseGaussian(sxy=POS_XY_STD, compat=POS_W) 41 | d.addPairwiseBilateral(sxy=Bi_XY_STD, srgb=Bi_RGB_STD, rgbim=image, compat=Bi_W) 42 | 43 | Q = d.inference(MAX_ITER) 44 | Q = np.array(Q).reshape((c, h, w)) 45 | MAP = np.argmax(Q, axis=0).reshape((h,w)).astype(np.float32) 46 | return MAP 47 | -------------------------------------------------------------------------------- /maskcut/imgs/demo1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/maskcut/imgs/demo1.jpg -------------------------------------------------------------------------------- /maskcut/imgs/demo2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/maskcut/imgs/demo2.jpg -------------------------------------------------------------------------------- /maskcut/imgs/demo3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/maskcut/imgs/demo3.jpg -------------------------------------------------------------------------------- /maskcut/imgs/demo4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/maskcut/imgs/demo4.jpg -------------------------------------------------------------------------------- /maskcut/imgs/demo5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/maskcut/imgs/demo5.jpg -------------------------------------------------------------------------------- /maskcut/imgs/demo6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/maskcut/imgs/demo6.jpg -------------------------------------------------------------------------------- /maskcut/imgs/demo7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/maskcut/imgs/demo7.jpg -------------------------------------------------------------------------------- /maskcut/imgs/demo8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/maskcut/imgs/demo8.jpg -------------------------------------------------------------------------------- /maskcut/merge_jsons.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # merge all ImageNet annotation files as a single one. 3 | 4 | import os 5 | import json 6 | import argparse 7 | 8 | if __name__ == "__main__": 9 | # load model arguments 10 | parser = argparse.ArgumentParser(description='Merge json files') 11 | parser.add_argument('--base-dir', type=str, 12 | default='annotations/', 13 | help='Dir to the generated annotation files with MaskCut') 14 | parser.add_argument('--save-path', type=str, default="imagenet_train_fixsize480_tau0.15_N3.json", 15 | help='Path to save the merged annotation file') 16 | # following arguments should be consistent with maskcut.py or maskcut_with_submitit.py (if use submitit) 17 | parser.add_argument('--num-folder-per-job', type=int, default=1, 18 | help='Number of folders per json file') 19 | parser.add_argument('--fixed-size', type=int, default=480, 20 | help='rescale the input images to a fixed size') 21 | parser.add_argument('--tau', type=float, default=0.15, help='threshold used for producing binary graph') 22 | parser.add_argument('--N', type=int, default=3, help='the maximum number of pseudo-masks per image') 23 | 24 | args = parser.parse_args() 25 | 26 | base_name = 'imagenet_train_fixsize{}_tau{}_N{}'.format(args.fixed_size, args.tau, args.N) 27 | 28 | start_idx = 0 29 | every_k = args.num_folder_per_job 30 | missed_folders = [] 31 | tobe_merged_ann_dicts = [] 32 | 33 | # check if pseudo-masks for all 1000 ImageNet-1K folders are avaliable. 34 | while start_idx < 1000: 35 | end_idx = start_idx + every_k 36 | filename = "{}_{}_{}.json".format(base_name, start_idx, end_idx) 37 | tobe_merged = os.path.join(args.base_dir, filename) 38 | if not os.path.isfile(tobe_merged): 39 | end_idx = start_idx + 1 40 | tobe_merged_ = "{}_{}_{}.json".format(base_name, start_idx, end_idx) 41 | if not os.path.isfile(tobe_merged_): 42 | missed_folders.append(start_idx) 43 | start_idx += 1 44 | continue 45 | else: 46 | tobe_merged = tobe_merged_ 47 | start_idx += 1 48 | else: 49 | start_idx += every_k 50 | tobe_merged_ann_dict = json.load(open(tobe_merged)) 51 | tobe_merged_ann_dicts.append(tobe_merged_ann_dict) 52 | 53 | print("Warning: these folders are not found: ", missed_folders) 54 | 55 | # filter out repeated image info 56 | for idx, ann_dict in enumerate(tobe_merged_ann_dicts): 57 | images = [] 58 | images_ids = [] 59 | for image in ann_dict['images']: 60 | if image['id'] in images_ids: 61 | continue 62 | else: 63 | images.append(image) 64 | images_ids.append(image['id']) 65 | ann_dict['images'] = images 66 | 67 | # re-generate image_id and segment_id, and combine annotation info and image info 68 | # from all annotation files 69 | base_ann_dict = tobe_merged_ann_dicts[0] 70 | image_id = base_ann_dict['images'][-1]['id'] + 1 71 | segment_id = base_ann_dict['annotations'][-1]['id'] + 1 72 | segment_id_list = [ann['id'] for ann in base_ann_dict['annotations']] 73 | for tobe_merged_ann_dict in tobe_merged_ann_dicts[1:]: 74 | file_name_and_id = {} 75 | for i, image in enumerate(tobe_merged_ann_dict['images']): 76 | file_name_and_id[str(image['id'])] = image_id 77 | image['id'] = image_id 78 | base_ann_dict['images'].append(image) 79 | image_id = image_id + 1 80 | 81 | for i, annotation_info in enumerate(tobe_merged_ann_dict['annotations']): 82 | annotation_info["image_id"] = file_name_and_id[str(annotation_info["image_id"])] 83 | annotation_info["id"] = segment_id 84 | annotation_info["iscrowd"] = 0 85 | segment_id_list.append(segment_id) 86 | base_ann_dict['annotations'].append(annotation_info) 87 | segment_id = segment_id + 1 88 | 89 | segment_id = 1 90 | for ann in base_ann_dict['annotations']: 91 | ann["id"] = segment_id 92 | segment_id += 1 93 | 94 | # save the final json file. 95 | anns = [ann['id'] for ann in base_ann_dict['annotations']] 96 | anns_image_id = [ann['image_id'] for ann in base_ann_dict['annotations']] 97 | json.dump(base_ann_dict, open(args.save_path, 'w')) 98 | print("Done: {} images; {} anns.".format(len(base_ann_dict['images']), len(base_ann_dict['annotations']))) 99 | -------------------------------------------------------------------------------- /maskcut/predict.py: -------------------------------------------------------------------------------- 1 | """ 2 | download pretrained weights to ./weights 3 | wget https://dl.fbaipublicfiles.com/dino/dino_vitbase8_pretrain/dino_vitbase8_pretrain.pth 4 | wget https://dl.fbaipublicfiles.com/dino/dino_deitsmall8_300ep_pretrain/dino_deitsmall8_300ep_pretrain.pth 5 | """ 6 | 7 | import sys 8 | 9 | sys.path.append("maskcut") 10 | import numpy as np 11 | import PIL.Image as Image 12 | import torch 13 | from scipy import ndimage 14 | from colormap import random_color 15 | 16 | import dino 17 | from third_party.TokenCut.unsupervised_saliency_detection import metric 18 | from crf import densecrf 19 | from maskcut import maskcut 20 | 21 | from cog import BasePredictor, Input, Path 22 | 23 | 24 | class Predictor(BasePredictor): 25 | def setup(self): 26 | """Load the model into memory to make running multiple predictions efficient""" 27 | 28 | # DINO pre-trained model 29 | vit_features = "k" 30 | self.patch_size = 8 31 | # adapted dino.ViTFeat to load from local pretrained_path 32 | self.backbone_base = dino.ViTFeat( 33 | "weights/dino_vitbase8_pretrain.pth", 34 | 768, 35 | "base", 36 | vit_features, 37 | self.patch_size, 38 | ) 39 | 40 | self.backbone_small = dino.ViTFeat( 41 | "weights/dino_deitsmall8_300ep_pretrain.pth", 42 | 384, 43 | "small", 44 | vit_features, 45 | self.patch_size, 46 | ) 47 | self.backbone_base.eval() 48 | self.backbone_base.cuda() 49 | self.backbone_small.eval() 50 | self.backbone_small.cuda() 51 | 52 | def predict( 53 | self, 54 | image: Path = Input( 55 | description="Input image", 56 | ), 57 | model: str = Input( 58 | description="Choose the model architecture", 59 | default="base", 60 | choices=["small", "base"] 61 | ), 62 | n_pseudo_masks: int = Input( 63 | description="The maximum number of pseudo-masks per image", 64 | default=3, 65 | ), 66 | tau: float = Input( 67 | description="Threshold used for producing binary graph", 68 | default=0.15, 69 | ), 70 | ) -> Path: 71 | """Run a single prediction on the model""" 72 | 73 | backbone = self.backbone_base if model == "base" else self.backbone_small 74 | 75 | # MaskCut hyperparameters 76 | fixed_size = 480 77 | 78 | # get pesudo-masks with MaskCut 79 | bipartitions, _, I_new = maskcut( 80 | str(image), 81 | backbone, 82 | self.patch_size, 83 | tau, 84 | N=n_pseudo_masks, 85 | fixed_size=fixed_size, 86 | cpu=False, 87 | ) 88 | 89 | I = Image.open(str(image)).convert("RGB") 90 | width, height = I.size 91 | pseudo_mask_list = [] 92 | for idx, bipartition in enumerate(bipartitions): 93 | # post-process pesudo-masks with CRF 94 | pseudo_mask = densecrf(np.array(I_new), bipartition) 95 | pseudo_mask = ndimage.binary_fill_holes(pseudo_mask >= 0.5) 96 | 97 | # filter out the mask that have a very different pseudo-mask after the CRF 98 | mask1 = torch.from_numpy(bipartition).cuda() 99 | mask2 = torch.from_numpy(pseudo_mask).cuda() 100 | 101 | if metric.IoU(mask1, mask2) < 0.5: 102 | pseudo_mask = pseudo_mask * -1 103 | 104 | # construct binary pseudo-masks 105 | pseudo_mask[pseudo_mask < 0] = 0 106 | pseudo_mask = Image.fromarray(np.uint8(pseudo_mask * 255)) 107 | pseudo_mask = np.asarray(pseudo_mask.resize((width, height))) 108 | 109 | pseudo_mask = pseudo_mask.astype(np.uint8) 110 | upper = np.max(pseudo_mask) 111 | lower = np.min(pseudo_mask) 112 | thresh = upper / 2.0 113 | pseudo_mask[pseudo_mask > thresh] = upper 114 | pseudo_mask[pseudo_mask <= thresh] = lower 115 | pseudo_mask_list.append(pseudo_mask) 116 | 117 | out = np.array(I) 118 | for pseudo_mask in pseudo_mask_list: 119 | 120 | out = vis_mask(out, pseudo_mask, random_color(rgb=True)) 121 | 122 | output_path = f"/tmp/out.png" 123 | 124 | out.save(str(output_path)) 125 | 126 | return Path(output_path) 127 | 128 | 129 | def vis_mask(input, mask, mask_color): 130 | fg = mask > 0.5 131 | rgb = np.copy(input) 132 | rgb[fg] = (rgb[fg] * 0.3 + np.array(mask_color) * 0.7).astype(np.uint8) 133 | return Image.fromarray(rgb) 134 | -------------------------------------------------------------------------------- /maskcut/run_maskcut_with_submitit.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | python run_with_submitit_maskcut_array.py \ 3 | --ngpus 1 \ 4 | --nodes 1 \ 5 | --timeout 1200 \ 6 | --partition learnfair \ 7 | --vit-arch base \ 8 | --patch-size 8 \ 9 | --dataset-path /path/to/imagenet/ \ 10 | --tau 0.15 \ 11 | --out-dir /path/to/save/annotations/ \ 12 | --num-folder-per-job 2 \ 13 | --job-index 0 \ 14 | --fixed_size 480 \ 15 | --N 3 \ 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | submitit 2 | # torch==1.8.1 3 | # torchvision==0.9.1 4 | faiss-gpu==1.7.2 5 | opencv-python==4.6.0.66 6 | scikit-image==0.19.2 7 | scikit-learn==1.1.1 8 | shapely==1.8.2 9 | timm==0.5.4 10 | pyyaml==6.0 11 | colored 12 | fvcore==0.1.5.post20220512 13 | gdown==4.5.4 -------------------------------------------------------------------------------- /videocutler/INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ### Requirements 4 | - Linux or macOS with Python ≥ 3.6 5 | - PyTorch ≥ 1.9 and [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation. 6 | Install them together at [pytorch.org](https://pytorch.org) to make sure of this. Note, please check 7 | PyTorch version matches that is required by Detectron2. 8 | - Detectron2: follow [Detectron2 installation instructions](https://detectron2.readthedocs.io/tutorials/install.html). 9 | - OpenCV is optional but needed by demo and visualization 10 | - `pip install -r requirements.txt` 11 | 12 | ### Example conda environment setup 13 | 14 | ```bash 15 | conda create --name videocuter python=3.8 -y 16 | conda activate videocuter 17 | conda install pytorch==1.9.0 torchvision==0.10.0 cudatoolkit=11.1 -c pytorch -c nvidia 18 | pip install -U opencv-python 19 | 20 | # under your working directory 21 | git clone git@github.com:facebookresearch/detectron2.git 22 | cd detectron2 23 | pip install -e . 24 | pip install git+https://github.com/cocodataset/panopticapi.git 25 | pip install git+https://github.com/mcordts/cityscapesScripts.git 26 | ``` 27 | 28 | ### CUDA kernel for MSDeformAttn 29 | After preparing the required environment, run the following command to compile CUDA kernel for MSDeformAttn: 30 | 31 | `CUDA_HOME` must be defined and points to the directory of the installed CUDA toolkit. 32 | ```bash 33 | pip install -r videocutler/requirements.txt 34 | cd videocutler/mask2former/modeling/pixel_decoder/ops 35 | sh make.sh 36 | ``` 37 | 38 | #### Building on another system 39 | To build on a system that does not have a GPU device but provide the drivers: 40 | ```bash 41 | TORCH_CUDA_ARCH_LIST='8.0' FORCE_CUDA=1 python setup.py build install 42 | ``` 43 | After preparing the required environment, run the following command to compile CUDA kernel for MSDeformAttn: 44 | 45 | `CUDA_HOME` must be defined and points to the directory of the installed CUDA toolkit. 46 | ```bash 47 | cd videocutler/mask2former/modeling/pixel_decoder/ops 48 | sh make.sh 49 | ``` 50 | -------------------------------------------------------------------------------- /videocutler/configs/imagenet/instance-segmentation/Base-COCO-InstanceSegmentation.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | BACKBONE: 3 | FREEZE_AT: 0 4 | NAME: "build_resnet_backbone" 5 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 6 | PIXEL_MEAN: [123.675, 116.280, 103.530] 7 | PIXEL_STD: [58.395, 57.120, 57.375] 8 | RESNETS: 9 | DEPTH: 50 10 | STEM_TYPE: "basic" # not used 11 | STEM_OUT_CHANNELS: 64 12 | STRIDE_IN_1X1: False 13 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 14 | # NORM: "SyncBN" 15 | RES5_MULTI_GRID: [1, 1, 1] # not used 16 | DATASETS: 17 | TRAIN: ("coco_2017_train",) 18 | TEST: ("coco_2017_val",) 19 | SOLVER: 20 | IMS_PER_BATCH: 16 21 | BASE_LR: 0.0001 22 | STEPS: (327778, 355092) 23 | MAX_ITER: 368750 24 | WARMUP_FACTOR: 1.0 25 | WARMUP_ITERS: 10 26 | WEIGHT_DECAY: 0.05 27 | OPTIMIZER: "ADAMW" 28 | BACKBONE_MULTIPLIER: 0.1 29 | CLIP_GRADIENTS: 30 | ENABLED: True 31 | CLIP_TYPE: "full_model" 32 | CLIP_VALUE: 0.01 33 | NORM_TYPE: 2.0 34 | AMP: 35 | ENABLED: True 36 | INPUT: 37 | IMAGE_SIZE: 1024 38 | MIN_SCALE: 0.1 39 | MAX_SCALE: 2.0 40 | FORMAT: "RGB" 41 | DATASET_MAPPER_NAME: "coco_instance_lsj" 42 | TEST: 43 | EVAL_PERIOD: 5000 44 | DATALOADER: 45 | FILTER_EMPTY_ANNOTATIONS: True 46 | NUM_WORKERS: 4 47 | VERSION: 2 48 | -------------------------------------------------------------------------------- /videocutler/configs/imagenet/instance-segmentation/Base-imagenet-InstanceSegmentation.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | BACKBONE: 3 | FREEZE_AT: 0 4 | NAME: "build_resnet_backbone" 5 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 6 | PIXEL_MEAN: [123.675, 116.280, 103.530] 7 | PIXEL_STD: [58.395, 57.120, 57.375] 8 | RESNETS: 9 | DEPTH: 50 10 | STEM_TYPE: "basic" # not used 11 | STEM_OUT_CHANNELS: 64 12 | STRIDE_IN_1X1: False 13 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 14 | # NORM: "SyncBN" 15 | RES5_MULTI_GRID: [1, 1, 1] # not used 16 | DATASETS: 17 | TRAIN: ("imagenet_train_tau0.15_fixsize480_w_painting3Inst_crf_centerprior_polygon",) 18 | TEST: ("imagenet_val",) 19 | SOLVER: 20 | IMS_PER_BATCH: 16 21 | BASE_LR: 0.0001 22 | STEPS: (80000,) 23 | MAX_ITER: 160000 24 | WARMUP_FACTOR: 1.0 25 | WARMUP_ITERS: 10 26 | WEIGHT_DECAY: 0.05 27 | OPTIMIZER: "ADAMW" 28 | BACKBONE_MULTIPLIER: 0.1 29 | CLIP_GRADIENTS: 30 | ENABLED: True 31 | CLIP_TYPE: "full_model" 32 | CLIP_VALUE: 0.01 33 | NORM_TYPE: 2.0 34 | AMP: 35 | ENABLED: True 36 | INPUT: 37 | IMAGE_SIZE: 1024 38 | MIN_SCALE: 0.1 39 | MAX_SCALE: 2.0 40 | # MASK_FORMAT: "bitmask" 41 | FORMAT: "RGB" 42 | DATASET_MAPPER_NAME: "coco_instance_lsj" 43 | TEST: 44 | PRECISE_BN: 45 | ENABLED: True 46 | EVAL_PERIOD: 5000 47 | DATALOADER: 48 | FILTER_EMPTY_ANNOTATIONS: True 49 | NUM_WORKERS: 4 50 | VERSION: 2 51 | -------------------------------------------------------------------------------- /videocutler/configs/imagenet/instance-segmentation/mask2former_R50_imagenet.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: Base-imagenet-InstanceSegmentation.yaml 2 | DATALOADER: 3 | FILTER_EMPTY_ANNOTATIONS: True 4 | NUM_WORKERS: 8 5 | COPY_PASTE: True 6 | COPY_PASTE_RATE: 1.0 7 | VISUALIZE_COPY_PASTE: False 8 | COPY_PASTE_RANDOM_NUM: True 9 | COPY_PASTE_MIN_RATIO: 0.3 10 | COPY_PASTE_MAX_RATIO: 1.0 11 | MODEL: 12 | META_ARCHITECTURE: "MaskFormer" 13 | SEM_SEG_HEAD: 14 | NAME: "MaskFormerHead" 15 | IGNORE_VALUE: 255 16 | NUM_CLASSES: 1 17 | LOSS_WEIGHT: 1.0 18 | CONVS_DIM: 256 19 | MASK_DIM: 256 20 | NORM: "GN" 21 | # pixel decoder 22 | PIXEL_DECODER_NAME: "MSDeformAttnPixelDecoder" 23 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 24 | DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"] 25 | COMMON_STRIDE: 4 26 | TRANSFORMER_ENC_LAYERS: 6 27 | WEIGHTS: 'http://dl.fbaipublicfiles.com/cutler/checkpoints/dino_RN50_pretrain_d2_format.pkl' 28 | MASK_FORMER: 29 | POSITIVE_BANK_IOU_THRESH: 0.0 30 | TRANSFORMER_DECODER_NAME: "MultiScaleMaskedTransformerDecoder" 31 | TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder" 32 | DEEP_SUPERVISION: True 33 | NO_OBJECT_WEIGHT: 0.1 34 | CLASS_WEIGHT: 2.0 35 | MASK_WEIGHT: 5.0 36 | DICE_WEIGHT: 5.0 37 | HIDDEN_DIM: 256 38 | NUM_OBJECT_QUERIES: 100 39 | NHEADS: 8 40 | DROPOUT: 0.3 41 | DIM_FEEDFORWARD: 2048 42 | ENC_LAYERS: 0 43 | PRE_NORM: False 44 | ENFORCE_INPUT_PROJ: False 45 | SIZE_DIVISIBILITY: 32 46 | DEC_LAYERS: 10 # 9 decoder layers, add one for the loss on learnable query 47 | TRAIN_NUM_POINTS: 12544 48 | OVERSAMPLE_RATIO: 3.0 49 | IMPORTANCE_SAMPLE_RATIO: 0.75 50 | TEST: 51 | SEMANTIC_ON: False 52 | INSTANCE_ON: True 53 | PANOPTIC_ON: False 54 | OVERLAP_THRESHOLD: 0.8 55 | OBJECT_MASK_THRESHOLD: 0.8 56 | DATASETS: 57 | TRAIN: ("imagenet_train",) 58 | INPUT: 59 | IMAGE_SIZE: 896 60 | MIN_SCALE: 0.1 61 | MAX_SCALE: 2.0 62 | # MASK_FORMAT: "bitmask" 63 | FORMAT: "RGB" 64 | DATASET_MAPPER_NAME: "coco_instance_lsj" 65 | TEST: 66 | PRECISE_BN: 67 | ENABLED: True 68 | EVAL_PERIOD: 10000 69 | DETECTIONS_PER_IMAGE: 100 # Test MS-COCO: 100; Test LVIS: 300 70 | SOLVER: 71 | IMS_PER_BATCH: 16 72 | BASE_LR: 0.00002 73 | STEPS: (80000,) 74 | MAX_ITER: 160000 75 | WARMUP_FACTOR: 1.0 76 | WARMUP_ITERS: 10 77 | WEIGHT_DECAY: 0.05 78 | OPTIMIZER: "ADAMW" 79 | BACKBONE_MULTIPLIER: 0.1 80 | CLIP_GRADIENTS: 81 | ENABLED: True 82 | CLIP_TYPE: "full_model" 83 | CLIP_VALUE: 0.01 84 | NORM_TYPE: 2.0 85 | AMP: 86 | ENABLED: True 87 | OUTPUT_DIR: "OUTPUT" -------------------------------------------------------------------------------- /videocutler/configs/imagenet_video/Base-YouTubeVIS-VideoInstanceSegmentation.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | BACKBONE: 3 | FREEZE_AT: 0 4 | NAME: "build_resnet_backbone" 5 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 6 | PIXEL_MEAN: [123.675, 116.280, 103.530] 7 | PIXEL_STD: [58.395, 57.120, 57.375] 8 | MASK_ON: True 9 | RESNETS: 10 | DEPTH: 50 11 | STEM_TYPE: "basic" # not used 12 | STEM_OUT_CHANNELS: 64 13 | STRIDE_IN_1X1: False 14 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 15 | # NORM: "SyncBN" 16 | RES5_MULTI_GRID: [1, 1, 1] # not used 17 | DATASETS: 18 | TRAIN: ("ytvis_2019_train",) 19 | TEST: ("ytvis_2019_val",) 20 | SOLVER: 21 | IMS_PER_BATCH: 16 22 | BASE_LR: 0.0001 23 | STEPS: (4000,) 24 | MAX_ITER: 6000 25 | WARMUP_FACTOR: 1.0 26 | WARMUP_ITERS: 10 27 | WEIGHT_DECAY: 0.05 28 | OPTIMIZER: "ADAMW" 29 | BACKBONE_MULTIPLIER: 0.1 30 | CLIP_GRADIENTS: 31 | ENABLED: True 32 | CLIP_TYPE: "full_model" 33 | CLIP_VALUE: 0.01 34 | NORM_TYPE: 2.0 35 | AMP: 36 | ENABLED: True 37 | INPUT: 38 | MIN_SIZE_TRAIN_SAMPLING: "choice_by_clip" 39 | RANDOM_FLIP: "flip_by_clip" 40 | AUGMENTATIONS: [] 41 | MIN_SIZE_TRAIN: (360, 480) 42 | MIN_SIZE_TEST: 360 43 | CROP: 44 | ENABLED: False 45 | TYPE: "absolute_range" 46 | SIZE: (600, 720) 47 | FORMAT: "RGB" 48 | TEST: 49 | EVAL_PERIOD: 0 50 | DATALOADER: 51 | FILTER_EMPTY_ANNOTATIONS: False 52 | NUM_WORKERS: 4 53 | VERSION: 2 54 | -------------------------------------------------------------------------------- /videocutler/configs/imagenet_video/video_mask2former_R50_cls_agnostic.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: Base-YouTubeVIS-VideoInstanceSegmentation.yaml 2 | MODEL: 3 | WEIGHTS: "pretrain/cutler_m2f_rn50.pth" 4 | META_ARCHITECTURE: "VideoMaskFormer" 5 | SEM_SEG_HEAD: 6 | NAME: "MaskFormerHead" 7 | IGNORE_VALUE: 255 8 | NUM_CLASSES: 1 # class-agnostic 9 | LOSS_WEIGHT: 1.0 10 | CONVS_DIM: 256 11 | MASK_DIM: 256 12 | NORM: "GN" 13 | # pixel decoder 14 | PIXEL_DECODER_NAME: "MSDeformAttnPixelDecoder" 15 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 16 | DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"] 17 | COMMON_STRIDE: 4 18 | TRANSFORMER_ENC_LAYERS: 6 19 | MASK_FORMER: 20 | TRANSFORMER_DECODER_NAME: "VideoMultiScaleMaskedTransformerDecoder" 21 | TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder" 22 | DEEP_SUPERVISION: True 23 | NO_OBJECT_WEIGHT: 0.1 24 | CLASS_WEIGHT: 2.0 25 | MASK_WEIGHT: 5.0 26 | DICE_WEIGHT: 5.0 27 | HIDDEN_DIM: 256 28 | NUM_OBJECT_QUERIES: 100 29 | NHEADS: 8 30 | DROPOUT: 0.3 31 | DIM_FEEDFORWARD: 2048 32 | ENC_LAYERS: 0 33 | PRE_NORM: False 34 | ENFORCE_INPUT_PROJ: False 35 | SIZE_DIVISIBILITY: 32 36 | DEC_LAYERS: 10 # 9 decoder layers, add one for the loss on learnable query 37 | TRAIN_NUM_POINTS: 12544 38 | OVERSAMPLE_RATIO: 3.0 39 | IMPORTANCE_SAMPLE_RATIO: 0.75 40 | TEST: 41 | SEMANTIC_ON: False 42 | INSTANCE_ON: True 43 | PANOPTIC_ON: False 44 | OVERLAP_THRESHOLD: 0.8 45 | OBJECT_MASK_THRESHOLD: 0.8 46 | DATASETS: 47 | TRAIN: ("imagenet_video_train_cls_agnostic",) 48 | TEST: ("ytvis_2019_train",) 49 | SOLVER: 50 | IMS_PER_BATCH: 16 51 | BASE_LR: 0.00002 52 | STEPS: (79999,) 53 | MAX_ITER: 80000 54 | WARMUP_FACTOR: 1.0 55 | WARMUP_ITERS: 10 56 | WEIGHT_DECAY: 0.05 57 | OPTIMIZER: "ADAMW" 58 | BACKBONE_MULTIPLIER: 0.1 59 | CLIP_GRADIENTS: 60 | ENABLED: True 61 | CLIP_TYPE: "full_model" 62 | CLIP_VALUE: 0.01 63 | NORM_TYPE: 2.0 64 | AMP: 65 | ENABLED: True 66 | DATALOADER: 67 | FILTER_EMPTY_ANNOTATIONS: False 68 | NUM_WORKERS: 0 69 | COPY_PASTE: True 70 | COPY_PASTE_RATE: 1.0 71 | VISUALIZE_COPY_PASTE: False 72 | COPY_PASTE_RANDOM_NUM: False 73 | COPY_PASTE_MIN_RATIO: 0.8 74 | COPY_PASTE_MAX_RATIO: 1.0 75 | INPUT: 76 | SAMPLING_FRAME_NUM: 3 77 | MIN_SIZE_TRAIN_SAMPLING: "choice_by_clip" 78 | RANDOM_FLIP: "flip_by_clip" 79 | AUGMENTATIONS: ['brightness', 'contrast', 'rotation'] 80 | MIN_SIZE_TRAIN: (360, 480) 81 | MIN_SIZE_TEST: 360 82 | CROP: 83 | ENABLED: True 84 | TYPE: "absolute_range" 85 | SIZE: (600, 720) 86 | OUTPUT_DIR: "OUTPUT/" -------------------------------------------------------------------------------- /videocutler/configs/imagenet_video/videocutler_eval_ytvis2019.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: video_mask2former_R50_cls_agnostic.yaml 2 | DATASETS: 3 | TEST: ("ytvis_2019_train",) -------------------------------------------------------------------------------- /videocutler/configs/imagenet_video/videocutler_eval_ytvis2021.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: video_mask2former_R50_cls_agnostic.yaml 2 | DATASETS: 3 | TEST: ("ytvis_2021_train",) -------------------------------------------------------------------------------- /videocutler/datasets/README.md: -------------------------------------------------------------------------------- 1 | # Prepare Datasets for VideoCutLER 2 | 3 | A dataset can be used by accessing [DatasetCatalog](https://detectron2.readthedocs.io/modules/data.html#detectron2.data.DatasetCatalog) 4 | for its data, or [MetadataCatalog](https://detectron2.readthedocs.io/modules/data.html#detectron2.data.MetadataCatalog) for its metadata (class names, etc). 5 | This document explains how to setup the builtin datasets so they can be used by the above APIs. 6 | [Use Custom Datasets](https://detectron2.readthedocs.io/tutorials/datasets.html) gives a deeper dive on how to use `DatasetCatalog` and `MetadataCatalog`, 7 | and how to add new datasets to them. 8 | 9 | VideoCutLER has builtin support for a few datasets. 10 | The datasets are assumed to exist in a directory specified by the environment variable 11 | `DETECTRON2_DATASETS`. 12 | Under this directory, detectron2 will look for datasets in the structure described below, if needed. 13 | ``` 14 | $DETECTRON2_DATASETS/ 15 | imagenet/ 16 | ytvis_2019/ 17 | ytvis_2021/ 18 | ``` 19 | 20 | You can set the location for builtin datasets by `export DETECTRON2_DATASETS=/path/to/datasets`. 21 | If left unset, the default is `./datasets` relative to your current working directory. 22 | 23 | Please check expected dataset structure for ImageNet-1K at [here](../../datasets/README.md). You can directly [download](https://drive.google.com/file/d/1gllHvrZQNVXphnk-IQxMcXh87Qs86ofT/view?usp=sharing) the pre-processed ImageNet-1K annotations produced by MaskCut in YouTubeVIS format and place it under the "imagenet/annotations/" directory. 24 | 25 | Alternatively, you can refer to the instructions on generating pseudo-masks using MaskCut at [here](../../README.md#generating-annotations-for-imagenet-1k-with-maskcut). You'll need to convert these annotations into the [YouTubeVIS](https://competitions.codalab.org/competitions/20128) format (MaskCut provides MSCOCO format annotations). This format conversion is a necessary step to ensure compatibility with the training process of VideoCutLER. 26 | 27 | 28 | ## Expected dataset structure for [YouTubeVIS 2019](https://competitions.codalab.org/competitions/20128): 29 | 30 | ``` 31 | ytvis_2019/ 32 | {train,valid,test}.json 33 | {train,valid,test}/ 34 | Annotations/ 35 | JPEGImages/ 36 | ``` 37 | 38 | ## Expected dataset structure for [YouTubeVIS 2021](https://competitions.codalab.org/competitions/28988): 39 | 40 | ``` 41 | ytvis_2021/ 42 | {train,valid,test}.json 43 | {train,valid,test}/ 44 | Annotations/ 45 | JPEGImages/ 46 | ``` 47 | -------------------------------------------------------------------------------- /videocutler/datasets/ade20k_instance_catid_mapping.txt: -------------------------------------------------------------------------------- 1 | Instacne100 SceneParse150 FullADE20K 2 | 1 8 165 3 | 2 9 3055 4 | 3 11 350 5 | 4 13 1831 6 | 5 15 774 7 | 5 15 783 8 | 6 16 2684 9 | 7 19 687 10 | 8 20 471 11 | 9 21 401 12 | 10 23 1735 13 | 11 24 2473 14 | 12 25 2329 15 | 13 28 1564 16 | 14 31 57 17 | 15 32 2272 18 | 16 33 907 19 | 17 34 724 20 | 18 36 2985 21 | 18 36 533 22 | 19 37 1395 23 | 20 38 155 24 | 21 39 2053 25 | 22 40 689 26 | 23 42 266 27 | 24 43 581 28 | 25 44 2380 29 | 26 45 491 30 | 27 46 627 31 | 28 48 2388 32 | 29 50 943 33 | 30 51 2096 34 | 31 54 2530 35 | 32 56 420 36 | 33 57 1948 37 | 34 58 1869 38 | 35 59 2251 39 | 36 63 239 40 | 37 65 571 41 | 38 66 2793 42 | 39 67 978 43 | 40 68 236 44 | 41 70 181 45 | 42 71 629 46 | 43 72 2598 47 | 44 73 1744 48 | 45 74 1374 49 | 46 75 591 50 | 47 76 2679 51 | 48 77 223 52 | 49 79 47 53 | 50 81 327 54 | 51 82 2821 55 | 52 83 1451 56 | 53 84 2880 57 | 54 86 480 58 | 55 87 77 59 | 56 88 2616 60 | 57 89 246 61 | 57 89 247 62 | 58 90 2733 63 | 59 91 14 64 | 60 93 38 65 | 61 94 1936 66 | 62 96 120 67 | 63 98 1702 68 | 64 99 249 69 | 65 103 2928 70 | 66 104 2337 71 | 67 105 1023 72 | 68 108 2989 73 | 69 109 1930 74 | 70 111 2586 75 | 71 112 131 76 | 72 113 146 77 | 73 116 95 78 | 74 117 1563 79 | 75 119 1708 80 | 76 120 103 81 | 77 121 1002 82 | 78 122 2569 83 | 79 124 2833 84 | 80 125 1551 85 | 81 126 1981 86 | 82 127 29 87 | 83 128 187 88 | 84 130 747 89 | 85 131 2254 90 | 86 133 2262 91 | 87 134 1260 92 | 88 135 2243 93 | 89 136 2932 94 | 90 137 2836 95 | 91 138 2850 96 | 92 139 64 97 | 93 140 894 98 | 94 143 1919 99 | 95 144 1583 100 | 96 145 318 101 | 97 147 2046 102 | 98 148 1098 103 | 99 149 530 104 | 100 150 954 105 | -------------------------------------------------------------------------------- /videocutler/datasets/prepare_ade20k_ins_seg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | import glob 5 | import json 6 | import os 7 | from collections import Counter 8 | 9 | import numpy as np 10 | import tqdm 11 | from panopticapi.utils import IdGenerator, save_json 12 | from PIL import Image 13 | import pycocotools.mask as mask_util 14 | 15 | 16 | if __name__ == "__main__": 17 | dataset_dir = os.getenv("DETECTRON2_DATASETS", "datasets") 18 | 19 | for name, dirname in [("train", "training"), ("val", "validation")]: 20 | image_dir = os.path.join(dataset_dir, f"ADEChallengeData2016/images/{dirname}/") 21 | instance_dir = os.path.join( 22 | dataset_dir, f"ADEChallengeData2016/annotations_instance/{dirname}/" 23 | ) 24 | 25 | # img_id = 0 26 | ann_id = 1 27 | 28 | # json 29 | out_file = os.path.join(dataset_dir, f"ADEChallengeData2016/ade20k_instance_{name}.json") 30 | 31 | # json config 32 | instance_config_file = "datasets/ade20k_instance_imgCatIds.json" 33 | with open(instance_config_file) as f: 34 | category_dict = json.load(f)["categories"] 35 | 36 | # load catid mapping 37 | # it is important to share category id for both instance and panoptic annotations 38 | mapping_file = "datasets/ade20k_instance_catid_mapping.txt" 39 | with open(mapping_file) as f: 40 | map_id = {} 41 | for i, line in enumerate(f.readlines()): 42 | if i == 0: 43 | continue 44 | ins_id, sem_id, _ = line.strip().split() 45 | # shift id by 1 because we want it to start from 0! 46 | # ignore_label becomes 255 47 | map_id[int(ins_id)] = int(sem_id) - 1 48 | 49 | for cat in category_dict: 50 | cat["id"] = map_id[cat["id"]] 51 | 52 | filenames = sorted(glob.glob(os.path.join(image_dir, "*.jpg"))) 53 | 54 | ann_dict = {} 55 | images = [] 56 | annotations = [] 57 | 58 | for idx, filename in enumerate(tqdm.tqdm(filenames)): 59 | image = {} 60 | image_id = os.path.basename(filename).split(".")[0] 61 | 62 | image["id"] = image_id 63 | image["file_name"] = os.path.basename(filename) 64 | 65 | original_format = np.array(Image.open(filename)) 66 | image["width"] = original_format.shape[1] 67 | image["height"] = original_format.shape[0] 68 | 69 | images.append(image) 70 | 71 | filename_instance = os.path.join(instance_dir, image_id + ".png") 72 | ins_seg = np.asarray(Image.open(filename_instance)) 73 | assert ins_seg.dtype == np.uint8 74 | 75 | instance_cat_ids = ins_seg[..., 0] 76 | # instance id starts from 1! 77 | # because 0 is reserved as VOID label 78 | instance_ins_ids = ins_seg[..., 1] 79 | 80 | # process things 81 | for thing_id in np.unique(instance_ins_ids): 82 | if thing_id == 0: 83 | continue 84 | mask = instance_ins_ids == thing_id 85 | instance_cat_id = np.unique(instance_cat_ids[mask]) 86 | assert len(instance_cat_id) == 1 87 | 88 | anno = {} 89 | anno['id'] = ann_id 90 | ann_id += 1 91 | anno['image_id'] = image['id'] 92 | anno["iscrowd"] = int(0) 93 | anno["category_id"] = int(map_id[instance_cat_id[0]]) 94 | 95 | inds = np.nonzero(mask) 96 | ymin, ymax = inds[0].min(), inds[0].max() 97 | xmin, xmax = inds[1].min(), inds[1].max() 98 | anno["bbox"] = [int(xmin), int(ymin), int(xmax - xmin + 1), int(ymax - ymin + 1)] 99 | # if xmax <= xmin or ymax <= ymin: 100 | # continue 101 | rle = mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] 102 | rle["counts"] = rle["counts"].decode("utf-8") 103 | anno["segmentation"] = rle 104 | anno["area"] = int(mask_util.area(rle)) 105 | annotations.append(anno) 106 | 107 | # save this 108 | ann_dict['images'] = images 109 | ann_dict['categories'] = category_dict 110 | ann_dict['annotations'] = annotations 111 | 112 | save_json(ann_dict, out_file) 113 | -------------------------------------------------------------------------------- /videocutler/datasets/prepare_ade20k_sem_seg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | import os 5 | from pathlib import Path 6 | 7 | import numpy as np 8 | import tqdm 9 | from PIL import Image 10 | 11 | 12 | def convert(input, output): 13 | img = np.asarray(Image.open(input)) 14 | assert img.dtype == np.uint8 15 | img = img - 1 # 0 (ignore) becomes 255. others are shifted by 1 16 | Image.fromarray(img).save(output) 17 | 18 | 19 | if __name__ == "__main__": 20 | dataset_dir = Path(os.getenv("DETECTRON2_DATASETS", "datasets")) / "ADEChallengeData2016" 21 | for name in ["training", "validation"]: 22 | annotation_dir = dataset_dir / "annotations" / name 23 | output_dir = dataset_dir / "annotations_detectron2" / name 24 | output_dir.mkdir(parents=True, exist_ok=True) 25 | for file in tqdm.tqdm(list(annotation_dir.iterdir())): 26 | output_file = output_dir / file.name 27 | convert(file, output_file) 28 | -------------------------------------------------------------------------------- /videocutler/datasets/prepare_coco_semantic_annos_from_panoptic_annos.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | import functools 6 | import json 7 | import multiprocessing as mp 8 | import numpy as np 9 | import os 10 | import time 11 | from fvcore.common.download import download 12 | from panopticapi.utils import rgb2id 13 | from PIL import Image 14 | 15 | from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES 16 | 17 | 18 | def _process_panoptic_to_semantic(input_panoptic, output_semantic, segments, id_map): 19 | panoptic = np.asarray(Image.open(input_panoptic), dtype=np.uint32) 20 | panoptic = rgb2id(panoptic) 21 | output = np.zeros_like(panoptic, dtype=np.uint8) + 255 22 | for seg in segments: 23 | cat_id = seg["category_id"] 24 | new_cat_id = id_map[cat_id] 25 | output[panoptic == seg["id"]] = new_cat_id 26 | Image.fromarray(output).save(output_semantic) 27 | 28 | 29 | def separate_coco_semantic_from_panoptic(panoptic_json, panoptic_root, sem_seg_root, categories): 30 | """ 31 | Create semantic segmentation annotations from panoptic segmentation 32 | annotations, to be used by PanopticFPN. 33 | It maps all thing categories to class 0, and maps all unlabeled pixels to class 255. 34 | It maps all stuff categories to contiguous ids starting from 1. 35 | Args: 36 | panoptic_json (str): path to the panoptic json file, in COCO's format. 37 | panoptic_root (str): a directory with panoptic annotation files, in COCO's format. 38 | sem_seg_root (str): a directory to output semantic annotation files 39 | categories (list[dict]): category metadata. Each dict needs to have: 40 | "id": corresponds to the "category_id" in the json annotations 41 | "isthing": 0 or 1 42 | """ 43 | os.makedirs(sem_seg_root, exist_ok=True) 44 | 45 | id_map = {} # map from category id to id in the output semantic annotation 46 | assert len(categories) <= 254 47 | for i, k in enumerate(categories): 48 | id_map[k["id"]] = i 49 | # what is id = 0? 50 | # id_map[0] = 255 51 | print(id_map) 52 | 53 | with open(panoptic_json) as f: 54 | obj = json.load(f) 55 | 56 | pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4)) 57 | 58 | def iter_annotations(): 59 | for anno in obj["annotations"]: 60 | file_name = anno["file_name"] 61 | segments = anno["segments_info"] 62 | input = os.path.join(panoptic_root, file_name) 63 | output = os.path.join(sem_seg_root, file_name) 64 | yield input, output, segments 65 | 66 | print("Start writing to {} ...".format(sem_seg_root)) 67 | start = time.time() 68 | pool.starmap( 69 | functools.partial(_process_panoptic_to_semantic, id_map=id_map), 70 | iter_annotations(), 71 | chunksize=100, 72 | ) 73 | print("Finished. time: {:.2f}s".format(time.time() - start)) 74 | 75 | 76 | if __name__ == "__main__": 77 | dataset_dir = os.path.join(os.getenv("DETECTRON2_DATASETS", "datasets"), "coco") 78 | for s in ["val2017", "train2017"]: 79 | separate_coco_semantic_from_panoptic( 80 | os.path.join(dataset_dir, "annotations/panoptic_{}.json".format(s)), 81 | os.path.join(dataset_dir, "panoptic_{}".format(s)), 82 | os.path.join(dataset_dir, "panoptic_semseg_{}".format(s)), 83 | COCO_CATEGORIES, 84 | ) 85 | -------------------------------------------------------------------------------- /videocutler/demo.sh: -------------------------------------------------------------------------------- 1 | python demo_video/demo.py \ 2 | --config-file configs/imagenet_video/video_mask2former_R50_cls_agnostic.yaml \ 3 | --input docs/demo-videos/99c6b1acf2/*.jpg \ 4 | --confidence-threshold 0.8 \ 5 | --output demos/ \ 6 | # --save-frames True \ 7 | # --save-masks True \ 8 | --opts MODEL.WEIGHTS videocutler_m2f_rn50.pth -------------------------------------------------------------------------------- /videocutler/demo/README.md: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | ## VideoCutLER Demo 3 | 4 | We provide a command line tool to run a simple demo of builtin configs. 5 | The usage is explained in [GETTING_STARTED.md](../GETTING_STARTED.md). 6 | -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00100.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00100.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00105.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00105.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00110.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00110.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00115.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00115.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00120.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00120.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00125.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00125.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00130.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00130.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00135.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00135.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00140.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00140.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00145.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00145.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00150.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00150.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00155.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00155.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00160.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00160.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00165.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00165.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00170.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00170.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00175.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00175.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00180.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00180.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00185.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00185.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00190.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00190.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00195.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00195.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00200.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00200.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00205.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00205.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00210.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00210.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00215.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00215.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/09773e4062/00220.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00220.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00000.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00010.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00010.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00020.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00020.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00030.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00030.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00040.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00040.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00050.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00050.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00060.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00060.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00070.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00070.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00080.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00080.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00090.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00090.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00100.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00100.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00110.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00110.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00120.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00120.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00130.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00130.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00140.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00140.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00150.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00150.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00160.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00160.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/4c7710908f/00170.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00170.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00000.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00010.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00010.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00020.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00020.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00030.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00030.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00040.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00040.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00050.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00050.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00060.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00060.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00070.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00070.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00080.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00080.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00090.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00090.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00100.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00100.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00110.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00110.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00120.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00120.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00130.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00130.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00140.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00140.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00150.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00150.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00160.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00160.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/8b4f6d1186/00170.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00170.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00075.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00075.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00080.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00080.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00085.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00085.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00090.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00090.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00095.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00095.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00100.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00100.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00105.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00105.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00110.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00110.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00115.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00115.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00120.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00120.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00125.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00125.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00130.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00130.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00135.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00135.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00140.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00140.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00145.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00145.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00150.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00150.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00155.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00155.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00160.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00160.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00165.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00165.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/99c6b1acf2/00170.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00170.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00000.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00005.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00005.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00010.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00010.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00015.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00015.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00020.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00020.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00025.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00025.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00030.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00030.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00035.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00035.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00040.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00040.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00045.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00045.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00050.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00050.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00055.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00055.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00060.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00060.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00065.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00065.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00070.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00070.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00075.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00075.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00080.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00080.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00085.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00085.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00090.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00090.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00095.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00095.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00100.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00100.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00105.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00105.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00110.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00110.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00115.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00115.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00120.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00120.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00125.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00125.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00130.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00130.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00135.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00135.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00140.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00140.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00145.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00145.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00150.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00150.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00155.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00155.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00160.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00160.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00165.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00165.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00170.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00170.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00175.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00175.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00180.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00180.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00185.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00185.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00190.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00190.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00195.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00195.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00200.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00200.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00205.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00205.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00210.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00210.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00215.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00215.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00220.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00220.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00225.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00225.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00230.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00230.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00235.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00235.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00240.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00240.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00245.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00245.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00250.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00250.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00255.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00255.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00260.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00260.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00265.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00265.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00270.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00270.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00275.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00275.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00280.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00280.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00285.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00285.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00290.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00290.jpg -------------------------------------------------------------------------------- /videocutler/docs/demo-videos/eea827bdda/00295.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00295.jpg -------------------------------------------------------------------------------- /videocutler/docs/videocutler_demos.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/videocutler_demos.gif -------------------------------------------------------------------------------- /videocutler/docs/videocutler_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/videocutler_pipeline.png -------------------------------------------------------------------------------- /videocutler/eval.sh: -------------------------------------------------------------------------------- 1 | export DETECTRON2_DATASETS=/shared/xudongw/DATASETS/ 2 | 3 | ###### eval YouTubeVIS-2019 ###### 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python train_net_video.py --num-gpus 4 \ 5 | --config-file configs/imagenet_video/videocutler_eval_ytvis2019.yaml \ 6 | --eval-only MODEL.WEIGHTS videocutler_m2f_rn50.pth \ 7 | OUTPUT_DIR OUTPUT/ytvis_2019 8 | 9 | python eval_ytvis.py --dataset-path ${DETECTRON2_DATASETS} --dataset-name 'ytvis_2019' --result-path 'OUTPUT/ytvis_2019/' 10 | 11 | ###### eval YouTubeVIS-2021 ###### 12 | # CUDA_VISIBLE_DEVICES=0,1,2,3 python train_net_video.py --num-gpus 4 \ 13 | # --config-file configs/imagenet_video/videocutler_eval_ytvis2021.yaml \ 14 | # --eval-only MODEL.WEIGHTS videocutler_m2f_rn50.pth \ 15 | # OUTPUT_DIR OUTPUT/ytvis_2021/ 16 | 17 | # python eval_ytvis.py --dataset-path ${DETECTRON2_DATASETS} --dataset-name 'ytvis_2021' --result-path 'OUTPUT/ytvis_2021/' -------------------------------------------------------------------------------- /videocutler/eval_ytvis.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # Modified by XuDong Wang from detectron2 and cocoapi 3 | 4 | import argparse 5 | import os 6 | 7 | from mask2former_video.data_video.datasets.ytvis_api.ytvoseval import YTVOSeval 8 | from mask2former_video.data_video.datasets.ytvis_api.ytvos import YTVOS 9 | 10 | def print_and_summary(cocoEval): 11 | str_print = "" 12 | for key in cocoEval.stats: 13 | str_print += "{:.2f},".format(key*100) 14 | return str_print 15 | 16 | def get_parser(): 17 | parser = argparse.ArgumentParser(description="eval configs") 18 | parser.add_argument( 19 | "--dataset-path", default="DATASETS", help="path to the annotation file", 20 | ) 21 | parser.add_argument( 22 | "--dataset-name", default="ytvis_2019", help="path to the annotation file", 23 | ) 24 | parser.add_argument( 25 | "--result-path", default="OUTPUT", help="path to the the result file", 26 | ) 27 | return parser 28 | 29 | if __name__ == "__main__": 30 | args = get_parser().parse_args() 31 | 32 | annFile = os.path.join(args.dataset_path, args.dataset_name, 'train.json') 33 | cocoGt=YTVOS(annFile) 34 | 35 | resFile = os.path.join(args.result_path, 'inference/results.json') 36 | cocoDt=cocoGt.loadRes(resFile) 37 | 38 | annType = 'segm' 39 | print('Running demo for {} results.'.format(annType)) 40 | cocoEval = YTVOSeval(cocoGt,cocoDt,annType) 41 | cocoEval.params.useCats = 0 42 | cocoEval.evaluate() 43 | cocoEval.accumulate() 44 | cocoEval.summarize() 45 | copypaste = print_and_summary(cocoEval) 46 | print(copypaste) -------------------------------------------------------------------------------- /videocutler/mask2former/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from . import data # register all new datasets 3 | from . import modeling 4 | 5 | # config 6 | from .config import add_maskformer2_config 7 | 8 | # dataset loading 9 | from .data.dataset_mappers.coco_instance_new_baseline_dataset_mapper import COCOInstanceNewBaselineDatasetMapper 10 | from .data.dataset_mappers.coco_panoptic_new_baseline_dataset_mapper import COCOPanopticNewBaselineDatasetMapper 11 | from .data.dataset_mappers.mask_former_instance_dataset_mapper import ( 12 | MaskFormerInstanceDatasetMapper, 13 | ) 14 | from .data.dataset_mappers.mask_former_panoptic_dataset_mapper import ( 15 | MaskFormerPanopticDatasetMapper, 16 | ) 17 | from .data.dataset_mappers.mask_former_semantic_dataset_mapper import ( 18 | MaskFormerSemanticDatasetMapper, 19 | ) 20 | 21 | # models 22 | from .maskformer_model import MaskFormer 23 | from .test_time_augmentation import SemanticSegmentorWithTTA 24 | 25 | # evaluation 26 | from .evaluation.instance_evaluation import InstanceSegEvaluator 27 | -------------------------------------------------------------------------------- /videocutler/mask2former/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | from detectron2.config import CfgNode as CN 4 | 5 | 6 | def add_maskformer2_config(cfg): 7 | """ 8 | Add config for MASK_FORMER. 9 | """ 10 | # NOTE: configs from original maskformer 11 | # data config 12 | # select the dataset mapper 13 | cfg.INPUT.DATASET_MAPPER_NAME = "mask_former_semantic" 14 | # Color augmentation 15 | cfg.INPUT.COLOR_AUG_SSD = False 16 | # We retry random cropping until no single category in semantic segmentation GT occupies more 17 | # than `SINGLE_CATEGORY_MAX_AREA` part of the crop. 18 | cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0 19 | # Pad image and segmentation GT in dataset mapper. 20 | cfg.INPUT.SIZE_DIVISIBILITY = -1 21 | 22 | # solver config 23 | # weight decay on embedding 24 | cfg.SOLVER.WEIGHT_DECAY_EMBED = 0.0 25 | # optimizer 26 | cfg.SOLVER.OPTIMIZER = "ADAMW" 27 | cfg.SOLVER.BACKBONE_MULTIPLIER = 0.1 28 | 29 | # mask_former model config 30 | cfg.MODEL.MASK_FORMER = CN() 31 | 32 | # loss 33 | cfg.MODEL.MASK_FORMER.DEEP_SUPERVISION = True 34 | cfg.MODEL.MASK_FORMER.NO_OBJECT_WEIGHT = 0.1 35 | cfg.MODEL.MASK_FORMER.CLASS_WEIGHT = 1.0 36 | cfg.MODEL.MASK_FORMER.DICE_WEIGHT = 1.0 37 | cfg.MODEL.MASK_FORMER.MASK_WEIGHT = 20.0 38 | cfg.MODEL.MASK_FORMER.POSITIVE_BANK_IOU_THRESH = 0.01 39 | 40 | # transformer config 41 | cfg.MODEL.MASK_FORMER.NHEADS = 8 42 | cfg.MODEL.MASK_FORMER.DROPOUT = 0.1 43 | cfg.MODEL.MASK_FORMER.DIM_FEEDFORWARD = 2048 44 | cfg.MODEL.MASK_FORMER.ENC_LAYERS = 0 45 | cfg.MODEL.MASK_FORMER.DEC_LAYERS = 6 46 | cfg.MODEL.MASK_FORMER.PRE_NORM = False 47 | 48 | cfg.MODEL.MASK_FORMER.HIDDEN_DIM = 256 49 | cfg.MODEL.MASK_FORMER.NUM_OBJECT_QUERIES = 100 50 | 51 | cfg.MODEL.MASK_FORMER.TRANSFORMER_IN_FEATURE = "res5" 52 | cfg.MODEL.MASK_FORMER.ENFORCE_INPUT_PROJ = False 53 | 54 | # mask_former inference config 55 | cfg.MODEL.MASK_FORMER.TEST = CN() 56 | cfg.MODEL.MASK_FORMER.TEST.SEMANTIC_ON = True 57 | cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON = False 58 | cfg.MODEL.MASK_FORMER.TEST.PANOPTIC_ON = False 59 | cfg.MODEL.MASK_FORMER.TEST.OBJECT_MASK_THRESHOLD = 0.0 60 | cfg.MODEL.MASK_FORMER.TEST.OVERLAP_THRESHOLD = 0.0 61 | cfg.MODEL.MASK_FORMER.TEST.SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE = False 62 | 63 | # Sometimes `backbone.size_divisibility` is set to 0 for some backbone (e.g. ResNet) 64 | # you can use this config to override 65 | cfg.MODEL.MASK_FORMER.SIZE_DIVISIBILITY = 32 66 | 67 | # pixel decoder config 68 | cfg.MODEL.SEM_SEG_HEAD.MASK_DIM = 256 69 | # adding transformer in pixel decoder 70 | cfg.MODEL.SEM_SEG_HEAD.TRANSFORMER_ENC_LAYERS = 0 71 | # pixel decoder 72 | cfg.MODEL.SEM_SEG_HEAD.PIXEL_DECODER_NAME = "BasePixelDecoder" 73 | 74 | # swin transformer backbone 75 | cfg.MODEL.SWIN = CN() 76 | cfg.MODEL.SWIN.PRETRAIN_IMG_SIZE = 224 77 | cfg.MODEL.SWIN.PATCH_SIZE = 4 78 | cfg.MODEL.SWIN.EMBED_DIM = 96 79 | cfg.MODEL.SWIN.DEPTHS = [2, 2, 6, 2] 80 | cfg.MODEL.SWIN.NUM_HEADS = [3, 6, 12, 24] 81 | cfg.MODEL.SWIN.WINDOW_SIZE = 7 82 | cfg.MODEL.SWIN.MLP_RATIO = 4.0 83 | cfg.MODEL.SWIN.QKV_BIAS = True 84 | cfg.MODEL.SWIN.QK_SCALE = None 85 | cfg.MODEL.SWIN.DROP_RATE = 0.0 86 | cfg.MODEL.SWIN.ATTN_DROP_RATE = 0.0 87 | cfg.MODEL.SWIN.DROP_PATH_RATE = 0.3 88 | cfg.MODEL.SWIN.APE = False 89 | cfg.MODEL.SWIN.PATCH_NORM = True 90 | cfg.MODEL.SWIN.OUT_FEATURES = ["res2", "res3", "res4", "res5"] 91 | cfg.MODEL.SWIN.USE_CHECKPOINT = False 92 | 93 | # NOTE: maskformer2 extra configs 94 | # transformer module 95 | cfg.MODEL.MASK_FORMER.TRANSFORMER_DECODER_NAME = "MultiScaleMaskedTransformerDecoder" 96 | 97 | # LSJ aug 98 | cfg.INPUT.IMAGE_SIZE = 1024 99 | cfg.INPUT.MIN_SCALE = 0.1 100 | cfg.INPUT.MAX_SCALE = 2.0 101 | 102 | # MSDeformAttn encoder configs 103 | cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES = ["res3", "res4", "res5"] 104 | cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_POINTS = 4 105 | cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_HEADS = 8 106 | 107 | # point loss configs 108 | # Number of points sampled during training for a mask point head. 109 | cfg.MODEL.MASK_FORMER.TRAIN_NUM_POINTS = 112 * 112 110 | # Oversampling parameter for PointRend point sampling during training. Parameter `k` in the 111 | # original paper. 112 | cfg.MODEL.MASK_FORMER.OVERSAMPLE_RATIO = 3.0 113 | # Importance sampling parameter for PointRend point sampling during training. Parametr `beta` in 114 | # the original paper. 115 | cfg.MODEL.MASK_FORMER.IMPORTANCE_SAMPLE_RATIO = 0.75 116 | -------------------------------------------------------------------------------- /videocutler/mask2former/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from . import datasets 3 | -------------------------------------------------------------------------------- /videocutler/mask2former/data/dataset_mappers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /videocutler/mask2former/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from . import ( 3 | register_ade20k_full, 4 | register_ade20k_panoptic, 5 | register_coco_stuff_10k, 6 | register_mapillary_vistas, 7 | register_coco_panoptic_annos_semseg, 8 | register_ade20k_instance, 9 | register_mapillary_vistas_panoptic, 10 | ) 11 | -------------------------------------------------------------------------------- /videocutler/mask2former/data/datasets/register_ade20k_instance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import json 3 | import logging 4 | import numpy as np 5 | import os 6 | from PIL import Image 7 | 8 | from detectron2.data import DatasetCatalog, MetadataCatalog 9 | from detectron2.data.datasets.coco import load_coco_json, register_coco_instances 10 | from detectron2.utils.file_io import PathManager 11 | 12 | ADE_CATEGORIES = [{'id': 7, 'name': 'bed'}, {'id': 8, 'name': 'windowpane'}, {'id': 10, 'name': 'cabinet'}, {'id': 12, 'name': 'person'}, {'id': 14, 'name': 'door'}, {'id': 15, 'name': 'table'}, {'id': 18, 'name': 'curtain'}, {'id': 19, 'name': 'chair'}, {'id': 20, 'name': 'car'}, {'id': 22, 'name': 'painting'}, {'id': 23, 'name': 'sofa'}, {'id': 24, 'name': 'shelf'}, {'id': 27, 'name': 'mirror'}, {'id': 30, 'name': 'armchair'}, {'id': 31, 'name': 'seat'}, {'id': 32, 'name': 'fence'}, {'id': 33, 'name': 'desk'}, {'id': 35, 'name': 'wardrobe'}, {'id': 36, 'name': 'lamp'}, {'id': 37, 'name': 'bathtub'}, {'id': 38, 'name': 'railing'}, {'id': 39, 'name': 'cushion'}, {'id': 41, 'name': 'box'}, {'id': 42, 'name': 'column'}, {'id': 43, 'name': 'signboard'}, {'id': 44, 'name': 'chest of drawers'}, {'id': 45, 'name': 'counter'}, {'id': 47, 'name': 'sink'}, {'id': 49, 'name': 'fireplace'}, {'id': 50, 'name': 'refrigerator'}, {'id': 53, 'name': 'stairs'}, {'id': 55, 'name': 'case'}, {'id': 56, 'name': 'pool table'}, {'id': 57, 'name': 'pillow'}, {'id': 58, 'name': 'screen door'}, {'id': 62, 'name': 'bookcase'}, {'id': 64, 'name': 'coffee table'}, {'id': 65, 'name': 'toilet'}, {'id': 66, 'name': 'flower'}, {'id': 67, 'name': 'book'}, {'id': 69, 'name': 'bench'}, {'id': 70, 'name': 'countertop'}, {'id': 71, 'name': 'stove'}, {'id': 72, 'name': 'palm'}, {'id': 73, 'name': 'kitchen island'}, {'id': 74, 'name': 'computer'}, {'id': 75, 'name': 'swivel chair'}, {'id': 76, 'name': 'boat'}, {'id': 78, 'name': 'arcade machine'}, {'id': 80, 'name': 'bus'}, {'id': 81, 'name': 'towel'}, {'id': 82, 'name': 'light'}, {'id': 83, 'name': 'truck'}, {'id': 85, 'name': 'chandelier'}, {'id': 86, 'name': 'awning'}, {'id': 87, 'name': 'streetlight'}, {'id': 88, 'name': 'booth'}, {'id': 89, 'name': 'television receiver'}, {'id': 90, 'name': 'airplane'}, {'id': 92, 'name': 'apparel'}, {'id': 93, 'name': 'pole'}, {'id': 95, 'name': 'bannister'}, {'id': 97, 'name': 'ottoman'}, {'id': 98, 'name': 'bottle'}, {'id': 102, 'name': 'van'}, {'id': 103, 'name': 'ship'}, {'id': 104, 'name': 'fountain'}, {'id': 107, 'name': 'washer'}, {'id': 108, 'name': 'plaything'}, {'id': 110, 'name': 'stool'}, {'id': 111, 'name': 'barrel'}, {'id': 112, 'name': 'basket'}, {'id': 115, 'name': 'bag'}, {'id': 116, 'name': 'minibike'}, {'id': 118, 'name': 'oven'}, {'id': 119, 'name': 'ball'}, {'id': 120, 'name': 'food'}, {'id': 121, 'name': 'step'}, {'id': 123, 'name': 'trade name'}, {'id': 124, 'name': 'microwave'}, {'id': 125, 'name': 'pot'}, {'id': 126, 'name': 'animal'}, {'id': 127, 'name': 'bicycle'}, {'id': 129, 'name': 'dishwasher'}, {'id': 130, 'name': 'screen'}, {'id': 132, 'name': 'sculpture'}, {'id': 133, 'name': 'hood'}, {'id': 134, 'name': 'sconce'}, {'id': 135, 'name': 'vase'}, {'id': 136, 'name': 'traffic light'}, {'id': 137, 'name': 'tray'}, {'id': 138, 'name': 'ashcan'}, {'id': 139, 'name': 'fan'}, {'id': 142, 'name': 'plate'}, {'id': 143, 'name': 'monitor'}, {'id': 144, 'name': 'bulletin board'}, {'id': 146, 'name': 'radiator'}, {'id': 147, 'name': 'glass'}, {'id': 148, 'name': 'clock'}, {'id': 149, 'name': 'flag'}] 13 | 14 | 15 | _PREDEFINED_SPLITS = { 16 | # point annotations without masks 17 | "ade20k_instance_train": ( 18 | "ADEChallengeData2016/images/training", 19 | "ADEChallengeData2016/ade20k_instance_train.json", 20 | ), 21 | "ade20k_instance_val": ( 22 | "ADEChallengeData2016/images/validation", 23 | "ADEChallengeData2016/ade20k_instance_val.json", 24 | ), 25 | } 26 | 27 | 28 | def _get_ade_instances_meta(): 29 | thing_ids = [k["id"] for k in ADE_CATEGORIES] 30 | assert len(thing_ids) == 100, len(thing_ids) 31 | # Mapping from the incontiguous ADE category id to an id in [0, 99] 32 | thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)} 33 | thing_classes = [k["name"] for k in ADE_CATEGORIES] 34 | ret = { 35 | "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id, 36 | "thing_classes": thing_classes, 37 | } 38 | return ret 39 | 40 | 41 | def register_all_ade20k_instance(root): 42 | for key, (image_root, json_file) in _PREDEFINED_SPLITS.items(): 43 | # Assume pre-defined datasets live in `./datasets`. 44 | register_coco_instances( 45 | key, 46 | _get_ade_instances_meta(), 47 | os.path.join(root, json_file) if "://" not in json_file else json_file, 48 | os.path.join(root, image_root), 49 | ) 50 | 51 | 52 | _root = os.getenv("DETECTRON2_DATASETS", "datasets") 53 | register_all_ade20k_instance(_root) 54 | -------------------------------------------------------------------------------- /videocutler/mask2former/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/mask2former/evaluation/__init__.py -------------------------------------------------------------------------------- /videocutler/mask2former/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .backbone.swin import D2SwinTransformer 3 | from .pixel_decoder.fpn import BasePixelDecoder 4 | from .pixel_decoder.msdeformattn import MSDeformAttnPixelDecoder 5 | from .meta_arch.mask_former_head import MaskFormerHead 6 | from .meta_arch.per_pixel_baseline import PerPixelBaselineHead, PerPixelBaselinePlusHead 7 | -------------------------------------------------------------------------------- /videocutler/mask2former/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /videocutler/mask2former/modeling/meta_arch/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /videocutler/mask2former/modeling/pixel_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /videocutler/mask2former/modeling/pixel_decoder/ops/functions/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------------------------------ 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 7 | # ------------------------------------------------------------------------------------------------ 8 | 9 | # Copyright (c) Facebook, Inc. and its affiliates. 10 | # Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 11 | 12 | from .ms_deform_attn_func import MSDeformAttnFunction 13 | 14 | -------------------------------------------------------------------------------- /videocutler/mask2former/modeling/pixel_decoder/ops/functions/ms_deform_attn_func.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------------------------------ 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 7 | # ------------------------------------------------------------------------------------------------ 8 | 9 | # Copyright (c) Facebook, Inc. and its affiliates. 10 | # Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 11 | 12 | from __future__ import absolute_import 13 | from __future__ import print_function 14 | from __future__ import division 15 | 16 | import torch 17 | import torch.nn.functional as F 18 | from torch.autograd import Function 19 | from torch.autograd.function import once_differentiable 20 | 21 | try: 22 | import MultiScaleDeformableAttention as MSDA 23 | except ModuleNotFoundError as e: 24 | info_string = ( 25 | "\n\nPlease compile MultiScaleDeformableAttention CUDA op with the following commands:\n" 26 | "\t`cd mask2former/modeling/pixel_decoder/ops`\n" 27 | "\t`sh make.sh`\n" 28 | ) 29 | raise ModuleNotFoundError(info_string) 30 | 31 | 32 | class MSDeformAttnFunction(Function): 33 | @staticmethod 34 | def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step): 35 | ctx.im2col_step = im2col_step 36 | output = MSDA.ms_deform_attn_forward( 37 | value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step) 38 | ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights) 39 | return output 40 | 41 | @staticmethod 42 | @once_differentiable 43 | def backward(ctx, grad_output): 44 | value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors 45 | grad_value, grad_sampling_loc, grad_attn_weight = \ 46 | MSDA.ms_deform_attn_backward( 47 | value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step) 48 | 49 | return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None 50 | 51 | 52 | def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights): 53 | # for debug and test only, 54 | # need to use cuda version instead 55 | N_, S_, M_, D_ = value.shape 56 | _, Lq_, M_, L_, P_, _ = sampling_locations.shape 57 | value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1) 58 | sampling_grids = 2 * sampling_locations - 1 59 | sampling_value_list = [] 60 | for lid_, (H_, W_) in enumerate(value_spatial_shapes): 61 | # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_ 62 | value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_) 63 | # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2 64 | sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1) 65 | # N_*M_, D_, Lq_, P_ 66 | sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_, 67 | mode='bilinear', padding_mode='zeros', align_corners=False) 68 | sampling_value_list.append(sampling_value_l_) 69 | # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_) 70 | attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_) 71 | output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_) 72 | return output.transpose(1, 2).contiguous() 73 | -------------------------------------------------------------------------------- /videocutler/mask2former/modeling/pixel_decoder/ops/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # ------------------------------------------------------------------------------------------------ 3 | # Deformable DETR 4 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | # ------------------------------------------------------------------------------------------------ 7 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | # ------------------------------------------------------------------------------------------------ 9 | 10 | # Copyright (c) Facebook, Inc. and its affiliates. 11 | # Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 12 | 13 | python setup.py build install 14 | -------------------------------------------------------------------------------- /videocutler/mask2former/modeling/pixel_decoder/ops/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------------------------------ 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 7 | # ------------------------------------------------------------------------------------------------ 8 | 9 | # Copyright (c) Facebook, Inc. and its affiliates. 10 | # Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 11 | 12 | from .ms_deform_attn import MSDeformAttn 13 | -------------------------------------------------------------------------------- /videocutler/mask2former/modeling/pixel_decoder/ops/setup.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------------------------------ 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 7 | # ------------------------------------------------------------------------------------------------ 8 | 9 | # Copyright (c) Facebook, Inc. and its affiliates. 10 | # Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 11 | 12 | import os 13 | import glob 14 | 15 | import torch 16 | 17 | from torch.utils.cpp_extension import CUDA_HOME 18 | from torch.utils.cpp_extension import CppExtension 19 | from torch.utils.cpp_extension import CUDAExtension 20 | 21 | from setuptools import find_packages 22 | from setuptools import setup 23 | 24 | requirements = ["torch", "torchvision"] 25 | 26 | def get_extensions(): 27 | this_dir = os.path.dirname(os.path.abspath(__file__)) 28 | extensions_dir = os.path.join(this_dir, "src") 29 | 30 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 31 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 32 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 33 | 34 | sources = main_file + source_cpu 35 | extension = CppExtension 36 | extra_compile_args = {"cxx": []} 37 | define_macros = [] 38 | 39 | # Force cuda since torch ask for a device, not if cuda is in fact available. 40 | if (os.environ.get('FORCE_CUDA') or torch.cuda.is_available()) and CUDA_HOME is not None: 41 | extension = CUDAExtension 42 | sources += source_cuda 43 | define_macros += [("WITH_CUDA", None)] 44 | extra_compile_args["nvcc"] = [ 45 | "-DCUDA_HAS_FP16=1", 46 | "-D__CUDA_NO_HALF_OPERATORS__", 47 | "-D__CUDA_NO_HALF_CONVERSIONS__", 48 | "-D__CUDA_NO_HALF2_OPERATORS__", 49 | ] 50 | else: 51 | if CUDA_HOME is None: 52 | raise NotImplementedError('CUDA_HOME is None. Please set environment variable CUDA_HOME.') 53 | else: 54 | raise NotImplementedError('No CUDA runtime is found. Please set FORCE_CUDA=1 or test it by running torch.cuda.is_available().') 55 | 56 | sources = [os.path.join(extensions_dir, s) for s in sources] 57 | include_dirs = [extensions_dir] 58 | ext_modules = [ 59 | extension( 60 | "MultiScaleDeformableAttention", 61 | sources, 62 | include_dirs=include_dirs, 63 | define_macros=define_macros, 64 | extra_compile_args=extra_compile_args, 65 | ) 66 | ] 67 | return ext_modules 68 | 69 | setup( 70 | name="MultiScaleDeformableAttention", 71 | version="1.0", 72 | author="Weijie Su", 73 | url="https://github.com/fundamentalvision/Deformable-DETR", 74 | description="PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention", 75 | packages=find_packages(exclude=("configs", "tests",)), 76 | ext_modules=get_extensions(), 77 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 78 | ) 79 | -------------------------------------------------------------------------------- /videocutler/mask2former/modeling/pixel_decoder/ops/src/cpu/ms_deform_attn_cpu.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | /*! 12 | * Copyright (c) Facebook, Inc. and its affiliates. 13 | * Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 14 | */ 15 | 16 | #include 17 | 18 | #include 19 | #include 20 | 21 | 22 | at::Tensor 23 | ms_deform_attn_cpu_forward( 24 | const at::Tensor &value, 25 | const at::Tensor &spatial_shapes, 26 | const at::Tensor &level_start_index, 27 | const at::Tensor &sampling_loc, 28 | const at::Tensor &attn_weight, 29 | const int im2col_step) 30 | { 31 | AT_ERROR("Not implement on cpu"); 32 | } 33 | 34 | std::vector 35 | ms_deform_attn_cpu_backward( 36 | const at::Tensor &value, 37 | const at::Tensor &spatial_shapes, 38 | const at::Tensor &level_start_index, 39 | const at::Tensor &sampling_loc, 40 | const at::Tensor &attn_weight, 41 | const at::Tensor &grad_output, 42 | const int im2col_step) 43 | { 44 | AT_ERROR("Not implement on cpu"); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /videocutler/mask2former/modeling/pixel_decoder/ops/src/cpu/ms_deform_attn_cpu.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | /*! 12 | * Copyright (c) Facebook, Inc. and its affiliates. 13 | * Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 14 | */ 15 | 16 | #pragma once 17 | #include 18 | 19 | at::Tensor 20 | ms_deform_attn_cpu_forward( 21 | const at::Tensor &value, 22 | const at::Tensor &spatial_shapes, 23 | const at::Tensor &level_start_index, 24 | const at::Tensor &sampling_loc, 25 | const at::Tensor &attn_weight, 26 | const int im2col_step); 27 | 28 | std::vector 29 | ms_deform_attn_cpu_backward( 30 | const at::Tensor &value, 31 | const at::Tensor &spatial_shapes, 32 | const at::Tensor &level_start_index, 33 | const at::Tensor &sampling_loc, 34 | const at::Tensor &attn_weight, 35 | const at::Tensor &grad_output, 36 | const int im2col_step); 37 | 38 | 39 | -------------------------------------------------------------------------------- /videocutler/mask2former/modeling/pixel_decoder/ops/src/cuda/ms_deform_attn_cuda.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | /*! 12 | * Copyright (c) Facebook, Inc. and its affiliates. 13 | * Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 14 | */ 15 | 16 | #pragma once 17 | #include 18 | 19 | at::Tensor ms_deform_attn_cuda_forward( 20 | const at::Tensor &value, 21 | const at::Tensor &spatial_shapes, 22 | const at::Tensor &level_start_index, 23 | const at::Tensor &sampling_loc, 24 | const at::Tensor &attn_weight, 25 | const int im2col_step); 26 | 27 | std::vector ms_deform_attn_cuda_backward( 28 | const at::Tensor &value, 29 | const at::Tensor &spatial_shapes, 30 | const at::Tensor &level_start_index, 31 | const at::Tensor &sampling_loc, 32 | const at::Tensor &attn_weight, 33 | const at::Tensor &grad_output, 34 | const int im2col_step); 35 | 36 | -------------------------------------------------------------------------------- /videocutler/mask2former/modeling/pixel_decoder/ops/src/ms_deform_attn.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | /*! 12 | * Copyright (c) Facebook, Inc. and its affiliates. 13 | * Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 14 | */ 15 | 16 | #pragma once 17 | 18 | #include "cpu/ms_deform_attn_cpu.h" 19 | 20 | #ifdef WITH_CUDA 21 | #include "cuda/ms_deform_attn_cuda.h" 22 | #endif 23 | 24 | 25 | at::Tensor 26 | ms_deform_attn_forward( 27 | const at::Tensor &value, 28 | const at::Tensor &spatial_shapes, 29 | const at::Tensor &level_start_index, 30 | const at::Tensor &sampling_loc, 31 | const at::Tensor &attn_weight, 32 | const int im2col_step) 33 | { 34 | if (value.type().is_cuda()) 35 | { 36 | #ifdef WITH_CUDA 37 | return ms_deform_attn_cuda_forward( 38 | value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step); 39 | #else 40 | AT_ERROR("Not compiled with GPU support"); 41 | #endif 42 | } 43 | AT_ERROR("Not implemented on the CPU"); 44 | } 45 | 46 | std::vector 47 | ms_deform_attn_backward( 48 | const at::Tensor &value, 49 | const at::Tensor &spatial_shapes, 50 | const at::Tensor &level_start_index, 51 | const at::Tensor &sampling_loc, 52 | const at::Tensor &attn_weight, 53 | const at::Tensor &grad_output, 54 | const int im2col_step) 55 | { 56 | if (value.type().is_cuda()) 57 | { 58 | #ifdef WITH_CUDA 59 | return ms_deform_attn_cuda_backward( 60 | value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step); 61 | #else 62 | AT_ERROR("Not compiled with GPU support"); 63 | #endif 64 | } 65 | AT_ERROR("Not implemented on the CPU"); 66 | } 67 | 68 | -------------------------------------------------------------------------------- /videocutler/mask2former/modeling/pixel_decoder/ops/src/vision.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | /*! 12 | * Copyright (c) Facebook, Inc. and its affiliates. 13 | * Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 14 | */ 15 | 16 | #include "ms_deform_attn.h" 17 | 18 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 19 | m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward"); 20 | m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward"); 21 | } 22 | -------------------------------------------------------------------------------- /videocutler/mask2former/modeling/pixel_decoder/ops/test.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------------------ 2 | # Deformable DETR 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved. 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 5 | # ------------------------------------------------------------------------------------------------ 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 7 | # ------------------------------------------------------------------------------------------------ 8 | 9 | # Copyright (c) Facebook, Inc. and its affiliates. 10 | # Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR 11 | 12 | from __future__ import absolute_import 13 | from __future__ import print_function 14 | from __future__ import division 15 | 16 | import time 17 | import torch 18 | import torch.nn as nn 19 | from torch.autograd import gradcheck 20 | 21 | from functions.ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch 22 | 23 | 24 | N, M, D = 1, 2, 2 25 | Lq, L, P = 2, 2, 2 26 | shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda() 27 | level_start_index = torch.cat((shapes.new_zeros((1, )), shapes.prod(1).cumsum(0)[:-1])) 28 | S = sum([(H*W).item() for H, W in shapes]) 29 | 30 | 31 | torch.manual_seed(3) 32 | 33 | 34 | @torch.no_grad() 35 | def check_forward_equal_with_pytorch_double(): 36 | value = torch.rand(N, S, M, D).cuda() * 0.01 37 | sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() 38 | attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 39 | attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) 40 | im2col_step = 2 41 | output_pytorch = ms_deform_attn_core_pytorch(value.double(), shapes, sampling_locations.double(), attention_weights.double()).detach().cpu() 42 | output_cuda = MSDeformAttnFunction.apply(value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step).detach().cpu() 43 | fwdok = torch.allclose(output_cuda, output_pytorch) 44 | max_abs_err = (output_cuda - output_pytorch).abs().max() 45 | max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max() 46 | 47 | print(f'* {fwdok} check_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}') 48 | 49 | 50 | @torch.no_grad() 51 | def check_forward_equal_with_pytorch_float(): 52 | value = torch.rand(N, S, M, D).cuda() * 0.01 53 | sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() 54 | attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 55 | attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) 56 | im2col_step = 2 57 | output_pytorch = ms_deform_attn_core_pytorch(value, shapes, sampling_locations, attention_weights).detach().cpu() 58 | output_cuda = MSDeformAttnFunction.apply(value, shapes, level_start_index, sampling_locations, attention_weights, im2col_step).detach().cpu() 59 | fwdok = torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3) 60 | max_abs_err = (output_cuda - output_pytorch).abs().max() 61 | max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max() 62 | 63 | print(f'* {fwdok} check_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}') 64 | 65 | 66 | def check_gradient_numerical(channels=4, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True): 67 | 68 | value = torch.rand(N, S, M, channels).cuda() * 0.01 69 | sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() 70 | attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 71 | attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) 72 | im2col_step = 2 73 | func = MSDeformAttnFunction.apply 74 | 75 | value.requires_grad = grad_value 76 | sampling_locations.requires_grad = grad_sampling_loc 77 | attention_weights.requires_grad = grad_attn_weight 78 | 79 | gradok = gradcheck(func, (value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step)) 80 | 81 | print(f'* {gradok} check_gradient_numerical(D={channels})') 82 | 83 | 84 | if __name__ == '__main__': 85 | check_forward_equal_with_pytorch_double() 86 | check_forward_equal_with_pytorch_float() 87 | 88 | for channels in [30, 32, 64, 71, 1025, 2048, 3096]: 89 | check_gradient_numerical(channels, True, True, True) 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /videocutler/mask2former/modeling/transformer_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .maskformer_transformer_decoder import StandardTransformerDecoder 3 | from .mask2former_transformer_decoder import MultiScaleMaskedTransformerDecoder 4 | -------------------------------------------------------------------------------- /videocutler/mask2former/modeling/transformer_decoder/position_encoding.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # # Modified by Bowen Cheng from: https://github.com/facebookresearch/detr/blob/master/models/position_encoding.py 3 | """ 4 | Various positional encodings for the transformer. 5 | """ 6 | import math 7 | 8 | import torch 9 | from torch import nn 10 | 11 | 12 | class PositionEmbeddingSine(nn.Module): 13 | """ 14 | This is a more standard version of the position embedding, very similar to the one 15 | used by the Attention is all you need paper, generalized to work on images. 16 | """ 17 | 18 | def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None): 19 | super().__init__() 20 | self.num_pos_feats = num_pos_feats 21 | self.temperature = temperature 22 | self.normalize = normalize 23 | if scale is not None and normalize is False: 24 | raise ValueError("normalize should be True if scale is passed") 25 | if scale is None: 26 | scale = 2 * math.pi 27 | self.scale = scale 28 | 29 | def forward(self, x, mask=None): 30 | if mask is None: 31 | mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool) 32 | not_mask = ~mask 33 | y_embed = not_mask.cumsum(1, dtype=torch.float32) 34 | x_embed = not_mask.cumsum(2, dtype=torch.float32) 35 | if self.normalize: 36 | eps = 1e-6 37 | y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale 38 | x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale 39 | 40 | dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device) 41 | dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats) 42 | 43 | pos_x = x_embed[:, :, :, None] / dim_t 44 | pos_y = y_embed[:, :, :, None] / dim_t 45 | pos_x = torch.stack( 46 | (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4 47 | ).flatten(3) 48 | pos_y = torch.stack( 49 | (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4 50 | ).flatten(3) 51 | pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2) 52 | return pos 53 | 54 | def __repr__(self, _repr_indent=4): 55 | head = "Positional encoding " + self.__class__.__name__ 56 | body = [ 57 | "num_pos_feats: {}".format(self.num_pos_feats), 58 | "temperature: {}".format(self.temperature), 59 | "normalize: {}".format(self.normalize), 60 | "scale: {}".format(self.scale), 61 | ] 62 | # _repr_indent = 4 63 | lines = [head] + [" " * _repr_indent + line for line in body] 64 | return "\n".join(lines) 65 | -------------------------------------------------------------------------------- /videocutler/mask2former/test_time_augmentation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import copy 3 | import logging 4 | from itertools import count 5 | 6 | import numpy as np 7 | import torch 8 | from fvcore.transforms import HFlipTransform 9 | from torch import nn 10 | from torch.nn.parallel import DistributedDataParallel 11 | 12 | from detectron2.data.detection_utils import read_image 13 | from detectron2.modeling import DatasetMapperTTA 14 | 15 | 16 | __all__ = [ 17 | "SemanticSegmentorWithTTA", 18 | ] 19 | 20 | 21 | class SemanticSegmentorWithTTA(nn.Module): 22 | """ 23 | A SemanticSegmentor with test-time augmentation enabled. 24 | Its :meth:`__call__` method has the same interface as :meth:`SemanticSegmentor.forward`. 25 | """ 26 | 27 | def __init__(self, cfg, model, tta_mapper=None, batch_size=1): 28 | """ 29 | Args: 30 | cfg (CfgNode): 31 | model (SemanticSegmentor): a SemanticSegmentor to apply TTA on. 32 | tta_mapper (callable): takes a dataset dict and returns a list of 33 | augmented versions of the dataset dict. Defaults to 34 | `DatasetMapperTTA(cfg)`. 35 | batch_size (int): batch the augmented images into this batch size for inference. 36 | """ 37 | super().__init__() 38 | if isinstance(model, DistributedDataParallel): 39 | model = model.module 40 | self.cfg = cfg.clone() 41 | 42 | self.model = model 43 | 44 | if tta_mapper is None: 45 | tta_mapper = DatasetMapperTTA(cfg) 46 | self.tta_mapper = tta_mapper 47 | self.batch_size = batch_size 48 | 49 | def __call__(self, batched_inputs): 50 | """ 51 | Same input/output format as :meth:`SemanticSegmentor.forward` 52 | """ 53 | 54 | def _maybe_read_image(dataset_dict): 55 | ret = copy.copy(dataset_dict) 56 | if "image" not in ret: 57 | image = read_image(ret.pop("file_name"), self.model.input_format) 58 | image = torch.from_numpy(np.ascontiguousarray(image.transpose(2, 0, 1))) # CHW 59 | ret["image"] = image 60 | if "height" not in ret and "width" not in ret: 61 | ret["height"] = image.shape[1] 62 | ret["width"] = image.shape[2] 63 | return ret 64 | 65 | processed_results = [] 66 | for x in batched_inputs: 67 | result = self._inference_one_image(_maybe_read_image(x)) 68 | processed_results.append(result) 69 | return processed_results 70 | 71 | def _inference_one_image(self, input): 72 | """ 73 | Args: 74 | input (dict): one dataset dict with "image" field being a CHW tensor 75 | Returns: 76 | dict: one output dict 77 | """ 78 | orig_shape = (input["height"], input["width"]) 79 | augmented_inputs, tfms = self._get_augmented_inputs(input) 80 | 81 | final_predictions = None 82 | count_predictions = 0 83 | for input, tfm in zip(augmented_inputs, tfms): 84 | count_predictions += 1 85 | with torch.no_grad(): 86 | if final_predictions is None: 87 | if any(isinstance(t, HFlipTransform) for t in tfm.transforms): 88 | final_predictions = self.model([input])[0].pop("sem_seg").flip(dims=[2]) 89 | else: 90 | final_predictions = self.model([input])[0].pop("sem_seg") 91 | else: 92 | if any(isinstance(t, HFlipTransform) for t in tfm.transforms): 93 | final_predictions += self.model([input])[0].pop("sem_seg").flip(dims=[2]) 94 | else: 95 | final_predictions += self.model([input])[0].pop("sem_seg") 96 | 97 | final_predictions = final_predictions / count_predictions 98 | return {"sem_seg": final_predictions} 99 | 100 | def _get_augmented_inputs(self, input): 101 | augmented_inputs = self.tta_mapper(input) 102 | tfms = [x.pop("transforms") for x in augmented_inputs] 103 | return augmented_inputs, tfms 104 | -------------------------------------------------------------------------------- /videocutler/mask2former/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /videocutler/mask2former/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/util/misc.py 3 | """ 4 | Misc functions, including distributed helpers. 5 | 6 | Mostly copy-paste from torchvision references. 7 | """ 8 | from typing import List, Optional 9 | 10 | import torch 11 | import torch.distributed as dist 12 | import torchvision 13 | from torch import Tensor 14 | 15 | 16 | def _max_by_axis(the_list): 17 | # type: (List[List[int]]) -> List[int] 18 | maxes = the_list[0] 19 | for sublist in the_list[1:]: 20 | for index, item in enumerate(sublist): 21 | maxes[index] = max(maxes[index], item) 22 | return maxes 23 | 24 | 25 | class NestedTensor(object): 26 | def __init__(self, tensors, mask: Optional[Tensor]): 27 | self.tensors = tensors 28 | self.mask = mask 29 | 30 | def to(self, device): 31 | # type: (Device) -> NestedTensor # noqa 32 | cast_tensor = self.tensors.to(device) 33 | mask = self.mask 34 | if mask is not None: 35 | assert mask is not None 36 | cast_mask = mask.to(device) 37 | else: 38 | cast_mask = None 39 | return NestedTensor(cast_tensor, cast_mask) 40 | 41 | def decompose(self): 42 | return self.tensors, self.mask 43 | 44 | def __repr__(self): 45 | return str(self.tensors) 46 | 47 | 48 | def nested_tensor_from_tensor_list(tensor_list: List[Tensor]): 49 | # TODO make this more general 50 | if tensor_list[0].ndim == 3: 51 | if torchvision._is_tracing(): 52 | # nested_tensor_from_tensor_list() does not export well to ONNX 53 | # call _onnx_nested_tensor_from_tensor_list() instead 54 | return _onnx_nested_tensor_from_tensor_list(tensor_list) 55 | 56 | # TODO make it support different-sized images 57 | max_size = _max_by_axis([list(img.shape) for img in tensor_list]) 58 | # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list])) 59 | batch_shape = [len(tensor_list)] + max_size 60 | b, c, h, w = batch_shape 61 | dtype = tensor_list[0].dtype 62 | device = tensor_list[0].device 63 | tensor = torch.zeros(batch_shape, dtype=dtype, device=device) 64 | mask = torch.ones((b, h, w), dtype=torch.bool, device=device) 65 | for img, pad_img, m in zip(tensor_list, tensor, mask): 66 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 67 | m[: img.shape[1], : img.shape[2]] = False 68 | else: 69 | raise ValueError("not supported") 70 | return NestedTensor(tensor, mask) 71 | 72 | 73 | # _onnx_nested_tensor_from_tensor_list() is an implementation of 74 | # nested_tensor_from_tensor_list() that is supported by ONNX tracing. 75 | @torch.jit.unused 76 | def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor: 77 | max_size = [] 78 | for i in range(tensor_list[0].dim()): 79 | max_size_i = torch.max( 80 | torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32) 81 | ).to(torch.int64) 82 | max_size.append(max_size_i) 83 | max_size = tuple(max_size) 84 | 85 | # work around for 86 | # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 87 | # m[: img.shape[1], :img.shape[2]] = False 88 | # which is not yet supported in onnx 89 | padded_imgs = [] 90 | padded_masks = [] 91 | for img in tensor_list: 92 | padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))] 93 | padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0])) 94 | padded_imgs.append(padded_img) 95 | 96 | m = torch.zeros_like(img[0], dtype=torch.int, device=img.device) 97 | padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1) 98 | padded_masks.append(padded_mask.to(torch.bool)) 99 | 100 | tensor = torch.stack(padded_imgs) 101 | mask = torch.stack(padded_masks) 102 | 103 | return NestedTensor(tensor, mask=mask) 104 | 105 | 106 | def is_dist_avail_and_initialized(): 107 | if not dist.is_available(): 108 | return False 109 | if not dist.is_initialized(): 110 | return False 111 | return True 112 | -------------------------------------------------------------------------------- /videocutler/mask2former_video/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from . import modeling 3 | 4 | # config 5 | from .config import add_maskformer2_video_config 6 | 7 | # models 8 | from .video_maskformer_model import VideoMaskFormer 9 | 10 | # video 11 | from .data_video import ( 12 | YTVISDatasetMapper, 13 | YTVISEvaluator, 14 | build_detection_train_loader, 15 | build_detection_test_loader, 16 | get_detection_dataset_dicts, 17 | ) 18 | 19 | # copy-paste 20 | from .engine import * -------------------------------------------------------------------------------- /videocutler/mask2former_video/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Meta Platforms, Inc. and affiliates. 3 | # Modified by XuDong Wang from https://github.com/facebookresearch/Mask2Former/tree/main/mask2former_video 4 | 5 | from detectron2.config import CfgNode as CN 6 | 7 | 8 | def add_maskformer2_video_config(cfg): 9 | # video data 10 | # DataLoader 11 | cfg.INPUT.SAMPLING_FRAME_NUM = 2 12 | cfg.INPUT.SAMPLING_FRAME_RANGE = 20 13 | cfg.INPUT.SAMPLING_FRAME_SHUFFLE = False 14 | cfg.INPUT.AUGMENTATIONS = [] # "brightness", "contrast", "saturation", "rotation" 15 | 16 | cfg.DATALOADER.COPY_PASTE = False 17 | cfg.DATALOADER.COPY_PASTE_RATE = 0.0 18 | cfg.DATALOADER.COPY_PASTE_MIN_RATIO = 0.5 19 | cfg.DATALOADER.COPY_PASTE_MAX_RATIO = 1.0 20 | cfg.DATALOADER.COPY_PASTE_RANDOM_NUM = True # random select number of instances 21 | cfg.DATALOADER.VISUALIZE_COPY_PASTE = False 22 | 23 | cfg.SOLVER.BASE_LR_MULTIPLIER = 1 24 | cfg.SOLVER.BASE_LR_MULTIPLIER_NAMES = [] 25 | -------------------------------------------------------------------------------- /videocutler/mask2former_video/data_video/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # Modified by Bowen Cheng from https://github.com/sukjunhwang/IFC 3 | 4 | from .dataset_mapper import YTVISDatasetMapper, CocoClipDatasetMapper 5 | from .build import * 6 | 7 | from .datasets import * 8 | from .ytvis_eval import YTVISEvaluator 9 | -------------------------------------------------------------------------------- /videocutler/mask2former_video/data_video/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # Modified by XuDong Wang from https://github.com/facebookresearch/Mask2Former/tree/main/mask2former_video 3 | 4 | from . import builtin # ensure the builtin datasets are registered 5 | 6 | __all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")] 7 | -------------------------------------------------------------------------------- /videocutler/mask2former_video/data_video/datasets/builtin.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # Modified by XuDong Wang from https://github.com/facebookresearch/Mask2Former/tree/main/mask2former_video 3 | 4 | import os 5 | 6 | from .ytvis import ( 7 | register_ytvis_instances, 8 | _get_ytvis_2019_instances_meta, 9 | _get_ytvis_2021_instances_meta, 10 | _get_imagenet_cls_agnostic_instances_meta, 11 | ) 12 | 13 | # ==== Predefined splits for YTVIS 2019 =========== 14 | _PREDEFINED_SPLITS_YTVIS_2019 = { 15 | "ytvis_2019_train": ("ytvis_2019/train/JPEGImages", 16 | "ytvis_2019/train.json"), 17 | "ytvis_2019_val": ("ytvis_2019/valid/JPEGImages", 18 | "ytvis_2019/valid.json"), 19 | "ytvis_2019_test": ("ytvis_2019/test/JPEGImages", 20 | "ytvis_2019/test.json"), 21 | "ytvis_2019_train_5perc": ("ytvis_2019/train/JPEGImages", 22 | "ytvis_2019/train_5percent.json"), 23 | "ytvis_2019_train_10perc": ("ytvis_2019/train/JPEGImages", 24 | "ytvis_2019/train_10percent.json"), 25 | "ytvis_2019_train_20perc": ("ytvis_2019/train/JPEGImages", 26 | "ytvis_2019/train_20percent.json"), 27 | "ytvis_2019_train_30perc": ("ytvis_2019/train/JPEGImages", 28 | "ytvis_2019/train_30percent.json"), 29 | "ytvis_2019_train_40perc": ("ytvis_2019/train/JPEGImages", 30 | "ytvis_2019/train_40percent.json"), 31 | "ytvis_2019_train_50perc": ("ytvis_2019/train/JPEGImages", 32 | "ytvis_2019/train_50percent.json"), 33 | } 34 | 35 | # ==== Predefined splits for YTVIS 2021 =========== 36 | _PREDEFINED_SPLITS_YTVIS_2021 = { 37 | "ytvis_2021_train": ("ytvis_2021/train/JPEGImages", 38 | "ytvis_2021/train.json"), 39 | "ytvis_2021_val": ("ytvis_2021/valid/JPEGImages", 40 | "ytvis_2021/valid.json"), 41 | "ytvis_2021_test": ("ytvis_2021/test/JPEGImages", 42 | "ytvis_2021/test.json"), 43 | "ytvis_2021_minus_2019_train": ("ytvis_2021/train/JPEGImages", 44 | "ytvis_2021/instances_val_sub.json"), 45 | } 46 | 47 | _PREDEFINED_SPLITS_ImageNet_CLS_AGNOSTIC = { 48 | "imagenet_video_train_cls_agnostic": ("imagenet/train", 49 | "imagenet/annotations/video_imagenet_train_fixsize480_tau0.15_N3.json"), 50 | } 51 | 52 | 53 | def register_all_ytvis_2019(root): 54 | for key, (image_root, json_file) in _PREDEFINED_SPLITS_YTVIS_2019.items(): 55 | # Assume pre-defined datasets live in `./datasets`. 56 | register_ytvis_instances( 57 | key, 58 | _get_ytvis_2019_instances_meta(), 59 | os.path.join(root, json_file) if "://" not in json_file else json_file, 60 | os.path.join(root, image_root), 61 | ) 62 | 63 | 64 | def register_all_ytvis_2021(root): 65 | for key, (image_root, json_file) in _PREDEFINED_SPLITS_YTVIS_2021.items(): 66 | # Assume pre-defined datasets live in `./datasets`. 67 | register_ytvis_instances( 68 | key, 69 | _get_ytvis_2021_instances_meta(), 70 | os.path.join(root, json_file) if "://" not in json_file else json_file, 71 | os.path.join(root, image_root), 72 | ) 73 | 74 | def register_all_imagenet_cls_agnostic(root): 75 | for key, (image_root, json_file) in _PREDEFINED_SPLITS_ImageNet_CLS_AGNOSTIC.items(): 76 | # Assume pre-defined datasets live in `./datasets`. 77 | register_ytvis_instances( 78 | key, 79 | _get_imagenet_cls_agnostic_instances_meta(), 80 | os.path.join(root, json_file) if "://" not in json_file else json_file, 81 | os.path.join(root, image_root), 82 | ) 83 | 84 | if __name__.endswith(".builtin"): 85 | # Assume pre-defined datasets live in `./datasets`. 86 | _root = os.getenv("DETECTRON2_DATASETS", "datasets") 87 | register_all_ytvis_2019(_root) 88 | register_all_ytvis_2021(_root) 89 | register_all_imagenet_cls_agnostic(_root) -------------------------------------------------------------------------------- /videocutler/mask2former_video/data_video/datasets/ytvis_api/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # Modified by Bowen Cheng from https://github.com/youtubevos/cocoapi 3 | -------------------------------------------------------------------------------- /videocutler/mask2former_video/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | from .train_loop import * 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 6 | 7 | from .defaults import * -------------------------------------------------------------------------------- /videocutler/mask2former_video/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .transformer_decoder.video_mask2former_transformer_decoder import VideoMultiScaleMaskedTransformerDecoder 3 | -------------------------------------------------------------------------------- /videocutler/mask2former_video/modeling/transformer_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | from .video_mask2former_transformer_decoder import VideoMultiScaleMaskedTransformerDecoder 3 | -------------------------------------------------------------------------------- /videocutler/mask2former_video/modeling/transformer_decoder/position_encoding.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # # Modified by Bowen Cheng from: https://github.com/facebookresearch/detr/blob/master/models/position_encoding.py 3 | """ 4 | Various positional encodings for the transformer. 5 | """ 6 | import math 7 | 8 | import torch 9 | from torch import nn 10 | 11 | 12 | class PositionEmbeddingSine3D(nn.Module): 13 | """ 14 | This is a more standard version of the position embedding, very similar to the one 15 | used by the Attention is all you need paper, generalized to work on images. 16 | """ 17 | 18 | def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None): 19 | super().__init__() 20 | self.num_pos_feats = num_pos_feats 21 | self.temperature = temperature 22 | self.normalize = normalize 23 | if scale is not None and normalize is False: 24 | raise ValueError("normalize should be True if scale is passed") 25 | if scale is None: 26 | scale = 2 * math.pi 27 | self.scale = scale 28 | 29 | def forward(self, x, mask=None): 30 | # b, t, c, h, w 31 | assert x.dim() == 5, f"{x.shape} should be a 5-dimensional Tensor, got {x.dim()}-dimensional Tensor instead" 32 | if mask is None: 33 | mask = torch.zeros((x.size(0), x.size(1), x.size(3), x.size(4)), device=x.device, dtype=torch.bool) 34 | not_mask = ~mask 35 | z_embed = not_mask.cumsum(1, dtype=torch.float32) 36 | y_embed = not_mask.cumsum(2, dtype=torch.float32) 37 | x_embed = not_mask.cumsum(3, dtype=torch.float32) 38 | if self.normalize: 39 | eps = 1e-6 40 | z_embed = z_embed / (z_embed[:, -1:, :, :] + eps) * self.scale 41 | y_embed = y_embed / (y_embed[:, :, -1:, :] + eps) * self.scale 42 | x_embed = x_embed / (x_embed[:, :, :, -1:] + eps) * self.scale 43 | 44 | dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device) 45 | dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats) 46 | 47 | dim_t_z = torch.arange((self.num_pos_feats * 2), dtype=torch.float32, device=x.device) 48 | dim_t_z = self.temperature ** (2 * (dim_t_z // 2) / (self.num_pos_feats * 2)) 49 | 50 | pos_x = x_embed[:, :, :, :, None] / dim_t 51 | pos_y = y_embed[:, :, :, :, None] / dim_t 52 | pos_z = z_embed[:, :, :, :, None] / dim_t_z 53 | pos_x = torch.stack((pos_x[:, :, :, :, 0::2].sin(), pos_x[:, :, :, :, 1::2].cos()), dim=5).flatten(4) 54 | pos_y = torch.stack((pos_y[:, :, :, :, 0::2].sin(), pos_y[:, :, :, :, 1::2].cos()), dim=5).flatten(4) 55 | pos_z = torch.stack((pos_z[:, :, :, :, 0::2].sin(), pos_z[:, :, :, :, 1::2].cos()), dim=5).flatten(4) 56 | pos = (torch.cat((pos_y, pos_x), dim=4) + pos_z).permute(0, 1, 4, 2, 3) # b, t, c, h, w 57 | return pos 58 | -------------------------------------------------------------------------------- /videocutler/mask2former_video/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | -------------------------------------------------------------------------------- /videocutler/mask2former_video/utils/memory.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import logging 4 | from contextlib import contextmanager 5 | from functools import wraps 6 | import torch 7 | from torch.cuda.amp import autocast 8 | 9 | __all__ = ["retry_if_cuda_oom"] 10 | 11 | 12 | @contextmanager 13 | def _ignore_torch_cuda_oom(): 14 | """ 15 | A context which ignores CUDA OOM exception from pytorch. 16 | """ 17 | try: 18 | yield 19 | except RuntimeError as e: 20 | # NOTE: the string may change? 21 | if "CUDA out of memory. " in str(e): 22 | pass 23 | else: 24 | raise 25 | 26 | 27 | def retry_if_cuda_oom(func): 28 | """ 29 | Makes a function retry itself after encountering 30 | pytorch's CUDA OOM error. 31 | It will first retry after calling `torch.cuda.empty_cache()`. 32 | If that still fails, it will then retry by trying to convert inputs to CPUs. 33 | In this case, it expects the function to dispatch to CPU implementation. 34 | The return values may become CPU tensors as well and it's user's 35 | responsibility to convert it back to CUDA tensor if needed. 36 | Args: 37 | func: a stateless callable that takes tensor-like objects as arguments 38 | Returns: 39 | a callable which retries `func` if OOM is encountered. 40 | Examples: 41 | :: 42 | output = retry_if_cuda_oom(some_torch_function)(input1, input2) 43 | # output may be on CPU even if inputs are on GPU 44 | Note: 45 | 1. When converting inputs to CPU, it will only look at each argument and check 46 | if it has `.device` and `.to` for conversion. Nested structures of tensors 47 | are not supported. 48 | 2. Since the function might be called more than once, it has to be 49 | stateless. 50 | """ 51 | 52 | def maybe_to_cpu(x): 53 | try: 54 | like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to") 55 | except AttributeError: 56 | like_gpu_tensor = False 57 | if like_gpu_tensor: 58 | return x.to(device="cpu").to(torch.float32) 59 | else: 60 | return x 61 | 62 | @wraps(func) 63 | def wrapped(*args, **kwargs): 64 | with _ignore_torch_cuda_oom(): 65 | return func(*args, **kwargs) 66 | 67 | # Clear cache and retry 68 | torch.cuda.empty_cache() 69 | with _ignore_torch_cuda_oom(): 70 | return func(*args, **kwargs) 71 | 72 | # Try on CPU. This slows down the code significantly, therefore print a notice. 73 | logger = logging.getLogger(__name__) 74 | logger.info("Attempting to copy inputs to CPU due to CUDA OOM") 75 | new_args = (maybe_to_cpu(x) for x in args) 76 | new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()} 77 | with autocast(enabled=False): 78 | return func(*new_args, **new_kwargs) 79 | 80 | return wrapped 81 | -------------------------------------------------------------------------------- /videocutler/predict.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, "Mask2Former") 3 | import tempfile 4 | from pathlib import Path 5 | import numpy as np 6 | import cv2 7 | import cog 8 | 9 | # import some common detectron2 utilities 10 | from detectron2.config import CfgNode as CN 11 | from detectron2.engine import DefaultPredictor 12 | from detectron2.config import get_cfg 13 | from detectron2.utils.visualizer import Visualizer, ColorMode 14 | from detectron2.data import MetadataCatalog 15 | from detectron2.projects.deeplab import add_deeplab_config 16 | 17 | # import Mask2Former project 18 | from mask2former import add_maskformer2_config 19 | 20 | 21 | class Predictor(cog.Predictor): 22 | def setup(self): 23 | cfg = get_cfg() 24 | add_deeplab_config(cfg) 25 | add_maskformer2_config(cfg) 26 | cfg.merge_from_file("Mask2Former/configs/coco/panoptic-segmentation/swin/maskformer2_swin_large_IN21k_384_bs16_100ep.yaml") 27 | cfg.MODEL.WEIGHTS = 'model_final_f07440.pkl' 28 | cfg.MODEL.MASK_FORMER.TEST.SEMANTIC_ON = True 29 | cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON = True 30 | cfg.MODEL.MASK_FORMER.TEST.PANOPTIC_ON = True 31 | self.predictor = DefaultPredictor(cfg) 32 | self.coco_metadata = MetadataCatalog.get("coco_2017_val_panoptic") 33 | 34 | 35 | @cog.input( 36 | "image", 37 | type=Path, 38 | help="Input image for segmentation. Output will be the concatenation of Panoptic segmentation (top), " 39 | "instance segmentation (middle), and semantic segmentation (bottom).", 40 | ) 41 | def predict(self, image): 42 | im = cv2.imread(str(image)) 43 | outputs = self.predictor(im) 44 | v = Visualizer(im[:, :, ::-1], self.coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW) 45 | panoptic_result = v.draw_panoptic_seg(outputs["panoptic_seg"][0].to("cpu"), 46 | outputs["panoptic_seg"][1]).get_image() 47 | v = Visualizer(im[:, :, ::-1], self.coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW) 48 | instance_result = v.draw_instance_predictions(outputs["instances"].to("cpu")).get_image() 49 | v = Visualizer(im[:, :, ::-1], self.coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW) 50 | semantic_result = v.draw_sem_seg(outputs["sem_seg"].argmax(0).to("cpu")).get_image() 51 | result = np.concatenate((panoptic_result, instance_result, semantic_result), axis=0)[:, :, ::-1] 52 | out_path = Path(tempfile.mkdtemp()) / "out.png" 53 | cv2.imwrite(str(out_path), result) 54 | return out_path 55 | -------------------------------------------------------------------------------- /videocutler/requirements.txt: -------------------------------------------------------------------------------- 1 | cython 2 | scipy 3 | shapely 4 | timm 5 | h5py 6 | submitit 7 | scikit-image 8 | -------------------------------------------------------------------------------- /videocutler/single-node-video_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | MASTER_NODE=$(scontrol show hostname "$SLURM_NODELIST" | head -n1) 3 | DIST_URL="tcp://$MASTER_NODE:12392" 4 | SOCKET_NAME=$(ip r | grep default | awk '{print $5}') 5 | export GLOO_SOCKET_IFNAME=ens32 6 | 7 | python -u train_net_video.py --num-gpus 8 --num-machines 1 --machine-rank "$SLURM_NODEID" --dist-url "$DIST_URL" "$@" 8 | -------------------------------------------------------------------------------- /videocutler/tools/README.md: -------------------------------------------------------------------------------- 1 | This directory contains few tools for MaskFormer. 2 | 3 | * `convert-torchvision-to-d2.py` 4 | 5 | Tool to convert torchvision pre-trained weights for D2. 6 | 7 | ``` 8 | wget https://download.pytorch.org/models/resnet101-63fe2227.pth 9 | python tools/convert-torchvision-to-d2.py resnet101-63fe2227.pth R-101.pkl 10 | ``` 11 | 12 | * `convert-pretrained-swin-model-to-d2.py` 13 | 14 | Tool to convert Swin Transformer pre-trained weights for D2. 15 | 16 | ``` 17 | pip install timm 18 | 19 | wget https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth 20 | python tools/convert-pretrained-swin-model-to-d2.py swin_tiny_patch4_window7_224.pth swin_tiny_patch4_window7_224.pkl 21 | 22 | wget https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth 23 | python tools/convert-pretrained-swin-model-to-d2.py swin_small_patch4_window7_224.pth swin_small_patch4_window7_224.pkl 24 | 25 | wget https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth 26 | python tools/convert-pretrained-swin-model-to-d2.py swin_base_patch4_window12_384_22k.pth swin_base_patch4_window12_384_22k.pkl 27 | 28 | wget https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth 29 | python tools/convert-pretrained-swin-model-to-d2.py swin_large_patch4_window12_384_22k.pth swin_large_patch4_window12_384_22k.pkl 30 | ``` 31 | 32 | * `evaluate_pq_for_semantic_segmentation.py` 33 | 34 | Tool to evaluate PQ (PQ-stuff) for semantic segmentation predictions. 35 | 36 | Usage: 37 | 38 | ``` 39 | python tools/evaluate_pq_for_semantic_segmentation.py --dataset-name ade20k_sem_seg_val --json-file OUTPUT_DIR/inference/sem_seg_predictions.json 40 | ``` 41 | 42 | where `OUTPUT_DIR` is set in the config file. 43 | 44 | * `evaluate_coco_boundary_ap.py` 45 | 46 | Tool to evaluate Boundary AP for instance segmentation predictions. 47 | 48 | Usage: 49 | 50 | ``` 51 | python tools/coco_instance_evaluation.py --gt-json-file COCO_GT_JSON --dt-json-file COCO_DT_JSON 52 | ``` 53 | 54 | To install Boundary IoU API, run: 55 | 56 | ``` 57 | pip install git+https://github.com/bowenc0221/boundary-iou-api.git 58 | ``` 59 | 60 | * `analyze_model.py` 61 | 62 | Tool to analyze model parameters and flops. 63 | 64 | Usage for semantic segmentation (ADE20K only, use with caution!): 65 | 66 | ``` 67 | python tools/analyze_model.py --num-inputs 1 --tasks flop --use-fixed-input-size --config-file CONFIG_FILE 68 | ``` 69 | 70 | Note that, for semantic segmentation (ADE20K only), we use a dummy image with fixed size that equals to `cfg.INPUT.CROP.SIZE[0] x cfg.INPUT.CROP.SIZE[0]`. 71 | Please do not use `--use-fixed-input-size` for calculating FLOPs on other datasets like Cityscapes! 72 | 73 | Usage for panoptic and instance segmentation: 74 | 75 | ``` 76 | python tools/analyze_model.py --num-inputs 100 --tasks flop --config-file CONFIG_FILE 77 | ``` 78 | 79 | Note that, for panoptic and instance segmentation, we compute the average flops over 100 real validation images. 80 | -------------------------------------------------------------------------------- /videocutler/tools/convert-pretrained-swin-model-to-d2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import pickle as pkl 5 | import sys 6 | 7 | import torch 8 | 9 | """ 10 | Usage: 11 | # download pretrained swin model: 12 | wget https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth 13 | # run the conversion 14 | ./convert-pretrained-model-to-d2.py swin_tiny_patch4_window7_224.pth swin_tiny_patch4_window7_224.pkl 15 | # Then, use swin_tiny_patch4_window7_224.pkl with the following changes in config: 16 | MODEL: 17 | WEIGHTS: "/path/to/swin_tiny_patch4_window7_224.pkl" 18 | INPUT: 19 | FORMAT: "RGB" 20 | """ 21 | 22 | if __name__ == "__main__": 23 | input = sys.argv[1] 24 | 25 | obj = torch.load(input, map_location="cpu")["model"] 26 | 27 | res = {"model": obj, "__author__": "third_party", "matching_heuristics": True} 28 | 29 | with open(sys.argv[2], "wb") as f: 30 | pkl.dump(res, f) 31 | -------------------------------------------------------------------------------- /videocutler/tools/convert-torchvision-to-d2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | 4 | import pickle as pkl 5 | import sys 6 | 7 | import torch 8 | 9 | """ 10 | Usage: 11 | # download one of the ResNet{18,34,50,101,152} models from torchvision: 12 | wget https://download.pytorch.org/models/resnet50-19c8e357.pth -O r50.pth 13 | # run the conversion 14 | ./convert-torchvision-to-d2.py r50.pth r50.pkl 15 | # Then, use r50.pkl with the following changes in config: 16 | MODEL: 17 | WEIGHTS: "/path/to/r50.pkl" 18 | PIXEL_MEAN: [123.675, 116.280, 103.530] 19 | PIXEL_STD: [58.395, 57.120, 57.375] 20 | RESNETS: 21 | DEPTH: 50 22 | STRIDE_IN_1X1: False 23 | INPUT: 24 | FORMAT: "RGB" 25 | """ 26 | 27 | if __name__ == "__main__": 28 | input = sys.argv[1] 29 | 30 | obj = torch.load(input, map_location="cpu") 31 | 32 | newmodel = {} 33 | for k in list(obj.keys()): 34 | old_k = k 35 | if "layer" not in k: 36 | k = "stem." + k 37 | for t in [1, 2, 3, 4]: 38 | k = k.replace("layer{}".format(t), "res{}".format(t + 1)) 39 | for t in [1, 2, 3]: 40 | k = k.replace("bn{}".format(t), "conv{}.norm".format(t)) 41 | k = k.replace("downsample.0", "shortcut") 42 | k = k.replace("downsample.1", "shortcut.norm") 43 | print(old_k, "->", k) 44 | newmodel[k] = obj.pop(old_k).detach().numpy() 45 | 46 | res = {"model": newmodel, "__author__": "torchvision", "matching_heuristics": True} 47 | 48 | with open(sys.argv[2], "wb") as f: 49 | pkl.dump(res, f) 50 | if obj: 51 | print("Unconverted keys:", obj.keys()) 52 | -------------------------------------------------------------------------------- /videocutler/tools/evaluate_coco_boundary_ap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | # Modified by Bowen Cheng from: https://github.com/bowenc0221/boundary-iou-api/blob/master/tools/coco_instance_evaluation.py 4 | 5 | """ 6 | Evaluation for COCO val2017: 7 | python ./tools/coco_instance_evaluation.py \ 8 | --gt-json-file COCO_GT_JSON \ 9 | --dt-json-file COCO_DT_JSON 10 | """ 11 | import argparse 12 | import json 13 | 14 | from boundary_iou.coco_instance_api.coco import COCO 15 | from boundary_iou.coco_instance_api.cocoeval import COCOeval 16 | 17 | 18 | def main(): 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument("--gt-json-file", default="") 21 | parser.add_argument("--dt-json-file", default="") 22 | parser.add_argument("--iou-type", default="boundary") 23 | parser.add_argument("--dilation-ratio", default="0.020", type=float) 24 | args = parser.parse_args() 25 | print(args) 26 | 27 | annFile = args.gt_json_file 28 | resFile = args.dt_json_file 29 | dilation_ratio = args.dilation_ratio 30 | if args.iou_type == "boundary": 31 | get_boundary = True 32 | else: 33 | get_boundary = False 34 | cocoGt = COCO(annFile, get_boundary=get_boundary, dilation_ratio=dilation_ratio) 35 | 36 | # remove box predictions 37 | resFile = json.load(open(resFile)) 38 | for c in resFile: 39 | c.pop("bbox", None) 40 | 41 | cocoDt = cocoGt.loadRes(resFile) 42 | cocoEval = COCOeval(cocoGt, cocoDt, iouType=args.iou_type, dilation_ratio=dilation_ratio) 43 | cocoEval.evaluate() 44 | cocoEval.accumulate() 45 | cocoEval.summarize() 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /videocutler/train-1node.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -p learnfair 3 | #SBATCH --nodes=1 4 | #SBATCH --ntasks=1 5 | #SBATCH --gres=gpu:8 6 | #SBATCH --gpus-per-node=8 7 | #SBATCH --cpus-per-task=48 8 | #SBATCH --time 10000 9 | #SBATCH -o "submitit/videocutler/slurm-%j.out" 10 | 11 | srun single-node-video_run.sh $@ --------------------------------------------------------------------------------