├── .gitignore
├── .gitmodules
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── INSTALL.md
├── LICENSE
├── README.md
├── cog.yaml
├── cutler
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   └── cutler_config.py
    ├── data
    │   ├── __init__.py
    │   ├── build.py
    │   ├── dataset_mapper.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── builtin.py
    │   │   ├── builtin_meta.py
    │   │   └── coco.py
    │   ├── detection_utils.py
    │   └── transforms
    │   │   ├── __init__.py
    │   │   ├── augmentation_impl.py
    │   │   └── transform.py
    ├── demo
    │   ├── __init__.py
    │   ├── demo.py
    │   ├── imgs
    │   │   ├── demo1.jpg
    │   │   ├── demo2.jpg
    │   │   ├── demo3.jpg
    │   │   ├── demo4.jpg
    │   │   ├── demo5.jpg
    │   │   ├── demo6.jpg
    │   │   ├── demo7.jpg
    │   │   └── demo8.jpg
    │   └── predictor.py
    ├── engine
    │   ├── __init__.py
    │   ├── defaults.py
    │   └── train_loop.py
    ├── evaluation
    │   ├── __init__.py
    │   └── coco_evaluation.py
    ├── model_zoo
    │   └── configs
    │   │   ├── Base-RCNN-FPN.yaml
    │   │   ├── COCO-Semisupervised
    │   │       ├── cascade_mask_rcnn_R_50_FPN_100perc.yaml
    │   │       ├── cascade_mask_rcnn_R_50_FPN_10perc.yaml
    │   │       ├── cascade_mask_rcnn_R_50_FPN_1perc.yaml
    │   │       ├── cascade_mask_rcnn_R_50_FPN_20perc.yaml
    │   │       ├── cascade_mask_rcnn_R_50_FPN_2perc.yaml
    │   │       ├── cascade_mask_rcnn_R_50_FPN_30perc.yaml
    │   │       ├── cascade_mask_rcnn_R_50_FPN_40perc.yaml
    │   │       ├── cascade_mask_rcnn_R_50_FPN_50perc.yaml
    │   │       ├── cascade_mask_rcnn_R_50_FPN_5perc.yaml
    │   │       ├── cascade_mask_rcnn_R_50_FPN_60perc.yaml
    │   │       └── cascade_mask_rcnn_R_50_FPN_80perc.yaml
    │   │   └── CutLER-ImageNet
    │   │       ├── cascade_mask_rcnn_R_50_FPN.yaml
    │   │       ├── cascade_mask_rcnn_R_50_FPN_demo.yaml
    │   │       ├── cascade_mask_rcnn_R_50_FPN_self_train.yaml
    │   │       └── mask_rcnn_R_50_FPN.yaml
    ├── modeling
    │   ├── __init__.py
    │   ├── meta_arch
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   └── rcnn.py
    │   └── roi_heads
    │   │   ├── __init__.py
    │   │   ├── custom_cascade_rcnn.py
    │   │   ├── fast_rcnn.py
    │   │   └── roi_heads.py
    ├── solver
    │   ├── __init__.py
    │   └── build.py
    ├── structures
    │   ├── __init__.py
    │   └── boxes.py
    ├── tools
    │   ├── eval.sh
    │   ├── get_self_training_ann.py
    │   ├── run_with_submitit.sh
    │   ├── run_with_submitit_ssl.sh
    │   ├── single-node_run.sh
    │   └── train-1node.sh
    └── train_net.py
├── datasets
    └── README.md
├── docs
    ├── cutler-demo.jpg
    ├── demos_videocutler.gif
    ├── maskcut-demo.jpg
    ├── maskcut.gif
    ├── pipeline.jpg
    └── teaser_img.jpg
├── maskcut
    ├── colormap.py
    ├── crf.py
    ├── demo.py
    ├── dino.py
    ├── imgs
    │   ├── demo1.jpg
    │   ├── demo2.jpg
    │   ├── demo3.jpg
    │   ├── demo4.jpg
    │   ├── demo5.jpg
    │   ├── demo6.jpg
    │   ├── demo7.jpg
    │   └── demo8.jpg
    ├── maskcut.py
    ├── maskcut_with_submitit.py
    ├── merge_jsons.py
    ├── predict.py
    ├── run_maskcut_with_submitit.sh
    └── run_with_submitit_maskcut_array.py
├── requirements.txt
└── videocutler
    ├── INSTALL.md
    ├── README.md
    ├── configs
        ├── imagenet
        │   └── instance-segmentation
        │   │   ├── Base-COCO-InstanceSegmentation.yaml
        │   │   ├── Base-imagenet-InstanceSegmentation.yaml
        │   │   └── mask2former_R50_imagenet.yaml
        └── imagenet_video
        │   ├── Base-YouTubeVIS-VideoInstanceSegmentation.yaml
        │   ├── video_mask2former_R50_cls_agnostic.yaml
        │   ├── videocutler_eval_ytvis2019.yaml
        │   └── videocutler_eval_ytvis2021.yaml
    ├── datasets
        ├── README.md
        ├── ade20k_instance_catid_mapping.txt
        ├── ade20k_instance_imgCatIds.json
        ├── prepare_ade20k_ins_seg.py
        ├── prepare_ade20k_pan_seg.py
        ├── prepare_ade20k_sem_seg.py
        └── prepare_coco_semantic_annos_from_panoptic_annos.py
    ├── demo.sh
    ├── demo
        ├── README.md
        ├── demo.py
        └── predictor.py
    ├── demo_video
        ├── colormap.py
        ├── demo.py
        ├── predictor.py
        └── visualizer.py
    ├── docs
        ├── demo-videos
        │   ├── 09773e4062
        │   │   ├── 00100.jpg
        │   │   ├── 00105.jpg
        │   │   ├── 00110.jpg
        │   │   ├── 00115.jpg
        │   │   ├── 00120.jpg
        │   │   ├── 00125.jpg
        │   │   ├── 00130.jpg
        │   │   ├── 00135.jpg
        │   │   ├── 00140.jpg
        │   │   ├── 00145.jpg
        │   │   ├── 00150.jpg
        │   │   ├── 00155.jpg
        │   │   ├── 00160.jpg
        │   │   ├── 00165.jpg
        │   │   ├── 00170.jpg
        │   │   ├── 00175.jpg
        │   │   ├── 00180.jpg
        │   │   ├── 00185.jpg
        │   │   ├── 00190.jpg
        │   │   ├── 00195.jpg
        │   │   ├── 00200.jpg
        │   │   ├── 00205.jpg
        │   │   ├── 00210.jpg
        │   │   ├── 00215.jpg
        │   │   └── 00220.jpg
        │   ├── 4c7710908f
        │   │   ├── 00000.jpg
        │   │   ├── 00010.jpg
        │   │   ├── 00020.jpg
        │   │   ├── 00030.jpg
        │   │   ├── 00040.jpg
        │   │   ├── 00050.jpg
        │   │   ├── 00060.jpg
        │   │   ├── 00070.jpg
        │   │   ├── 00080.jpg
        │   │   ├── 00090.jpg
        │   │   ├── 00100.jpg
        │   │   ├── 00110.jpg
        │   │   ├── 00120.jpg
        │   │   ├── 00130.jpg
        │   │   ├── 00140.jpg
        │   │   ├── 00150.jpg
        │   │   ├── 00160.jpg
        │   │   └── 00170.jpg
        │   ├── 8b4f6d1186
        │   │   ├── 00000.jpg
        │   │   ├── 00010.jpg
        │   │   ├── 00020.jpg
        │   │   ├── 00030.jpg
        │   │   ├── 00040.jpg
        │   │   ├── 00050.jpg
        │   │   ├── 00060.jpg
        │   │   ├── 00070.jpg
        │   │   ├── 00080.jpg
        │   │   ├── 00090.jpg
        │   │   ├── 00100.jpg
        │   │   ├── 00110.jpg
        │   │   ├── 00120.jpg
        │   │   ├── 00130.jpg
        │   │   ├── 00140.jpg
        │   │   ├── 00150.jpg
        │   │   ├── 00160.jpg
        │   │   └── 00170.jpg
        │   ├── 99c6b1acf2
        │   │   ├── 00075.jpg
        │   │   ├── 00080.jpg
        │   │   ├── 00085.jpg
        │   │   ├── 00090.jpg
        │   │   ├── 00095.jpg
        │   │   ├── 00100.jpg
        │   │   ├── 00105.jpg
        │   │   ├── 00110.jpg
        │   │   ├── 00115.jpg
        │   │   ├── 00120.jpg
        │   │   ├── 00125.jpg
        │   │   ├── 00130.jpg
        │   │   ├── 00135.jpg
        │   │   ├── 00140.jpg
        │   │   ├── 00145.jpg
        │   │   ├── 00150.jpg
        │   │   ├── 00155.jpg
        │   │   ├── 00160.jpg
        │   │   ├── 00165.jpg
        │   │   └── 00170.jpg
        │   └── eea827bdda
        │   │   ├── 00000.jpg
        │   │   ├── 00005.jpg
        │   │   ├── 00010.jpg
        │   │   ├── 00015.jpg
        │   │   ├── 00020.jpg
        │   │   ├── 00025.jpg
        │   │   ├── 00030.jpg
        │   │   ├── 00035.jpg
        │   │   ├── 00040.jpg
        │   │   ├── 00045.jpg
        │   │   ├── 00050.jpg
        │   │   ├── 00055.jpg
        │   │   ├── 00060.jpg
        │   │   ├── 00065.jpg
        │   │   ├── 00070.jpg
        │   │   ├── 00075.jpg
        │   │   ├── 00080.jpg
        │   │   ├── 00085.jpg
        │   │   ├── 00090.jpg
        │   │   ├── 00095.jpg
        │   │   ├── 00100.jpg
        │   │   ├── 00105.jpg
        │   │   ├── 00110.jpg
        │   │   ├── 00115.jpg
        │   │   ├── 00120.jpg
        │   │   ├── 00125.jpg
        │   │   ├── 00130.jpg
        │   │   ├── 00135.jpg
        │   │   ├── 00140.jpg
        │   │   ├── 00145.jpg
        │   │   ├── 00150.jpg
        │   │   ├── 00155.jpg
        │   │   ├── 00160.jpg
        │   │   ├── 00165.jpg
        │   │   ├── 00170.jpg
        │   │   ├── 00175.jpg
        │   │   ├── 00180.jpg
        │   │   ├── 00185.jpg
        │   │   ├── 00190.jpg
        │   │   ├── 00195.jpg
        │   │   ├── 00200.jpg
        │   │   ├── 00205.jpg
        │   │   ├── 00210.jpg
        │   │   ├── 00215.jpg
        │   │   ├── 00220.jpg
        │   │   ├── 00225.jpg
        │   │   ├── 00230.jpg
        │   │   ├── 00235.jpg
        │   │   ├── 00240.jpg
        │   │   ├── 00245.jpg
        │   │   ├── 00250.jpg
        │   │   ├── 00255.jpg
        │   │   ├── 00260.jpg
        │   │   ├── 00265.jpg
        │   │   ├── 00270.jpg
        │   │   ├── 00275.jpg
        │   │   ├── 00280.jpg
        │   │   ├── 00285.jpg
        │   │   ├── 00290.jpg
        │   │   └── 00295.jpg
        ├── videocutler_demos.gif
        └── videocutler_pipeline.png
    ├── eval.sh
    ├── eval_ytvis.py
    ├── mask2former
        ├── __init__.py
        ├── config.py
        ├── data
        │   ├── __init__.py
        │   ├── dataset_mappers
        │   │   ├── __init__.py
        │   │   ├── coco_instance_new_baseline_dataset_mapper.py
        │   │   ├── coco_panoptic_new_baseline_dataset_mapper.py
        │   │   ├── mask_former_instance_dataset_mapper.py
        │   │   ├── mask_former_panoptic_dataset_mapper.py
        │   │   └── mask_former_semantic_dataset_mapper.py
        │   └── datasets
        │   │   ├── __init__.py
        │   │   ├── register_ade20k_full.py
        │   │   ├── register_ade20k_instance.py
        │   │   ├── register_ade20k_panoptic.py
        │   │   ├── register_coco_panoptic_annos_semseg.py
        │   │   ├── register_coco_stuff_10k.py
        │   │   ├── register_mapillary_vistas.py
        │   │   └── register_mapillary_vistas_panoptic.py
        ├── evaluation
        │   ├── __init__.py
        │   └── instance_evaluation.py
        ├── maskformer_model.py
        ├── modeling
        │   ├── __init__.py
        │   ├── backbone
        │   │   ├── __init__.py
        │   │   └── swin.py
        │   ├── criterion.py
        │   ├── matcher.py
        │   ├── meta_arch
        │   │   ├── __init__.py
        │   │   ├── mask_former_head.py
        │   │   └── per_pixel_baseline.py
        │   ├── pixel_decoder
        │   │   ├── __init__.py
        │   │   ├── fpn.py
        │   │   ├── msdeformattn.py
        │   │   └── ops
        │   │   │   ├── functions
        │   │   │       ├── __init__.py
        │   │   │       └── ms_deform_attn_func.py
        │   │   │   ├── make.sh
        │   │   │   ├── modules
        │   │   │       ├── __init__.py
        │   │   │       └── ms_deform_attn.py
        │   │   │   ├── setup.py
        │   │   │   ├── src
        │   │   │       ├── cpu
        │   │   │       │   ├── ms_deform_attn_cpu.cpp
        │   │   │       │   └── ms_deform_attn_cpu.h
        │   │   │       ├── cuda
        │   │   │       │   ├── ms_deform_attn_cuda.cu
        │   │   │       │   ├── ms_deform_attn_cuda.h
        │   │   │       │   └── ms_deform_im2col_cuda.cuh
        │   │   │       ├── ms_deform_attn.h
        │   │   │       └── vision.cpp
        │   │   │   └── test.py
        │   └── transformer_decoder
        │   │   ├── __init__.py
        │   │   ├── mask2former_transformer_decoder.py
        │   │   ├── maskformer_transformer_decoder.py
        │   │   ├── position_encoding.py
        │   │   └── transformer.py
        ├── test_time_augmentation.py
        └── utils
        │   ├── __init__.py
        │   └── misc.py
    ├── mask2former_video
        ├── __init__.py
        ├── config.py
        ├── data_video
        │   ├── __init__.py
        │   ├── augmentation.py
        │   ├── build.py
        │   ├── dataset_mapper.py
        │   ├── datasets
        │   │   ├── __init__.py
        │   │   ├── builtin.py
        │   │   ├── ytvis.py
        │   │   └── ytvis_api
        │   │   │   ├── __init__.py
        │   │   │   ├── ytvos.py
        │   │   │   └── ytvoseval.py
        │   └── ytvis_eval.py
        ├── engine
        │   ├── __init__.py
        │   ├── defaults.py
        │   └── train_loop.py
        ├── modeling
        │   ├── __init__.py
        │   ├── criterion.py
        │   ├── matcher.py
        │   └── transformer_decoder
        │   │   ├── __init__.py
        │   │   ├── position_encoding.py
        │   │   └── video_mask2former_transformer_decoder.py
        ├── utils
        │   ├── __init__.py
        │   └── memory.py
        └── video_maskformer_model.py
    ├── predict.py
    ├── requirements.txt
    ├── single-node-video_run.sh
    ├── tools
        ├── README.md
        ├── analyze_model.py
        ├── convert-pretrained-swin-model-to-d2.py
        ├── convert-torchvision-to-d2.py
        ├── evaluate_coco_boundary_ap.py
        └── evaluate_pq_for_semantic_segmentation.py
    ├── train-1node.sh
    ├── train_net.py
    └── train_net_video.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | __MACOSX/
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | pip-wheel-metadata/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # dataset and wandb cache files
 32 | */datasets/
 33 | */OUTPUT/
 34 | */wandb/
 35 | 
 36 | # local scripts
 37 | */*.sh
 38 | 
 39 | # PyInstaller
 40 | #  Usually these files are written by a python script from a template
 41 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 42 | *.manifest
 43 | *.spec
 44 | 
 45 | # Installer logs
 46 | pip-log.txt
 47 | pip-delete-this-directory.txt
 48 | 
 49 | # Unit test / coverage reports
 50 | htmlcov/
 51 | .tox/
 52 | .nox/
 53 | .coverage
 54 | .coverage.*
 55 | .cache
 56 | nosetests.xml
 57 | coverage.xml
 58 | *.cover
 59 | *.py,cover
 60 | .hypothesis/
 61 | .pytest_cache/
 62 | debug.ipynb
 63 | */OUTPUT-DIR*
 64 | */debug*
 65 | 
 66 | # Translations
 67 | *.mo
 68 | *.pot
 69 | 
 70 | # Django stuff:
 71 | *.log
 72 | local_settings.py
 73 | db.sqlite3
 74 | db.sqlite3-journal
 75 | 
 76 | # Flask stuff:
 77 | instance/
 78 | .webassets-cache
 79 | 
 80 | # Scrapy stuff:
 81 | .scrapy
 82 | 
 83 | # Sphinx documentation
 84 | docs/_build/
 85 | 
 86 | # PyBuilder
 87 | target/
 88 | 
 89 | # Jupyter Notebook
 90 | .ipynb_checkpoints
 91 | 
 92 | # pretrained models
 93 | videocutler/pretrain
 94 | *.pth
 95 | 
 96 | # demo results
 97 | demos/
 98 | 
 99 | # IPython
100 | profile_default/
101 | ipython_config.py
102 | 
103 | # pyenv
104 | .python-version
105 | 
106 | # pipenv
107 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
108 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
109 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
110 | #   install all needed dependencies.
111 | #Pipfile.lock
112 | 
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
114 | __pypackages__/
115 | 
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 | 
120 | # SageMath parsed files
121 | *.sage.py
122 | 
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 | 
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 | 
136 | # Rope project settings
137 | .ropeproject
138 | 
139 | # mkdocs documentation
140 | /site


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "third_party/TokenCut"]
2 | 	path = third_party/TokenCut
3 | 	url = https://github.com/YangtaoWANG95/TokenCut.git
4 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to make participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |   advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 | 
55 | This Code of Conduct also applies outside the project spaces when there is a
56 | reasonable belief that an individual's behavior may have a negative impact on
57 | the project or its community.
58 | 
59 | ## Enforcement
60 | 
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported by contacting the project team at <opensource-conduct@fb.com>. All
63 | complaints will be reviewed and investigated and will result in a response that
64 | is deemed necessary and appropriate to the circumstances. The project team is
65 | obligated to maintain confidentiality with regard to the reporter of an incident.
66 | Further details of specific enforcement policies may be posted separately.
67 | 
68 | Project maintainers who do not follow or enforce the Code of Conduct in good
69 | faith may face temporary or permanent repercussions as determined by other
70 | members of the project's leadership.
71 | 
72 | ## Attribution
73 | 
74 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
75 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
76 | 
77 | [homepage]: https://www.contributor-covenant.org
78 | 
79 | For answers to common questions about this code of conduct, see
80 | https://www.contributor-covenant.org/faq
81 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to CutLER
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Pull Requests
 6 | We actively welcome your pull requests.
 7 | 
 8 | 1. Fork the repo and create your branch from `main`.
 9 | 2. If you've added code that should be tested, add tests.
10 | 3. If you've changed APIs, update the documentation.
11 | 4. Ensure the test suite passes.
12 | 5. Make sure your code lints.
13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
14 | 
15 | ## Contributor License Agreement ("CLA")
16 | In order to accept your pull request, we need you to submit a CLA. You only need
17 | to do this once to work on any of Facebook's open source projects.
18 | 
19 | Complete your CLA here: <https://code.facebook.com/cla>
20 | 
21 | ## Issues
22 | We use GitHub issues to track public bugs. Please ensure your description is
23 | clear and has sufficient instructions to be able to reproduce the issue.
24 | 
25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
26 | disclosure of security bugs. In those cases, please go through the process
27 | outlined on that page and do not file a public issue.
28 | 
29 | ## License
30 | By contributing to CutLER, you agree that your contributions will be licensed
31 | under the LICENSE file in the root directory of this source tree.


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Installation
 3 | 
 4 | ## Requirements
 5 | - Linux or macOS with Python ≥ 3.8
 6 | - PyTorch ≥ 1.8 and [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation.
 7 |   Install them together at [pytorch.org](https://pytorch.org) to make sure of this. 
 8 |   Note, please check PyTorch version matches that is required by Detectron2.
 9 | - Detectron2: follow Detectron2 installation instructions.
10 | - OpenCV ≥ 4.6 is needed by demo and visualization.
11 | 
12 | ## Example conda environment setup
13 | 
14 | ```bash
15 | conda create --name cutler python=3.8 -y
16 | conda activate cutler
17 | conda install pytorch==1.8.1 torchvision==0.9.1 torchaudio==0.8.1 -c pytorch
18 | pip install git+https://github.com/lucasb-eyer/pydensecrf.git
19 | 
20 | # under your working directory
21 | git clone git@github.com:facebookresearch/detectron2.git
22 | cd detectron2
23 | pip install -e .
24 | pip install git+https://github.com/cocodataset/panopticapi.git
25 | pip install git+https://github.com/mcordts/cityscapesScripts.git
26 | 
27 | cd ..
28 | git clone --recursive git@github.com:facebookresearch/CutLER.git
29 | cd CutLER
30 | pip install -r requirements.txt
31 | ```
32 | 
33 | ## datasets
34 | If you want to train/evaluate on the datasets, please see [datasets/README.md](datasets/README.md) to see how we prepare datasets for this project.


--------------------------------------------------------------------------------
/cog.yaml:
--------------------------------------------------------------------------------
 1 | build:
 2 |   gpu: true
 3 |   cuda: "11.6"
 4 |   python_version: "3.8"
 5 |   python_packages:
 6 |     - "torch==1.11.0"
 7 |     - "torchvision==0.12.0"
 8 |     - "faiss-gpu==1.7.2"
 9 |     - "opencv-python==4.6.0.66"
10 |     - "scikit-image==0.19.2"
11 |     - "scikit-learn==1.1.1"
12 |     - "shapely==1.8.2"
13 |     - "timm==0.5.4"
14 |     - "pyyaml==6.0"
15 |     - "colored==1.4.4"
16 |     - "fvcore==0.1.5.post20220512"
17 |     - "gdown==4.5.4"
18 |     - "pycocotools==2.0.6"
19 |     - "numpy==1.20.0"
20 | 
21 |   run:
22 |     - pip install git+https://github.com/lucasb-eyer/pydensecrf.git
23 | 
24 | predict: "maskcut/predict.py:Predictor"
25 | 


--------------------------------------------------------------------------------
/cutler/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | import config
 4 | import engine
 5 | import modeling
 6 | import structures
 7 | import tools
 8 | import demo 
 9 | 
10 | # dataset loading
11 | from . import data  # register all new datasets
12 | from data import datasets  # register all new datasets
13 | from solver import *
14 | 
15 | # from .data import register_all_imagenet


--------------------------------------------------------------------------------
/cutler/config/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | from .cutler_config import add_cutler_config


--------------------------------------------------------------------------------
/cutler/config/cutler_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | from detectron2.config import CfgNode as CN
 4 | 
 5 | def add_cutler_config(cfg):
 6 |     cfg.DATALOADER.COPY_PASTE = False
 7 |     cfg.DATALOADER.COPY_PASTE_RATE = 0.0
 8 |     cfg.DATALOADER.COPY_PASTE_MIN_RATIO = 0.5
 9 |     cfg.DATALOADER.COPY_PASTE_MAX_RATIO = 1.0
10 |     cfg.DATALOADER.COPY_PASTE_RANDOM_NUM = True
11 |     cfg.DATALOADER.VISUALIZE_COPY_PASTE = False
12 | 
13 |     cfg.MODEL.ROI_HEADS.USE_DROPLOSS = False
14 |     cfg.MODEL.ROI_HEADS.DROPLOSS_IOU_THRESH = 0.0
15 | 
16 |     cfg.SOLVER.BASE_LR_MULTIPLIER = 1
17 |     cfg.SOLVER.BASE_LR_MULTIPLIER_NAMES = []
18 | 
19 |     cfg.TEST.NO_SEGM = False


--------------------------------------------------------------------------------
/cutler/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | from . import datasets  # ensure the builtin datasets are registered
 4 | from .detection_utils import *  # isort:skip
 5 | from .build import (
 6 |     build_batch_data_loader, 
 7 |     build_detection_train_loader, 
 8 |     build_detection_test_loader, 
 9 |     get_detection_dataset_dicts, 
10 |     load_proposals_into_dataset, 
11 |     print_instances_class_histogram,
12 |     )
13 | from detectron2.data.common import *
14 | 
15 | __all__ = [k for k in globals().keys() if not k.startswith("_")]


--------------------------------------------------------------------------------
/cutler/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | from .coco import load_coco_json, load_sem_seg, register_coco_instances, convert_to_coco_json
 3 | from .builtin import (
 4 |     register_all_imagenet, 
 5 |     register_all_uvo, 
 6 |     register_all_coco_ca,
 7 |     register_all_coco_semi,
 8 |     register_all_lvis,
 9 |     register_all_voc,
10 |     register_all_cross_domain,
11 |     register_all_kitti,
12 |     register_all_objects365,
13 |     register_all_openimages,
14 |     )
15 | 
16 | __all__ = [k for k in globals().keys() if not k.startswith("_")]


--------------------------------------------------------------------------------
/cutler/data/transforms/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # Modified by XuDong Wang from https://github.com/facebookresearch/detectron2/blob/main/detectron2/data/transforms/__init__.py
 3 | 
 4 | from fvcore.transforms.transform import *
 5 | from .transform import *
 6 | from detectron2.data.transforms.augmentation import *
 7 | from .augmentation_impl import *
 8 | 
 9 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
10 | 
11 | 
12 | from detectron2.utils.env import fixup_module_metadata
13 | 
14 | fixup_module_metadata(__name__, globals(), __all__)
15 | del fixup_module_metadata


--------------------------------------------------------------------------------
/cutler/demo/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | from demo import *
3 | from predictor import *
4 | 
5 | __all__ = [k for k in globals().keys() if not k.startswith("_")]


--------------------------------------------------------------------------------
/cutler/demo/imgs/demo1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/cutler/demo/imgs/demo1.jpg


--------------------------------------------------------------------------------
/cutler/demo/imgs/demo2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/cutler/demo/imgs/demo2.jpg


--------------------------------------------------------------------------------
/cutler/demo/imgs/demo3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/cutler/demo/imgs/demo3.jpg


--------------------------------------------------------------------------------
/cutler/demo/imgs/demo4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/cutler/demo/imgs/demo4.jpg


--------------------------------------------------------------------------------
/cutler/demo/imgs/demo5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/cutler/demo/imgs/demo5.jpg


--------------------------------------------------------------------------------
/cutler/demo/imgs/demo6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/cutler/demo/imgs/demo6.jpg


--------------------------------------------------------------------------------
/cutler/demo/imgs/demo7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/cutler/demo/imgs/demo7.jpg


--------------------------------------------------------------------------------
/cutler/demo/imgs/demo8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/cutler/demo/imgs/demo8.jpg


--------------------------------------------------------------------------------
/cutler/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | from .train_loop import *
4 | 
5 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
6 | 
7 | from .defaults import *


--------------------------------------------------------------------------------
/cutler/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | from .coco_evaluation import COCOEvaluator


--------------------------------------------------------------------------------
/cutler/model_zoo/configs/Base-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |   ANCHOR_GENERATOR:
10 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
11 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
12 |   RPN:
13 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
15 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
16 |     # Detectron1 uses 2000 proposals per-batch,
17 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 |     POST_NMS_TOPK_TRAIN: 1000
20 |     POST_NMS_TOPK_TEST: 1000
21 |   ROI_HEADS:
22 |     NAME: "StandardROIHeads"
23 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
24 |   ROI_BOX_HEAD:
25 |     NAME: "FastRCNNConvFCHead"
26 |     NUM_FC: 2
27 |     POOLER_RESOLUTION: 7
28 |   ROI_MASK_HEAD:
29 |     NAME: "MaskRCNNConvUpsampleHead"
30 |     NUM_CONV: 4
31 |     POOLER_RESOLUTION: 14
32 | DATASETS:
33 |   TRAIN: ("coco_2017_train",)
34 |   TEST: ("coco_2017_val",)
35 | SOLVER:
36 |   IMS_PER_BATCH: 16
37 |   BASE_LR: 0.02
38 |   STEPS: (60000, 80000)
39 |   MAX_ITER: 90000
40 | INPUT:
41 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
42 | VERSION: 2
43 | 


--------------------------------------------------------------------------------
/cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_100perc.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 4 |   PIXEL_STD: [58.395, 57.120, 57.375]
 5 |   WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth"
 6 |   MASK_ON: True
 7 |   BACKBONE:
 8 |     FREEZE_AT: 0
 9 |   RESNETS:
10 |     DEPTH: 50
11 |     NORM: "SyncBN"
12 |     STRIDE_IN_1X1: False
13 |   FPN:
14 |     NORM: "SyncBN"
15 |   ROI_BOX_HEAD:
16 |     CLS_AGNOSTIC_BBOX_REG: True
17 |   ROI_HEADS:
18 |     NAME: CustomCascadeROIHeads
19 |   RPN:
20 |     POST_NMS_TOPK_TRAIN: 2000
21 | DATASETS:
22 |   TRAIN: ("coco_2017_train",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 |   STEPS: (60000, 80000)
28 |   MAX_ITER: 90000
29 |   BASE_LR_MULTIPLIER: 2
30 |   BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred']
31 | INPUT:
32 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
33 |   MAX_SIZE_TRAIN: 1333
34 |   MASK_FORMAT: "bitmask"
35 |   FORMAT: "RGB"
36 | TEST:
37 |   PRECISE_BN:
38 |     ENABLED: True
39 |   EVAL_PERIOD: 5000
40 | OUTPUT_DIR: "output/100perc"


--------------------------------------------------------------------------------
/cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_10perc.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 4 |   PIXEL_STD: [58.395, 57.120, 57.375]
 5 |   WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth"
 6 |   MASK_ON: True
 7 |   BACKBONE:
 8 |     FREEZE_AT: 0
 9 |   RESNETS:
10 |     DEPTH: 50
11 |     NORM: "SyncBN"
12 |     STRIDE_IN_1X1: False
13 |   FPN:
14 |     NORM: "SyncBN"
15 |   ROI_BOX_HEAD:
16 |     CLS_AGNOSTIC_BBOX_REG: True
17 |   ROI_HEADS:
18 |     NAME: CustomCascadeROIHeads
19 |   RPN:
20 |     POST_NMS_TOPK_TRAIN: 2000
21 | DATASETS:
22 |   TRAIN: ("coco_semi_10perc",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.04
27 |   STEPS: (6000, 8000)
28 |   MAX_ITER: 9000
29 |   BASE_LR_MULTIPLIER: 4
30 |   BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred']
31 | INPUT:
32 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
33 |   MAX_SIZE_TRAIN: 1333
34 |   MASK_FORMAT: "bitmask"
35 |   FORMAT: "RGB"
36 | TEST:
37 |   PRECISE_BN:
38 |     ENABLED: True
39 |   EVAL_PERIOD: 5000
40 | OUTPUT_DIR: "output/10perc"


--------------------------------------------------------------------------------
/cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_1perc.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 4 |   PIXEL_STD: [58.395, 57.120, 57.375]
 5 |   WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth"
 6 |   MASK_ON: True
 7 |   BACKBONE:
 8 |     FREEZE_AT: 0
 9 |   RESNETS:
10 |     DEPTH: 50
11 |     NORM: "SyncBN"
12 |     STRIDE_IN_1X1: False
13 |   FPN:
14 |     NORM: "SyncBN"
15 |   ROI_BOX_HEAD:
16 |     CLS_AGNOSTIC_BBOX_REG: True
17 |   ROI_HEADS:
18 |     NAME: CustomCascadeROIHeads
19 |   RPN:
20 |     POST_NMS_TOPK_TRAIN: 2000
21 | DATASETS:
22 |   TRAIN: ("coco_semi_1perc",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.04
27 |   STEPS: (2400, 3200)
28 |   MAX_ITER: 3600
29 |   WARMUP_FACTOR: 0.001
30 |   WARMUP_ITERS: 1000
31 |   BASE_LR_MULTIPLIER: 4
32 |   BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred']
33 | INPUT:
34 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
35 |   MAX_SIZE_TRAIN: 1333
36 |   MASK_FORMAT: "bitmask"
37 |   FORMAT: "RGB"
38 | TEST:
39 |   PRECISE_BN:
40 |     ENABLED: True
41 |   EVAL_PERIOD: 5000
42 | OUTPUT_DIR: "output/1perc"


--------------------------------------------------------------------------------
/cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_20perc.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 4 |   PIXEL_STD: [58.395, 57.120, 57.375]
 5 |   WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth"
 6 |   MASK_ON: True
 7 |   BACKBONE:
 8 |     FREEZE_AT: 0
 9 |   RESNETS:
10 |     DEPTH: 50
11 |     NORM: "SyncBN"
12 |     STRIDE_IN_1X1: False
13 |   FPN:
14 |     NORM: "SyncBN"
15 |   ROI_BOX_HEAD:
16 |     CLS_AGNOSTIC_BBOX_REG: True
17 |   ROI_HEADS:
18 |     NAME: CustomCascadeROIHeads
19 |   RPN:
20 |     POST_NMS_TOPK_TRAIN: 2000
21 | DATASETS:
22 |   TRAIN: ("coco_semi_20perc",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.04
27 |   STEPS: (12000, 16000)
28 |   MAX_ITER: 18000
29 |   BASE_LR_MULTIPLIER: 4
30 |   BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred']
31 | INPUT:
32 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
33 |   MAX_SIZE_TRAIN: 1333
34 |   MASK_FORMAT: "bitmask"
35 |   FORMAT: "RGB"
36 | TEST:
37 |   PRECISE_BN:
38 |     ENABLED: True
39 |   EVAL_PERIOD: 5000
40 | OUTPUT_DIR: "output/20perc"


--------------------------------------------------------------------------------
/cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_2perc.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 4 |   PIXEL_STD: [58.395, 57.120, 57.375]
 5 |   WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth"
 6 |   MASK_ON: True
 7 |   BACKBONE:
 8 |     FREEZE_AT: 0
 9 |   RESNETS:
10 |     DEPTH: 50
11 |     NORM: "SyncBN"
12 |     STRIDE_IN_1X1: False
13 |   FPN:
14 |     NORM: "SyncBN"
15 |   ROI_BOX_HEAD:
16 |     CLS_AGNOSTIC_BBOX_REG: True
17 |   ROI_HEADS:
18 |     NAME: CustomCascadeROIHeads
19 |   RPN:
20 |     POST_NMS_TOPK_TRAIN: 2000
21 | DATASETS:
22 |   TRAIN: ("coco_semi_2perc",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.04
27 |   STEPS: (2400, 3200)
28 |   MAX_ITER: 3600
29 |   WARMUP_FACTOR: 0.001
30 |   WARMUP_ITERS: 1000
31 |   BASE_LR_MULTIPLIER: 4
32 |   BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred']
33 | INPUT:
34 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
35 |   MAX_SIZE_TRAIN: 1333
36 |   MASK_FORMAT: "bitmask"
37 |   FORMAT: "RGB"
38 | TEST:
39 |   PRECISE_BN:
40 |     ENABLED: True
41 |   EVAL_PERIOD: 5000
42 | OUTPUT_DIR: "output/2perc"


--------------------------------------------------------------------------------
/cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_30perc.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 4 |   PIXEL_STD: [58.395, 57.120, 57.375]
 5 |   WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth"
 6 |   MASK_ON: True
 7 |   BACKBONE:
 8 |     FREEZE_AT: 0
 9 |   RESNETS:
10 |     DEPTH: 50
11 |     NORM: "SyncBN"
12 |     STRIDE_IN_1X1: False
13 |   FPN:
14 |     NORM: "SyncBN"
15 |   ROI_BOX_HEAD:
16 |     CLS_AGNOSTIC_BBOX_REG: True
17 |   ROI_HEADS:
18 |     NAME: CustomCascadeROIHeads
19 |   RPN:
20 |     POST_NMS_TOPK_TRAIN: 2000
21 | DATASETS:
22 |   TRAIN: ("coco_semi_30perc",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.04
27 |   STEPS: (18000, 24000)
28 |   MAX_ITER: 27000
29 |   BASE_LR_MULTIPLIER: 4
30 |   BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred']
31 | INPUT:
32 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
33 |   MAX_SIZE_TRAIN: 1333
34 |   MASK_FORMAT: "bitmask"
35 |   FORMAT: "RGB"
36 | TEST:
37 |   PRECISE_BN:
38 |     ENABLED: True
39 |   EVAL_PERIOD: 5000
40 | OUTPUT_DIR: "output/30perc"


--------------------------------------------------------------------------------
/cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_40perc.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 4 |   PIXEL_STD: [58.395, 57.120, 57.375]
 5 |   WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth"
 6 |   MASK_ON: True
 7 |   BACKBONE:
 8 |     FREEZE_AT: 0
 9 |   RESNETS:
10 |     DEPTH: 50
11 |     NORM: "SyncBN"
12 |     STRIDE_IN_1X1: False
13 |   FPN:
14 |     NORM: "SyncBN"
15 |   ROI_BOX_HEAD:
16 |     CLS_AGNOSTIC_BBOX_REG: True
17 |   ROI_HEADS:
18 |     NAME: CustomCascadeROIHeads
19 |   RPN:
20 |     POST_NMS_TOPK_TRAIN: 2000
21 | DATASETS:
22 |   TRAIN: ("coco_semi_40perc",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.04
27 |   STEPS: (24000, 32000)
28 |   MAX_ITER: 36000
29 |   BASE_LR_MULTIPLIER: 4
30 |   BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred']
31 | INPUT:
32 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
33 |   MAX_SIZE_TRAIN: 1333
34 |   MASK_FORMAT: "bitmask"
35 |   FORMAT: "RGB"
36 | TEST:
37 |   PRECISE_BN:
38 |     ENABLED: True
39 |   EVAL_PERIOD: 5000
40 | OUTPUT_DIR: "output/40perc"


--------------------------------------------------------------------------------
/cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_50perc.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 4 |   PIXEL_STD: [58.395, 57.120, 57.375]
 5 |   WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth"
 6 |   MASK_ON: True
 7 |   BACKBONE:
 8 |     FREEZE_AT: 0
 9 |   RESNETS:
10 |     DEPTH: 50
11 |     NORM: "SyncBN"
12 |     STRIDE_IN_1X1: False
13 |   FPN:
14 |     NORM: "SyncBN"
15 |   ROI_BOX_HEAD:
16 |     CLS_AGNOSTIC_BBOX_REG: True
17 |   ROI_HEADS:
18 |     NAME: CustomCascadeROIHeads
19 |   RPN:
20 |     POST_NMS_TOPK_TRAIN: 2000
21 | DATASETS:
22 |   TRAIN: ("coco_semi_50perc",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 |   STEPS: (30000, 40000)
28 |   MAX_ITER: 45000
29 |   BASE_LR_MULTIPLIER: 2
30 |   BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred']
31 | INPUT:
32 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
33 |   MAX_SIZE_TRAIN: 1333
34 |   MASK_FORMAT: "bitmask"
35 |   FORMAT: "RGB"
36 | TEST:
37 |   PRECISE_BN:
38 |     ENABLED: True
39 |   EVAL_PERIOD: 5000
40 | OUTPUT_DIR: "output/50perc"


--------------------------------------------------------------------------------
/cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_5perc.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 4 |   PIXEL_STD: [58.395, 57.120, 57.375]
 5 |   WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth"
 6 |   MASK_ON: True
 7 |   BACKBONE:
 8 |     FREEZE_AT: 0
 9 |   RESNETS:
10 |     DEPTH: 50
11 |     NORM: "SyncBN"
12 |     STRIDE_IN_1X1: False
13 |   FPN:
14 |     NORM: "SyncBN"
15 |   ROI_BOX_HEAD:
16 |     CLS_AGNOSTIC_BBOX_REG: True
17 |   ROI_HEADS:
18 |     NAME: CustomCascadeROIHeads
19 |   RPN:
20 |     POST_NMS_TOPK_TRAIN: 2000
21 | DATASETS:
22 |   TRAIN: ("coco_semi_5perc",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.04
27 |   STEPS: (3000, 4000)
28 |   MAX_ITER: 4500
29 |   WARMUP_FACTOR: 0.001
30 |   WARMUP_ITERS: 1000
31 |   BASE_LR_MULTIPLIER: 4
32 |   BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred']
33 | INPUT:
34 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
35 |   MAX_SIZE_TRAIN: 1333
36 |   MASK_FORMAT: "bitmask"
37 |   FORMAT: "RGB"
38 | TEST:
39 |   PRECISE_BN:
40 |     ENABLED: True
41 |   EVAL_PERIOD: 5000
42 | OUTPUT_DIR: "output/5perc"


--------------------------------------------------------------------------------
/cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_60perc.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 4 |   PIXEL_STD: [58.395, 57.120, 57.375]
 5 |   WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth"
 6 |   MASK_ON: True
 7 |   BACKBONE:
 8 |     FREEZE_AT: 0
 9 |   RESNETS:
10 |     DEPTH: 50
11 |     NORM: "SyncBN"
12 |     STRIDE_IN_1X1: False
13 |   FPN:
14 |     NORM: "SyncBN"
15 |   ROI_BOX_HEAD:
16 |     CLS_AGNOSTIC_BBOX_REG: True
17 |   ROI_HEADS:
18 |     NAME: CustomCascadeROIHeads
19 |   RPN:
20 |     POST_NMS_TOPK_TRAIN: 2000
21 | DATASETS:
22 |   TRAIN: ("coco_semi_60perc",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 |   STEPS: (36000, 48000)
28 |   MAX_ITER: 54000
29 |   BASE_LR_MULTIPLIER: 2
30 |   BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred']
31 | INPUT:
32 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
33 |   MAX_SIZE_TRAIN: 1333
34 |   MASK_FORMAT: "bitmask"
35 |   FORMAT: "RGB"
36 | TEST:
37 |   PRECISE_BN:
38 |     ENABLED: True
39 |   EVAL_PERIOD: 5000
40 | OUTPUT_DIR: "output/60perc"


--------------------------------------------------------------------------------
/cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_80perc.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 4 |   PIXEL_STD: [58.395, 57.120, 57.375]
 5 |   WEIGHTS: "http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth"
 6 |   MASK_ON: True
 7 |   BACKBONE:
 8 |     FREEZE_AT: 0
 9 |   RESNETS:
10 |     DEPTH: 50
11 |     NORM: "SyncBN"
12 |     STRIDE_IN_1X1: False
13 |   FPN:
14 |     NORM: "SyncBN"
15 |   ROI_BOX_HEAD:
16 |     CLS_AGNOSTIC_BBOX_REG: True
17 |   ROI_HEADS:
18 |     NAME: CustomCascadeROIHeads
19 |   RPN:
20 |     POST_NMS_TOPK_TRAIN: 2000
21 | DATASETS:
22 |   TRAIN: ("coco_semi_80perc",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 |   STEPS: (48000, 64000)
28 |   MAX_ITER: 72000
29 |   BASE_LR_MULTIPLIER: 2
30 |   BASE_LR_MULTIPLIER_NAMES: ['roi_heads.mask_head.predictor', 'roi_heads.box_predictor.0.cls_score', 'roi_heads.box_predictor.0.bbox_pred', 'roi_heads.box_predictor.1.cls_score', 'roi_heads.box_predictor.1.bbox_pred', 'roi_heads.box_predictor.2.cls_score', 'roi_heads.box_predictor.2.bbox_pred']
31 | INPUT:
32 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
33 |   MAX_SIZE_TRAIN: 1333
34 |   MASK_FORMAT: "bitmask"
35 |   FORMAT: "RGB"
36 | TEST:
37 |   PRECISE_BN:
38 |     ENABLED: True
39 |   EVAL_PERIOD: 5000
40 | OUTPUT_DIR: "output/80perc"


--------------------------------------------------------------------------------
/cutler/model_zoo/configs/CutLER-ImageNet/cascade_mask_rcnn_R_50_FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | DATALOADER:
 3 |   COPY_PASTE: True
 4 |   COPY_PASTE_RATE: 1.0
 5 |   VISUALIZE_COPY_PASTE: False
 6 |   COPY_PASTE_RANDOM_NUM: True
 7 |   COPY_PASTE_MIN_RATIO: 0.3
 8 |   COPY_PASTE_MAX_RATIO: 1.0
 9 |   NUM_WORKERS: 0
10 | MODEL:
11 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
12 |   PIXEL_STD: [58.395, 57.120, 57.375]
13 |   WEIGHTS: 'http://dl.fbaipublicfiles.com/cutler/checkpoints/dino_RN50_pretrain_d2_format.pkl'
14 |   MASK_ON: True
15 |   BACKBONE:
16 |     FREEZE_AT: 0
17 |   RESNETS:
18 |     DEPTH: 50
19 |     NORM: "SyncBN"
20 |     STRIDE_IN_1X1: False
21 |   FPN:
22 |     NORM: "SyncBN"
23 |   ROI_BOX_HEAD:
24 |     CLS_AGNOSTIC_BBOX_REG: True
25 |   ROI_HEADS:
26 |     NAME: CustomCascadeROIHeads
27 |     NUM_CLASSES: 1
28 |     SCORE_THRESH_TEST: 0.0
29 |     POSITIVE_FRACTION: 0.25
30 |     USE_DROPLOSS: True
31 |     DROPLOSS_IOU_THRESH: 0.01
32 |   RPN:
33 |     POST_NMS_TOPK_TRAIN: 4000
34 |     NMS_THRESH: 0.65
35 | DATASETS:
36 |   TRAIN: ("imagenet_train",)
37 | SOLVER:
38 |   IMS_PER_BATCH: 16
39 |   BASE_LR: 0.01
40 |   WEIGHT_DECAY: 0.00005
41 |   STEPS: (80000,)
42 |   MAX_ITER: 160000
43 |   GAMMA: 0.02
44 |   CLIP_GRADIENTS:
45 |     CLIP_TYPE: norm
46 |     CLIP_VALUE: 1.0
47 |     ENABLED: true
48 |     NORM_TYPE: 2.0
49 |   AMP:
50 |     ENABLED: True
51 | INPUT:
52 |   MIN_SIZE_TRAIN: (240, 320, 480, 640, 672, 704, 736, 768, 800, 1024)
53 |   MAX_SIZE_TRAIN: 1333
54 |   MASK_FORMAT: "bitmask"
55 |   FORMAT: "RGB"
56 | TEST:
57 |   PRECISE_BN:
58 |     ENABLED: True
59 |     NUM_ITER: 200
60 |   DETECTIONS_PER_IMAGE: 100
61 | OUTPUT_DIR: "output/"


--------------------------------------------------------------------------------
/cutler/model_zoo/configs/CutLER-ImageNet/cascade_mask_rcnn_R_50_FPN_demo.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | DATALOADER:
 3 |   COPY_PASTE: True
 4 |   COPY_PASTE_RATE: 1.0
 5 |   VISUALIZE_COPY_PASTE: False
 6 |   COPY_PASTE_RANDOM_NUM: True
 7 |   COPY_PASTE_MIN_RATIO: 0.3
 8 |   COPY_PASTE_MAX_RATIO: 1.0
 9 |   NUM_WORKERS: 0
10 | MODEL:
11 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
12 |   PIXEL_STD: [58.395, 57.120, 57.375]
13 |   WEIGHTS: 'http://dl.fbaipublicfiles.com/cutler/checkpoints/dino_RN50_pretrain_d2_format.pkl'
14 |   MASK_ON: True
15 |   BACKBONE:
16 |     FREEZE_AT: 0
17 |   RESNETS:
18 |     DEPTH: 50
19 |     NORM: "SyncBN"
20 |     STRIDE_IN_1X1: False
21 |   FPN:
22 |     NORM: "SyncBN"
23 |   ROI_BOX_HEAD:
24 |     CLS_AGNOSTIC_BBOX_REG: True
25 |   ROI_HEADS:
26 |     NAME: CustomCascadeROIHeads
27 |     NUM_CLASSES: 1
28 |     SCORE_THRESH_TEST: 0.0
29 |     POSITIVE_FRACTION: 0.25
30 |     USE_DROPLOSS: True
31 |     DROPLOSS_IOU_THRESH: 0.01
32 |   RPN:
33 |     POST_NMS_TOPK_TRAIN: 4000
34 |     NMS_THRESH: 0.65
35 | DATASETS:
36 |   TRAIN: ("imagenet_train",)
37 |   TEST: ("imagenet_train",)
38 | SOLVER:
39 |   IMS_PER_BATCH: 16
40 |   BASE_LR: 0.01
41 |   WEIGHT_DECAY: 0.00005
42 |   STEPS: (80000,)
43 |   MAX_ITER: 160000
44 |   GAMMA: 0.02
45 |   CLIP_GRADIENTS:
46 |     CLIP_TYPE: norm
47 |     CLIP_VALUE: 1.0
48 |     ENABLED: true
49 |     NORM_TYPE: 2.0
50 |   AMP:
51 |     ENABLED: True
52 | INPUT:
53 |   MIN_SIZE_TRAIN: (240, 320, 480, 640, 672, 704, 736, 768, 800, 1024)
54 |   MAX_SIZE_TRAIN: 1333
55 |   MASK_FORMAT: "bitmask"
56 |   FORMAT: "RGB"
57 | TEST:
58 |   PRECISE_BN:
59 |     ENABLED: True
60 |     NUM_ITER: 200
61 |   DETECTIONS_PER_IMAGE: 100
62 | OUTPUT_DIR: "output/"
63 | 


--------------------------------------------------------------------------------
/cutler/model_zoo/configs/CutLER-ImageNet/cascade_mask_rcnn_R_50_FPN_self_train.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | DATALOADER:
 3 |   COPY_PASTE: True
 4 |   COPY_PASTE_RATE: 1.0
 5 |   VISUALIZE_COPY_PASTE: False
 6 |   COPY_PASTE_RANDOM_NUM: True
 7 |   COPY_PASTE_MIN_RATIO: 0.5
 8 |   COPY_PASTE_MAX_RATIO: 1.0
 9 |   NUM_WORKERS: 2
10 | MODEL:
11 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
12 |   PIXEL_STD: [58.395, 57.120, 57.375]
13 |   WEIGHTS: 'http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_r1.pth' # round 1
14 |   # WEIGHTS: 'http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_r2.pth' # round 2
15 |   MASK_ON: True
16 |   BACKBONE:
17 |     FREEZE_AT: 0
18 |   RESNETS:
19 |     DEPTH: 50
20 |     NORM: "SyncBN"
21 |     STRIDE_IN_1X1: False
22 |   FPN:
23 |     NORM: "SyncBN"
24 |   ROI_BOX_HEAD:
25 |     CLS_AGNOSTIC_BBOX_REG: True
26 |   ROI_HEADS:
27 |     NAME: CustomCascadeROIHeads
28 |     NUM_CLASSES: 1
29 |     SCORE_THRESH_TEST: 0.0
30 |     POSITIVE_FRACTION: 0.25
31 |     USE_DROPLOSS: False
32 |     DROPLOSS_IOU_THRESH: 0.01
33 | DATASETS:
34 |   TRAIN: ("imagenet_train_r1",) # round 1
35 |   # TRAIN: ("imagenet_train_r2",) # round 2
36 | SOLVER:
37 |   IMS_PER_BATCH: 16
38 |   BASE_LR: 0.005
39 |   STEPS: (79999,)
40 |   MAX_ITER: 80000
41 |   GAMMA: 1.0
42 |   CLIP_GRADIENTS:
43 |     CLIP_TYPE: norm
44 |     CLIP_VALUE: 1.0
45 |     ENABLED: true
46 |     NORM_TYPE: 2.0
47 |   AMP:
48 |     ENABLED: True
49 | INPUT:
50 |   MIN_SIZE_TRAIN: (240, 320, 480, 640, 672, 704, 736, 768, 800, 1024)
51 |   MAX_SIZE_TRAIN: 1333
52 |   MASK_FORMAT: "bitmask"
53 |   FORMAT: "RGB"
54 | TEST:
55 |   PRECISE_BN:
56 |     ENABLED: True
57 |     NUM_ITER: 200
58 |   DETECTIONS_PER_IMAGE: 100
59 | OUTPUT_DIR: "output/self-train-r1/" # round 1
60 | # OUTPUT_DIR: "output/self-train-r2/" # round 2


--------------------------------------------------------------------------------
/cutler/model_zoo/configs/CutLER-ImageNet/mask_rcnn_R_50_FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | DATALOADER:
 3 |   COPY_PASTE: True
 4 |   COPY_PASTE_RATE: 1.0
 5 |   VISUALIZE_COPY_PASTE: False
 6 |   COPY_PASTE_RANDOM_NUM: True
 7 |   COPY_PASTE_MIN_RATIO: 0.3
 8 |   COPY_PASTE_MAX_RATIO: 1.0
 9 | MODEL:
10 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
11 |   PIXEL_STD: [58.395, 57.120, 57.375]
12 |   WEIGHTS: 'http://dl.fbaipublicfiles.com/cutler/checkpoints/dino_RN50_pretrain_d2_format.pkl'
13 |   MASK_ON: True
14 |   BACKBONE:
15 |     FREEZE_AT: 0
16 |   RESNETS:
17 |     DEPTH: 50
18 |     NORM: "SyncBN"
19 |     STRIDE_IN_1X1: False
20 |   FPN:
21 |     NORM: "SyncBN"
22 |   ROI_HEADS:
23 |     NAME: "CustomStandardROIHeads"
24 |     NUM_CLASSES: 1
25 |     SCORE_THRESH_TEST: 0.0
26 |     USE_DROPLOSS: True
27 |     DROPLOSS_IOU_THRESH: 0.01
28 |   RPN:
29 |     POST_NMS_TOPK_TRAIN: 4000
30 |     NMS_THRESH: 0.65
31 | DATASETS:
32 |   TRAIN: ("imagenet_train",)
33 | SOLVER:
34 |   IMS_PER_BATCH: 16
35 |   BASE_LR: 0.01
36 |   WEIGHT_DECAY: 0.00005
37 |   STEPS: (80000,)
38 |   MAX_ITER: 160000
39 |   CLIP_GRADIENTS:
40 |     CLIP_TYPE: norm
41 |     CLIP_VALUE: 1.0
42 |     ENABLED: true
43 |     NORM_TYPE: 2.0
44 | INPUT:
45 |   MIN_SIZE_TRAIN: (240, 320, 480, 640, 672, 704, 736, 768, 800, 1024)
46 |   MAX_SIZE_TRAIN: 1333
47 |   MASK_FORMAT: "bitmask"
48 |   FORMAT: "RGB"
49 | TEST:
50 |   PRECISE_BN:
51 |     ENABLED: True
52 | OUTPUT_DIR: "output/"


--------------------------------------------------------------------------------
/cutler/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | from .roi_heads import (
 4 |     ROI_HEADS_REGISTRY,
 5 |     ROIHeads,
 6 |     CustomStandardROIHeads,
 7 |     FastRCNNOutputLayers,
 8 |     build_roi_heads,
 9 | )
10 | from .roi_heads.custom_cascade_rcnn import CustomCascadeROIHeads
11 | from .roi_heads.fast_rcnn import FastRCNNOutputLayers
12 | from .meta_arch.rcnn import GeneralizedRCNN, ProposalNetwork
13 | from .meta_arch.build import build_model
14 | 
15 | _EXCLUDE = {"ShapeSpec"}
16 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]


--------------------------------------------------------------------------------
/cutler/modeling/meta_arch/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
3 | # Modified by XuDong Wang from https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/meta_arch/__init__.py
4 | 
5 | from .build import META_ARCH_REGISTRY, build_model  # isort:skip
6 | 
7 | __all__ = list(globals().keys())
8 | 


--------------------------------------------------------------------------------
/cutler/modeling/meta_arch/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # Modified by XuDong Wang from https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/meta_arch/build.py
 3 | 
 4 | import torch
 5 | 
 6 | from detectron2.utils.logger import _log_api_usage
 7 | from detectron2.utils.registry import Registry
 8 | 
 9 | META_ARCH_REGISTRY = Registry("META_ARCH")  # noqa F401 isort:skip
10 | META_ARCH_REGISTRY.__doc__ = """
11 | Registry for meta-architectures, i.e. the whole model.
12 | 
13 | The registered object will be called with `obj(cfg)`
14 | and expected to return a `nn.Module` object.
15 | """
16 | 
17 | 
18 | def build_model(cfg):
19 |     """
20 |     Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``.
21 |     Note that it does not load any weights from ``cfg``.
22 |     """
23 |     meta_arch = cfg.MODEL.META_ARCHITECTURE
24 |     model = META_ARCH_REGISTRY.get(meta_arch)(cfg)
25 |     model.to(torch.device(cfg.MODEL.DEVICE))
26 |     _log_api_usage("modeling.meta_arch." + meta_arch)
27 |     return model
28 | 


--------------------------------------------------------------------------------
/cutler/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | from .roi_heads import (
 4 |     ROI_HEADS_REGISTRY,
 5 |     ROIHeads,
 6 |     Res5ROIHeads,
 7 |     CustomStandardROIHeads,
 8 |     build_roi_heads,
 9 |     select_foreground_proposals,
10 | )
11 | from .custom_cascade_rcnn import CustomCascadeROIHeads
12 | from .fast_rcnn import FastRCNNOutputLayers
13 | 
14 | from . import custom_cascade_rcnn  # isort:skip
15 | 
16 | __all__ = list(globals().keys())
17 | 


--------------------------------------------------------------------------------
/cutler/solver/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 |  
3 | from .build import build_lr_scheduler, build_optimizer, get_default_optimizer_params
4 | 
5 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
6 | 


--------------------------------------------------------------------------------
/cutler/structures/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | from .boxes import pairwise_iou_max_scores
 4 | 
 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
 6 | 
 7 | 
 8 | from detectron2.utils.env import fixup_module_metadata
 9 | 
10 | fixup_module_metadata(__name__, globals(), __all__)
11 | del fixup_module_metadata
12 | 


--------------------------------------------------------------------------------
/cutler/structures/boxes.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # Modified by XuDong Wang from https://github.com/facebookresearch/detectron2/blob/main/detectron2/structures/boxes.py
 3 | 
 4 | import torch
 5 | 
 6 | def pairwise_iou_max_scores(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
 7 |     """
 8 |     Given two lists of boxes of size N and M, compute the IoU
 9 |     (intersection over union) between **all** N x M pairs of boxes.
10 |     The box order must be (xmin, ymin, xmax, ymax).
11 | 
12 |     Args:
13 |         boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
14 | 
15 |     Returns:
16 |         Tensor: IoU, sized [N,M].
17 |     """
18 |     area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])  # [N]
19 |     area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])  # [M]
20 | 
21 |     width_height = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) - torch.max(
22 |         boxes1[:, None, :2], boxes2[:, :2]
23 |     )  # [N,M,2]
24 | 
25 |     width_height.clamp_(min=0)  # [N,M,2]
26 |     inter = width_height.prod(dim=2)  # [N,M]
27 | 
28 |     # handle empty boxes
29 |     iou = torch.where(
30 |         inter > 0,
31 |         inter / (area1[:, None] + area2 - inter),
32 |         torch.zeros(1, dtype=inter.dtype, device=inter.device),
33 |     )
34 |     iou_max, _ = torch.max(iou, dim=1)
35 |     return iou_max


--------------------------------------------------------------------------------
/cutler/tools/eval.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # link to the dataset folder, model weights and the config file.
 4 | export DETECTRON2_DATASETS=/path/to/DETECTRON2_DATASETS/
 5 | model_weights="http://dl.fbaipublicfiles.com/cutler/checkpoints/cutler_cascade_final.pth"
 6 | config_file="model_zoo/configs/CutLER-ImageNet/cascade_mask_rcnn_R_50_FPN.yaml"
 7 | num_gpus=2
 8 | 
 9 | echo "========== start evaluating the model on all 11 datasets =========="
10 | 
11 | test_dataset='cls_agnostic_clipart'
12 | echo "========== evaluating ${test_dataset} =========="
13 | python train_net.py --num-gpus ${num_gpus} \
14 |   --config-file ${config_file} \
15 |   --test-dataset ${test_dataset} --no-segm \
16 |   --eval-only MODEL.WEIGHTS ${model_weights}
17 | 
18 | test_dataset='cls_agnostic_watercolor'
19 | echo "========== evaluating ${test_dataset} =========="
20 | python train_net.py --num-gpus ${num_gpus} \
21 |   --config-file ${config_file} \
22 |   --test-dataset ${test_dataset} --no-segm \
23 |   --eval-only MODEL.WEIGHTS ${model_weights}
24 | 
25 | test_dataset='cls_agnostic_comic'
26 | echo "========== evaluating ${test_dataset} =========="
27 | python train_net.py --num-gpus ${num_gpus} \
28 |   --config-file ${config_file} \
29 |   --test-dataset ${test_dataset} --no-segm \
30 |   --eval-only MODEL.WEIGHTS ${model_weights}
31 | 
32 | test_dataset='cls_agnostic_voc'
33 | echo "========== evaluating ${test_dataset} =========="
34 | python train_net.py --num-gpus ${num_gpus} \
35 |   --config-file ${config_file} \
36 |   --test-dataset ${test_dataset} --no-segm \
37 |   --eval-only MODEL.WEIGHTS ${model_weights}
38 | 
39 | test_dataset='cls_agnostic_objects365'
40 | echo "========== evaluating ${test_dataset} =========="
41 | python train_net.py --num-gpus ${num_gpus} \
42 |   --config-file ${config_file} \
43 |   --test-dataset ${test_dataset} --no-segm \
44 |   --eval-only MODEL.WEIGHTS ${model_weights}
45 | 
46 | test_dataset='cls_agnostic_openimages'
47 | echo "========== evaluating ${test_dataset} =========="
48 | python train_net.py --num-gpus ${num_gpus} \
49 |   --config-file ${config_file} \
50 |   --test-dataset ${test_dataset} --no-segm \
51 |   --eval-only MODEL.WEIGHTS ${model_weights}
52 | 
53 | test_dataset='cls_agnostic_kitti'
54 | echo "========== evaluating ${test_dataset} =========="
55 | python train_net.py --num-gpus ${num_gpus} \
56 |   --config-file ${config_file} \
57 |   --test-dataset ${test_dataset} --no-segm \
58 |   --eval-only MODEL.WEIGHTS ${model_weights}
59 | 
60 | test_dataset='cls_agnostic_coco'
61 | echo "========== evaluating ${test_dataset} =========="
62 | python train_net.py --num-gpus ${num_gpus} \
63 |   --config-file ${config_file} \
64 |   --test-dataset ${test_dataset} \
65 |   --eval-only MODEL.WEIGHTS ${model_weights}
66 | 
67 | test_dataset='cls_agnostic_coco20k'
68 | echo "========== evaluating ${test_dataset} =========="
69 | python train_net.py --num-gpus ${num_gpus} \
70 |   --config-file ${config_file} \
71 |   --test-dataset ${test_dataset} \
72 |   --eval-only MODEL.WEIGHTS ${model_weights}
73 | 
74 | test_dataset='cls_agnostic_lvis'
75 | echo "========== evaluating ${test_dataset} =========="
76 | # LVIS should set TEST.DETECTIONS_PER_IMAGE=300
77 | python train_net.py --num-gpus ${num_gpus} \
78 |   --config-file ${config_file} \
79 |   --test-dataset ${test_dataset} \
80 |   --eval-only MODEL.WEIGHTS ${model_weights} TEST.DETECTIONS_PER_IMAGE 300
81 | 
82 | test_dataset='cls_agnostic_uvo'
83 | echo "========== evaluating ${test_dataset} =========="
84 | python train_net.py --num-gpus ${num_gpus} \
85 |   --config-file ${config_file} \
86 |   --test-dataset ${test_dataset} \
87 |   --eval-only MODEL.WEIGHTS ${model_weights}
88 | 
89 | echo "========== evaluation is completed =========="


--------------------------------------------------------------------------------
/cutler/tools/run_with_submitit.sh:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | sbatch tools/train-1node.sh \
3 |   --config-file model_zoo/configs/CutLER-ImageNet/cascade_mask_rcnn_R_50_FPN.yaml \
4 |   OUTPUT_DIR /path/to/output


--------------------------------------------------------------------------------
/cutler/tools/run_with_submitit_ssl.sh:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | sbatch tools/train-1node.sh \
3 | --config-file /private/home/xudongw/cutler-code-release/CutLER/cutler/model_zoo/configs/COCO-Semisupervised/cascade_mask_rcnn_R_50_FPN_50perc.yaml \


--------------------------------------------------------------------------------
/cutler/tools/single-node_run.sh:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | #!/bin/bash
3 | export DETECTRON2_DATASETS=/path/to/DETECTRON2_DATASETS/
4 | MASTER_NODE=$(scontrol show hostname "$SLURM_NODELIST" | head -n1)
5 | DIST_URL="tcp://$MASTER_NODE:12399"
6 | SOCKET_NAME=$(ip r | grep default | awk '{print $5}')
7 | export GLOO_SOCKET_IFNAME=$SOCKET_NAME
8 | 
9 | python -u train_net.py --num-gpus 8 --num-machines 1 --machine-rank "$SLURM_NODEID" --dist-url "$DIST_URL" "$@"


--------------------------------------------------------------------------------
/cutler/tools/train-1node.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | #!/bin/bash
 3 | #SBATCH -p devlab
 4 | #SBATCH --nodes=1
 5 | #SBATCH --gres=gpu:8
 6 | #SBATCH --gpus-per-node=8
 7 | #SBATCH --cpus-per-task=80
 8 | #SBATCH --mem=512G
 9 | #SBATCH --time 2000
10 | #SBATCH -o "submitit/slurm-%j.out"
11 | 
12 | srun tools/single-node_run.sh $@


--------------------------------------------------------------------------------
/docs/cutler-demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/docs/cutler-demo.jpg


--------------------------------------------------------------------------------
/docs/demos_videocutler.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/docs/demos_videocutler.gif


--------------------------------------------------------------------------------
/docs/maskcut-demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/docs/maskcut-demo.jpg


--------------------------------------------------------------------------------
/docs/maskcut.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/docs/maskcut.gif


--------------------------------------------------------------------------------
/docs/pipeline.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/docs/pipeline.jpg


--------------------------------------------------------------------------------
/docs/teaser_img.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/docs/teaser_img.jpg


--------------------------------------------------------------------------------
/maskcut/colormap.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # copied from https://github.com/facebookresearch/detectron2/blob/main/detectron2/utils/colormap.py
  3 | 
  4 | """
  5 | An awesome colormap for really neat visualizations.
  6 | Copied from Detectron, and removed gray colors.
  7 | """
  8 | 
  9 | import numpy as np
 10 | import random
 11 | 
 12 | __all__ = ["colormap", "random_color", "random_colors"]
 13 | 
 14 | # fmt: off
 15 | # RGB:
 16 | _COLORS = np.array(
 17 |     [
 18 |         0.000, 0.447, 0.741,
 19 |         0.850, 0.325, 0.098,
 20 |         0.929, 0.694, 0.125,
 21 |         0.494, 0.184, 0.556,
 22 |         0.466, 0.674, 0.188,
 23 |         0.301, 0.745, 0.933,
 24 |         0.635, 0.078, 0.184,
 25 |         0.300, 0.300, 0.300,
 26 |         0.600, 0.600, 0.600,
 27 |         1.000, 0.000, 0.000,
 28 |         1.000, 0.500, 0.000,
 29 |         0.749, 0.749, 0.000,
 30 |         0.000, 1.000, 0.000,
 31 |         0.000, 0.000, 1.000,
 32 |         0.667, 0.000, 1.000,
 33 |         0.333, 0.333, 0.000,
 34 |         0.333, 0.667, 0.000,
 35 |         0.333, 1.000, 0.000,
 36 |         0.667, 0.333, 0.000,
 37 |         0.667, 0.667, 0.000,
 38 |         0.667, 1.000, 0.000,
 39 |         1.000, 0.333, 0.000,
 40 |         1.000, 0.667, 0.000,
 41 |         1.000, 1.000, 0.000,
 42 |         0.000, 0.333, 0.500,
 43 |         0.000, 0.667, 0.500,
 44 |         0.000, 1.000, 0.500,
 45 |         0.333, 0.000, 0.500,
 46 |         0.333, 0.333, 0.500,
 47 |         0.333, 0.667, 0.500,
 48 |         0.333, 1.000, 0.500,
 49 |         0.667, 0.000, 0.500,
 50 |         0.667, 0.333, 0.500,
 51 |         0.667, 0.667, 0.500,
 52 |         0.667, 1.000, 0.500,
 53 |         1.000, 0.000, 0.500,
 54 |         1.000, 0.333, 0.500,
 55 |         1.000, 0.667, 0.500,
 56 |         1.000, 1.000, 0.500,
 57 |         0.000, 0.333, 1.000,
 58 |         0.000, 0.667, 1.000,
 59 |         0.000, 1.000, 1.000,
 60 |         0.333, 0.000, 1.000,
 61 |         0.333, 0.333, 1.000,
 62 |         0.333, 0.667, 1.000,
 63 |         0.333, 1.000, 1.000,
 64 |         0.667, 0.000, 1.000,
 65 |         0.667, 0.333, 1.000,
 66 |         0.667, 0.667, 1.000,
 67 |         0.667, 1.000, 1.000,
 68 |         1.000, 0.000, 1.000,
 69 |         1.000, 0.333, 1.000,
 70 |         1.000, 0.667, 1.000,
 71 |         0.333, 0.000, 0.000,
 72 |         0.500, 0.000, 0.000,
 73 |         0.667, 0.000, 0.000,
 74 |         0.833, 0.000, 0.000,
 75 |         1.000, 0.000, 0.000,
 76 |         0.000, 0.167, 0.000,
 77 |         0.000, 0.333, 0.000,
 78 |         0.000, 0.500, 0.000,
 79 |         0.000, 0.667, 0.000,
 80 |         0.000, 0.833, 0.000,
 81 |         0.000, 1.000, 0.000,
 82 |         0.000, 0.000, 0.167,
 83 |         0.000, 0.000, 0.333,
 84 |         0.000, 0.000, 0.500,
 85 |         0.000, 0.000, 0.667,
 86 |         0.000, 0.000, 0.833,
 87 |         0.000, 0.000, 1.000,
 88 |         0.000, 0.000, 0.000,
 89 |         0.143, 0.143, 0.143,
 90 |         0.857, 0.857, 0.857,
 91 |         1.000, 1.000, 1.000
 92 |     ]
 93 | ).astype(np.float32).reshape(-1, 3)
 94 | # fmt: on
 95 | 
 96 | 
 97 | def colormap(rgb=False, maximum=255):
 98 |     """
 99 |     Args:
100 |         rgb (bool): whether to return RGB colors or BGR colors.
101 |         maximum (int): either 255 or 1
102 |     Returns:
103 |         ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1]
104 |     """
105 |     assert maximum in [255, 1], maximum
106 |     c = _COLORS * maximum
107 |     if not rgb:
108 |         c = c[:, ::-1]
109 |     return c
110 | 
111 | 
112 | def random_color(rgb=False, maximum=255):
113 |     """
114 |     Args:
115 |         rgb (bool): whether to return RGB colors or BGR colors.
116 |         maximum (int): either 255 or 1
117 |     Returns:
118 |         ndarray: a vector of 3 numbers
119 |     """
120 |     idx = np.random.randint(0, len(_COLORS))
121 |     ret = _COLORS[idx] * maximum
122 |     if not rgb:
123 |         ret = ret[::-1]
124 |     return ret
125 | 
126 | 
127 | def random_colors(N, rgb=False, maximum=255):
128 |     """
129 |     Args:
130 |         N (int): number of unique colors needed
131 |         rgb (bool): whether to return RGB colors or BGR colors.
132 |         maximum (int): either 255 or 1
133 |     Returns:
134 |         ndarray: a list of random_color
135 |     """
136 |     indices = random.sample(range(len(_COLORS)), N)
137 |     ret = [_COLORS[i] * maximum for i in indices]
138 |     if not rgb:
139 |         ret = [x[::-1] for x in ret]
140 |     return ret
141 | 
142 | 
143 | if __name__ == "__main__":
144 |     import cv2
145 | 
146 |     size = 100
147 |     H, W = 10, 10
148 |     canvas = np.random.rand(H * size, W * size, 3).astype("float32")
149 |     for h in range(H):
150 |         for w in range(W):
151 |             idx = h * W + w
152 |             if idx >= len(_COLORS):
153 |                 break
154 |             canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx]
155 |     cv2.imshow("a", canvas)
156 |     cv2.waitKey(0)


--------------------------------------------------------------------------------
/maskcut/crf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # modfied by Xudong Wang based on https://github.com/lucasb-eyer/pydensecrf/blob/master/pydensecrf/tests/test_dcrf.py and third_party/TokenCut
 3 | 
 4 | import numpy as np
 5 | import pydensecrf.densecrf as dcrf
 6 | import pydensecrf.utils as utils
 7 | import torch
 8 | import torch.nn.functional as F
 9 | import torchvision.transforms.functional as VF
10 | 
11 | MAX_ITER = 10
12 | POS_W = 7 
13 | POS_XY_STD = 3
14 | Bi_W = 10
15 | Bi_XY_STD = 50 
16 | Bi_RGB_STD = 5
17 | 
18 | def densecrf(image, mask):
19 |     h, w = mask.shape
20 |     mask = mask.reshape(1, h, w)
21 |     fg = mask.astype(float) 
22 |     bg = 1 - fg
23 |     output_logits = torch.from_numpy(np.concatenate((bg,fg), axis=0))
24 | 
25 |     H, W = image.shape[:2]
26 |     image = np.ascontiguousarray(image)
27 |     
28 |     output_logits = F.interpolate(output_logits.unsqueeze(0), size=(H, W), mode="bilinear").squeeze()
29 |     output_probs = F.softmax(output_logits, dim=0).cpu().numpy()
30 | 
31 |     c = output_probs.shape[0]
32 |     h = output_probs.shape[1]
33 |     w = output_probs.shape[2]
34 | 
35 |     U = utils.unary_from_softmax(output_probs)
36 |     U = np.ascontiguousarray(U)
37 | 
38 |     d = dcrf.DenseCRF2D(w, h, c)
39 |     d.setUnaryEnergy(U)
40 |     d.addPairwiseGaussian(sxy=POS_XY_STD, compat=POS_W)
41 |     d.addPairwiseBilateral(sxy=Bi_XY_STD, srgb=Bi_RGB_STD, rgbim=image, compat=Bi_W)
42 | 
43 |     Q = d.inference(MAX_ITER)
44 |     Q = np.array(Q).reshape((c, h, w))
45 |     MAP = np.argmax(Q, axis=0).reshape((h,w)).astype(np.float32)
46 |     return MAP
47 | 


--------------------------------------------------------------------------------
/maskcut/imgs/demo1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/maskcut/imgs/demo1.jpg


--------------------------------------------------------------------------------
/maskcut/imgs/demo2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/maskcut/imgs/demo2.jpg


--------------------------------------------------------------------------------
/maskcut/imgs/demo3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/maskcut/imgs/demo3.jpg


--------------------------------------------------------------------------------
/maskcut/imgs/demo4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/maskcut/imgs/demo4.jpg


--------------------------------------------------------------------------------
/maskcut/imgs/demo5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/maskcut/imgs/demo5.jpg


--------------------------------------------------------------------------------
/maskcut/imgs/demo6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/maskcut/imgs/demo6.jpg


--------------------------------------------------------------------------------
/maskcut/imgs/demo7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/maskcut/imgs/demo7.jpg


--------------------------------------------------------------------------------
/maskcut/imgs/demo8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/maskcut/imgs/demo8.jpg


--------------------------------------------------------------------------------
/maskcut/merge_jsons.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # merge all ImageNet annotation files as a single one.
 3 | 
 4 | import os
 5 | import json
 6 | import argparse
 7 | 
 8 | if __name__ == "__main__":
 9 |     # load model arguments
10 |     parser = argparse.ArgumentParser(description='Merge json files')
11 |     parser.add_argument('--base-dir', type=str,
12 |                         default='annotations/',
13 |                         help='Dir to the generated annotation files with MaskCut')
14 |     parser.add_argument('--save-path', type=str, default="imagenet_train_fixsize480_tau0.15_N3.json",
15 |                         help='Path to save the merged annotation file')
16 |     # following arguments should be consistent with maskcut.py or maskcut_with_submitit.py (if use submitit)
17 |     parser.add_argument('--num-folder-per-job', type=int, default=1,
18 |                         help='Number of folders per json file')
19 |     parser.add_argument('--fixed-size', type=int, default=480,
20 |                         help='rescale the input images to a fixed size')
21 |     parser.add_argument('--tau', type=float, default=0.15, help='threshold used for producing binary graph')
22 |     parser.add_argument('--N', type=int, default=3, help='the maximum number of pseudo-masks per image')
23 | 
24 |     args = parser.parse_args()
25 | 
26 |     base_name = 'imagenet_train_fixsize{}_tau{}_N{}'.format(args.fixed_size, args.tau, args.N)
27 | 
28 |     start_idx = 0
29 |     every_k = args.num_folder_per_job
30 |     missed_folders = []
31 |     tobe_merged_ann_dicts = []
32 | 
33 |     # check if pseudo-masks for all 1000 ImageNet-1K folders are avaliable.
34 |     while start_idx < 1000:
35 |         end_idx = start_idx + every_k
36 |         filename = "{}_{}_{}.json".format(base_name, start_idx, end_idx)
37 |         tobe_merged = os.path.join(args.base_dir, filename)
38 |         if not os.path.isfile(tobe_merged):
39 |             end_idx = start_idx + 1
40 |             tobe_merged_ = "{}_{}_{}.json".format(base_name, start_idx, end_idx)
41 |             if not os.path.isfile(tobe_merged_):
42 |                 missed_folders.append(start_idx)
43 |                 start_idx += 1
44 |                 continue
45 |             else:
46 |                 tobe_merged = tobe_merged_
47 |                 start_idx += 1
48 |         else:
49 |             start_idx += every_k
50 |         tobe_merged_ann_dict = json.load(open(tobe_merged))
51 |         tobe_merged_ann_dicts.append(tobe_merged_ann_dict)
52 | 
53 |     print("Warning: these folders are not found: ", missed_folders)
54 | 
55 |     # filter out repeated image info
56 |     for idx, ann_dict in enumerate(tobe_merged_ann_dicts):
57 |         images = []
58 |         images_ids = []
59 |         for image in ann_dict['images']:
60 |             if image['id'] in images_ids:
61 |                 continue
62 |             else:
63 |                 images.append(image)
64 |                 images_ids.append(image['id'])
65 |         ann_dict['images'] = images
66 | 
67 |     # re-generate image_id and segment_id, and combine annotation info and image info
68 |     # from all annotation files
69 |     base_ann_dict = tobe_merged_ann_dicts[0]
70 |     image_id = base_ann_dict['images'][-1]['id'] + 1
71 |     segment_id = base_ann_dict['annotations'][-1]['id'] + 1
72 |     segment_id_list = [ann['id'] for ann in base_ann_dict['annotations']]
73 |     for tobe_merged_ann_dict in tobe_merged_ann_dicts[1:]:
74 |         file_name_and_id = {}
75 |         for i, image in enumerate(tobe_merged_ann_dict['images']):
76 |             file_name_and_id[str(image['id'])] = image_id
77 |             image['id'] = image_id
78 |             base_ann_dict['images'].append(image)
79 |             image_id = image_id + 1
80 | 
81 |         for i, annotation_info in enumerate(tobe_merged_ann_dict['annotations']):
82 |             annotation_info["image_id"] = file_name_and_id[str(annotation_info["image_id"])]
83 |             annotation_info["id"] = segment_id
84 |             annotation_info["iscrowd"] = 0
85 |             segment_id_list.append(segment_id)
86 |             base_ann_dict['annotations'].append(annotation_info)
87 |             segment_id = segment_id + 1
88 | 
89 |     segment_id = 1
90 |     for ann in base_ann_dict['annotations']:
91 |         ann["id"] = segment_id
92 |         segment_id += 1
93 | 
94 |     # save the final json file.
95 |     anns = [ann['id'] for ann in base_ann_dict['annotations']]
96 |     anns_image_id = [ann['image_id'] for ann in base_ann_dict['annotations']]
97 |     json.dump(base_ann_dict, open(args.save_path, 'w'))
98 |     print("Done: {} images; {} anns.".format(len(base_ann_dict['images']), len(base_ann_dict['annotations'])))
99 | 


--------------------------------------------------------------------------------
/maskcut/predict.py:
--------------------------------------------------------------------------------
  1 | """
  2 | download pretrained weights to ./weights
  3 | wget https://dl.fbaipublicfiles.com/dino/dino_vitbase8_pretrain/dino_vitbase8_pretrain.pth
  4 | wget https://dl.fbaipublicfiles.com/dino/dino_deitsmall8_300ep_pretrain/dino_deitsmall8_300ep_pretrain.pth
  5 | """
  6 | 
  7 | import sys
  8 | 
  9 | sys.path.append("maskcut")
 10 | import numpy as np
 11 | import PIL.Image as Image
 12 | import torch
 13 | from scipy import ndimage
 14 | from colormap import random_color
 15 | 
 16 | import dino
 17 | from third_party.TokenCut.unsupervised_saliency_detection import metric
 18 | from crf import densecrf
 19 | from maskcut import maskcut
 20 | 
 21 | from cog import BasePredictor, Input, Path
 22 | 
 23 | 
 24 | class Predictor(BasePredictor):
 25 |     def setup(self):
 26 |         """Load the model into memory to make running multiple predictions efficient"""
 27 | 
 28 |         # DINO pre-trained model
 29 |         vit_features = "k"
 30 |         self.patch_size = 8
 31 |         # adapted dino.ViTFeat to load from local pretrained_path
 32 |         self.backbone_base = dino.ViTFeat(
 33 |             "weights/dino_vitbase8_pretrain.pth",
 34 |             768,
 35 |             "base",
 36 |             vit_features,
 37 |             self.patch_size,
 38 |         )
 39 | 
 40 |         self.backbone_small = dino.ViTFeat(
 41 |             "weights/dino_deitsmall8_300ep_pretrain.pth",
 42 |             384,
 43 |             "small",
 44 |             vit_features,
 45 |             self.patch_size,
 46 |         )
 47 |         self.backbone_base.eval()
 48 |         self.backbone_base.cuda()
 49 |         self.backbone_small.eval()
 50 |         self.backbone_small.cuda()
 51 | 
 52 |     def predict(
 53 |         self,
 54 |         image: Path = Input(
 55 |             description="Input image",
 56 |         ),
 57 |         model: str = Input(
 58 |             description="Choose the model architecture",
 59 |             default="base",
 60 |             choices=["small", "base"]
 61 |         ),
 62 |         n_pseudo_masks: int = Input(
 63 |             description="The maximum number of pseudo-masks per image",
 64 |             default=3,
 65 |         ),
 66 |         tau: float = Input(
 67 |             description="Threshold used for producing binary graph",
 68 |             default=0.15,
 69 |         ),
 70 |     ) -> Path:
 71 |         """Run a single prediction on the model"""
 72 | 
 73 |         backbone = self.backbone_base if model == "base" else self.backbone_small
 74 | 
 75 |         # MaskCut hyperparameters
 76 |         fixed_size = 480
 77 | 
 78 |         # get pesudo-masks with MaskCut
 79 |         bipartitions, _, I_new = maskcut(
 80 |             str(image),
 81 |             backbone,
 82 |             self.patch_size,
 83 |             tau,
 84 |             N=n_pseudo_masks,
 85 |             fixed_size=fixed_size,
 86 |             cpu=False,
 87 |         )
 88 | 
 89 |         I = Image.open(str(image)).convert("RGB")
 90 |         width, height = I.size
 91 |         pseudo_mask_list = []
 92 |         for idx, bipartition in enumerate(bipartitions):
 93 |             # post-process pesudo-masks with CRF
 94 |             pseudo_mask = densecrf(np.array(I_new), bipartition)
 95 |             pseudo_mask = ndimage.binary_fill_holes(pseudo_mask >= 0.5)
 96 | 
 97 |             # filter out the mask that have a very different pseudo-mask after the CRF
 98 |             mask1 = torch.from_numpy(bipartition).cuda()
 99 |             mask2 = torch.from_numpy(pseudo_mask).cuda()
100 | 
101 |             if metric.IoU(mask1, mask2) < 0.5:
102 |                 pseudo_mask = pseudo_mask * -1
103 | 
104 |             # construct binary pseudo-masks
105 |             pseudo_mask[pseudo_mask < 0] = 0
106 |             pseudo_mask = Image.fromarray(np.uint8(pseudo_mask * 255))
107 |             pseudo_mask = np.asarray(pseudo_mask.resize((width, height)))
108 | 
109 |             pseudo_mask = pseudo_mask.astype(np.uint8)
110 |             upper = np.max(pseudo_mask)
111 |             lower = np.min(pseudo_mask)
112 |             thresh = upper / 2.0
113 |             pseudo_mask[pseudo_mask > thresh] = upper
114 |             pseudo_mask[pseudo_mask <= thresh] = lower
115 |             pseudo_mask_list.append(pseudo_mask)
116 | 
117 |         out = np.array(I)
118 |         for pseudo_mask in pseudo_mask_list:
119 | 
120 |             out = vis_mask(out, pseudo_mask, random_color(rgb=True))
121 | 
122 |         output_path = f"/tmp/out.png"
123 | 
124 |         out.save(str(output_path))
125 | 
126 |         return Path(output_path)
127 | 
128 | 
129 | def vis_mask(input, mask, mask_color):
130 |     fg = mask > 0.5
131 |     rgb = np.copy(input)
132 |     rgb[fg] = (rgb[fg] * 0.3 + np.array(mask_color) * 0.7).astype(np.uint8)
133 |     return Image.fromarray(rgb)
134 | 


--------------------------------------------------------------------------------
/maskcut/run_maskcut_with_submitit.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | python run_with_submitit_maskcut_array.py \
 3 | --ngpus 1 \
 4 | --nodes 1 \
 5 | --timeout 1200 \
 6 | --partition learnfair \
 7 | --vit-arch base \
 8 | --patch-size 8 \
 9 | --dataset-path /path/to/imagenet/ \
10 | --tau 0.15 \
11 | --out-dir /path/to/save/annotations/ \
12 | --num-folder-per-job 2 \
13 | --job-index 0 \
14 | --fixed_size 480 \
15 | --N 3 \
16 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | submitit
 2 | # torch==1.8.1
 3 | # torchvision==0.9.1
 4 | faiss-gpu==1.7.2
 5 | opencv-python==4.6.0.66
 6 | scikit-image==0.19.2
 7 | scikit-learn==1.1.1
 8 | shapely==1.8.2
 9 | timm==0.5.4
10 | pyyaml==6.0
11 | colored
12 | fvcore==0.1.5.post20220512
13 | gdown==4.5.4


--------------------------------------------------------------------------------
/videocutler/INSTALL.md:
--------------------------------------------------------------------------------
 1 | ## Installation
 2 | 
 3 | ### Requirements
 4 | - Linux or macOS with Python ≥ 3.6
 5 | - PyTorch ≥ 1.9 and [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation.
 6 |   Install them together at [pytorch.org](https://pytorch.org) to make sure of this. Note, please check
 7 |   PyTorch version matches that is required by Detectron2.
 8 | - Detectron2: follow [Detectron2 installation instructions](https://detectron2.readthedocs.io/tutorials/install.html).
 9 | - OpenCV is optional but needed by demo and visualization
10 | - `pip install -r requirements.txt`
11 | 
12 | ### Example conda environment setup
13 | 
14 | ```bash
15 | conda create --name videocuter python=3.8 -y
16 | conda activate videocuter
17 | conda install pytorch==1.9.0 torchvision==0.10.0 cudatoolkit=11.1 -c pytorch -c nvidia
18 | pip install -U opencv-python
19 | 
20 | # under your working directory
21 | git clone git@github.com:facebookresearch/detectron2.git
22 | cd detectron2
23 | pip install -e .
24 | pip install git+https://github.com/cocodataset/panopticapi.git
25 | pip install git+https://github.com/mcordts/cityscapesScripts.git
26 | ```
27 | 
28 | ### CUDA kernel for MSDeformAttn
29 | After preparing the required environment, run the following command to compile CUDA kernel for MSDeformAttn:
30 | 
31 | `CUDA_HOME` must be defined and points to the directory of the installed CUDA toolkit.
32 | ```bash
33 | pip install -r videocutler/requirements.txt
34 | cd videocutler/mask2former/modeling/pixel_decoder/ops
35 | sh make.sh
36 | ```
37 | 
38 | #### Building on another system
39 | To build on a system that does not have a GPU device but provide the drivers:
40 | ```bash
41 | TORCH_CUDA_ARCH_LIST='8.0' FORCE_CUDA=1 python setup.py build install
42 | ```
43 | After preparing the required environment, run the following command to compile CUDA kernel for MSDeformAttn:
44 | 
45 | `CUDA_HOME` must be defined and points to the directory of the installed CUDA toolkit.
46 | ```bash
47 | cd videocutler/mask2former/modeling/pixel_decoder/ops
48 | sh make.sh
49 | ```
50 | 


--------------------------------------------------------------------------------
/videocutler/configs/imagenet/instance-segmentation/Base-COCO-InstanceSegmentation.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   BACKBONE:
 3 |     FREEZE_AT: 0
 4 |     NAME: "build_resnet_backbone"
 5 |   WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
 6 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 7 |   PIXEL_STD: [58.395, 57.120, 57.375]
 8 |   RESNETS:
 9 |     DEPTH: 50
10 |     STEM_TYPE: "basic"  # not used
11 |     STEM_OUT_CHANNELS: 64
12 |     STRIDE_IN_1X1: False
13 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
14 |     # NORM: "SyncBN"
15 |     RES5_MULTI_GRID: [1, 1, 1]  # not used
16 | DATASETS:
17 |   TRAIN: ("coco_2017_train",)
18 |   TEST: ("coco_2017_val",)
19 | SOLVER:
20 |   IMS_PER_BATCH: 16
21 |   BASE_LR: 0.0001
22 |   STEPS: (327778, 355092)
23 |   MAX_ITER: 368750
24 |   WARMUP_FACTOR: 1.0
25 |   WARMUP_ITERS: 10
26 |   WEIGHT_DECAY: 0.05
27 |   OPTIMIZER: "ADAMW"
28 |   BACKBONE_MULTIPLIER: 0.1
29 |   CLIP_GRADIENTS:
30 |     ENABLED: True
31 |     CLIP_TYPE: "full_model"
32 |     CLIP_VALUE: 0.01
33 |     NORM_TYPE: 2.0
34 |   AMP:
35 |     ENABLED: True
36 | INPUT:
37 |   IMAGE_SIZE: 1024
38 |   MIN_SCALE: 0.1
39 |   MAX_SCALE: 2.0
40 |   FORMAT: "RGB"
41 |   DATASET_MAPPER_NAME: "coco_instance_lsj"
42 | TEST:
43 |   EVAL_PERIOD: 5000
44 | DATALOADER:
45 |   FILTER_EMPTY_ANNOTATIONS: True
46 |   NUM_WORKERS: 4
47 | VERSION: 2
48 | 


--------------------------------------------------------------------------------
/videocutler/configs/imagenet/instance-segmentation/Base-imagenet-InstanceSegmentation.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   BACKBONE:
 3 |     FREEZE_AT: 0
 4 |     NAME: "build_resnet_backbone"
 5 |   WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
 6 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 7 |   PIXEL_STD: [58.395, 57.120, 57.375]
 8 |   RESNETS:
 9 |     DEPTH: 50
10 |     STEM_TYPE: "basic"  # not used
11 |     STEM_OUT_CHANNELS: 64
12 |     STRIDE_IN_1X1: False
13 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
14 |     # NORM: "SyncBN"
15 |     RES5_MULTI_GRID: [1, 1, 1]  # not used
16 | DATASETS:
17 |   TRAIN: ("imagenet_train_tau0.15_fixsize480_w_painting3Inst_crf_centerprior_polygon",)
18 |   TEST: ("imagenet_val",)
19 | SOLVER:
20 |   IMS_PER_BATCH: 16
21 |   BASE_LR: 0.0001
22 |   STEPS: (80000,)
23 |   MAX_ITER: 160000
24 |   WARMUP_FACTOR: 1.0
25 |   WARMUP_ITERS: 10
26 |   WEIGHT_DECAY: 0.05
27 |   OPTIMIZER: "ADAMW"
28 |   BACKBONE_MULTIPLIER: 0.1
29 |   CLIP_GRADIENTS:
30 |     ENABLED: True
31 |     CLIP_TYPE: "full_model"
32 |     CLIP_VALUE: 0.01
33 |     NORM_TYPE: 2.0
34 |   AMP:
35 |     ENABLED: True
36 | INPUT:
37 |   IMAGE_SIZE: 1024
38 |   MIN_SCALE: 0.1
39 |   MAX_SCALE: 2.0
40 |   # MASK_FORMAT: "bitmask"
41 |   FORMAT: "RGB"
42 |   DATASET_MAPPER_NAME: "coco_instance_lsj"
43 | TEST:
44 |   PRECISE_BN:
45 |     ENABLED: True
46 |   EVAL_PERIOD: 5000
47 | DATALOADER:
48 |   FILTER_EMPTY_ANNOTATIONS: True
49 |   NUM_WORKERS: 4
50 | VERSION: 2
51 | 


--------------------------------------------------------------------------------
/videocutler/configs/imagenet/instance-segmentation/mask2former_R50_imagenet.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-imagenet-InstanceSegmentation.yaml
 2 | DATALOADER:
 3 |   FILTER_EMPTY_ANNOTATIONS: True
 4 |   NUM_WORKERS: 8
 5 |   COPY_PASTE: True
 6 |   COPY_PASTE_RATE: 1.0
 7 |   VISUALIZE_COPY_PASTE: False
 8 |   COPY_PASTE_RANDOM_NUM: True
 9 |   COPY_PASTE_MIN_RATIO: 0.3
10 |   COPY_PASTE_MAX_RATIO: 1.0
11 | MODEL:
12 |   META_ARCHITECTURE: "MaskFormer"
13 |   SEM_SEG_HEAD:
14 |     NAME: "MaskFormerHead"
15 |     IGNORE_VALUE: 255
16 |     NUM_CLASSES: 1
17 |     LOSS_WEIGHT: 1.0
18 |     CONVS_DIM: 256
19 |     MASK_DIM: 256
20 |     NORM: "GN"
21 |     # pixel decoder
22 |     PIXEL_DECODER_NAME: "MSDeformAttnPixelDecoder"
23 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
24 |     DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"]
25 |     COMMON_STRIDE: 4
26 |     TRANSFORMER_ENC_LAYERS: 6
27 |   WEIGHTS: 'http://dl.fbaipublicfiles.com/cutler/checkpoints/dino_RN50_pretrain_d2_format.pkl'
28 |   MASK_FORMER:
29 |     POSITIVE_BANK_IOU_THRESH: 0.0
30 |     TRANSFORMER_DECODER_NAME: "MultiScaleMaskedTransformerDecoder"
31 |     TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder"
32 |     DEEP_SUPERVISION: True
33 |     NO_OBJECT_WEIGHT: 0.1
34 |     CLASS_WEIGHT: 2.0
35 |     MASK_WEIGHT: 5.0
36 |     DICE_WEIGHT: 5.0
37 |     HIDDEN_DIM: 256
38 |     NUM_OBJECT_QUERIES: 100
39 |     NHEADS: 8
40 |     DROPOUT: 0.3
41 |     DIM_FEEDFORWARD: 2048
42 |     ENC_LAYERS: 0
43 |     PRE_NORM: False
44 |     ENFORCE_INPUT_PROJ: False
45 |     SIZE_DIVISIBILITY: 32
46 |     DEC_LAYERS: 10  # 9 decoder layers, add one for the loss on learnable query
47 |     TRAIN_NUM_POINTS: 12544
48 |     OVERSAMPLE_RATIO: 3.0
49 |     IMPORTANCE_SAMPLE_RATIO: 0.75
50 |     TEST:
51 |       SEMANTIC_ON: False
52 |       INSTANCE_ON: True
53 |       PANOPTIC_ON: False
54 |       OVERLAP_THRESHOLD: 0.8
55 |       OBJECT_MASK_THRESHOLD: 0.8
56 | DATASETS:
57 |   TRAIN: ("imagenet_train",)
58 | INPUT:
59 |   IMAGE_SIZE: 896
60 |   MIN_SCALE: 0.1
61 |   MAX_SCALE: 2.0
62 |   # MASK_FORMAT: "bitmask"
63 |   FORMAT: "RGB"
64 |   DATASET_MAPPER_NAME: "coco_instance_lsj"
65 | TEST:
66 |   PRECISE_BN:
67 |     ENABLED: True
68 |   EVAL_PERIOD: 10000
69 |   DETECTIONS_PER_IMAGE: 100  # Test MS-COCO: 100; Test LVIS: 300
70 | SOLVER:
71 |   IMS_PER_BATCH: 16
72 |   BASE_LR: 0.00002
73 |   STEPS: (80000,)
74 |   MAX_ITER: 160000
75 |   WARMUP_FACTOR: 1.0
76 |   WARMUP_ITERS: 10
77 |   WEIGHT_DECAY: 0.05
78 |   OPTIMIZER: "ADAMW"
79 |   BACKBONE_MULTIPLIER: 0.1
80 |   CLIP_GRADIENTS:
81 |     ENABLED: True
82 |     CLIP_TYPE: "full_model"
83 |     CLIP_VALUE: 0.01
84 |     NORM_TYPE: 2.0
85 |   AMP:
86 |     ENABLED: True
87 | OUTPUT_DIR: "OUTPUT"


--------------------------------------------------------------------------------
/videocutler/configs/imagenet_video/Base-YouTubeVIS-VideoInstanceSegmentation.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   BACKBONE:
 3 |     FREEZE_AT: 0
 4 |     NAME: "build_resnet_backbone"
 5 |   WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
 6 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 7 |   PIXEL_STD: [58.395, 57.120, 57.375]
 8 |   MASK_ON: True
 9 |   RESNETS:
10 |     DEPTH: 50
11 |     STEM_TYPE: "basic"  # not used
12 |     STEM_OUT_CHANNELS: 64
13 |     STRIDE_IN_1X1: False
14 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
15 |     # NORM: "SyncBN"
16 |     RES5_MULTI_GRID: [1, 1, 1]  # not used
17 | DATASETS:
18 |   TRAIN: ("ytvis_2019_train",)
19 |   TEST: ("ytvis_2019_val",)
20 | SOLVER:
21 |   IMS_PER_BATCH: 16
22 |   BASE_LR: 0.0001
23 |   STEPS: (4000,)
24 |   MAX_ITER: 6000
25 |   WARMUP_FACTOR: 1.0
26 |   WARMUP_ITERS: 10
27 |   WEIGHT_DECAY: 0.05
28 |   OPTIMIZER: "ADAMW"
29 |   BACKBONE_MULTIPLIER: 0.1
30 |   CLIP_GRADIENTS:
31 |     ENABLED: True
32 |     CLIP_TYPE: "full_model"
33 |     CLIP_VALUE: 0.01
34 |     NORM_TYPE: 2.0
35 |   AMP:
36 |     ENABLED: True
37 | INPUT:
38 |   MIN_SIZE_TRAIN_SAMPLING: "choice_by_clip"
39 |   RANDOM_FLIP: "flip_by_clip"
40 |   AUGMENTATIONS: []
41 |   MIN_SIZE_TRAIN: (360, 480)
42 |   MIN_SIZE_TEST: 360
43 |   CROP:
44 |     ENABLED: False
45 |     TYPE: "absolute_range"
46 |     SIZE: (600, 720)
47 |   FORMAT: "RGB"
48 | TEST:
49 |   EVAL_PERIOD: 0
50 | DATALOADER:
51 |   FILTER_EMPTY_ANNOTATIONS: False
52 |   NUM_WORKERS: 4
53 | VERSION: 2
54 | 


--------------------------------------------------------------------------------
/videocutler/configs/imagenet_video/video_mask2former_R50_cls_agnostic.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-YouTubeVIS-VideoInstanceSegmentation.yaml
 2 | MODEL:
 3 |   WEIGHTS: "pretrain/cutler_m2f_rn50.pth"
 4 |   META_ARCHITECTURE: "VideoMaskFormer"
 5 |   SEM_SEG_HEAD:
 6 |     NAME: "MaskFormerHead"
 7 |     IGNORE_VALUE: 255
 8 |     NUM_CLASSES: 1 # class-agnostic
 9 |     LOSS_WEIGHT: 1.0
10 |     CONVS_DIM: 256
11 |     MASK_DIM: 256
12 |     NORM: "GN"
13 |     # pixel decoder
14 |     PIXEL_DECODER_NAME: "MSDeformAttnPixelDecoder"
15 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
16 |     DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"]
17 |     COMMON_STRIDE: 4
18 |     TRANSFORMER_ENC_LAYERS: 6
19 |   MASK_FORMER:
20 |     TRANSFORMER_DECODER_NAME: "VideoMultiScaleMaskedTransformerDecoder"
21 |     TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder"
22 |     DEEP_SUPERVISION: True
23 |     NO_OBJECT_WEIGHT: 0.1
24 |     CLASS_WEIGHT: 2.0
25 |     MASK_WEIGHT: 5.0
26 |     DICE_WEIGHT: 5.0
27 |     HIDDEN_DIM: 256
28 |     NUM_OBJECT_QUERIES: 100
29 |     NHEADS: 8
30 |     DROPOUT: 0.3
31 |     DIM_FEEDFORWARD: 2048
32 |     ENC_LAYERS: 0
33 |     PRE_NORM: False
34 |     ENFORCE_INPUT_PROJ: False
35 |     SIZE_DIVISIBILITY: 32
36 |     DEC_LAYERS: 10  # 9 decoder layers, add one for the loss on learnable query
37 |     TRAIN_NUM_POINTS: 12544
38 |     OVERSAMPLE_RATIO: 3.0
39 |     IMPORTANCE_SAMPLE_RATIO: 0.75
40 |     TEST:
41 |       SEMANTIC_ON: False
42 |       INSTANCE_ON: True
43 |       PANOPTIC_ON: False
44 |       OVERLAP_THRESHOLD: 0.8
45 |       OBJECT_MASK_THRESHOLD: 0.8
46 | DATASETS:
47 |   TRAIN: ("imagenet_video_train_cls_agnostic",)
48 |   TEST: ("ytvis_2019_train",)
49 | SOLVER:
50 |   IMS_PER_BATCH: 16
51 |   BASE_LR: 0.00002
52 |   STEPS: (79999,)
53 |   MAX_ITER: 80000
54 |   WARMUP_FACTOR: 1.0
55 |   WARMUP_ITERS: 10
56 |   WEIGHT_DECAY: 0.05
57 |   OPTIMIZER: "ADAMW"
58 |   BACKBONE_MULTIPLIER: 0.1
59 |   CLIP_GRADIENTS:
60 |     ENABLED: True
61 |     CLIP_TYPE: "full_model"
62 |     CLIP_VALUE: 0.01
63 |     NORM_TYPE: 2.0
64 |   AMP:
65 |     ENABLED: True
66 | DATALOADER:
67 |   FILTER_EMPTY_ANNOTATIONS: False
68 |   NUM_WORKERS: 0
69 |   COPY_PASTE: True
70 |   COPY_PASTE_RATE: 1.0
71 |   VISUALIZE_COPY_PASTE: False
72 |   COPY_PASTE_RANDOM_NUM: False
73 |   COPY_PASTE_MIN_RATIO: 0.8
74 |   COPY_PASTE_MAX_RATIO: 1.0
75 | INPUT:
76 |   SAMPLING_FRAME_NUM: 3
77 |   MIN_SIZE_TRAIN_SAMPLING: "choice_by_clip"
78 |   RANDOM_FLIP: "flip_by_clip"
79 |   AUGMENTATIONS: ['brightness', 'contrast', 'rotation']
80 |   MIN_SIZE_TRAIN: (360, 480)
81 |   MIN_SIZE_TEST: 360
82 |   CROP:
83 |     ENABLED: True
84 |     TYPE: "absolute_range"
85 |     SIZE: (600, 720)
86 | OUTPUT_DIR: "OUTPUT/"


--------------------------------------------------------------------------------
/videocutler/configs/imagenet_video/videocutler_eval_ytvis2019.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: video_mask2former_R50_cls_agnostic.yaml
2 | DATASETS:
3 |   TEST: ("ytvis_2019_train",)


--------------------------------------------------------------------------------
/videocutler/configs/imagenet_video/videocutler_eval_ytvis2021.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: video_mask2former_R50_cls_agnostic.yaml
2 | DATASETS:
3 |   TEST: ("ytvis_2021_train",)


--------------------------------------------------------------------------------
/videocutler/datasets/README.md:
--------------------------------------------------------------------------------
 1 | # Prepare Datasets for VideoCutLER
 2 | 
 3 | A dataset can be used by accessing [DatasetCatalog](https://detectron2.readthedocs.io/modules/data.html#detectron2.data.DatasetCatalog)
 4 | for its data, or [MetadataCatalog](https://detectron2.readthedocs.io/modules/data.html#detectron2.data.MetadataCatalog) for its metadata (class names, etc).
 5 | This document explains how to setup the builtin datasets so they can be used by the above APIs.
 6 | [Use Custom Datasets](https://detectron2.readthedocs.io/tutorials/datasets.html) gives a deeper dive on how to use `DatasetCatalog` and `MetadataCatalog`,
 7 | and how to add new datasets to them.
 8 | 
 9 | VideoCutLER has builtin support for a few datasets.
10 | The datasets are assumed to exist in a directory specified by the environment variable
11 | `DETECTRON2_DATASETS`.
12 | Under this directory, detectron2 will look for datasets in the structure described below, if needed.
13 | ```
14 | $DETECTRON2_DATASETS/
15 |   imagenet/
16 |   ytvis_2019/
17 |   ytvis_2021/
18 | ```
19 | 
20 | You can set the location for builtin datasets by `export DETECTRON2_DATASETS=/path/to/datasets`.
21 | If left unset, the default is `./datasets` relative to your current working directory.
22 | 
23 | Please check expected dataset structure for ImageNet-1K at [here](../../datasets/README.md). You can directly [download](https://drive.google.com/file/d/1gllHvrZQNVXphnk-IQxMcXh87Qs86ofT/view?usp=sharing) the pre-processed ImageNet-1K annotations produced by MaskCut in YouTubeVIS format and place it under the "imagenet/annotations/" directory.
24 | 
25 | Alternatively, you can refer to the instructions on generating pseudo-masks using MaskCut at [here](../../README.md#generating-annotations-for-imagenet-1k-with-maskcut). You'll need to convert these annotations into the [YouTubeVIS](https://competitions.codalab.org/competitions/20128) format (MaskCut provides MSCOCO format annotations). This format conversion is a necessary step to ensure compatibility with the training process of VideoCutLER.
26 | 
27 | 
28 | ## Expected dataset structure for [YouTubeVIS 2019](https://competitions.codalab.org/competitions/20128):
29 | 
30 | ```
31 | ytvis_2019/
32 |   {train,valid,test}.json
33 |   {train,valid,test}/
34 |     Annotations/
35 |     JPEGImages/
36 | ```
37 | 
38 | ## Expected dataset structure for [YouTubeVIS 2021](https://competitions.codalab.org/competitions/28988):
39 | 
40 | ```
41 | ytvis_2021/
42 |   {train,valid,test}.json
43 |   {train,valid,test}/
44 |     Annotations/
45 |     JPEGImages/
46 | ```
47 | 


--------------------------------------------------------------------------------
/videocutler/datasets/ade20k_instance_catid_mapping.txt:
--------------------------------------------------------------------------------
  1 | Instacne100	SceneParse150	FullADE20K
  2 | 1		8		165
  3 | 2		9		3055
  4 | 3		11		350
  5 | 4		13		1831
  6 | 5		15		774
  7 | 5		15		783
  8 | 6		16		2684
  9 | 7		19		687
 10 | 8		20		471
 11 | 9		21		401
 12 | 10		23		1735
 13 | 11		24		2473
 14 | 12		25		2329
 15 | 13		28		1564
 16 | 14		31		57
 17 | 15		32		2272
 18 | 16		33		907
 19 | 17		34		724
 20 | 18		36		2985
 21 | 18		36		533
 22 | 19		37		1395
 23 | 20		38		155
 24 | 21		39		2053
 25 | 22		40		689
 26 | 23		42		266
 27 | 24		43		581
 28 | 25		44		2380
 29 | 26		45		491
 30 | 27		46		627
 31 | 28		48		2388
 32 | 29		50		943
 33 | 30		51		2096
 34 | 31		54		2530
 35 | 32		56		420
 36 | 33		57		1948
 37 | 34		58		1869
 38 | 35		59		2251
 39 | 36		63		239
 40 | 37		65		571
 41 | 38		66		2793
 42 | 39		67		978
 43 | 40		68		236
 44 | 41		70		181
 45 | 42		71		629
 46 | 43		72		2598
 47 | 44		73		1744
 48 | 45		74		1374
 49 | 46		75		591
 50 | 47		76		2679
 51 | 48		77		223
 52 | 49		79		47
 53 | 50		81		327
 54 | 51		82		2821
 55 | 52		83		1451
 56 | 53		84		2880
 57 | 54		86		480
 58 | 55		87		77
 59 | 56		88		2616
 60 | 57		89		246
 61 | 57		89		247
 62 | 58		90		2733
 63 | 59		91		14
 64 | 60		93		38
 65 | 61		94		1936
 66 | 62		96		120
 67 | 63		98		1702
 68 | 64		99		249
 69 | 65		103		2928
 70 | 66		104		2337
 71 | 67		105		1023
 72 | 68		108		2989
 73 | 69		109		1930
 74 | 70		111		2586
 75 | 71		112		131
 76 | 72		113		146
 77 | 73		116		95
 78 | 74		117		1563
 79 | 75		119		1708
 80 | 76		120		103
 81 | 77		121		1002
 82 | 78		122		2569
 83 | 79		124		2833
 84 | 80		125		1551
 85 | 81		126		1981
 86 | 82		127		29
 87 | 83		128		187
 88 | 84		130		747
 89 | 85		131		2254
 90 | 86		133		2262
 91 | 87		134		1260
 92 | 88		135		2243
 93 | 89		136		2932
 94 | 90		137		2836
 95 | 91		138		2850
 96 | 92		139		64
 97 | 93		140		894
 98 | 94		143		1919
 99 | 95		144		1583
100 | 96		145		318
101 | 97		147		2046
102 | 98		148		1098
103 | 99		149		530
104 | 100		150		954
105 | 


--------------------------------------------------------------------------------
/videocutler/datasets/prepare_ade20k_ins_seg.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | # Copyright (c) Facebook, Inc. and its affiliates.
  4 | import glob
  5 | import json
  6 | import os
  7 | from collections import Counter
  8 | 
  9 | import numpy as np
 10 | import tqdm
 11 | from panopticapi.utils import IdGenerator, save_json
 12 | from PIL import Image
 13 | import pycocotools.mask as mask_util
 14 | 
 15 | 
 16 | if __name__ == "__main__":
 17 |     dataset_dir = os.getenv("DETECTRON2_DATASETS", "datasets")
 18 | 
 19 |     for name, dirname in [("train", "training"), ("val", "validation")]:
 20 |         image_dir = os.path.join(dataset_dir, f"ADEChallengeData2016/images/{dirname}/")
 21 |         instance_dir = os.path.join(
 22 |             dataset_dir, f"ADEChallengeData2016/annotations_instance/{dirname}/"
 23 |         )
 24 | 
 25 |         # img_id = 0
 26 |         ann_id = 1
 27 | 
 28 |         # json
 29 |         out_file = os.path.join(dataset_dir, f"ADEChallengeData2016/ade20k_instance_{name}.json")
 30 | 
 31 |         # json config
 32 |         instance_config_file = "datasets/ade20k_instance_imgCatIds.json"
 33 |         with open(instance_config_file) as f:
 34 |             category_dict = json.load(f)["categories"]
 35 | 
 36 |         # load catid mapping
 37 |         # it is important to share category id for both instance and panoptic annotations
 38 |         mapping_file = "datasets/ade20k_instance_catid_mapping.txt"
 39 |         with open(mapping_file) as f:
 40 |             map_id = {}
 41 |             for i, line in enumerate(f.readlines()):
 42 |                 if i == 0:
 43 |                     continue
 44 |                 ins_id, sem_id, _ = line.strip().split()
 45 |                 # shift id by 1 because we want it to start from 0!
 46 |                 # ignore_label becomes 255
 47 |                 map_id[int(ins_id)] = int(sem_id) - 1
 48 | 
 49 |         for cat in category_dict:
 50 |             cat["id"] = map_id[cat["id"]]
 51 | 
 52 |         filenames = sorted(glob.glob(os.path.join(image_dir, "*.jpg")))
 53 | 
 54 |         ann_dict = {}
 55 |         images = []
 56 |         annotations = []
 57 | 
 58 |         for idx, filename in enumerate(tqdm.tqdm(filenames)):
 59 |             image = {}
 60 |             image_id = os.path.basename(filename).split(".")[0]
 61 | 
 62 |             image["id"] = image_id
 63 |             image["file_name"] = os.path.basename(filename)
 64 | 
 65 |             original_format = np.array(Image.open(filename))
 66 |             image["width"] = original_format.shape[1]
 67 |             image["height"] = original_format.shape[0]
 68 | 
 69 |             images.append(image)
 70 | 
 71 |             filename_instance = os.path.join(instance_dir, image_id + ".png")
 72 |             ins_seg = np.asarray(Image.open(filename_instance))
 73 |             assert ins_seg.dtype == np.uint8
 74 | 
 75 |             instance_cat_ids = ins_seg[..., 0]
 76 |             # instance id starts from 1!
 77 |             # because 0 is reserved as VOID label
 78 |             instance_ins_ids = ins_seg[..., 1]
 79 | 
 80 |             # process things
 81 |             for thing_id in np.unique(instance_ins_ids):
 82 |                 if thing_id == 0:
 83 |                     continue
 84 |                 mask = instance_ins_ids == thing_id
 85 |                 instance_cat_id = np.unique(instance_cat_ids[mask])
 86 |                 assert len(instance_cat_id) == 1
 87 | 
 88 |                 anno = {}
 89 |                 anno['id'] = ann_id
 90 |                 ann_id += 1
 91 |                 anno['image_id'] = image['id']
 92 |                 anno["iscrowd"] = int(0)
 93 |                 anno["category_id"] = int(map_id[instance_cat_id[0]])
 94 | 
 95 |                 inds = np.nonzero(mask)
 96 |                 ymin, ymax = inds[0].min(), inds[0].max()
 97 |                 xmin, xmax = inds[1].min(), inds[1].max()
 98 |                 anno["bbox"] = [int(xmin), int(ymin), int(xmax - xmin + 1), int(ymax - ymin + 1)]
 99 |                 # if xmax <= xmin or ymax <= ymin:
100 |                 #     continue
101 |                 rle = mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
102 |                 rle["counts"] = rle["counts"].decode("utf-8")
103 |                 anno["segmentation"] = rle
104 |                 anno["area"] = int(mask_util.area(rle))
105 |                 annotations.append(anno)
106 | 
107 |         # save this
108 |         ann_dict['images'] = images
109 |         ann_dict['categories'] = category_dict
110 |         ann_dict['annotations'] = annotations
111 | 
112 |         save_json(ann_dict, out_file)
113 | 


--------------------------------------------------------------------------------
/videocutler/datasets/prepare_ade20k_sem_seg.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # Copyright (c) Facebook, Inc. and its affiliates.
 4 | import os
 5 | from pathlib import Path
 6 | 
 7 | import numpy as np
 8 | import tqdm
 9 | from PIL import Image
10 | 
11 | 
12 | def convert(input, output):
13 |     img = np.asarray(Image.open(input))
14 |     assert img.dtype == np.uint8
15 |     img = img - 1  # 0 (ignore) becomes 255. others are shifted by 1
16 |     Image.fromarray(img).save(output)
17 | 
18 | 
19 | if __name__ == "__main__":
20 |     dataset_dir = Path(os.getenv("DETECTRON2_DATASETS", "datasets")) / "ADEChallengeData2016"
21 |     for name in ["training", "validation"]:
22 |         annotation_dir = dataset_dir / "annotations" / name
23 |         output_dir = dataset_dir / "annotations_detectron2" / name
24 |         output_dir.mkdir(parents=True, exist_ok=True)
25 |         for file in tqdm.tqdm(list(annotation_dir.iterdir())):
26 |             output_file = output_dir / file.name
27 |             convert(file, output_file)
28 | 


--------------------------------------------------------------------------------
/videocutler/datasets/prepare_coco_semantic_annos_from_panoptic_annos.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # Copyright (c) Facebook, Inc. and its affiliates.
 4 | 
 5 | import functools
 6 | import json
 7 | import multiprocessing as mp
 8 | import numpy as np
 9 | import os
10 | import time
11 | from fvcore.common.download import download
12 | from panopticapi.utils import rgb2id
13 | from PIL import Image
14 | 
15 | from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES
16 | 
17 | 
18 | def _process_panoptic_to_semantic(input_panoptic, output_semantic, segments, id_map):
19 |     panoptic = np.asarray(Image.open(input_panoptic), dtype=np.uint32)
20 |     panoptic = rgb2id(panoptic)
21 |     output = np.zeros_like(panoptic, dtype=np.uint8) + 255
22 |     for seg in segments:
23 |         cat_id = seg["category_id"]
24 |         new_cat_id = id_map[cat_id]
25 |         output[panoptic == seg["id"]] = new_cat_id
26 |     Image.fromarray(output).save(output_semantic)
27 | 
28 | 
29 | def separate_coco_semantic_from_panoptic(panoptic_json, panoptic_root, sem_seg_root, categories):
30 |     """
31 |     Create semantic segmentation annotations from panoptic segmentation
32 |     annotations, to be used by PanopticFPN.
33 |     It maps all thing categories to class 0, and maps all unlabeled pixels to class 255.
34 |     It maps all stuff categories to contiguous ids starting from 1.
35 |     Args:
36 |         panoptic_json (str): path to the panoptic json file, in COCO's format.
37 |         panoptic_root (str): a directory with panoptic annotation files, in COCO's format.
38 |         sem_seg_root (str): a directory to output semantic annotation files
39 |         categories (list[dict]): category metadata. Each dict needs to have:
40 |             "id": corresponds to the "category_id" in the json annotations
41 |             "isthing": 0 or 1
42 |     """
43 |     os.makedirs(sem_seg_root, exist_ok=True)
44 | 
45 |     id_map = {}  # map from category id to id in the output semantic annotation
46 |     assert len(categories) <= 254
47 |     for i, k in enumerate(categories):
48 |         id_map[k["id"]] = i
49 |     # what is id = 0?
50 |     # id_map[0] = 255
51 |     print(id_map)
52 | 
53 |     with open(panoptic_json) as f:
54 |         obj = json.load(f)
55 | 
56 |     pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4))
57 | 
58 |     def iter_annotations():
59 |         for anno in obj["annotations"]:
60 |             file_name = anno["file_name"]
61 |             segments = anno["segments_info"]
62 |             input = os.path.join(panoptic_root, file_name)
63 |             output = os.path.join(sem_seg_root, file_name)
64 |             yield input, output, segments
65 | 
66 |     print("Start writing to {} ...".format(sem_seg_root))
67 |     start = time.time()
68 |     pool.starmap(
69 |         functools.partial(_process_panoptic_to_semantic, id_map=id_map),
70 |         iter_annotations(),
71 |         chunksize=100,
72 |     )
73 |     print("Finished. time: {:.2f}s".format(time.time() - start))
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     dataset_dir = os.path.join(os.getenv("DETECTRON2_DATASETS", "datasets"), "coco")
78 |     for s in ["val2017", "train2017"]:
79 |         separate_coco_semantic_from_panoptic(
80 |             os.path.join(dataset_dir, "annotations/panoptic_{}.json".format(s)),
81 |             os.path.join(dataset_dir, "panoptic_{}".format(s)),
82 |             os.path.join(dataset_dir, "panoptic_semseg_{}".format(s)),
83 |             COCO_CATEGORIES,
84 |         )
85 | 


--------------------------------------------------------------------------------
/videocutler/demo.sh:
--------------------------------------------------------------------------------
1 | python demo_video/demo.py \
2 |   --config-file configs/imagenet_video/video_mask2former_R50_cls_agnostic.yaml \
3 |   --input docs/demo-videos/99c6b1acf2/*.jpg \
4 |   --confidence-threshold 0.8 \
5 |   --output demos/ \
6 |   # --save-frames True \
7 |   # --save-masks True \
8 |   --opts MODEL.WEIGHTS videocutler_m2f_rn50.pth


--------------------------------------------------------------------------------
/videocutler/demo/README.md:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | ## VideoCutLER Demo
3 | 
4 | We provide a command line tool to run a simple demo of builtin configs.
5 | The usage is explained in [GETTING_STARTED.md](../GETTING_STARTED.md).
6 | 


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00100.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00100.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00105.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00105.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00110.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00110.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00115.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00115.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00120.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00120.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00125.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00125.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00130.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00130.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00135.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00135.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00140.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00140.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00145.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00145.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00150.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00150.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00155.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00155.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00160.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00160.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00165.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00165.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00170.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00170.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00175.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00175.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00180.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00180.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00185.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00185.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00190.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00190.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00195.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00195.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00200.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00200.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00205.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00205.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00210.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00210.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00215.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00215.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/09773e4062/00220.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/09773e4062/00220.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00000.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00010.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00010.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00020.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00020.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00030.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00030.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00040.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00040.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00050.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00050.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00060.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00060.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00070.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00070.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00080.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00080.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00090.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00090.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00100.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00100.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00110.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00110.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00120.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00120.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00130.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00130.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00140.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00140.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00150.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00150.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00160.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00160.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/4c7710908f/00170.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/4c7710908f/00170.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00000.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00010.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00010.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00020.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00020.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00030.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00030.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00040.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00040.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00050.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00050.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00060.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00060.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00070.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00070.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00080.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00080.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00090.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00090.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00100.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00100.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00110.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00110.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00120.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00120.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00130.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00130.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00140.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00140.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00150.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00150.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00160.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00160.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/8b4f6d1186/00170.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/8b4f6d1186/00170.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00075.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00075.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00080.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00080.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00085.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00085.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00090.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00090.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00095.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00095.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00100.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00100.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00105.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00105.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00110.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00110.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00115.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00115.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00120.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00120.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00125.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00125.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00130.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00130.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00135.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00135.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00140.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00140.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00145.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00145.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00150.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00150.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00155.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00155.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00160.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00160.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00165.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00165.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/99c6b1acf2/00170.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/99c6b1acf2/00170.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00000.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00005.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00005.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00010.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00010.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00015.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00015.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00020.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00020.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00025.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00025.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00030.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00030.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00035.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00035.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00040.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00040.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00045.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00045.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00050.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00050.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00055.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00055.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00060.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00060.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00065.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00065.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00070.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00070.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00075.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00075.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00080.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00080.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00085.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00085.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00090.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00090.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00095.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00095.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00100.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00100.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00105.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00105.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00110.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00110.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00115.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00115.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00120.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00120.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00125.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00125.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00130.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00130.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00135.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00135.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00140.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00140.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00145.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00145.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00150.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00150.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00155.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00155.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00160.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00160.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00165.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00165.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00170.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00170.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00175.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00175.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00180.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00180.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00185.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00185.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00190.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00190.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00195.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00195.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00200.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00200.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00205.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00205.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00210.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00210.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00215.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00215.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00220.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00220.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00225.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00225.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00230.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00230.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00235.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00235.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00240.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00240.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00245.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00245.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00250.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00250.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00255.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00255.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00260.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00260.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00265.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00265.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00270.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00270.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00275.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00275.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00280.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00280.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00285.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00285.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00290.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00290.jpg


--------------------------------------------------------------------------------
/videocutler/docs/demo-videos/eea827bdda/00295.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/demo-videos/eea827bdda/00295.jpg


--------------------------------------------------------------------------------
/videocutler/docs/videocutler_demos.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/videocutler_demos.gif


--------------------------------------------------------------------------------
/videocutler/docs/videocutler_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/docs/videocutler_pipeline.png


--------------------------------------------------------------------------------
/videocutler/eval.sh:
--------------------------------------------------------------------------------
 1 | export DETECTRON2_DATASETS=/shared/xudongw/DATASETS/
 2 | 
 3 | ###### eval YouTubeVIS-2019 ######
 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python train_net_video.py --num-gpus 4 \
 5 |   --config-file configs/imagenet_video/videocutler_eval_ytvis2019.yaml \
 6 |   --eval-only MODEL.WEIGHTS videocutler_m2f_rn50.pth \
 7 |   OUTPUT_DIR OUTPUT/ytvis_2019
 8 | 
 9 | python eval_ytvis.py --dataset-path ${DETECTRON2_DATASETS} --dataset-name 'ytvis_2019' --result-path 'OUTPUT/ytvis_2019/'
10 | 
11 | ###### eval YouTubeVIS-2021 ######
12 | # CUDA_VISIBLE_DEVICES=0,1,2,3 python train_net_video.py --num-gpus 4 \
13 | #   --config-file configs/imagenet_video/videocutler_eval_ytvis2021.yaml \
14 | #   --eval-only MODEL.WEIGHTS videocutler_m2f_rn50.pth \
15 | #   OUTPUT_DIR OUTPUT/ytvis_2021/
16 | 
17 | # python eval_ytvis.py --dataset-path ${DETECTRON2_DATASETS} --dataset-name 'ytvis_2021' --result-path 'OUTPUT/ytvis_2021/'


--------------------------------------------------------------------------------
/videocutler/eval_ytvis.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # Modified by XuDong Wang from detectron2 and cocoapi
 3 | 
 4 | import argparse
 5 | import os
 6 | 
 7 | from mask2former_video.data_video.datasets.ytvis_api.ytvoseval import YTVOSeval
 8 | from mask2former_video.data_video.datasets.ytvis_api.ytvos import YTVOS
 9 | 
10 | def print_and_summary(cocoEval):
11 |     str_print = ""
12 |     for key in cocoEval.stats:
13 |         str_print += "{:.2f},".format(key*100)
14 |     return str_print
15 | 
16 | def get_parser():
17 |     parser = argparse.ArgumentParser(description="eval configs")
18 |     parser.add_argument(
19 |         "--dataset-path", default="DATASETS", help="path to the annotation file",
20 |     )
21 |     parser.add_argument(
22 |         "--dataset-name", default="ytvis_2019", help="path to the annotation file", 
23 |     )
24 |     parser.add_argument(
25 |         "--result-path", default="OUTPUT", help="path to the the result file", 
26 |     )
27 |     return parser
28 | 
29 | if __name__ == "__main__":
30 |     args = get_parser().parse_args()
31 | 
32 |     annFile = os.path.join(args.dataset_path, args.dataset_name, 'train.json')
33 |     cocoGt=YTVOS(annFile)
34 | 
35 |     resFile = os.path.join(args.result_path, 'inference/results.json')
36 |     cocoDt=cocoGt.loadRes(resFile)
37 | 
38 |     annType = 'segm'
39 |     print('Running demo for {} results.'.format(annType))
40 |     cocoEval = YTVOSeval(cocoGt,cocoDt,annType)
41 |     cocoEval.params.useCats  = 0
42 |     cocoEval.evaluate()
43 |     cocoEval.accumulate()
44 |     cocoEval.summarize()
45 |     copypaste = print_and_summary(cocoEval)
46 |     print(copypaste)


--------------------------------------------------------------------------------
/videocutler/mask2former/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from . import data  # register all new datasets
 3 | from . import modeling
 4 | 
 5 | # config
 6 | from .config import add_maskformer2_config
 7 | 
 8 | # dataset loading
 9 | from .data.dataset_mappers.coco_instance_new_baseline_dataset_mapper import COCOInstanceNewBaselineDatasetMapper
10 | from .data.dataset_mappers.coco_panoptic_new_baseline_dataset_mapper import COCOPanopticNewBaselineDatasetMapper
11 | from .data.dataset_mappers.mask_former_instance_dataset_mapper import (
12 |     MaskFormerInstanceDatasetMapper,
13 | )
14 | from .data.dataset_mappers.mask_former_panoptic_dataset_mapper import (
15 |     MaskFormerPanopticDatasetMapper,
16 | )
17 | from .data.dataset_mappers.mask_former_semantic_dataset_mapper import (
18 |     MaskFormerSemanticDatasetMapper,
19 | )
20 | 
21 | # models
22 | from .maskformer_model import MaskFormer
23 | from .test_time_augmentation import SemanticSegmentorWithTTA
24 | 
25 | # evaluation
26 | from .evaluation.instance_evaluation import InstanceSegEvaluator
27 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/config.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | from detectron2.config import CfgNode as CN
  4 | 
  5 | 
  6 | def add_maskformer2_config(cfg):
  7 |     """
  8 |     Add config for MASK_FORMER.
  9 |     """
 10 |     # NOTE: configs from original maskformer
 11 |     # data config
 12 |     # select the dataset mapper
 13 |     cfg.INPUT.DATASET_MAPPER_NAME = "mask_former_semantic"
 14 |     # Color augmentation
 15 |     cfg.INPUT.COLOR_AUG_SSD = False
 16 |     # We retry random cropping until no single category in semantic segmentation GT occupies more
 17 |     # than `SINGLE_CATEGORY_MAX_AREA` part of the crop.
 18 |     cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0
 19 |     # Pad image and segmentation GT in dataset mapper.
 20 |     cfg.INPUT.SIZE_DIVISIBILITY = -1
 21 | 
 22 |     # solver config
 23 |     # weight decay on embedding
 24 |     cfg.SOLVER.WEIGHT_DECAY_EMBED = 0.0
 25 |     # optimizer
 26 |     cfg.SOLVER.OPTIMIZER = "ADAMW"
 27 |     cfg.SOLVER.BACKBONE_MULTIPLIER = 0.1
 28 | 
 29 |     # mask_former model config
 30 |     cfg.MODEL.MASK_FORMER = CN()
 31 | 
 32 |     # loss
 33 |     cfg.MODEL.MASK_FORMER.DEEP_SUPERVISION = True
 34 |     cfg.MODEL.MASK_FORMER.NO_OBJECT_WEIGHT = 0.1
 35 |     cfg.MODEL.MASK_FORMER.CLASS_WEIGHT = 1.0
 36 |     cfg.MODEL.MASK_FORMER.DICE_WEIGHT = 1.0
 37 |     cfg.MODEL.MASK_FORMER.MASK_WEIGHT = 20.0
 38 |     cfg.MODEL.MASK_FORMER.POSITIVE_BANK_IOU_THRESH = 0.01
 39 | 
 40 |     # transformer config
 41 |     cfg.MODEL.MASK_FORMER.NHEADS = 8
 42 |     cfg.MODEL.MASK_FORMER.DROPOUT = 0.1
 43 |     cfg.MODEL.MASK_FORMER.DIM_FEEDFORWARD = 2048
 44 |     cfg.MODEL.MASK_FORMER.ENC_LAYERS = 0
 45 |     cfg.MODEL.MASK_FORMER.DEC_LAYERS = 6
 46 |     cfg.MODEL.MASK_FORMER.PRE_NORM = False
 47 | 
 48 |     cfg.MODEL.MASK_FORMER.HIDDEN_DIM = 256
 49 |     cfg.MODEL.MASK_FORMER.NUM_OBJECT_QUERIES = 100
 50 | 
 51 |     cfg.MODEL.MASK_FORMER.TRANSFORMER_IN_FEATURE = "res5"
 52 |     cfg.MODEL.MASK_FORMER.ENFORCE_INPUT_PROJ = False
 53 | 
 54 |     # mask_former inference config
 55 |     cfg.MODEL.MASK_FORMER.TEST = CN()
 56 |     cfg.MODEL.MASK_FORMER.TEST.SEMANTIC_ON = True
 57 |     cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON = False
 58 |     cfg.MODEL.MASK_FORMER.TEST.PANOPTIC_ON = False
 59 |     cfg.MODEL.MASK_FORMER.TEST.OBJECT_MASK_THRESHOLD = 0.0
 60 |     cfg.MODEL.MASK_FORMER.TEST.OVERLAP_THRESHOLD = 0.0
 61 |     cfg.MODEL.MASK_FORMER.TEST.SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE = False
 62 | 
 63 |     # Sometimes `backbone.size_divisibility` is set to 0 for some backbone (e.g. ResNet)
 64 |     # you can use this config to override
 65 |     cfg.MODEL.MASK_FORMER.SIZE_DIVISIBILITY = 32
 66 | 
 67 |     # pixel decoder config
 68 |     cfg.MODEL.SEM_SEG_HEAD.MASK_DIM = 256
 69 |     # adding transformer in pixel decoder
 70 |     cfg.MODEL.SEM_SEG_HEAD.TRANSFORMER_ENC_LAYERS = 0
 71 |     # pixel decoder
 72 |     cfg.MODEL.SEM_SEG_HEAD.PIXEL_DECODER_NAME = "BasePixelDecoder"
 73 | 
 74 |     # swin transformer backbone
 75 |     cfg.MODEL.SWIN = CN()
 76 |     cfg.MODEL.SWIN.PRETRAIN_IMG_SIZE = 224
 77 |     cfg.MODEL.SWIN.PATCH_SIZE = 4
 78 |     cfg.MODEL.SWIN.EMBED_DIM = 96
 79 |     cfg.MODEL.SWIN.DEPTHS = [2, 2, 6, 2]
 80 |     cfg.MODEL.SWIN.NUM_HEADS = [3, 6, 12, 24]
 81 |     cfg.MODEL.SWIN.WINDOW_SIZE = 7
 82 |     cfg.MODEL.SWIN.MLP_RATIO = 4.0
 83 |     cfg.MODEL.SWIN.QKV_BIAS = True
 84 |     cfg.MODEL.SWIN.QK_SCALE = None
 85 |     cfg.MODEL.SWIN.DROP_RATE = 0.0
 86 |     cfg.MODEL.SWIN.ATTN_DROP_RATE = 0.0
 87 |     cfg.MODEL.SWIN.DROP_PATH_RATE = 0.3
 88 |     cfg.MODEL.SWIN.APE = False
 89 |     cfg.MODEL.SWIN.PATCH_NORM = True
 90 |     cfg.MODEL.SWIN.OUT_FEATURES = ["res2", "res3", "res4", "res5"]
 91 |     cfg.MODEL.SWIN.USE_CHECKPOINT = False
 92 | 
 93 |     # NOTE: maskformer2 extra configs
 94 |     # transformer module
 95 |     cfg.MODEL.MASK_FORMER.TRANSFORMER_DECODER_NAME = "MultiScaleMaskedTransformerDecoder"
 96 | 
 97 |     # LSJ aug
 98 |     cfg.INPUT.IMAGE_SIZE = 1024
 99 |     cfg.INPUT.MIN_SCALE = 0.1
100 |     cfg.INPUT.MAX_SCALE = 2.0
101 | 
102 |     # MSDeformAttn encoder configs
103 |     cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES = ["res3", "res4", "res5"]
104 |     cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_POINTS = 4
105 |     cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_HEADS = 8
106 | 
107 |     # point loss configs
108 |     # Number of points sampled during training for a mask point head.
109 |     cfg.MODEL.MASK_FORMER.TRAIN_NUM_POINTS = 112 * 112
110 |     # Oversampling parameter for PointRend point sampling during training. Parameter `k` in the
111 |     # original paper.
112 |     cfg.MODEL.MASK_FORMER.OVERSAMPLE_RATIO = 3.0
113 |     # Importance sampling parameter for PointRend point sampling during training. Parametr `beta` in
114 |     # the original paper.
115 |     cfg.MODEL.MASK_FORMER.IMPORTANCE_SAMPLE_RATIO = 0.75
116 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from . import datasets
3 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/data/dataset_mappers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from . import (
 3 |     register_ade20k_full,
 4 |     register_ade20k_panoptic,
 5 |     register_coco_stuff_10k,
 6 |     register_mapillary_vistas,
 7 |     register_coco_panoptic_annos_semseg,
 8 |     register_ade20k_instance,
 9 |     register_mapillary_vistas_panoptic,
10 | )
11 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/data/datasets/register_ade20k_instance.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import json
 3 | import logging
 4 | import numpy as np
 5 | import os
 6 | from PIL import Image
 7 | 
 8 | from detectron2.data import DatasetCatalog, MetadataCatalog
 9 | from detectron2.data.datasets.coco import load_coco_json, register_coco_instances
10 | from detectron2.utils.file_io import PathManager
11 | 
12 | ADE_CATEGORIES = [{'id': 7, 'name': 'bed'}, {'id': 8, 'name': 'windowpane'}, {'id': 10, 'name': 'cabinet'}, {'id': 12, 'name': 'person'}, {'id': 14, 'name': 'door'}, {'id': 15, 'name': 'table'}, {'id': 18, 'name': 'curtain'}, {'id': 19, 'name': 'chair'}, {'id': 20, 'name': 'car'}, {'id': 22, 'name': 'painting'}, {'id': 23, 'name': 'sofa'}, {'id': 24, 'name': 'shelf'}, {'id': 27, 'name': 'mirror'}, {'id': 30, 'name': 'armchair'}, {'id': 31, 'name': 'seat'}, {'id': 32, 'name': 'fence'}, {'id': 33, 'name': 'desk'}, {'id': 35, 'name': 'wardrobe'}, {'id': 36, 'name': 'lamp'}, {'id': 37, 'name': 'bathtub'}, {'id': 38, 'name': 'railing'}, {'id': 39, 'name': 'cushion'}, {'id': 41, 'name': 'box'}, {'id': 42, 'name': 'column'}, {'id': 43, 'name': 'signboard'}, {'id': 44, 'name': 'chest of drawers'}, {'id': 45, 'name': 'counter'}, {'id': 47, 'name': 'sink'}, {'id': 49, 'name': 'fireplace'}, {'id': 50, 'name': 'refrigerator'}, {'id': 53, 'name': 'stairs'}, {'id': 55, 'name': 'case'}, {'id': 56, 'name': 'pool table'}, {'id': 57, 'name': 'pillow'}, {'id': 58, 'name': 'screen door'}, {'id': 62, 'name': 'bookcase'}, {'id': 64, 'name': 'coffee table'}, {'id': 65, 'name': 'toilet'}, {'id': 66, 'name': 'flower'}, {'id': 67, 'name': 'book'}, {'id': 69, 'name': 'bench'}, {'id': 70, 'name': 'countertop'}, {'id': 71, 'name': 'stove'}, {'id': 72, 'name': 'palm'}, {'id': 73, 'name': 'kitchen island'}, {'id': 74, 'name': 'computer'}, {'id': 75, 'name': 'swivel chair'}, {'id': 76, 'name': 'boat'}, {'id': 78, 'name': 'arcade machine'}, {'id': 80, 'name': 'bus'}, {'id': 81, 'name': 'towel'}, {'id': 82, 'name': 'light'}, {'id': 83, 'name': 'truck'}, {'id': 85, 'name': 'chandelier'}, {'id': 86, 'name': 'awning'}, {'id': 87, 'name': 'streetlight'}, {'id': 88, 'name': 'booth'}, {'id': 89, 'name': 'television receiver'}, {'id': 90, 'name': 'airplane'}, {'id': 92, 'name': 'apparel'}, {'id': 93, 'name': 'pole'}, {'id': 95, 'name': 'bannister'}, {'id': 97, 'name': 'ottoman'}, {'id': 98, 'name': 'bottle'}, {'id': 102, 'name': 'van'}, {'id': 103, 'name': 'ship'}, {'id': 104, 'name': 'fountain'}, {'id': 107, 'name': 'washer'}, {'id': 108, 'name': 'plaything'}, {'id': 110, 'name': 'stool'}, {'id': 111, 'name': 'barrel'}, {'id': 112, 'name': 'basket'}, {'id': 115, 'name': 'bag'}, {'id': 116, 'name': 'minibike'}, {'id': 118, 'name': 'oven'}, {'id': 119, 'name': 'ball'}, {'id': 120, 'name': 'food'}, {'id': 121, 'name': 'step'}, {'id': 123, 'name': 'trade name'}, {'id': 124, 'name': 'microwave'}, {'id': 125, 'name': 'pot'}, {'id': 126, 'name': 'animal'}, {'id': 127, 'name': 'bicycle'}, {'id': 129, 'name': 'dishwasher'}, {'id': 130, 'name': 'screen'}, {'id': 132, 'name': 'sculpture'}, {'id': 133, 'name': 'hood'}, {'id': 134, 'name': 'sconce'}, {'id': 135, 'name': 'vase'}, {'id': 136, 'name': 'traffic light'}, {'id': 137, 'name': 'tray'}, {'id': 138, 'name': 'ashcan'}, {'id': 139, 'name': 'fan'}, {'id': 142, 'name': 'plate'}, {'id': 143, 'name': 'monitor'}, {'id': 144, 'name': 'bulletin board'}, {'id': 146, 'name': 'radiator'}, {'id': 147, 'name': 'glass'}, {'id': 148, 'name': 'clock'}, {'id': 149, 'name': 'flag'}]
13 | 
14 | 
15 | _PREDEFINED_SPLITS = {
16 |     # point annotations without masks
17 |     "ade20k_instance_train": (
18 |         "ADEChallengeData2016/images/training",
19 |         "ADEChallengeData2016/ade20k_instance_train.json",
20 |     ),
21 |     "ade20k_instance_val": (
22 |         "ADEChallengeData2016/images/validation",
23 |         "ADEChallengeData2016/ade20k_instance_val.json",
24 |     ),
25 | }
26 | 
27 | 
28 | def _get_ade_instances_meta():
29 |     thing_ids = [k["id"] for k in ADE_CATEGORIES]
30 |     assert len(thing_ids) == 100, len(thing_ids)
31 |     # Mapping from the incontiguous ADE category id to an id in [0, 99]
32 |     thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
33 |     thing_classes = [k["name"] for k in ADE_CATEGORIES]
34 |     ret = {
35 |         "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id,
36 |         "thing_classes": thing_classes,
37 |     }
38 |     return ret
39 | 
40 | 
41 | def register_all_ade20k_instance(root):
42 |     for key, (image_root, json_file) in _PREDEFINED_SPLITS.items():
43 |         # Assume pre-defined datasets live in `./datasets`.
44 |         register_coco_instances(
45 |             key,
46 |             _get_ade_instances_meta(),
47 |             os.path.join(root, json_file) if "://" not in json_file else json_file,
48 |             os.path.join(root, image_root),
49 |         )
50 | 
51 | 
52 | _root = os.getenv("DETECTRON2_DATASETS", "datasets")
53 | register_all_ade20k_instance(_root)
54 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/CutLER/c47b5dfc84f4480ffb5fae65a3d618a5c0c14d3e/videocutler/mask2former/evaluation/__init__.py


--------------------------------------------------------------------------------
/videocutler/mask2former/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .backbone.swin import D2SwinTransformer
3 | from .pixel_decoder.fpn import BasePixelDecoder
4 | from .pixel_decoder.msdeformattn import MSDeformAttnPixelDecoder
5 | from .meta_arch.mask_former_head import MaskFormerHead
6 | from .meta_arch.per_pixel_baseline import PerPixelBaselineHead, PerPixelBaselinePlusHead
7 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/modeling/meta_arch/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/modeling/pixel_decoder/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/modeling/pixel_decoder/ops/functions/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------------------------------
 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 7 | # ------------------------------------------------------------------------------------------------
 8 | 
 9 | # Copyright (c) Facebook, Inc. and its affiliates.
10 | # Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR
11 | 
12 | from .ms_deform_attn_func import MSDeformAttnFunction
13 | 
14 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/modeling/pixel_decoder/ops/functions/ms_deform_attn_func.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------------------------------
 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 7 | # ------------------------------------------------------------------------------------------------
 8 | 
 9 | # Copyright (c) Facebook, Inc. and its affiliates.
10 | # Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR
11 | 
12 | from __future__ import absolute_import
13 | from __future__ import print_function
14 | from __future__ import division
15 | 
16 | import torch
17 | import torch.nn.functional as F
18 | from torch.autograd import Function
19 | from torch.autograd.function import once_differentiable
20 | 
21 | try:
22 |     import MultiScaleDeformableAttention as MSDA
23 | except ModuleNotFoundError as e:
24 |     info_string = (
25 |         "\n\nPlease compile MultiScaleDeformableAttention CUDA op with the following commands:\n"
26 |         "\t`cd mask2former/modeling/pixel_decoder/ops`\n"
27 |         "\t`sh make.sh`\n"
28 |     )
29 |     raise ModuleNotFoundError(info_string)
30 | 
31 | 
32 | class MSDeformAttnFunction(Function):
33 |     @staticmethod
34 |     def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step):
35 |         ctx.im2col_step = im2col_step
36 |         output = MSDA.ms_deform_attn_forward(
37 |             value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step)
38 |         ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights)
39 |         return output
40 | 
41 |     @staticmethod
42 |     @once_differentiable
43 |     def backward(ctx, grad_output):
44 |         value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors
45 |         grad_value, grad_sampling_loc, grad_attn_weight = \
46 |             MSDA.ms_deform_attn_backward(
47 |                 value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step)
48 | 
49 |         return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None
50 | 
51 | 
52 | def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights):
53 |     # for debug and test only,
54 |     # need to use cuda version instead
55 |     N_, S_, M_, D_ = value.shape
56 |     _, Lq_, M_, L_, P_, _ = sampling_locations.shape
57 |     value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)
58 |     sampling_grids = 2 * sampling_locations - 1
59 |     sampling_value_list = []
60 |     for lid_, (H_, W_) in enumerate(value_spatial_shapes):
61 |         # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_
62 |         value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_)
63 |         # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2
64 |         sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1)
65 |         # N_*M_, D_, Lq_, P_
66 |         sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_,
67 |                                           mode='bilinear', padding_mode='zeros', align_corners=False)
68 |         sampling_value_list.append(sampling_value_l_)
69 |     # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_)
70 |     attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_)
71 |     output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_)
72 |     return output.transpose(1, 2).contiguous()
73 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/modeling/pixel_decoder/ops/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # ------------------------------------------------------------------------------------------------
 3 | # Deformable DETR
 4 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | # ------------------------------------------------------------------------------------------------
 7 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | # ------------------------------------------------------------------------------------------------
 9 | 
10 | # Copyright (c) Facebook, Inc. and its affiliates.
11 | # Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR
12 | 
13 | python setup.py build install
14 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/modeling/pixel_decoder/ops/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------------------------------
 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 7 | # ------------------------------------------------------------------------------------------------
 8 | 
 9 | # Copyright (c) Facebook, Inc. and its affiliates.
10 | # Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR
11 | 
12 | from .ms_deform_attn import MSDeformAttn
13 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/modeling/pixel_decoder/ops/setup.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------------------------------
 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 7 | # ------------------------------------------------------------------------------------------------
 8 | 
 9 | # Copyright (c) Facebook, Inc. and its affiliates.
10 | # Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR
11 | 
12 | import os
13 | import glob
14 | 
15 | import torch
16 | 
17 | from torch.utils.cpp_extension import CUDA_HOME
18 | from torch.utils.cpp_extension import CppExtension
19 | from torch.utils.cpp_extension import CUDAExtension
20 | 
21 | from setuptools import find_packages
22 | from setuptools import setup
23 | 
24 | requirements = ["torch", "torchvision"]
25 | 
26 | def get_extensions():
27 |     this_dir = os.path.dirname(os.path.abspath(__file__))
28 |     extensions_dir = os.path.join(this_dir, "src")
29 | 
30 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
31 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
32 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
33 | 
34 |     sources = main_file + source_cpu
35 |     extension = CppExtension
36 |     extra_compile_args = {"cxx": []}
37 |     define_macros = []
38 | 
39 |     # Force cuda since torch ask for a device, not if cuda is in fact available.
40 |     if (os.environ.get('FORCE_CUDA') or torch.cuda.is_available()) and CUDA_HOME is not None:
41 |         extension = CUDAExtension
42 |         sources += source_cuda
43 |         define_macros += [("WITH_CUDA", None)]
44 |         extra_compile_args["nvcc"] = [
45 |             "-DCUDA_HAS_FP16=1",
46 |             "-D__CUDA_NO_HALF_OPERATORS__",
47 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
48 |             "-D__CUDA_NO_HALF2_OPERATORS__",
49 |         ]
50 |     else:
51 |         if CUDA_HOME is None:
52 |             raise NotImplementedError('CUDA_HOME is None. Please set environment variable CUDA_HOME.')
53 |         else:
54 |             raise NotImplementedError('No CUDA runtime is found. Please set FORCE_CUDA=1 or test it by running torch.cuda.is_available().')
55 | 
56 |     sources = [os.path.join(extensions_dir, s) for s in sources]
57 |     include_dirs = [extensions_dir]
58 |     ext_modules = [
59 |         extension(
60 |             "MultiScaleDeformableAttention",
61 |             sources,
62 |             include_dirs=include_dirs,
63 |             define_macros=define_macros,
64 |             extra_compile_args=extra_compile_args,
65 |         )
66 |     ]
67 |     return ext_modules
68 | 
69 | setup(
70 |     name="MultiScaleDeformableAttention",
71 |     version="1.0",
72 |     author="Weijie Su",
73 |     url="https://github.com/fundamentalvision/Deformable-DETR",
74 |     description="PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention",
75 |     packages=find_packages(exclude=("configs", "tests",)),
76 |     ext_modules=get_extensions(),
77 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
78 | )
79 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/modeling/pixel_decoder/ops/src/cpu/ms_deform_attn_cpu.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | /*!
12 | * Copyright (c) Facebook, Inc. and its affiliates.
13 | * Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR
14 | */
15 | 
16 | #include <vector>
17 | 
18 | #include <ATen/ATen.h>
19 | #include <ATen/cuda/CUDAContext.h>
20 | 
21 | 
22 | at::Tensor
23 | ms_deform_attn_cpu_forward(
24 |     const at::Tensor &value, 
25 |     const at::Tensor &spatial_shapes,
26 |     const at::Tensor &level_start_index,
27 |     const at::Tensor &sampling_loc,
28 |     const at::Tensor &attn_weight,
29 |     const int im2col_step)
30 | {
31 |     AT_ERROR("Not implement on cpu");
32 | }
33 | 
34 | std::vector<at::Tensor>
35 | ms_deform_attn_cpu_backward(
36 |     const at::Tensor &value, 
37 |     const at::Tensor &spatial_shapes,
38 |     const at::Tensor &level_start_index,
39 |     const at::Tensor &sampling_loc,
40 |     const at::Tensor &attn_weight,
41 |     const at::Tensor &grad_output,
42 |     const int im2col_step)
43 | {
44 |     AT_ERROR("Not implement on cpu");
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/modeling/pixel_decoder/ops/src/cpu/ms_deform_attn_cpu.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | /*!
12 | * Copyright (c) Facebook, Inc. and its affiliates.
13 | * Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR
14 | */
15 | 
16 | #pragma once
17 | #include <torch/extension.h>
18 | 
19 | at::Tensor
20 | ms_deform_attn_cpu_forward(
21 |     const at::Tensor &value, 
22 |     const at::Tensor &spatial_shapes,
23 |     const at::Tensor &level_start_index,
24 |     const at::Tensor &sampling_loc,
25 |     const at::Tensor &attn_weight,
26 |     const int im2col_step);
27 | 
28 | std::vector<at::Tensor>
29 | ms_deform_attn_cpu_backward(
30 |     const at::Tensor &value, 
31 |     const at::Tensor &spatial_shapes,
32 |     const at::Tensor &level_start_index,
33 |     const at::Tensor &sampling_loc,
34 |     const at::Tensor &attn_weight,
35 |     const at::Tensor &grad_output,
36 |     const int im2col_step);
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/modeling/pixel_decoder/ops/src/cuda/ms_deform_attn_cuda.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | /*!
12 | * Copyright (c) Facebook, Inc. and its affiliates.
13 | * Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR
14 | */
15 | 
16 | #pragma once
17 | #include <torch/extension.h>
18 | 
19 | at::Tensor ms_deform_attn_cuda_forward(
20 |     const at::Tensor &value, 
21 |     const at::Tensor &spatial_shapes,
22 |     const at::Tensor &level_start_index,
23 |     const at::Tensor &sampling_loc,
24 |     const at::Tensor &attn_weight,
25 |     const int im2col_step);
26 | 
27 | std::vector<at::Tensor> ms_deform_attn_cuda_backward(
28 |     const at::Tensor &value, 
29 |     const at::Tensor &spatial_shapes,
30 |     const at::Tensor &level_start_index,
31 |     const at::Tensor &sampling_loc,
32 |     const at::Tensor &attn_weight,
33 |     const at::Tensor &grad_output,
34 |     const int im2col_step);
35 | 
36 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/modeling/pixel_decoder/ops/src/ms_deform_attn.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | /*!
12 | * Copyright (c) Facebook, Inc. and its affiliates.
13 | * Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR
14 | */
15 | 
16 | #pragma once
17 | 
18 | #include "cpu/ms_deform_attn_cpu.h"
19 | 
20 | #ifdef WITH_CUDA
21 | #include "cuda/ms_deform_attn_cuda.h"
22 | #endif
23 | 
24 | 
25 | at::Tensor
26 | ms_deform_attn_forward(
27 |     const at::Tensor &value, 
28 |     const at::Tensor &spatial_shapes,
29 |     const at::Tensor &level_start_index,
30 |     const at::Tensor &sampling_loc,
31 |     const at::Tensor &attn_weight,
32 |     const int im2col_step)
33 | {
34 |     if (value.type().is_cuda())
35 |     {
36 | #ifdef WITH_CUDA
37 |         return ms_deform_attn_cuda_forward(
38 |             value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step);
39 | #else
40 |         AT_ERROR("Not compiled with GPU support");
41 | #endif
42 |     }
43 |     AT_ERROR("Not implemented on the CPU");
44 | }
45 | 
46 | std::vector<at::Tensor>
47 | ms_deform_attn_backward(
48 |     const at::Tensor &value, 
49 |     const at::Tensor &spatial_shapes,
50 |     const at::Tensor &level_start_index,
51 |     const at::Tensor &sampling_loc,
52 |     const at::Tensor &attn_weight,
53 |     const at::Tensor &grad_output,
54 |     const int im2col_step)
55 | {
56 |     if (value.type().is_cuda())
57 |     {
58 | #ifdef WITH_CUDA
59 |         return ms_deform_attn_cuda_backward(
60 |             value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step);
61 | #else
62 |         AT_ERROR("Not compiled with GPU support");
63 | #endif
64 |     }
65 |     AT_ERROR("Not implemented on the CPU");
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/modeling/pixel_decoder/ops/src/vision.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | /*!
12 | * Copyright (c) Facebook, Inc. and its affiliates.
13 | * Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR
14 | */
15 | 
16 | #include "ms_deform_attn.h"
17 | 
18 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
19 |   m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward");
20 |   m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward");
21 | }
22 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/modeling/pixel_decoder/ops/test.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------------------
 2 | # Deformable DETR
 3 | # Copyright (c) 2020 SenseTime. All Rights Reserved.
 4 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 5 | # ------------------------------------------------------------------------------------------------
 6 | # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 7 | # ------------------------------------------------------------------------------------------------
 8 | 
 9 | # Copyright (c) Facebook, Inc. and its affiliates.
10 | # Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR
11 | 
12 | from __future__ import absolute_import
13 | from __future__ import print_function
14 | from __future__ import division
15 | 
16 | import time
17 | import torch
18 | import torch.nn as nn
19 | from torch.autograd import gradcheck
20 | 
21 | from functions.ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch
22 | 
23 | 
24 | N, M, D = 1, 2, 2
25 | Lq, L, P = 2, 2, 2
26 | shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda()
27 | level_start_index = torch.cat((shapes.new_zeros((1, )), shapes.prod(1).cumsum(0)[:-1]))
28 | S = sum([(H*W).item() for H, W in shapes])
29 | 
30 | 
31 | torch.manual_seed(3)
32 | 
33 | 
34 | @torch.no_grad()
35 | def check_forward_equal_with_pytorch_double():
36 |     value = torch.rand(N, S, M, D).cuda() * 0.01
37 |     sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
38 |     attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
39 |     attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
40 |     im2col_step = 2
41 |     output_pytorch = ms_deform_attn_core_pytorch(value.double(), shapes, sampling_locations.double(), attention_weights.double()).detach().cpu()
42 |     output_cuda = MSDeformAttnFunction.apply(value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step).detach().cpu()
43 |     fwdok = torch.allclose(output_cuda, output_pytorch)
44 |     max_abs_err = (output_cuda - output_pytorch).abs().max()
45 |     max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()
46 | 
47 |     print(f'* {fwdok} check_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')
48 | 
49 | 
50 | @torch.no_grad()
51 | def check_forward_equal_with_pytorch_float():
52 |     value = torch.rand(N, S, M, D).cuda() * 0.01
53 |     sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
54 |     attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
55 |     attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
56 |     im2col_step = 2
57 |     output_pytorch = ms_deform_attn_core_pytorch(value, shapes, sampling_locations, attention_weights).detach().cpu()
58 |     output_cuda = MSDeformAttnFunction.apply(value, shapes, level_start_index, sampling_locations, attention_weights, im2col_step).detach().cpu()
59 |     fwdok = torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3)
60 |     max_abs_err = (output_cuda - output_pytorch).abs().max()
61 |     max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()
62 | 
63 |     print(f'* {fwdok} check_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')
64 | 
65 | 
66 | def check_gradient_numerical(channels=4, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True):
67 | 
68 |     value = torch.rand(N, S, M, channels).cuda() * 0.01
69 |     sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
70 |     attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
71 |     attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
72 |     im2col_step = 2
73 |     func = MSDeformAttnFunction.apply
74 | 
75 |     value.requires_grad = grad_value
76 |     sampling_locations.requires_grad = grad_sampling_loc
77 |     attention_weights.requires_grad = grad_attn_weight
78 | 
79 |     gradok = gradcheck(func, (value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step))
80 | 
81 |     print(f'* {gradok} check_gradient_numerical(D={channels})')
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     check_forward_equal_with_pytorch_double()
86 |     check_forward_equal_with_pytorch_float()
87 | 
88 |     for channels in [30, 32, 64, 71, 1025, 2048, 3096]:
89 |         check_gradient_numerical(channels, True, True, True)
90 | 
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/modeling/transformer_decoder/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .maskformer_transformer_decoder import StandardTransformerDecoder
3 | from .mask2former_transformer_decoder import MultiScaleMaskedTransformerDecoder
4 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/modeling/transformer_decoder/position_encoding.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # # Modified by Bowen Cheng from: https://github.com/facebookresearch/detr/blob/master/models/position_encoding.py
 3 | """
 4 | Various positional encodings for the transformer.
 5 | """
 6 | import math
 7 | 
 8 | import torch
 9 | from torch import nn
10 | 
11 | 
12 | class PositionEmbeddingSine(nn.Module):
13 |     """
14 |     This is a more standard version of the position embedding, very similar to the one
15 |     used by the Attention is all you need paper, generalized to work on images.
16 |     """
17 | 
18 |     def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
19 |         super().__init__()
20 |         self.num_pos_feats = num_pos_feats
21 |         self.temperature = temperature
22 |         self.normalize = normalize
23 |         if scale is not None and normalize is False:
24 |             raise ValueError("normalize should be True if scale is passed")
25 |         if scale is None:
26 |             scale = 2 * math.pi
27 |         self.scale = scale
28 | 
29 |     def forward(self, x, mask=None):
30 |         if mask is None:
31 |             mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool)
32 |         not_mask = ~mask
33 |         y_embed = not_mask.cumsum(1, dtype=torch.float32)
34 |         x_embed = not_mask.cumsum(2, dtype=torch.float32)
35 |         if self.normalize:
36 |             eps = 1e-6
37 |             y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
38 |             x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
39 | 
40 |         dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
41 |         dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
42 | 
43 |         pos_x = x_embed[:, :, :, None] / dim_t
44 |         pos_y = y_embed[:, :, :, None] / dim_t
45 |         pos_x = torch.stack(
46 |             (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
47 |         ).flatten(3)
48 |         pos_y = torch.stack(
49 |             (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
50 |         ).flatten(3)
51 |         pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
52 |         return pos
53 |     
54 |     def __repr__(self, _repr_indent=4):
55 |         head = "Positional encoding " + self.__class__.__name__
56 |         body = [
57 |             "num_pos_feats: {}".format(self.num_pos_feats),
58 |             "temperature: {}".format(self.temperature),
59 |             "normalize: {}".format(self.normalize),
60 |             "scale: {}".format(self.scale),
61 |         ]
62 |         # _repr_indent = 4
63 |         lines = [head] + [" " * _repr_indent + line for line in body]
64 |         return "\n".join(lines)
65 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/test_time_augmentation.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import copy
  3 | import logging
  4 | from itertools import count
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | from fvcore.transforms import HFlipTransform
  9 | from torch import nn
 10 | from torch.nn.parallel import DistributedDataParallel
 11 | 
 12 | from detectron2.data.detection_utils import read_image
 13 | from detectron2.modeling import DatasetMapperTTA
 14 | 
 15 | 
 16 | __all__ = [
 17 |     "SemanticSegmentorWithTTA",
 18 | ]
 19 | 
 20 | 
 21 | class SemanticSegmentorWithTTA(nn.Module):
 22 |     """
 23 |     A SemanticSegmentor with test-time augmentation enabled.
 24 |     Its :meth:`__call__` method has the same interface as :meth:`SemanticSegmentor.forward`.
 25 |     """
 26 | 
 27 |     def __init__(self, cfg, model, tta_mapper=None, batch_size=1):
 28 |         """
 29 |         Args:
 30 |             cfg (CfgNode):
 31 |             model (SemanticSegmentor): a SemanticSegmentor to apply TTA on.
 32 |             tta_mapper (callable): takes a dataset dict and returns a list of
 33 |                 augmented versions of the dataset dict. Defaults to
 34 |                 `DatasetMapperTTA(cfg)`.
 35 |             batch_size (int): batch the augmented images into this batch size for inference.
 36 |         """
 37 |         super().__init__()
 38 |         if isinstance(model, DistributedDataParallel):
 39 |             model = model.module
 40 |         self.cfg = cfg.clone()
 41 | 
 42 |         self.model = model
 43 | 
 44 |         if tta_mapper is None:
 45 |             tta_mapper = DatasetMapperTTA(cfg)
 46 |         self.tta_mapper = tta_mapper
 47 |         self.batch_size = batch_size
 48 | 
 49 |     def __call__(self, batched_inputs):
 50 |         """
 51 |         Same input/output format as :meth:`SemanticSegmentor.forward`
 52 |         """
 53 | 
 54 |         def _maybe_read_image(dataset_dict):
 55 |             ret = copy.copy(dataset_dict)
 56 |             if "image" not in ret:
 57 |                 image = read_image(ret.pop("file_name"), self.model.input_format)
 58 |                 image = torch.from_numpy(np.ascontiguousarray(image.transpose(2, 0, 1)))  # CHW
 59 |                 ret["image"] = image
 60 |             if "height" not in ret and "width" not in ret:
 61 |                 ret["height"] = image.shape[1]
 62 |                 ret["width"] = image.shape[2]
 63 |             return ret
 64 | 
 65 |         processed_results = []
 66 |         for x in batched_inputs:
 67 |             result = self._inference_one_image(_maybe_read_image(x))
 68 |             processed_results.append(result)
 69 |         return processed_results
 70 | 
 71 |     def _inference_one_image(self, input):
 72 |         """
 73 |         Args:
 74 |             input (dict): one dataset dict with "image" field being a CHW tensor
 75 |         Returns:
 76 |             dict: one output dict
 77 |         """
 78 |         orig_shape = (input["height"], input["width"])
 79 |         augmented_inputs, tfms = self._get_augmented_inputs(input)
 80 | 
 81 |         final_predictions = None
 82 |         count_predictions = 0
 83 |         for input, tfm in zip(augmented_inputs, tfms):
 84 |             count_predictions += 1
 85 |             with torch.no_grad():
 86 |                 if final_predictions is None:
 87 |                     if any(isinstance(t, HFlipTransform) for t in tfm.transforms):
 88 |                         final_predictions = self.model([input])[0].pop("sem_seg").flip(dims=[2])
 89 |                     else:
 90 |                         final_predictions = self.model([input])[0].pop("sem_seg")
 91 |                 else:
 92 |                     if any(isinstance(t, HFlipTransform) for t in tfm.transforms):
 93 |                         final_predictions += self.model([input])[0].pop("sem_seg").flip(dims=[2])
 94 |                     else:
 95 |                         final_predictions += self.model([input])[0].pop("sem_seg")
 96 | 
 97 |         final_predictions = final_predictions / count_predictions
 98 |         return {"sem_seg": final_predictions}
 99 | 
100 |     def _get_augmented_inputs(self, input):
101 |         augmented_inputs = self.tta_mapper(input)
102 |         tfms = [x.pop("transforms") for x in augmented_inputs]
103 |         return augmented_inputs, tfms
104 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/videocutler/mask2former/utils/misc.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/util/misc.py
  3 | """
  4 | Misc functions, including distributed helpers.
  5 | 
  6 | Mostly copy-paste from torchvision references.
  7 | """
  8 | from typing import List, Optional
  9 | 
 10 | import torch
 11 | import torch.distributed as dist
 12 | import torchvision
 13 | from torch import Tensor
 14 | 
 15 | 
 16 | def _max_by_axis(the_list):
 17 |     # type: (List[List[int]]) -> List[int]
 18 |     maxes = the_list[0]
 19 |     for sublist in the_list[1:]:
 20 |         for index, item in enumerate(sublist):
 21 |             maxes[index] = max(maxes[index], item)
 22 |     return maxes
 23 | 
 24 | 
 25 | class NestedTensor(object):
 26 |     def __init__(self, tensors, mask: Optional[Tensor]):
 27 |         self.tensors = tensors
 28 |         self.mask = mask
 29 | 
 30 |     def to(self, device):
 31 |         # type: (Device) -> NestedTensor # noqa
 32 |         cast_tensor = self.tensors.to(device)
 33 |         mask = self.mask
 34 |         if mask is not None:
 35 |             assert mask is not None
 36 |             cast_mask = mask.to(device)
 37 |         else:
 38 |             cast_mask = None
 39 |         return NestedTensor(cast_tensor, cast_mask)
 40 | 
 41 |     def decompose(self):
 42 |         return self.tensors, self.mask
 43 | 
 44 |     def __repr__(self):
 45 |         return str(self.tensors)
 46 | 
 47 | 
 48 | def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
 49 |     # TODO make this more general
 50 |     if tensor_list[0].ndim == 3:
 51 |         if torchvision._is_tracing():
 52 |             # nested_tensor_from_tensor_list() does not export well to ONNX
 53 |             # call _onnx_nested_tensor_from_tensor_list() instead
 54 |             return _onnx_nested_tensor_from_tensor_list(tensor_list)
 55 | 
 56 |         # TODO make it support different-sized images
 57 |         max_size = _max_by_axis([list(img.shape) for img in tensor_list])
 58 |         # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list]))
 59 |         batch_shape = [len(tensor_list)] + max_size
 60 |         b, c, h, w = batch_shape
 61 |         dtype = tensor_list[0].dtype
 62 |         device = tensor_list[0].device
 63 |         tensor = torch.zeros(batch_shape, dtype=dtype, device=device)
 64 |         mask = torch.ones((b, h, w), dtype=torch.bool, device=device)
 65 |         for img, pad_img, m in zip(tensor_list, tensor, mask):
 66 |             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
 67 |             m[: img.shape[1], : img.shape[2]] = False
 68 |     else:
 69 |         raise ValueError("not supported")
 70 |     return NestedTensor(tensor, mask)
 71 | 
 72 | 
 73 | # _onnx_nested_tensor_from_tensor_list() is an implementation of
 74 | # nested_tensor_from_tensor_list() that is supported by ONNX tracing.
 75 | @torch.jit.unused
 76 | def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor:
 77 |     max_size = []
 78 |     for i in range(tensor_list[0].dim()):
 79 |         max_size_i = torch.max(
 80 |             torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)
 81 |         ).to(torch.int64)
 82 |         max_size.append(max_size_i)
 83 |     max_size = tuple(max_size)
 84 | 
 85 |     # work around for
 86 |     # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
 87 |     # m[: img.shape[1], :img.shape[2]] = False
 88 |     # which is not yet supported in onnx
 89 |     padded_imgs = []
 90 |     padded_masks = []
 91 |     for img in tensor_list:
 92 |         padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
 93 |         padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0]))
 94 |         padded_imgs.append(padded_img)
 95 | 
 96 |         m = torch.zeros_like(img[0], dtype=torch.int, device=img.device)
 97 |         padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1)
 98 |         padded_masks.append(padded_mask.to(torch.bool))
 99 | 
100 |     tensor = torch.stack(padded_imgs)
101 |     mask = torch.stack(padded_masks)
102 | 
103 |     return NestedTensor(tensor, mask=mask)
104 | 
105 | 
106 | def is_dist_avail_and_initialized():
107 |     if not dist.is_available():
108 |         return False
109 |     if not dist.is_initialized():
110 |         return False
111 |     return True
112 | 


--------------------------------------------------------------------------------
/videocutler/mask2former_video/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | from . import modeling
 3 | 
 4 | # config
 5 | from .config import add_maskformer2_video_config
 6 | 
 7 | # models
 8 | from .video_maskformer_model import VideoMaskFormer
 9 | 
10 | # video
11 | from .data_video import (
12 |     YTVISDatasetMapper,
13 |     YTVISEvaluator,
14 |     build_detection_train_loader,
15 |     build_detection_test_loader,
16 |     get_detection_dataset_dicts,
17 | )
18 | 
19 | # copy-paste
20 | from .engine import *


--------------------------------------------------------------------------------
/videocutler/mask2former_video/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | # Modified by XuDong Wang from https://github.com/facebookresearch/Mask2Former/tree/main/mask2former_video
 4 | 
 5 | from detectron2.config import CfgNode as CN
 6 | 
 7 | 
 8 | def add_maskformer2_video_config(cfg):
 9 |     # video data
10 |     # DataLoader
11 |     cfg.INPUT.SAMPLING_FRAME_NUM = 2
12 |     cfg.INPUT.SAMPLING_FRAME_RANGE = 20
13 |     cfg.INPUT.SAMPLING_FRAME_SHUFFLE = False
14 |     cfg.INPUT.AUGMENTATIONS = [] # "brightness", "contrast", "saturation", "rotation"
15 | 
16 |     cfg.DATALOADER.COPY_PASTE = False
17 |     cfg.DATALOADER.COPY_PASTE_RATE = 0.0
18 |     cfg.DATALOADER.COPY_PASTE_MIN_RATIO = 0.5
19 |     cfg.DATALOADER.COPY_PASTE_MAX_RATIO = 1.0
20 |     cfg.DATALOADER.COPY_PASTE_RANDOM_NUM = True # random select number of instances
21 |     cfg.DATALOADER.VISUALIZE_COPY_PASTE = False
22 | 
23 |     cfg.SOLVER.BASE_LR_MULTIPLIER = 1
24 |     cfg.SOLVER.BASE_LR_MULTIPLIER_NAMES = []
25 | 


--------------------------------------------------------------------------------
/videocutler/mask2former_video/data_video/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # Modified by Bowen Cheng from https://github.com/sukjunhwang/IFC
3 | 
4 | from .dataset_mapper import YTVISDatasetMapper, CocoClipDatasetMapper
5 | from .build import *
6 | 
7 | from .datasets import *
8 | from .ytvis_eval import YTVISEvaluator
9 | 


--------------------------------------------------------------------------------
/videocutler/mask2former_video/data_video/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # Modified by XuDong Wang from https://github.com/facebookresearch/Mask2Former/tree/main/mask2former_video
3 | 
4 | from . import builtin  # ensure the builtin datasets are registered
5 | 
6 | __all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]
7 | 


--------------------------------------------------------------------------------
/videocutler/mask2former_video/data_video/datasets/builtin.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # Modified by XuDong Wang from https://github.com/facebookresearch/Mask2Former/tree/main/mask2former_video
 3 | 
 4 | import os
 5 | 
 6 | from .ytvis import (
 7 |     register_ytvis_instances,
 8 |     _get_ytvis_2019_instances_meta,
 9 |     _get_ytvis_2021_instances_meta,
10 |     _get_imagenet_cls_agnostic_instances_meta,
11 | )
12 | 
13 | # ==== Predefined splits for YTVIS 2019 ===========
14 | _PREDEFINED_SPLITS_YTVIS_2019 = {
15 |     "ytvis_2019_train": ("ytvis_2019/train/JPEGImages",
16 |                          "ytvis_2019/train.json"),
17 |     "ytvis_2019_val": ("ytvis_2019/valid/JPEGImages",
18 |                        "ytvis_2019/valid.json"),
19 |     "ytvis_2019_test": ("ytvis_2019/test/JPEGImages",
20 |                         "ytvis_2019/test.json"),
21 |     "ytvis_2019_train_5perc": ("ytvis_2019/train/JPEGImages",
22 |                         "ytvis_2019/train_5percent.json"),
23 |     "ytvis_2019_train_10perc": ("ytvis_2019/train/JPEGImages",
24 |                         "ytvis_2019/train_10percent.json"),
25 |     "ytvis_2019_train_20perc": ("ytvis_2019/train/JPEGImages",
26 |                         "ytvis_2019/train_20percent.json"),
27 |     "ytvis_2019_train_30perc": ("ytvis_2019/train/JPEGImages",
28 |                         "ytvis_2019/train_30percent.json"),
29 |     "ytvis_2019_train_40perc": ("ytvis_2019/train/JPEGImages",
30 |                         "ytvis_2019/train_40percent.json"),
31 |     "ytvis_2019_train_50perc": ("ytvis_2019/train/JPEGImages",
32 |                         "ytvis_2019/train_50percent.json"),
33 | }
34 | 
35 | # ==== Predefined splits for YTVIS 2021 ===========
36 | _PREDEFINED_SPLITS_YTVIS_2021 = {
37 |     "ytvis_2021_train": ("ytvis_2021/train/JPEGImages",
38 |                          "ytvis_2021/train.json"),
39 |     "ytvis_2021_val": ("ytvis_2021/valid/JPEGImages",
40 |                        "ytvis_2021/valid.json"),
41 |     "ytvis_2021_test": ("ytvis_2021/test/JPEGImages",
42 |                         "ytvis_2021/test.json"),
43 |     "ytvis_2021_minus_2019_train": ("ytvis_2021/train/JPEGImages",
44 |                        "ytvis_2021/instances_val_sub.json"),
45 | }
46 | 
47 | _PREDEFINED_SPLITS_ImageNet_CLS_AGNOSTIC = {
48 |     "imagenet_video_train_cls_agnostic": ("imagenet/train",
49 |                         "imagenet/annotations/video_imagenet_train_fixsize480_tau0.15_N3.json"),
50 | }
51 | 
52 | 
53 | def register_all_ytvis_2019(root):
54 |     for key, (image_root, json_file) in _PREDEFINED_SPLITS_YTVIS_2019.items():
55 |         # Assume pre-defined datasets live in `./datasets`.
56 |         register_ytvis_instances(
57 |             key,
58 |             _get_ytvis_2019_instances_meta(),
59 |             os.path.join(root, json_file) if "://" not in json_file else json_file,
60 |             os.path.join(root, image_root),
61 |         )
62 | 
63 | 
64 | def register_all_ytvis_2021(root):
65 |     for key, (image_root, json_file) in _PREDEFINED_SPLITS_YTVIS_2021.items():
66 |         # Assume pre-defined datasets live in `./datasets`.
67 |         register_ytvis_instances(
68 |             key,
69 |             _get_ytvis_2021_instances_meta(),
70 |             os.path.join(root, json_file) if "://" not in json_file else json_file,
71 |             os.path.join(root, image_root),
72 |         )
73 | 
74 | def register_all_imagenet_cls_agnostic(root):
75 |     for key, (image_root, json_file) in _PREDEFINED_SPLITS_ImageNet_CLS_AGNOSTIC.items():
76 |         # Assume pre-defined datasets live in `./datasets`.
77 |         register_ytvis_instances(
78 |             key,
79 |             _get_imagenet_cls_agnostic_instances_meta(),
80 |             os.path.join(root, json_file) if "://" not in json_file else json_file,
81 |             os.path.join(root, image_root),
82 |         )
83 | 
84 | if __name__.endswith(".builtin"):
85 |     # Assume pre-defined datasets live in `./datasets`.
86 |     _root = os.getenv("DETECTRON2_DATASETS", "datasets")
87 |     register_all_ytvis_2019(_root)
88 |     register_all_ytvis_2021(_root)
89 |     register_all_imagenet_cls_agnostic(_root)


--------------------------------------------------------------------------------
/videocutler/mask2former_video/data_video/datasets/ytvis_api/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # Modified by Bowen Cheng from https://github.com/youtubevos/cocoapi
3 | 


--------------------------------------------------------------------------------
/videocutler/mask2former_video/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | from .train_loop import *
4 | 
5 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
6 | 
7 | from .defaults import *


--------------------------------------------------------------------------------
/videocutler/mask2former_video/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .transformer_decoder.video_mask2former_transformer_decoder import VideoMultiScaleMaskedTransformerDecoder
3 | 


--------------------------------------------------------------------------------
/videocutler/mask2former_video/modeling/transformer_decoder/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | from .video_mask2former_transformer_decoder import VideoMultiScaleMaskedTransformerDecoder
3 | 


--------------------------------------------------------------------------------
/videocutler/mask2former_video/modeling/transformer_decoder/position_encoding.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # # Modified by Bowen Cheng from: https://github.com/facebookresearch/detr/blob/master/models/position_encoding.py
 3 | """
 4 | Various positional encodings for the transformer.
 5 | """
 6 | import math
 7 | 
 8 | import torch
 9 | from torch import nn
10 | 
11 | 
12 | class PositionEmbeddingSine3D(nn.Module):
13 |     """
14 |     This is a more standard version of the position embedding, very similar to the one
15 |     used by the Attention is all you need paper, generalized to work on images.
16 |     """
17 | 
18 |     def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
19 |         super().__init__()
20 |         self.num_pos_feats = num_pos_feats
21 |         self.temperature = temperature
22 |         self.normalize = normalize
23 |         if scale is not None and normalize is False:
24 |             raise ValueError("normalize should be True if scale is passed")
25 |         if scale is None:
26 |             scale = 2 * math.pi
27 |         self.scale = scale
28 | 
29 |     def forward(self, x, mask=None):
30 |         # b, t, c, h, w
31 |         assert x.dim() == 5, f"{x.shape} should be a 5-dimensional Tensor, got {x.dim()}-dimensional Tensor instead"
32 |         if mask is None:
33 |             mask = torch.zeros((x.size(0), x.size(1), x.size(3), x.size(4)), device=x.device, dtype=torch.bool)
34 |         not_mask = ~mask
35 |         z_embed = not_mask.cumsum(1, dtype=torch.float32)
36 |         y_embed = not_mask.cumsum(2, dtype=torch.float32)
37 |         x_embed = not_mask.cumsum(3, dtype=torch.float32)
38 |         if self.normalize:
39 |             eps = 1e-6
40 |             z_embed = z_embed / (z_embed[:, -1:, :, :] + eps) * self.scale
41 |             y_embed = y_embed / (y_embed[:, :, -1:, :] + eps) * self.scale
42 |             x_embed = x_embed / (x_embed[:, :, :, -1:] + eps) * self.scale
43 | 
44 |         dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
45 |         dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
46 | 
47 |         dim_t_z = torch.arange((self.num_pos_feats * 2), dtype=torch.float32, device=x.device)
48 |         dim_t_z = self.temperature ** (2 * (dim_t_z // 2) / (self.num_pos_feats * 2))
49 | 
50 |         pos_x = x_embed[:, :, :, :, None] / dim_t
51 |         pos_y = y_embed[:, :, :, :, None] / dim_t
52 |         pos_z = z_embed[:, :, :, :, None] / dim_t_z
53 |         pos_x = torch.stack((pos_x[:, :, :, :, 0::2].sin(), pos_x[:, :, :, :, 1::2].cos()), dim=5).flatten(4)
54 |         pos_y = torch.stack((pos_y[:, :, :, :, 0::2].sin(), pos_y[:, :, :, :, 1::2].cos()), dim=5).flatten(4)
55 |         pos_z = torch.stack((pos_z[:, :, :, :, 0::2].sin(), pos_z[:, :, :, :, 1::2].cos()), dim=5).flatten(4)
56 |         pos = (torch.cat((pos_y, pos_x), dim=4) + pos_z).permute(0, 1, 4, 2, 3)  # b, t, c, h, w
57 |         return pos
58 | 


--------------------------------------------------------------------------------
/videocutler/mask2former_video/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | 


--------------------------------------------------------------------------------
/videocutler/mask2former_video/utils/memory.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import logging
 4 | from contextlib import contextmanager
 5 | from functools import wraps
 6 | import torch
 7 | from torch.cuda.amp import autocast
 8 | 
 9 | __all__ = ["retry_if_cuda_oom"]
10 | 
11 | 
12 | @contextmanager
13 | def _ignore_torch_cuda_oom():
14 |     """
15 |     A context which ignores CUDA OOM exception from pytorch.
16 |     """
17 |     try:
18 |         yield
19 |     except RuntimeError as e:
20 |         # NOTE: the string may change?
21 |         if "CUDA out of memory. " in str(e):
22 |             pass
23 |         else:
24 |             raise
25 | 
26 | 
27 | def retry_if_cuda_oom(func):
28 |     """
29 |     Makes a function retry itself after encountering
30 |     pytorch's CUDA OOM error.
31 |     It will first retry after calling `torch.cuda.empty_cache()`.
32 |     If that still fails, it will then retry by trying to convert inputs to CPUs.
33 |     In this case, it expects the function to dispatch to CPU implementation.
34 |     The return values may become CPU tensors as well and it's user's
35 |     responsibility to convert it back to CUDA tensor if needed.
36 |     Args:
37 |         func: a stateless callable that takes tensor-like objects as arguments
38 |     Returns:
39 |         a callable which retries `func` if OOM is encountered.
40 |     Examples:
41 |     ::
42 |         output = retry_if_cuda_oom(some_torch_function)(input1, input2)
43 |         # output may be on CPU even if inputs are on GPU
44 |     Note:
45 |         1. When converting inputs to CPU, it will only look at each argument and check
46 |            if it has `.device` and `.to` for conversion. Nested structures of tensors
47 |            are not supported.
48 |         2. Since the function might be called more than once, it has to be
49 |            stateless.
50 |     """
51 | 
52 |     def maybe_to_cpu(x):
53 |         try:
54 |             like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to")
55 |         except AttributeError:
56 |             like_gpu_tensor = False
57 |         if like_gpu_tensor:
58 |             return x.to(device="cpu").to(torch.float32)
59 |         else:
60 |             return x
61 | 
62 |     @wraps(func)
63 |     def wrapped(*args, **kwargs):
64 |         with _ignore_torch_cuda_oom():
65 |             return func(*args, **kwargs)
66 | 
67 |         # Clear cache and retry
68 |         torch.cuda.empty_cache()
69 |         with _ignore_torch_cuda_oom():
70 |             return func(*args, **kwargs)
71 | 
72 |         # Try on CPU. This slows down the code significantly, therefore print a notice.
73 |         logger = logging.getLogger(__name__)
74 |         logger.info("Attempting to copy inputs to CPU due to CUDA OOM")
75 |         new_args = (maybe_to_cpu(x) for x in args)
76 |         new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()}
77 |         with autocast(enabled=False):
78 |             return func(*new_args, **new_kwargs)
79 | 
80 |     return wrapped
81 | 


--------------------------------------------------------------------------------
/videocutler/predict.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.insert(0, "Mask2Former")
 3 | import tempfile
 4 | from pathlib import Path
 5 | import numpy as np
 6 | import cv2
 7 | import cog
 8 | 
 9 | # import some common detectron2 utilities
10 | from detectron2.config import CfgNode as CN
11 | from detectron2.engine import DefaultPredictor
12 | from detectron2.config import get_cfg
13 | from detectron2.utils.visualizer import Visualizer, ColorMode
14 | from detectron2.data import MetadataCatalog
15 | from detectron2.projects.deeplab import add_deeplab_config
16 | 
17 | # import Mask2Former project
18 | from mask2former import add_maskformer2_config
19 | 
20 | 
21 | class Predictor(cog.Predictor):
22 |     def setup(self):
23 |         cfg = get_cfg()
24 |         add_deeplab_config(cfg)
25 |         add_maskformer2_config(cfg)
26 |         cfg.merge_from_file("Mask2Former/configs/coco/panoptic-segmentation/swin/maskformer2_swin_large_IN21k_384_bs16_100ep.yaml")
27 |         cfg.MODEL.WEIGHTS = 'model_final_f07440.pkl'
28 |         cfg.MODEL.MASK_FORMER.TEST.SEMANTIC_ON = True
29 |         cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON = True
30 |         cfg.MODEL.MASK_FORMER.TEST.PANOPTIC_ON = True
31 |         self.predictor = DefaultPredictor(cfg)
32 |         self.coco_metadata = MetadataCatalog.get("coco_2017_val_panoptic")
33 | 
34 | 
35 |     @cog.input(
36 |         "image",
37 |         type=Path,
38 |         help="Input image for segmentation. Output will be the concatenation of Panoptic segmentation (top), "
39 |              "instance segmentation (middle), and semantic segmentation (bottom).",
40 |     )
41 |     def predict(self, image):
42 |         im = cv2.imread(str(image))
43 |         outputs = self.predictor(im)
44 |         v = Visualizer(im[:, :, ::-1], self.coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)
45 |         panoptic_result = v.draw_panoptic_seg(outputs["panoptic_seg"][0].to("cpu"),
46 |                                               outputs["panoptic_seg"][1]).get_image()
47 |         v = Visualizer(im[:, :, ::-1], self.coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)
48 |         instance_result = v.draw_instance_predictions(outputs["instances"].to("cpu")).get_image()
49 |         v = Visualizer(im[:, :, ::-1], self.coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)
50 |         semantic_result = v.draw_sem_seg(outputs["sem_seg"].argmax(0).to("cpu")).get_image()
51 |         result = np.concatenate((panoptic_result, instance_result, semantic_result), axis=0)[:, :, ::-1]
52 |         out_path = Path(tempfile.mkdtemp()) / "out.png"
53 |         cv2.imwrite(str(out_path), result)
54 |         return out_path
55 | 


--------------------------------------------------------------------------------
/videocutler/requirements.txt:
--------------------------------------------------------------------------------
1 | cython
2 | scipy
3 | shapely
4 | timm
5 | h5py
6 | submitit
7 | scikit-image
8 | 


--------------------------------------------------------------------------------
/videocutler/single-node-video_run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | MASTER_NODE=$(scontrol show hostname "$SLURM_NODELIST" | head -n1)
3 | DIST_URL="tcp://$MASTER_NODE:12392"
4 | SOCKET_NAME=$(ip r | grep default | awk '{print $5}')
5 | export GLOO_SOCKET_IFNAME=ens32
6 | 
7 | python -u train_net_video.py --num-gpus 8 --num-machines 1 --machine-rank "$SLURM_NODEID" --dist-url "$DIST_URL" "$@"
8 | 


--------------------------------------------------------------------------------
/videocutler/tools/README.md:
--------------------------------------------------------------------------------
 1 | This directory contains few tools for MaskFormer.
 2 | 
 3 | * `convert-torchvision-to-d2.py`
 4 | 
 5 | Tool to convert torchvision pre-trained weights for D2.
 6 | 
 7 | ```
 8 | wget https://download.pytorch.org/models/resnet101-63fe2227.pth
 9 | python tools/convert-torchvision-to-d2.py resnet101-63fe2227.pth R-101.pkl
10 | ```
11 | 
12 | * `convert-pretrained-swin-model-to-d2.py`
13 | 
14 | Tool to convert Swin Transformer pre-trained weights for D2.
15 | 
16 | ```
17 | pip install timm
18 | 
19 | wget https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth
20 | python tools/convert-pretrained-swin-model-to-d2.py swin_tiny_patch4_window7_224.pth swin_tiny_patch4_window7_224.pkl
21 | 
22 | wget https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth
23 | python tools/convert-pretrained-swin-model-to-d2.py swin_small_patch4_window7_224.pth swin_small_patch4_window7_224.pkl
24 | 
25 | wget https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth
26 | python tools/convert-pretrained-swin-model-to-d2.py swin_base_patch4_window12_384_22k.pth swin_base_patch4_window12_384_22k.pkl
27 | 
28 | wget https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth
29 | python tools/convert-pretrained-swin-model-to-d2.py swin_large_patch4_window12_384_22k.pth swin_large_patch4_window12_384_22k.pkl
30 | ```
31 | 
32 | * `evaluate_pq_for_semantic_segmentation.py`
33 | 
34 | Tool to evaluate PQ (PQ-stuff) for semantic segmentation predictions.
35 | 
36 | Usage:
37 | 
38 | ```
39 | python tools/evaluate_pq_for_semantic_segmentation.py --dataset-name ade20k_sem_seg_val --json-file OUTPUT_DIR/inference/sem_seg_predictions.json
40 | ```
41 | 
42 | where `OUTPUT_DIR` is set in the config file.
43 | 
44 | * `evaluate_coco_boundary_ap.py`
45 | 
46 | Tool to evaluate Boundary AP for instance segmentation predictions.
47 | 
48 | Usage:
49 | 
50 | ```
51 | python tools/coco_instance_evaluation.py --gt-json-file COCO_GT_JSON --dt-json-file COCO_DT_JSON
52 | ```
53 | 
54 | To install Boundary IoU API, run:
55 | 
56 | ```
57 | pip install git+https://github.com/bowenc0221/boundary-iou-api.git
58 | ```
59 | 
60 | * `analyze_model.py`
61 | 
62 | Tool to analyze model parameters and flops.
63 | 
64 | Usage for semantic segmentation (ADE20K only, use with caution!):
65 | 
66 | ```
67 | python tools/analyze_model.py --num-inputs 1 --tasks flop --use-fixed-input-size --config-file CONFIG_FILE
68 | ```
69 | 
70 | Note that, for semantic segmentation (ADE20K only), we use a dummy image with fixed size that equals to `cfg.INPUT.CROP.SIZE[0] x cfg.INPUT.CROP.SIZE[0]`.
71 | Please do not use `--use-fixed-input-size` for calculating FLOPs on other datasets like Cityscapes!
72 | 
73 | Usage for panoptic and instance segmentation:
74 | 
75 | ```
76 | python tools/analyze_model.py --num-inputs 100 --tasks flop --config-file CONFIG_FILE
77 | ```
78 | 
79 | Note that, for panoptic and instance segmentation, we compute the average flops over 100 real validation images.
80 | 


--------------------------------------------------------------------------------
/videocutler/tools/convert-pretrained-swin-model-to-d2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | import pickle as pkl
 5 | import sys
 6 | 
 7 | import torch
 8 | 
 9 | """
10 | Usage:
11 |   # download pretrained swin model:
12 |   wget https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth
13 |   # run the conversion
14 |   ./convert-pretrained-model-to-d2.py swin_tiny_patch4_window7_224.pth swin_tiny_patch4_window7_224.pkl
15 |   # Then, use swin_tiny_patch4_window7_224.pkl with the following changes in config:
16 | MODEL:
17 |   WEIGHTS: "/path/to/swin_tiny_patch4_window7_224.pkl"
18 | INPUT:
19 |   FORMAT: "RGB"
20 | """
21 | 
22 | if __name__ == "__main__":
23 |     input = sys.argv[1]
24 | 
25 |     obj = torch.load(input, map_location="cpu")["model"]
26 | 
27 |     res = {"model": obj, "__author__": "third_party", "matching_heuristics": True}
28 | 
29 |     with open(sys.argv[2], "wb") as f:
30 |         pkl.dump(res, f)
31 | 


--------------------------------------------------------------------------------
/videocutler/tools/convert-torchvision-to-d2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | import pickle as pkl
 5 | import sys
 6 | 
 7 | import torch
 8 | 
 9 | """
10 | Usage:
11 |   # download one of the ResNet{18,34,50,101,152} models from torchvision:
12 |   wget https://download.pytorch.org/models/resnet50-19c8e357.pth -O r50.pth
13 |   # run the conversion
14 |   ./convert-torchvision-to-d2.py r50.pth r50.pkl
15 |   # Then, use r50.pkl with the following changes in config:
16 | MODEL:
17 |   WEIGHTS: "/path/to/r50.pkl"
18 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
19 |   PIXEL_STD: [58.395, 57.120, 57.375]
20 |   RESNETS:
21 |     DEPTH: 50
22 |     STRIDE_IN_1X1: False
23 | INPUT:
24 |   FORMAT: "RGB"
25 | """
26 | 
27 | if __name__ == "__main__":
28 |     input = sys.argv[1]
29 | 
30 |     obj = torch.load(input, map_location="cpu")
31 | 
32 |     newmodel = {}
33 |     for k in list(obj.keys()):
34 |         old_k = k
35 |         if "layer" not in k:
36 |             k = "stem." + k
37 |         for t in [1, 2, 3, 4]:
38 |             k = k.replace("layer{}".format(t), "res{}".format(t + 1))
39 |         for t in [1, 2, 3]:
40 |             k = k.replace("bn{}".format(t), "conv{}.norm".format(t))
41 |         k = k.replace("downsample.0", "shortcut")
42 |         k = k.replace("downsample.1", "shortcut.norm")
43 |         print(old_k, "->", k)
44 |         newmodel[k] = obj.pop(old_k).detach().numpy()
45 | 
46 |     res = {"model": newmodel, "__author__": "torchvision", "matching_heuristics": True}
47 | 
48 |     with open(sys.argv[2], "wb") as f:
49 |         pkl.dump(res, f)
50 |     if obj:
51 |         print("Unconverted keys:", obj.keys())
52 | 


--------------------------------------------------------------------------------
/videocutler/tools/evaluate_coco_boundary_ap.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | # Modified by Bowen Cheng from: https://github.com/bowenc0221/boundary-iou-api/blob/master/tools/coco_instance_evaluation.py
 4 | 
 5 | """
 6 | Evaluation for COCO val2017:
 7 | python ./tools/coco_instance_evaluation.py \
 8 |     --gt-json-file COCO_GT_JSON \
 9 |     --dt-json-file COCO_DT_JSON
10 | """
11 | import argparse
12 | import json
13 | 
14 | from boundary_iou.coco_instance_api.coco import COCO
15 | from boundary_iou.coco_instance_api.cocoeval import COCOeval
16 | 
17 | 
18 | def main():
19 |     parser = argparse.ArgumentParser()
20 |     parser.add_argument("--gt-json-file", default="")
21 |     parser.add_argument("--dt-json-file", default="")
22 |     parser.add_argument("--iou-type", default="boundary")
23 |     parser.add_argument("--dilation-ratio", default="0.020", type=float)
24 |     args = parser.parse_args()
25 |     print(args)
26 | 
27 |     annFile = args.gt_json_file
28 |     resFile = args.dt_json_file
29 |     dilation_ratio = args.dilation_ratio
30 |     if args.iou_type == "boundary":
31 |         get_boundary = True
32 |     else:
33 |         get_boundary = False
34 |     cocoGt = COCO(annFile, get_boundary=get_boundary, dilation_ratio=dilation_ratio)
35 |     
36 |     # remove box predictions
37 |     resFile = json.load(open(resFile))
38 |     for c in resFile:
39 |         c.pop("bbox", None)
40 | 
41 |     cocoDt = cocoGt.loadRes(resFile)
42 |     cocoEval = COCOeval(cocoGt, cocoDt, iouType=args.iou_type, dilation_ratio=dilation_ratio)
43 |     cocoEval.evaluate()
44 |     cocoEval.accumulate()
45 |     cocoEval.summarize()
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     main()
50 | 


--------------------------------------------------------------------------------
/videocutler/train-1node.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -p learnfair
 3 | #SBATCH --nodes=1
 4 | #SBATCH --ntasks=1
 5 | #SBATCH --gres=gpu:8
 6 | #SBATCH --gpus-per-node=8
 7 | #SBATCH --cpus-per-task=48
 8 | #SBATCH --time 10000
 9 | #SBATCH -o "submitit/videocutler/slurm-%j.out"
10 | 
11 | srun single-node-video_run.sh $@


--------------------------------------------------------------------------------