├── .env ├── .gitignore ├── Docker ├── Dockerfile ├── install_CoAE_code.sh ├── install_cnnimageretrieval-pytorch.sh ├── install_maskrcnn_benchmark.sh ├── requirements.txt └── wget_gdrive.sh ├── FASTAPI.md ├── INSTALL.md ├── LICENSE ├── README.md ├── app.py ├── baselines ├── CoAE │ ├── INSTALL.md │ ├── README.md │ ├── _init_paths.py │ ├── cfgs │ │ ├── res101.yml │ │ └── res50.yml │ ├── experiments │ │ ├── launcher_coae_grozi_eval.py │ │ ├── launcher_coae_grozi_eval_collect.py │ │ ├── launcher_coae_grozi_train.py │ │ ├── launcher_coae_instre_eval.py │ │ ├── launcher_coae_instre_eval_collect.py │ │ ├── launcher_coae_instre_train.py │ │ └── parse_logs_to_pkl.py │ ├── lib │ │ ├── __init__.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ ├── factory.py │ │ │ └── os2d.py │ │ └── roi_data_layer │ │ │ ├── __init__.py │ │ │ ├── minibatch.py │ │ │ ├── roibatchLoader.py │ │ │ └── roidb.py │ ├── test_net.py │ └── trainval_net.py ├── __init__.py └── detector_retrieval │ ├── INSTALL.md │ ├── README.md │ ├── __init__.py │ ├── detector │ ├── __init__.py │ ├── config │ │ ├── e2e_faster_rcnn_R_101_FPN_1x_multiscale.yaml │ │ ├── e2e_faster_rcnn_R_101_FPN_1x_multiscale_noClasses.yaml │ │ ├── e2e_faster_rcnn_R_101_FPN_1x_multiscale_noClasses_fromPytorch.yaml │ │ ├── e2e_faster_rcnn_R_50_FPN_1x_multiscale.yaml │ │ ├── e2e_faster_rcnn_R_50_FPN_1x_multiscale_noClasses.yaml │ │ └── e2e_faster_rcnn_R_50_FPN_1x_multiscale_noClasses_fromPytorch.yaml │ ├── detector_data.py │ ├── engine_inference.py │ ├── engine_trainer.py │ ├── experiments │ │ ├── launcher_train_detector.py │ │ └── launcher_train_detector_imagenet.py │ └── train_detector.py │ ├── evaluate_detector_retrieval.py │ ├── experiments │ ├── launcher_grozi_eval.py │ ├── launcher_grozi_eval_collect.py │ ├── launcher_imagenet_eval.py │ ├── launcher_imagenet_eval_collect.py │ ├── launcher_instre_eval.py │ └── launcher_instre_eval_collect.py │ ├── main_detector_retrieval.py │ ├── retrieval │ ├── __init__.py │ ├── experiments │ │ ├── launcher_grozi.py │ │ ├── launcher_imagenet.py │ │ └── launcher_instre.py │ ├── prepare_all_datasets.sh │ ├── prepare_dataset_retrieval.py │ ├── prepare_datasets_imagenet.sh │ ├── retrieval_data.py │ ├── test_and_learn_whitening.py │ └── train.py │ └── utils_maskrcnn.py ├── data ├── ImageNet-RepMet │ ├── pretrain │ │ ├── convert_resnet_pytorch_to_cirtorch.py │ │ ├── convert_resnet_pytorch_to_maskrcnnbenchmark.py │ │ ├── imagenet-repmet │ │ │ └── val_classes.txt │ │ ├── prepare_data_exclude_test_classes.py │ │ └── train_imagenet.py │ └── repmet_test_classes.txt ├── dataset_scales.txt ├── dataset_scales_imagenet.txt ├── demo │ ├── class_image_0.jpg │ ├── class_image_1.jpg │ └── input_image.jpg ├── get_dataset_scales.py └── get_dataset_scales_imagenet.py ├── demo-api.ipynb ├── demo.ipynb ├── docker-compose.yml ├── experiments ├── README.md ├── README_ImageNet.md ├── config_training.yml ├── launcher_exp1.py ├── launcher_exp1_collect.py ├── launcher_exp2.py ├── launcher_exp2_collect.py ├── launcher_exp3_instre.py ├── launcher_grozi_eval.py ├── launcher_grozi_eval_collect.py ├── launcher_imagenet_eval.py ├── launcher_imagenet_eval_collect.py ├── launcher_instre_eval.py └── launcher_instre_eval_collect.py ├── main.py ├── models ├── README.md ├── convert_resnet_caffe2_cirtorch_to_pytorch.py ├── convert_resnet_caffe2_groupnorm_to_pytorch.py ├── convert_resnet_cirtorch_to_pytorch.py └── convert_resnet_maskrcnnbenchmark_to_pytorch.py └── os2d ├── __init__.py ├── config.py ├── data ├── __init__.py ├── dataloader.py ├── dataset.py └── voc_eval.py ├── engine ├── __init__.py ├── augmentation.py ├── evaluate.py ├── objective.py ├── optimization.py └── train.py ├── modeling ├── __init__.py ├── box_coder.py ├── feature_extractor.py ├── head.py └── model.py ├── structures ├── __init__.py ├── bounding_box.py ├── feature_map.py └── transforms.py └── utils ├── __init__.py ├── launcher.py ├── logger.py ├── plot_visdom.py ├── utils.py ├── visualization.py └── wget_gdrive.sh /.env: -------------------------------------------------------------------------------- 1 | USER_UID=1001 2 | USER_GID=${USER_UID} 3 | IMAGE_NAME=py-docker 4 | WORKDIR=/workspace 5 | CUSTOM_PORT=80 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.pyc 3 | *.pth 4 | *.log 5 | output/ 6 | data/grozi 7 | data/dairy 8 | data/paste 9 | data/instre 10 | baselines/CoAE/data 11 | -------------------------------------------------------------------------------- /Docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM pytorch/pytorch:1.13.0-cuda11.6-cudnn8-runtime 2 | 3 | ARG USERNAME=torch-docker 4 | ARG USER_UID=1000 5 | ARG USER_GID=$USER_UID 6 | ARG OS2D_ROOT=/workspace 7 | 8 | RUN apt-get update && apt-get upgrade -y 9 | 10 | RUN groupadd --gid $USER_GID $USERNAME \ 11 | && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \ 12 | && mkdir /etc/sudoers.d \ 13 | && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ 14 | && chmod 0440 /etc/sudoers.d/$USERNAME 15 | 16 | RUN apt-get install -y \ 17 | sudo \ 18 | build-essential \ 19 | cmake \ 20 | git \ 21 | wget \ 22 | unzip \ 23 | yasm \ 24 | pkg-config \ 25 | libswscale-dev \ 26 | libtbb2 \ 27 | libtbb-dev \ 28 | libjpeg-dev \ 29 | libpng-dev \ 30 | libtiff-dev \ 31 | libavformat-dev \ 32 | libhdf5-dev \ 33 | libpq-dev 34 | 35 | RUN mkdir -p ${OS2D_ROOT} && chmod -R a+rwx ${OS2D_ROOT} 36 | WORKDIR ${OS2D_ROOT} 37 | 38 | RUN git clone https://github.com/aosokin/os2d.git ${OS2D_ROOT} 39 | 40 | COPY requirements.txt /tmp/requirements.txt 41 | RUN pip install -r /tmp/requirements.txt 42 | 43 | # COPY *.sh /tmp/ 44 | # RUN /tmp/install_maskrcnn_benchmark.sh 45 | # RUN /tmp/install_cnnimageretrieval-pytorch.sh 46 | # RUN /tmp/install_CoAE_code.sh 47 | RUN "${OS2D_ROOT}/os2d/utils/wget_gdrive.sh" "${OS2D_ROOT}/models/os2d_v2-train.pth" "1l_aanrxHj14d_QkCpein8wFmainNAzo8" 48 | 49 | USER ${USERNAME} 50 | 51 | ENV SHELL=/bin/bash -------------------------------------------------------------------------------- /Docker/install_CoAE_code.sh: -------------------------------------------------------------------------------- 1 | # !/bin/bash 2 | 3 | cd ${OS2D_ROOT}/baselines/CoAE 4 | 5 | # extra dependencies (from https://github.com/timy90022/One-Shot-Object-Detection/blob/master/requirements.txt) 6 | pip install easydict==1.9 7 | 8 | # clone the repo 9 | git clone https://github.com/timy90022/One-Shot-Object-Detection.git coae 10 | cd coae 11 | # I was on this commit: 12 | git checkout 2098ad3e90cb4aa9f1dd188a40efa29927ac3ab1 13 | 14 | # build binaries (see https://github.com/timy90022/One-Shot-Object-Detection/blob/master/README.md) 15 | cd ${OS2D_ROOT}/baselines/CoAE/coae/lib 16 | python setup.py build develop -------------------------------------------------------------------------------- /Docker/install_cnnimageretrieval-pytorch.sh: -------------------------------------------------------------------------------- 1 | # !/bin/bash 2 | 3 | cd $OS2D_ROOT/baselines/detector_retrieval/retrieval 4 | 5 | git clone https://github.com/filipradenovic/cnnimageretrieval-pytorch.git cnnimageretrieval-pytorch 6 | cd cnnimageretrieval-pytorch 7 | git checkout v1.1 8 | cd .. -------------------------------------------------------------------------------- /Docker/install_maskrcnn_benchmark.sh: -------------------------------------------------------------------------------- 1 | # !/bin/bash 2 | 3 | # dependencies 4 | pip install ninja>=1.9.0 cython>=0.29.15 5 | pip install opencv-python>=4.2.0.32 6 | 7 | # the rest will be compiling from sources - set the path for that 8 | INSTALL_DIR=$HOME/local/software/pytorch/os2d 9 | mkdir -p $INSTALL_DIR 10 | 11 | # install pycocotools 12 | cd $INSTALL_DIR 13 | git clone https://github.com/cocodataset/cocoapi.git 14 | cd cocoapi/PythonAPI 15 | python setup.py build_ext install 16 | 17 | # install apex 18 | cd $INSTALL_DIR 19 | git clone https://github.com/NVIDIA/apex.git 20 | cd apex 21 | python setup.py install --cuda_ext --cpp_ext 22 | 23 | # install PyTorch Detection 24 | cd $INSTALL_DIR 25 | git clone https://github.com/facebookresearch/maskrcnn-benchmark.git 26 | cd maskrcnn-benchmark 27 | 28 | # difference from the standard instruction: get v0.1 29 | git checkout v0.1 30 | 31 | # the following will install the lib with 32 | # symbolic links, so that you can modify 33 | # the files if you want and won't need to 34 | # re-build it 35 | python setup.py build develop 36 | 37 | unset INSTALL_DIR -------------------------------------------------------------------------------- /Docker/wget_gdrive.sh: -------------------------------------------------------------------------------- 1 | # Command from here: https://medium.com/@acpanjan/download-google-drive-files-using-wget-3c2c025a8b99 2 | 3 | TARGET_PATH=$1 4 | FILEID=$2 5 | 6 | wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id='${FILEID} -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=${FILEID}" -O ${TARGET_PATH} && rm -rf /tmp/cookies.txt 7 | -------------------------------------------------------------------------------- /FASTAPI.md: -------------------------------------------------------------------------------- 1 | ## Running OS2D API Service 2 | 3 | ### Docker Compose 4 | ```bash 5 | # Please, check all environment variables at `.env` file 6 | 7 | # Build Docker image from Docker Compose 8 | docker-compose build 9 | 10 | # Run container 11 | docker-compose up -d 12 | ``` 13 | 14 | ### Docker 15 | ```bash 16 | # Build Docker image 17 | ## Must replace all variables by their values 18 | ### See `.env` file 19 | docker build -t os2d:latest \ 20 | --build-arg USER_UID=${USER_UID} \ 21 | --build-arg USER_GID=${USER_GID} \ 22 | --build-arg OS2D_ROOT=${WORKDIR} \ 23 | ./Docker/ 24 | 25 | # Run container 26 | docker run -d \ 27 | --name os2d \ 28 | -p 80:80 \ 29 | os2d:latest \ 30 | uvicorn app:app --port 80 --host 0.0.0.0 31 | ``` -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation of OS2D 2 | ```bash 3 | # create a separate conda env 4 | conda create -n os2d python=3.7 5 | 6 | # activate it 7 | conda activate os2d 8 | 9 | # this installs the right pip and dependencies for the fresh python 10 | conda install ipython pip 11 | 12 | # get pytorch and torchvision 13 | conda install pytorch=1.4 torchvision=0.5 cudatoolkit=10.0 -c pytorch 14 | 15 | # more dependencies 16 | conda install tqdm=4.42.1 pandas=1.0.1 matplotlib=3.1.3 pyyaml=5.3 scipy=1.4.1 17 | conda install -c conda-forge yacs=0.1.6 18 | 19 | # to monitor GPU usage on a cluster 20 | pip install gpustat==0.6.0 21 | 22 | # to view train logs in visdom 23 | pip install visdom==0.1.8.9 24 | ``` 25 | 26 | ## Installation of the baselines 27 | 1. To install the detector-retrieval baselines, see [instructions](baselines/detector_retrieval/INSTALL.md). 28 | 2. To install the CoAE baselines, see [instructions](baselines/CoAE/INSTALL.md). 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Anton Osokin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from fastapi import FastAPI 3 | from fastapi.responses import JSONResponse 4 | from pydantic import BaseModel 5 | 6 | import torch 7 | import torchvision.transforms as transforms 8 | 9 | from os2d.config import cfg 10 | from os2d.structures.bounding_box import filter_bbox, convert_xyxy_bbox_to_relative_coords 11 | from os2d.engine.optimization import create_optimizer 12 | from os2d.structures.feature_map import FeatureMapSize 13 | from os2d.modeling.model import build_os2d_from_config 14 | from os2d.utils import set_random_seed, get_trainable_parameters, mkdir, setup_logger, decode_base64_to_image, get_image_size_after_resize_preserving_aspect_ratio 15 | 16 | class ImageRequest(BaseModel): 17 | content: str 18 | 19 | class QueryImageResquest(BaseModel): 20 | image: ImageRequest 21 | query: List[ImageRequest] 22 | 23 | def preprocess_image(image, transform_image, target_size, cuda=True): 24 | h, w = get_image_size_after_resize_preserving_aspect_ratio(h=image.size[1], 25 | w=image.size[0], 26 | target_size=target_size) 27 | image = image.resize((w, h)) 28 | image = transform_image(image) 29 | if cuda: 30 | image = image.cuda() 31 | return image 32 | 33 | def init_logger(cfg): 34 | output_dir = cfg.output.path 35 | if output_dir: 36 | mkdir(output_dir) 37 | 38 | logger = setup_logger("OS2D", output_dir if cfg.output.save_log_to_file else None) 39 | 40 | model_path = "models/os2d_v2-train.pth" 41 | init_logger(cfg) 42 | 43 | app = FastAPI() 44 | 45 | cfg.visualization.eval.max_detections = 30 46 | cfg.visualization.eval.score_threshold = 0.45 47 | 48 | @app.post('/detect-all-instances') 49 | def query_image(request: QueryImageResquest): 50 | # set this to use faster convolutions 51 | cfg.is_cuda = torch.cuda.is_available() 52 | 53 | if cfg.is_cuda: 54 | assert torch.cuda.is_available(), "Do not have available GPU, but cfg.is_cuda == 1" 55 | torch.backends.cudnn.benchmark = True 56 | 57 | # random seed 58 | set_random_seed(cfg.random_seed, cfg.is_cuda) 59 | 60 | 61 | # Model 62 | cfg.init.model = model_path 63 | net, box_coder, criterion, img_normalization, optimizer_state = build_os2d_from_config(cfg) 64 | 65 | # Optimizer 66 | parameters = get_trainable_parameters(net) 67 | optimizer = create_optimizer(parameters, cfg.train.optim, optimizer_state) 68 | 69 | # load the dataset 70 | input_image = decode_base64_to_image(request.image.content) 71 | query_image = [decode_base64_to_image(image.content) for image in request.query] 72 | class_ids = [0 for _ in range(len(query_image))] 73 | 74 | transform_image = transforms.Compose([ 75 | transforms.ToTensor(), 76 | transforms.Normalize(img_normalization["mean"], img_normalization["std"]) 77 | ]) 78 | input_processed = preprocess_image(input_image, transform_image, 1500, cfg.is_cuda).unsqueeze(0) 79 | input_h, input_w = input_processed.size()[-2:] 80 | 81 | query_processed = [preprocess_image(image, transform_image, cfg.model.class_image_size, cfg.is_cuda) for image in query_image] 82 | 83 | with torch.no_grad(): 84 | loc_prediction_batch, class_prediction_batch, _, fm_size, transform_corners_batch = net(images=input_processed, class_images=query_processed) 85 | 86 | 87 | image_loc_scores_pyramid = [loc_prediction_batch[0]] 88 | image_class_scores_pyramid = [class_prediction_batch[0]] 89 | img_size_pyramid = [FeatureMapSize(img=input_processed)] 90 | transform_corners_pyramid = [transform_corners_batch[0]] 91 | 92 | boxes = box_coder.decode_pyramid(image_loc_scores_pyramid, image_class_scores_pyramid, 93 | img_size_pyramid, class_ids, 94 | nms_iou_threshold=cfg.eval.nms_iou_threshold, 95 | nms_score_threshold=cfg.eval.nms_score_threshold, 96 | transform_corners_pyramid=transform_corners_pyramid) 97 | 98 | # remove some fields to lighten visualization 99 | boxes.remove_field("default_boxes") 100 | 101 | scores, boxes_coords = filter_bbox(boxes, cfg.visualization.eval.score_threshold, cfg.visualization.eval.max_detections) 102 | boxes_coords = [convert_xyxy_bbox_to_relative_coords(box, im_height=input_h, im_width=input_w) for box in boxes_coords.tolist()] 103 | return JSONResponse(content={'scores': scores.tolist(), 'bboxes': boxes_coords}) -------------------------------------------------------------------------------- /baselines/CoAE/INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation of the CoAE code 2 | Needed for the CoAE baseline. These instructions assume OS2D is [installed](../../INSTALL.md), and maskrcnn_benchmark from the detector-retrieval baseline is [installed](../detector_retrieval/INSTALL.md). 3 | The instructions are based on [the CoAE repo](https://github.com/timy90022/One-Shot-Object-Detection). 4 | ```bash 5 | # activate the os2d env 6 | conda activate os2d 7 | cd ${OS2D_ROOT}/baselines/CoAE 8 | 9 | # extra dependencies (from https://github.com/timy90022/One-Shot-Object-Detection/blob/master/requirements.txt) 10 | pip install easydict==1.9 11 | 12 | # clone the repo 13 | git clone https://github.com/timy90022/One-Shot-Object-Detection.git coae 14 | cd coae 15 | # I was on this commit: 16 | git checkout 2098ad3e90cb4aa9f1dd188a40efa29927ac3ab1 17 | 18 | # build binaries (see https://github.com/timy90022/One-Shot-Object-Detection/blob/master/README.md) 19 | cd ${OS2D_ROOT}/baselines/CoAE/coae/lib 20 | python setup.py build develop 21 | ``` 22 | -------------------------------------------------------------------------------- /baselines/CoAE/README.md: -------------------------------------------------------------------------------- 1 | ## Experiments with the CoAE baseline 2 | 3 | ### Installation 4 | See [INSTALL.md](./INSTALL.md) 5 | 6 | ### Setup datasets 7 | ```bash 8 | cd ${OS2D_ROOT}/baselines/CoAE/data 9 | ln -s ${OS2D_ROOT}/data/grozi grozi 10 | ln -s ${OS2D_ROOT}/data/dairy dairy 11 | ln -s ${OS2D_ROOT}/data/paste paste 12 | ln -s ${OS2D_ROOT}/data/instre instre 13 | ``` 14 | 15 | ### Preparations 16 | ```bash 17 | cd ${OS2D_ROOT}/baselines/CoAE 18 | conda activate os2d 19 | export PYTHONPATH=$OS2D_ROOT:$PYTHONPATH 20 | ``` 21 | 22 | ### Train models 23 | ```bash 24 | # training runs 25 | python experiments/launcher_coae_grozi_train.py 26 | python experiments/launcher_coae_instre_train.py 27 | ``` 28 | 29 | ### View training logs 30 | ```bash 31 | # Convert text logs to the OS2D binary format 32 | python experiments/parse_logs_to_pkl.py --log_path output/grozi 33 | python experiments/parse_logs_to_pkl.py --log_path output/instre 34 | 35 | # View in Visdom 36 | python ../../os2d/utils/plot_visdom.py --log_path output/grozi 37 | python ../../os2d/utils/plot_visdom.py --log_path output/instre 38 | ``` 39 | 40 | ### Run evaluation 41 | ```bash 42 | # evaluation of the best models (selection of the best model on the validation set was done manually) 43 | python experiments/launcher_coae_grozi_eval.py 44 | python experiments/launcher_coae_instre_eval.py 45 | ``` 46 | 47 | ### View results 48 | ```bash 49 | # Create tables 50 | # Table 3 51 | python experiments/launcher_coae_grozi_eval_collect.py 52 | # Table 4 53 | python experiments/launcher_coae_instre_eval_collect.py 54 | ``` 55 | -------------------------------------------------------------------------------- /baselines/CoAE/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, 'coae', 'lib') 12 | add_path(lib_path) 13 | 14 | coco_path = osp.join(this_dir, 'coae', 'data', 'coco', 'PythonAPI') 15 | add_path(coco_path) 16 | -------------------------------------------------------------------------------- /baselines/CoAE/cfgs/res101.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res101 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_BATCHSIZE: 256 7 | PROPOSAL_METHOD: gt 8 | BG_THRESH_LO: 0.0 9 | DISPLAY: 20 10 | BATCH_SIZE: 128 11 | WEIGHT_DECAY: 0.0001 12 | DOUBLE_BIAS: False 13 | SNAPSHOT_PREFIX: res50_faster_rcnn 14 | USE_FLIPPED: False 15 | TEST: 16 | HAS_RPN: True 17 | POOLING_SIZE: 7 18 | POOLING_MODE: align 19 | CROP_RESIZE_WITH_MAX_POOL: False 20 | ANCHOR_SCALES: [4,8,16,32] 21 | ANCHOR_RATIOS: [0.5,1,2] -------------------------------------------------------------------------------- /baselines/CoAE/cfgs/res50.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res50 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_BATCHSIZE: 256 7 | PROPOSAL_METHOD: gt 8 | BG_THRESH_LO: 0.0 9 | DISPLAY: 20 10 | BATCH_SIZE: 128 11 | WEIGHT_DECAY: 0.0001 12 | DOUBLE_BIAS: False 13 | SNAPSHOT_PREFIX: res50_faster_rcnn 14 | USE_FLIPPED: False 15 | TEST: 16 | HAS_RPN: True 17 | POOLING_MODE: align 18 | ANCHOR_SCALES: [4,8,16,32] 19 | ANCHOR_RATIOS: [0.5,1,2] -------------------------------------------------------------------------------- /baselines/CoAE/experiments/launcher_coae_grozi_eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | 4 | from os2d.utils import launcher as launcher 5 | 6 | 7 | if __name__ == "__main__": 8 | # load default launcher parameters 9 | parser = launcher.create_args_parser() 10 | args = parser.parse_args() 11 | 12 | script_path = os.path.dirname(os.path.abspath(__file__)) 13 | coae_path = os.path.join(script_path, "..") 14 | os2d_path = os.path.join(coae_path, "..", "..") 15 | 16 | train_launcher_path = os.path.join(coae_path, "test_net.py") 17 | main_command = f"PYTHONPATH={coae_path}:{os2d_path}:$PYTHONPATH python {train_launcher_path}" 18 | 19 | data_path = os.path.abspath(os.path.join(coae_path, "data")) 20 | 21 | config_job_name = "coae" 22 | log_path = os.path.abspath(os.path.join(coae_path, "output", "eval_grozi")) 23 | 24 | exp_job_names = [] 25 | exp_log_paths = [] 26 | exp_commands = [] 27 | exp_log_file_prefix = [] 28 | 29 | 30 | def add_job(arch, 31 | eval_dataset, 32 | model_path, 33 | model_checkpoint, 34 | folder_suffix="", 35 | extra_params=None, 36 | ): 37 | job_name = f"{config_job_name}.{eval_dataset}.{arch}" 38 | 39 | # set output folder 40 | log_folder = f"{config_job_name}" 41 | if folder_suffix: 42 | log_folder += "." + folder_suffix 43 | log_folder = os.path.join(log_path, log_folder) 44 | 45 | commands = [] 46 | 47 | d = OrderedDict() 48 | d["--cuda"] = "" 49 | if os.path.isfile(model_path): 50 | d["--weights"] = os.path.join(log_folder, model_path) 51 | else: 52 | d["--weights"] = os.path.join(model_path, model_checkpoint) 53 | d["--dataset"] = eval_dataset 54 | if eval_dataset == "paste-f": 55 | d["--class_image_augmentation"] = "rotation90" 56 | d["--net"] = arch 57 | d["--set"] = "" 58 | d["TRAIN.USE_FLIPPED"] = "False" 59 | d["DATA_DIR"] = data_path 60 | # put smth here, but those are not used in CoAE 61 | d["TRAIN.MAX_SIZE"] = "5000" 62 | d["TEST.MAX_SIZE"] = "5000" 63 | 64 | if extra_params: 65 | d.update(extra_params) 66 | 67 | commands += [main_command + " " + launcher.parameters_to_str(d)] 68 | 69 | exp_job_names.append(job_name) 70 | exp_commands.append(commands) 71 | exp_log_paths.append(log_folder) 72 | exp_log_file_prefix.append(f"eval_{eval_dataset}_") 73 | 74 | 75 | sub_index = 0 76 | arch = "res101" 77 | init_net_name = "Pytorch" 78 | query_size = 192 79 | scale = 900 80 | multi_scale_training = "ms" 81 | best_job_name = f"{sub_index}.{arch}_init{init_net_name}_query{query_size}_scale{scale}_{multi_scale_training}" 82 | 83 | dataset_scale = {} 84 | # compute renormalizations by using the same ratio of scale between datasets as in OS2D 85 | dataset_scale["grozi-val-new-cl"] = 1 86 | dataset_scale["grozi-val-old-cl"] = 1 87 | dataset_scale["dairy"] = 3500.0 / 1280.0 88 | dataset_scale["paste-v"] = 3500.0 / 1280.0 89 | dataset_scale["paste-f"] = 3500.0 / 1280.0 90 | 91 | for eval_dataset in ["grozi-val-new-cl", "grozi-val-old-cl", "dairy", "paste-v", "paste-f"]: 92 | cur_scale = int(scale * dataset_scale[eval_dataset]) 93 | scale_str = str(cur_scale) if multi_scale_training == "ss"\ 94 | else ",".join(str(int(m * cur_scale)) for m in [0.5, 0.625, 0.8, 1, 1.2, 1.4, 1.6]) 95 | add_job(arch, eval_dataset, 96 | "output/grozi/coae." + best_job_name, "best_model_1.pth", 97 | folder_suffix="best_train", 98 | extra_params={ 99 | "TRAIN.query_size": query_size, 100 | "TRAIN.SCALES": f"[{scale_str}]", 101 | "TEST.SCALES": f"[{cur_scale}]", 102 | } 103 | ) 104 | 105 | 106 | for job_name, log_path, commands, log_file_prefix in zip(exp_job_names, exp_log_paths, exp_commands, exp_log_file_prefix): 107 | launcher.add_job(job_name=job_name, 108 | log_path=log_path, 109 | commands=commands, 110 | log_file_prefix=log_file_prefix) 111 | launcher.launch_all_jobs(args) 112 | -------------------------------------------------------------------------------- /baselines/CoAE/experiments/launcher_coae_grozi_eval_collect.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | from os2d.utils.logger import extract_pattern_after_marked_line, numeric_const_pattern, mAP_percent_to_points 5 | 6 | 7 | if __name__ == "__main__": 8 | script_path = os.path.dirname(os.path.abspath(__file__)) 9 | coae_path = os.path.join(script_path, "..") 10 | 11 | config_job_name = "coae" 12 | log_path = os.path.abspath(os.path.join(coae_path, "output", "eval_grozi")) 13 | 14 | 15 | def get_result(eval_dataset, 16 | folder_suffix="", 17 | result_suffix="out.txt", 18 | ): 19 | 20 | # set output folder 21 | log_folder = f"{config_job_name}" 22 | if folder_suffix: 23 | log_folder += "." + folder_suffix 24 | log_folder = os.path.join(log_path, log_folder) 25 | 26 | result_file = f"eval_{eval_dataset}_{result_suffix}" 27 | result_file = os.path.join(log_folder, result_file) 28 | 29 | dataset_pattern = "Evaluating detections" 30 | eval_pattern = f"mAP@0.50:\s({numeric_const_pattern})" 31 | 32 | mAP_value = extract_pattern_after_marked_line(result_file, dataset_pattern, eval_pattern) 33 | return mAP_percent_to_points(mAP_value) 34 | 35 | 36 | datasets = ["grozi-val-old-cl", "grozi-val-new-cl", "dairy", "paste-v", "paste-f"] 37 | table = pd.DataFrame(columns=datasets) 38 | d = {} 39 | for eval_dataset in datasets: 40 | d[eval_dataset] = get_result(eval_dataset, folder_suffix="best_train") 41 | table = table.append(d, ignore_index=True) 42 | 43 | print(table, sep='\n') 44 | -------------------------------------------------------------------------------- /baselines/CoAE/experiments/launcher_coae_grozi_train.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | 4 | from os2d.utils import launcher as launcher 5 | 6 | 7 | if __name__ == "__main__": 8 | # load default launcher parameters 9 | parser = launcher.create_args_parser() 10 | args = parser.parse_args() 11 | 12 | script_path = os.path.dirname(os.path.abspath(__file__)) 13 | coae_path = os.path.join(script_path, "..") 14 | os2d_path = os.path.join(coae_path, "..", "..") 15 | 16 | train_launcher_path = os.path.join(coae_path, "trainval_net.py") 17 | main_command = f"PYTHONPATH={coae_path}:{os2d_path}:$PYTHONPATH python {train_launcher_path}" 18 | 19 | data_path = os.path.abspath(os.path.join(coae_path, "data")) 20 | 21 | config_job_name = "coae" 22 | log_path = os.path.abspath(os.path.join(coae_path, "output", "grozi")) 23 | 24 | exp_job_names = [] 25 | exp_log_paths = [] 26 | exp_commands = [] 27 | 28 | 29 | def add_job(job_name, 30 | sub_index, 31 | arch, 32 | init_net, 33 | extra_params=None, 34 | ): 35 | job_name = f"{config_job_name}.{sub_index}.{job_name}" 36 | 37 | log_folder = job_name 38 | log_folder = os.path.join(log_path, log_folder) 39 | 40 | commands = [] 41 | 42 | # stage 1 43 | d = OrderedDict() 44 | d["--cuda"] = "" 45 | d["--dataset"] = "grozi-train" 46 | d["--dataset_val"] = "grozi-val-new-cl" 47 | d["--init_weights"] = init_net 48 | d["--disp_interval"] = "1" 49 | d["--val_interval"] = "10" 50 | d["--nw"] = "4" 51 | d["--bs"] = "4" 52 | d["--s"] = 1 53 | d["--epochs"] = "2000" 54 | d["--lr_decay_milestones"] = "1000 1500" 55 | d["--lr"] = 0.01 # default starting learning rate 56 | d["--lr_decay_gamma"] = 0.1 57 | d["--lr_reload_best_after_decay"] = "True" 58 | d["--save_dir"] = log_folder 59 | d["--net"] = arch 60 | d["--set"] = "" 61 | d["DATA_DIR"] = data_path 62 | # put smth here, but those are not used in CoAE 63 | d["TRAIN.MAX_SIZE"] = "3000" 64 | d["TEST.MAX_SIZE"] = "3000" 65 | 66 | if extra_params: 67 | d.update(extra_params) 68 | 69 | commands += [main_command + " " + launcher.parameters_to_str(d)] 70 | 71 | exp_job_names.append(job_name) 72 | exp_commands.append(commands) 73 | exp_log_paths.append(log_folder) 74 | 75 | 76 | arch = "res101" 77 | init_net = os.path.join(os2d_path, "models", "resnet101-5d3b4d8f.pth") 78 | init_net_name = "Pytorch" 79 | query_size = 192 80 | scale = 900 81 | ms = "ms" 82 | 83 | job_id = 0 84 | scale_str = str(scale) if ms == "ss" else ",".join(str(int(m * scale)) for m in [0.5, 0.625, 0.8, 1, 1.2, 1.4, 1.6]) 85 | add_job(f"{arch}_init{init_net_name}_query{query_size}_scale{scale}_{ms}", job_id, 86 | arch, init_net, 87 | { 88 | "TRAIN.query_size": query_size, 89 | "TRAIN.SCALES": f"[{scale_str}]", 90 | "TEST.SCALES": f"[{scale}]", # CAUTION! single scale here 91 | } 92 | ) 93 | 94 | for job_name, log_path, commands in zip(exp_job_names, exp_log_paths, exp_commands): 95 | launcher.add_job(job_name=job_name, 96 | log_path=log_path, 97 | commands=commands) 98 | launcher.launch_all_jobs(args) 99 | -------------------------------------------------------------------------------- /baselines/CoAE/experiments/launcher_coae_instre_eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | 4 | from os2d.utils import launcher as launcher 5 | 6 | 7 | if __name__ == "__main__": 8 | # load default launcher parameters 9 | parser = launcher.create_args_parser() 10 | args = parser.parse_args() 11 | 12 | script_path = os.path.dirname(os.path.abspath(__file__)) 13 | coae_path = os.path.join(script_path, "..") 14 | os2d_path = os.path.join(coae_path, "..", "..") 15 | 16 | train_launcher_path = os.path.join(coae_path, "test_net.py") 17 | main_command = f"PYTHONPATH={coae_path}:{os2d_path}:$PYTHONPATH python {train_launcher_path}" 18 | 19 | data_path = os.path.abspath(os.path.join(coae_path, "data")) 20 | 21 | config_job_name = "coae" 22 | log_path = os.path.abspath(os.path.join(coae_path, "output", "eval_instre")) 23 | 24 | exp_job_names = [] 25 | exp_log_paths = [] 26 | exp_commands = [] 27 | exp_log_file_prefix = [] 28 | 29 | 30 | def add_job(arch, 31 | eval_dataset, 32 | model_path, 33 | model_checkpoint, 34 | folder_suffix="", 35 | test_augment=None, 36 | extra_params=None, 37 | ): 38 | job_name = f"{config_job_name}.{eval_dataset}.{arch}" 39 | 40 | # set output folder 41 | log_folder = f"{config_job_name}" 42 | if folder_suffix: 43 | log_folder += "." + folder_suffix 44 | log_folder = os.path.join(log_path, log_folder) 45 | 46 | commands = [] 47 | 48 | # stage 1 49 | d = OrderedDict() 50 | d["--cuda"] = "" 51 | if os.path.isfile(model_path): 52 | d["--weights"] = os.path.join(log_folder, model_path) 53 | else: 54 | d["--weights"] = os.path.join(model_path, model_checkpoint) 55 | d["--dataset"] = eval_dataset 56 | d["--net"] = arch 57 | if test_augment is not None: 58 | d["--class_image_augmentation"] = test_augment 59 | d["--set"] = "" 60 | d["TRAIN.USE_FLIPPED"] = "False" 61 | d["DATA_DIR"] = data_path 62 | # put smth here, but those are not used in CoAE 63 | d["TRAIN.MAX_SIZE"] = "5000" 64 | d["TEST.MAX_SIZE"] = "5000" 65 | 66 | if extra_params: 67 | d.update(extra_params) 68 | 69 | commands += [main_command + " " + launcher.parameters_to_str(d)] 70 | 71 | exp_job_names.append(job_name) 72 | exp_commands.append(commands) 73 | exp_log_paths.append(log_folder) 74 | exp_log_file_prefix.append(f"eval_{eval_dataset}_{arch}_") 75 | 76 | query_size = 192 77 | scale = 900 78 | multi_scale_training = "ms" 79 | dataset_scale = {} 80 | # compute renormalizations by using the same ratio of scale between datasets as in OS2D 81 | dataset_scale["instre-s1"] = 700.0 / 1280.0 82 | dataset_scale["instre-s2"] = 600.0 / 1280.0 83 | 84 | job_id = 0 85 | for dataset in ["instre-s1", "instre-s2"]: 86 | for arch, init_net in zip(["res101", "res50"], ["Pytorch", "Caffe2"]): 87 | eval_dataset = f"{dataset}-test" 88 | dataset_train = f"{dataset}-train" 89 | cur_scale = int(scale * dataset_scale[dataset]) 90 | scale_str = str(cur_scale) if multi_scale_training == "ss"\ 91 | else ",".join(str(int(m * cur_scale)) for m in [0.5, 0.625, 0.8, 1, 1.2, 1.4, 1.6]) 92 | 93 | best_job_name = f"{job_id}.{dataset_train}_{arch}_init{init_net}_query{query_size}_scale{cur_scale}_{multi_scale_training}" 94 | add_job(arch, eval_dataset, 95 | "output/instre/coae." + best_job_name, "best_model_1.pth", 96 | folder_suffix="best_train", 97 | test_augment="rotation90", 98 | extra_params={ 99 | "TRAIN.query_size": query_size, 100 | "TRAIN.SCALES": f"[{scale_str}]", 101 | "TEST.SCALES": f"[{cur_scale}]", 102 | } 103 | ) 104 | job_id += 1 105 | 106 | 107 | for job_name, log_path, commands, log_file_prefix in zip(exp_job_names, exp_log_paths, exp_commands, exp_log_file_prefix): 108 | launcher.add_job(job_name=job_name, 109 | log_path=log_path, 110 | commands=commands, 111 | log_file_prefix=log_file_prefix) 112 | launcher.launch_all_jobs(args) 113 | -------------------------------------------------------------------------------- /baselines/CoAE/experiments/launcher_coae_instre_eval_collect.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | from os2d.utils.logger import extract_pattern_after_marked_line, numeric_const_pattern, mAP_percent_to_points 5 | 6 | 7 | if __name__ == "__main__": 8 | script_path = os.path.dirname(os.path.abspath(__file__)) 9 | coae_path = os.path.join(script_path, "..") 10 | 11 | config_job_name = "coae" 12 | log_path = os.path.abspath(os.path.join(coae_path, "output", "eval_instre")) 13 | 14 | 15 | def get_result(arch, 16 | eval_dataset, 17 | folder_suffix="", 18 | result_suffix="out.txt", 19 | ): 20 | 21 | # set output folder 22 | log_folder = f"{config_job_name}" 23 | if folder_suffix: 24 | log_folder += "." + folder_suffix 25 | log_folder = os.path.join(log_path, log_folder) 26 | 27 | result_file = f"eval_{eval_dataset}_{arch}_{result_suffix}" 28 | result_file = os.path.join(log_folder, result_file) 29 | 30 | dataset_pattern = "Evaluating detections" 31 | eval_pattern = f"mAP@0.50:\s({numeric_const_pattern})" 32 | 33 | mAP_value = extract_pattern_after_marked_line(result_file, dataset_pattern, eval_pattern) 34 | return mAP_percent_to_points(mAP_value) 35 | 36 | 37 | datasets = ["instre-s1-test", "instre-s2-test"] 38 | archs = ["res50", "res101"] 39 | table = pd.DataFrame(columns=["arch"] + datasets) 40 | for arch in archs: 41 | d = {} 42 | d["arch"] = arch 43 | for eval_dataset in datasets: 44 | d[eval_dataset] = get_result(arch, eval_dataset, folder_suffix="best_train") 45 | table = table.append(d, ignore_index=True) 46 | 47 | print(table, sep='\n') 48 | -------------------------------------------------------------------------------- /baselines/CoAE/experiments/launcher_coae_instre_train.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | 4 | from os2d.utils import launcher as launcher 5 | 6 | 7 | if __name__ == "__main__": 8 | # load default launcher parameters 9 | parser = launcher.create_args_parser() 10 | args = parser.parse_args() 11 | 12 | script_path = os.path.dirname(os.path.abspath(__file__)) 13 | coae_path = os.path.join(script_path, "..") 14 | os2d_path = os.path.join(coae_path, "..", "..") 15 | 16 | train_launcher_path = os.path.join(coae_path, "trainval_net.py") 17 | main_command = f"PYTHONPATH={coae_path}:{os2d_path}:$PYTHONPATH python {train_launcher_path}" 18 | 19 | data_path = os.path.abspath(os.path.join(coae_path, "data")) 20 | 21 | config_job_name = "coae" 22 | log_path = os.path.abspath(os.path.join(coae_path, "output", "instre")) 23 | 24 | exp_job_names = [] 25 | exp_log_paths = [] 26 | exp_commands = [] 27 | 28 | 29 | def add_job(job_name, 30 | sub_index, 31 | arch, 32 | init_net, 33 | dataset_train, 34 | dataset_val, 35 | extra_params=None, 36 | ): 37 | job_name = f"{config_job_name}.{sub_index}.{job_name}" 38 | 39 | log_folder = job_name 40 | log_folder = os.path.join(log_path, log_folder) 41 | 42 | commands = [] 43 | 44 | # stage 1 45 | d = OrderedDict() 46 | d["--cuda"] = "" 47 | d["--dataset"] = dataset_train 48 | d["--dataset_val"] = dataset_val 49 | d["--init_weights"] = init_net 50 | d["--disp_interval"] = "1" 51 | d["--val_interval"] = "1" 52 | d["--nw"] = "4" 53 | d["--bs"] = "4" 54 | d["--s"] = 1 55 | d["--epochs"] = "100" # set 20x less epochs for instre comapared to grozi as ezch epoch is 20x bigger 56 | d["--lr_decay_milestones"] = "50 75" 57 | d["--lr"] = 0.01 # default starting learning rate 58 | d["--lr_decay_gamma"] = 0.1 59 | d["--lr_reload_best_after_decay"] = "True" 60 | d["--save_dir"] = log_folder 61 | d["--net"] = arch 62 | d["--class_image_augmentation"] = "rotation90" 63 | d["--set"] = "" 64 | d["DATA_DIR"] = data_path 65 | # put smth here, but those are not used in CoAE 66 | d["TRAIN.MAX_SIZE"] = "3000" 67 | d["TEST.MAX_SIZE"] = "3000" 68 | 69 | if extra_params: 70 | d.update(extra_params) 71 | 72 | commands += [main_command + " " + launcher.parameters_to_str(d)] 73 | 74 | exp_job_names.append(job_name) 75 | exp_commands.append(commands) 76 | exp_log_paths.append(log_folder) 77 | 78 | 79 | query_size = 192 80 | scale = 900 81 | multi_scale_training = "ms" 82 | dataset_scale = {} 83 | 84 | # compute renormalizations by using the same ratio of scale between datasets as in OS2D 85 | dataset_scale["instre-s1"] = 700.0 / 1280.0 86 | dataset_scale["instre-s2"] = 600.0 / 1280.0 87 | 88 | job_id = 0 89 | for dataset in ["instre-s1", "instre-s2"]: 90 | dataset_train = f"{dataset}-train" 91 | dataset_val = f"{dataset}-val" 92 | cur_scale = int(scale * dataset_scale[dataset]) 93 | scale_str = str(cur_scale) if multi_scale_training == "ss"\ 94 | else ",".join(str(int(m * cur_scale)) for m in [0.5, 0.625, 0.8, 1, 1.2, 1.4, 1.6]) 95 | 96 | arch = "res101" 97 | init_net_name = "Pytorch" 98 | init_net = os.path.join(os2d_path, "models", "resnet101-5d3b4d8f.pth") 99 | 100 | add_job(f"{dataset_train}_{arch}_init{init_net_name}_query{query_size}_scale{cur_scale}_{multi_scale_training}", job_id, 101 | arch, init_net, 102 | dataset_train, dataset_val, 103 | { 104 | "TRAIN.query_size": query_size, 105 | "TRAIN.SCALES": f"[{scale_str}]", 106 | "TEST.SCALES": f"[{cur_scale}]", 107 | } 108 | ) 109 | job_id += 1 110 | 111 | arch = "res50" 112 | init_net_name = "Caffe2" 113 | init_net = os.path.join(os2d_path, "models", "imagenet-caffe-resnet50-features-ac468af-converted.pth") 114 | 115 | add_job(f"{dataset_train}_{arch}_init{init_net_name}_query{query_size}_scale{cur_scale}_{multi_scale_training}", job_id, 116 | arch, init_net, 117 | dataset_train, dataset_val, 118 | { 119 | "TRAIN.query_size": query_size, 120 | "TRAIN.SCALES": f"[{scale_str}]", 121 | "TEST.SCALES": f"[{cur_scale}]", 122 | } 123 | ) 124 | job_id += 1 125 | 126 | 127 | for job_name, log_path, commands in zip(exp_job_names, exp_log_paths, exp_commands): 128 | launcher.add_job(job_name=job_name, 129 | log_path=log_path, 130 | commands=commands) 131 | launcher.launch_all_jobs(args) 132 | -------------------------------------------------------------------------------- /baselines/CoAE/experiments/parse_logs_to_pkl.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import argparse 4 | import glob 5 | 6 | 7 | parser = argparse.ArgumentParser() 8 | 9 | parser.add_argument("--log_path", default=None, type=str, help="Folder to search for logs") 10 | parser.add_argument("--log_names", default=[], nargs="+", type=str, help="Plot logs from these folder") 11 | parser.add_argument("--log_file_names", default=["out.txt"], nargs="+", type=str, help="Plot logs from these folder") 12 | 13 | args = parser.parse_args() 14 | 15 | if args.log_path is not None and not os.path.isdir(args.log_path): 16 | raise RuntimeError("Log path %s does not exist" % args.log_path) 17 | 18 | log_path = args.log_path if args.log_path else "" 19 | if len(args.log_names) == 0: 20 | print("--log_names was not specified, scanning folder %s" % args.log_path) 21 | log_names = sorted(glob.glob( os.path.join(log_path, "*"))) 22 | else: 23 | log_names = [os.path.join(log_path, name) for name in args.log_names] 24 | 25 | log_file_names = args.log_file_names 26 | target_pickle_file = "train_log.pkl" 27 | 28 | prefix = "mAP@0.50: " 29 | plot_name = "mAP@0.50" 30 | 31 | iter_log_step = 10 32 | iter_plot_name = "iter" 33 | 34 | 35 | def update_meter(log, name, num_log_steps, value): 36 | # create entry if needed 37 | if name not in log: 38 | log[name] = [] 39 | meter = log[name] 40 | # add missing values if any 41 | while len(meter) < num_log_steps - 1: 42 | meter.append(float("nan")) 43 | # add the new value 44 | meter.append(value) 45 | 46 | for log_name in log_names: 47 | log_pkl = {} 48 | num_log_steps = 0 49 | 50 | for log_file_name in log_file_names: 51 | log_file = os.path.join(log_name, log_file_name) 52 | if not os.path.isfile(log_file): 53 | print("Missing file", log_file) 54 | continue 55 | with open(log_file, 'r') as log: 56 | for l in log: 57 | if l.startswith(prefix): 58 | num_log_steps += 1 59 | value = float(l[len(prefix):]) 60 | update_meter(log_pkl, plot_name, num_log_steps, value) 61 | update_meter(log_pkl, iter_plot_name, num_log_steps, num_log_steps * iter_log_step) 62 | if log_pkl: 63 | pickle.dump(log_pkl, open(os.path.join(log_name, "train_log.pkl"), "wb")) 64 | -------------------------------------------------------------------------------- /baselines/CoAE/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/os2d/817ffb15a8cbc353e4f94debe419902d4dd83a22/baselines/CoAE/lib/__init__.py -------------------------------------------------------------------------------- /baselines/CoAE/lib/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/os2d/817ffb15a8cbc353e4f94debe419902d4dd83a22/baselines/CoAE/lib/datasets/__init__.py -------------------------------------------------------------------------------- /baselines/CoAE/lib/datasets/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name. 9 | This file is a replacement of 10 | https://github.com/timy90022/One-Shot-Object-Detection/blob/master/lib/datasets/factory.py 11 | """ 12 | 13 | from lib.datasets.os2d import build_os2d_dataset_by_name 14 | 15 | 16 | # __sets = {} 17 | 18 | # # Set up the grozi dataset 19 | # for split in ["train", "val-old-cl", "val-new-cl", "val-all", "train-mini"]: 20 | # name = 'grozi-{}'.format(split) 21 | # __sets[name] = (lambda name=name: build_os2d_dataset_by_name(name, data_path=None)) 22 | 23 | # # Set up OS2D evaluation datasets 24 | # for name in ["dairy", "paste-v", "paste-f"]: 25 | # __sets[name] = (lambda name=name: build_os2d_dataset_by_name(name, data_path=None)) 26 | 27 | 28 | # # Set up the grozi dataset 29 | # for split in ["train", "val-old-cl", "val-new-cl", "val-all", "train-mini"]: 30 | # name = 'grozi-{}'.format(split) 31 | # __sets[name] = (lambda name=name: build_os2d_dataset_by_name(name, data_path=None, eval_scale=1280.0)) 32 | 33 | # # Set up evaluation datasets 34 | # dataset_eval_scale = {} 35 | # dataset_eval_scale["dairy"] = 3500.0 36 | # dataset_eval_scale["paste-v"] = 3500.0 37 | # dataset_eval_scale["paste-f"] = 2000.0 38 | 39 | # for name in dataset_eval_scale: 40 | # __sets[name] = (lambda name=name: build_os2d_dataset_by_name(name, data_path=None, eval_scale=dataset_eval_scale[name])) 41 | 42 | 43 | def get_imdb(name): 44 | """Get an imdb (image database) by name.""" 45 | return build_os2d_dataset_by_name(name) 46 | 47 | # if name not in __sets: 48 | # raise KeyError('Unknown dataset: {}'.format(name)) 49 | # return __sets[name]() 50 | 51 | 52 | # def list_imdbs(): 53 | # """List all registered imdbs.""" 54 | # return list(__sets.keys()) 55 | -------------------------------------------------------------------------------- /baselines/CoAE/lib/roi_data_layer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/os2d/817ffb15a8cbc353e4f94debe419902d4dd83a22/baselines/CoAE/lib/roi_data_layer/__init__.py -------------------------------------------------------------------------------- /baselines/CoAE/lib/roi_data_layer/minibatch.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | 8 | """Compute minibatch blobs for training a Fast R-CNN network. 9 | This file is a changed version of 10 | https://github.com/timy90022/One-Shot-Object-Detection/blob/master/lib/roi_data_layer/minibatch.py 11 | """ 12 | 13 | import numpy as np 14 | import numpy.random as npr 15 | from PIL import Image 16 | 17 | from model.utils.config import cfg 18 | from model.utils.blob import prep_im_for_blob, im_list_to_blob 19 | 20 | 21 | def imread(image_path): 22 | with open(image_path, "rb") as f: 23 | img = Image.open(f) 24 | if img.mode != "RGB": 25 | img = img.convert("RGB") 26 | img.load() 27 | return np.array(img) 28 | 29 | 30 | def get_minibatch(roidb, num_classes): 31 | """Given a roidb, construct a minibatch sampled from it.""" 32 | num_images = len(roidb) 33 | # Sample random scales to use for each image in this batch 34 | random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES), 35 | size=num_images) 36 | assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ 37 | 'num_images ({}) must divide BATCH_SIZE ({})'. \ 38 | format(num_images, cfg.TRAIN.BATCH_SIZE) 39 | 40 | # Get the input image blob, formatted for caffe 41 | im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) 42 | 43 | blobs = {'data': im_blob} 44 | 45 | assert len(im_scales) == 1, "Single batch only" 46 | assert len(roidb) == 1, "Single batch only" 47 | 48 | # gt boxes: (x1, y1, x2, y2, cls) 49 | if cfg.TRAIN.USE_ALL_GT: 50 | # Include all ground truth boxes 51 | gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] 52 | else: 53 | # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 54 | gt_inds = np.where((roidb[0]['gt_classes'] != 0) & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0] 55 | gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) 56 | gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0] 57 | gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] 58 | blobs['gt_boxes'] = gt_boxes 59 | blobs['im_info'] = np.array( 60 | [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], 61 | dtype=np.float32) 62 | 63 | blobs['img_id'] = roidb[0]['img_id'] 64 | 65 | return blobs 66 | 67 | def _get_image_blob(roidb, scale_inds): 68 | """Builds an input blob from the images in the roidb at the specified 69 | scales. 70 | """ 71 | num_images = len(roidb) 72 | 73 | processed_ims = [] 74 | im_scales = [] 75 | for i in range(num_images): 76 | #im = cv2.imread(roidb[i]['image']) 77 | im = imread(roidb[i]['image']) 78 | 79 | if len(im.shape) == 2: 80 | im = im[:,:,np.newaxis] 81 | im = np.concatenate((im,im,im), axis=2) 82 | # flip the channel, since the original one using cv2 83 | # rgb -> bgr 84 | # im = im[:,:,::-1] 85 | 86 | if roidb[i]['flipped']: 87 | im = im[:, ::-1, :] 88 | target_size = cfg.TRAIN.SCALES[scale_inds[i]] 89 | im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, 90 | cfg.TRAIN.MAX_SIZE) 91 | im_scales.append(im_scale) 92 | processed_ims.append(im) 93 | 94 | # Create a blob to hold the input images 95 | blob = im_list_to_blob(processed_ims) 96 | 97 | return blob, im_scales 98 | -------------------------------------------------------------------------------- /baselines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/os2d/817ffb15a8cbc353e4f94debe419902d4dd83a22/baselines/__init__.py -------------------------------------------------------------------------------- /baselines/detector_retrieval/INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation of the detector-retrieval benchmark 2 | 3 | ### Installation of the maskrcnn_benchmark 4 | Needed for the detector-retrieval baseline. These instructions assume OS2D is [installed](../../INSTALL.md) and largely follow the [ones of maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/INSTALL.md) 5 | ```bash 6 | # activate the os2d env 7 | conda activate os2d 8 | 9 | # dependencies 10 | conda install ninja=1.9.0 cython=0.29.15 11 | pip install opencv-python==4.2.0.32 12 | 13 | # the rest will be compiling from sources - set the path for that 14 | INSTALL_DIR=$HOME/local/software/pytorch/os2d 15 | mkdir -p $INSTALL_DIR 16 | 17 | # install pycocotools 18 | cd $INSTALL_DIR 19 | git clone https://github.com/cocodataset/cocoapi.git 20 | cd cocoapi/PythonAPI 21 | python setup.py build_ext install 22 | 23 | # install apex 24 | cd $INSTALL_DIR 25 | git clone https://github.com/NVIDIA/apex.git 26 | cd apex 27 | python setup.py install --cuda_ext --cpp_ext 28 | 29 | # install PyTorch Detection 30 | cd $INSTALL_DIR 31 | git clone https://github.com/facebookresearch/maskrcnn-benchmark.git 32 | cd maskrcnn-benchmark 33 | 34 | # difference from the standard instruction: get v0.1 35 | git checkout v0.1 36 | 37 | # the following will install the lib with 38 | # symbolic links, so that you can modify 39 | # the files if you want and won't need to 40 | # re-build it 41 | python setup.py build develop 42 | 43 | unset INSTALL_DIR 44 | ``` 45 | 46 | ### Installation of cnnimageretrieval-pytorch 47 | 48 | ```bash 49 | # activate the os2d env 50 | conda activate os2d 51 | 52 | # move to the folder of the baseline 53 | cd $OS2D_ROOT/baselines/detector_retrieval/retrieval 54 | 55 | git clone https://github.com/filipradenovic/cnnimageretrieval-pytorch.git cnnimageretrieval-pytorch 56 | cd cnnimageretrieval-pytorch 57 | git checkout v1.1 58 | cd .. 59 | ``` 60 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/README.md: -------------------------------------------------------------------------------- 1 | ## Experiments with the detector-retrieval baseline 2 | The detector-retrieval baseline consists of the two models that need to be trained independently. 3 | 4 | ### Installation 5 | See [INSTALL.md](./INSTALL.md) 6 | 7 | ### Preparations 8 | ```bash 9 | conda activate os2d 10 | export PYTHONPATH=$OS2D_ROOT:$PYTHONPATH 11 | ``` 12 | 13 | ### Train the detector 14 | The main training script is `detector/train_detector.py`. The training runs can be launched with `detector/launcher_train_detector.py`. 15 | ```bash 16 | cd $OS2D_ROOT/baselines/detector_retrieval/detector 17 | 18 | #6 jobs training class agnostic detectors, 2 jobs for training class-aware detectors 19 | python experiments/launcher_train_detector.py 20 | ``` 21 | Trained model will be stored in `$OS2D_ROOT/baselines/detector_retrieval/detector/output`. 22 | 23 | 24 | ### Train the retrieval system 25 | 26 | #### Prepare dataset 27 | One needs to convert datasets to the format of [cnnimageretrieval-pytorch](https://github.com/filipradenovic/cnnimageretrieval-pytorch). This csript should do this: 28 | ```bash 29 | cd $OS2D_ROOT/baselines/detector_retrieval/retrieval 30 | 31 | bash prepare_all_datasets.sh 32 | ``` 33 | 34 | #### Run training 35 | ```bash 36 | cd $OS2D_ROOT/baselines/detector_retrieval/retrieval 37 | 38 | # Grozi 39 | python experiments/launcher_grozi.py 40 | # INSTRE 41 | python experiments/launcher_instre.py 42 | ``` 43 | 44 | ### Run evaluation 45 | ```bash 46 | cd $OS2D_ROOT/baselines/detector_retrieval 47 | 48 | # Grozi 49 | python experiments/launcher_grozi_eval.py 50 | # INSTRE 51 | python experiments/launcher_instre_eval.py 52 | ``` 53 | 54 | ### View results 55 | ```bash 56 | cd $OS2D_ROOT/baselines/detector_retrieval 57 | 58 | # Create tables 59 | # Table 3 60 | python experiments/launcher_grozi_eval_collect.py 61 | # Table 4 62 | python experiments/launcher_instre_eval_collect.py 63 | ``` 64 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/os2d/817ffb15a8cbc353e4f94debe419902d4dd83a22/baselines/detector_retrieval/__init__.py -------------------------------------------------------------------------------- /baselines/detector_retrieval/detector/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/os2d/817ffb15a8cbc353e4f94debe419902d4dd83a22/baselines/detector_retrieval/detector/__init__.py -------------------------------------------------------------------------------- /baselines/detector_retrieval/detector/config/e2e_faster_rcnn_R_101_FPN_1x_multiscale.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | SCORE_THRESH: -10.0 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 7 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | NUM_CLASSES: -1 25 | DATASETS: 26 | TRAIN: ("grozi-train",) 27 | TEST: ("grozi-val-old-cl",) 28 | INPUT: 29 | MIN_SIZE_TRAIN: (480, 600, 768, 960, 1152, 1344, 1536) 30 | MAX_SIZE_TRAIN: 2048 31 | MIN_SIZE_TEST: 960 32 | MAX_SIZE_TEST: 1280 33 | DATALOADER: 34 | SIZE_DIVISIBILITY: 32 35 | SOLVER: 36 | BASE_LR: 0.02 37 | WEIGHT_DECAY: 0.0001 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | CHECKPOINT_PERIOD: 100000 41 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/detector/config/e2e_faster_rcnn_R_101_FPN_1x_multiscale_noClasses.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | SCORE_THRESH: -10.0 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 7 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | NUM_CLASSES: 0 25 | DATASETS: 26 | TRAIN: ("grozi-train",) 27 | TEST: ("grozi-val-all",) 28 | INPUT: 29 | MIN_SIZE_TRAIN: (480, 600, 768, 960, 1152, 1344, 1536) 30 | MAX_SIZE_TRAIN: 2048 31 | MIN_SIZE_TEST: 960 32 | MAX_SIZE_TEST: 1280 33 | DATALOADER: 34 | SIZE_DIVISIBILITY: 32 35 | SOLVER: 36 | BASE_LR: 0.02 37 | WEIGHT_DECAY: 0.0001 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | CHECKPOINT_PERIOD: 100000 41 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/detector/config/e2e_faster_rcnn_R_101_FPN_1x_multiscale_noClasses_fromPytorch.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" 4 | BACKBONE: 5 | CONV_BODY: "R-101-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | SCORE_THRESH: -10.0 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 7 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | NUM_CLASSES: 0 25 | DATASETS: 26 | TRAIN: ("grozi-train",) 27 | TEST: ("grozi-val-all",) 28 | INPUT: 29 | MIN_SIZE_TRAIN: (480, 600, 768, 960, 1152, 1344, 1536) 30 | MAX_SIZE_TRAIN: 2048 31 | MIN_SIZE_TEST: 960 32 | MAX_SIZE_TEST: 1280 33 | TO_BGR255: False 34 | PIXEL_MEAN: (0.485, 0.456, 0.406) 35 | PIXEL_STD: (0.229, 0.224, 0.225) 36 | DATALOADER: 37 | SIZE_DIVISIBILITY: 32 38 | SOLVER: 39 | BASE_LR: 0.02 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | CHECKPOINT_PERIOD: 100000 44 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/detector/config/e2e_faster_rcnn_R_50_FPN_1x_multiscale.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | SCORE_THRESH: -10.0 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 7 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | NUM_CLASSES: -1 25 | DATASETS: 26 | TRAIN: ("grozi-train",) 27 | TEST: ("grozi-val-old-cl",) 28 | INPUT: 29 | MIN_SIZE_TRAIN: (480, 600, 768, 960, 1152, 1344, 1536) 30 | MAX_SIZE_TRAIN: 2048 31 | MIN_SIZE_TEST: 960 32 | MAX_SIZE_TEST: 1280 33 | DATALOADER: 34 | SIZE_DIVISIBILITY: 32 35 | SOLVER: 36 | BASE_LR: 0.02 37 | WEIGHT_DECAY: 0.0001 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | CHECKPOINT_PERIOD: 100000 41 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/detector/config/e2e_faster_rcnn_R_50_FPN_1x_multiscale_noClasses.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | SCORE_THRESH: -10.0 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 7 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | NUM_CLASSES: 0 25 | DATASETS: 26 | TRAIN: ("grozi-train",) 27 | TEST: ("grozi-val-all",) 28 | INPUT: 29 | MIN_SIZE_TRAIN: (480, 600, 768, 960, 1152, 1344, 1536) 30 | MAX_SIZE_TRAIN: 2048 31 | MIN_SIZE_TEST: 960 32 | MAX_SIZE_TEST: 1280 33 | DATALOADER: 34 | SIZE_DIVISIBILITY: 32 35 | SOLVER: 36 | BASE_LR: 0.02 37 | WEIGHT_DECAY: 0.0001 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | CHECKPOINT_PERIOD: 100000 41 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/detector/config/e2e_faster_rcnn_R_50_FPN_1x_multiscale_noClasses_fromPytorch.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" 4 | BACKBONE: 5 | CONV_BODY: "R-50-FPN" 6 | RESNETS: 7 | BACKBONE_OUT_CHANNELS: 256 8 | RPN: 9 | USE_FPN: True 10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64) 11 | PRE_NMS_TOP_N_TRAIN: 2000 12 | PRE_NMS_TOP_N_TEST: 1000 13 | POST_NMS_TOP_N_TEST: 1000 14 | FPN_POST_NMS_TOP_N_TEST: 1000 15 | ROI_HEADS: 16 | USE_FPN: True 17 | SCORE_THRESH: -10.0 18 | ROI_BOX_HEAD: 19 | POOLER_RESOLUTION: 7 20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) 21 | POOLER_SAMPLING_RATIO: 2 22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" 23 | PREDICTOR: "FPNPredictor" 24 | NUM_CLASSES: 0 25 | DATASETS: 26 | TRAIN: ("grozi-train",) 27 | TEST: ("grozi-val-all",) 28 | INPUT: 29 | MIN_SIZE_TRAIN: (480, 600, 768, 960, 1152, 1344, 1536) 30 | MAX_SIZE_TRAIN: 2048 31 | MIN_SIZE_TEST: 960 32 | MAX_SIZE_TEST: 1280 33 | TO_BGR255: False 34 | PIXEL_MEAN: (0.485, 0.456, 0.406) 35 | PIXEL_STD: (0.229, 0.224, 0.225) 36 | DATALOADER: 37 | SIZE_DIVISIBILITY: 32 38 | SOLVER: 39 | BASE_LR: 0.02 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | CHECKPOINT_PERIOD: 100000 44 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/detector/engine_inference.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import logging 3 | import time 4 | import os 5 | from collections import OrderedDict 6 | from tqdm import tqdm 7 | 8 | import torch 9 | 10 | from maskrcnn_benchmark.config import cfg 11 | from maskrcnn_benchmark.data.datasets.evaluation import evaluate 12 | from maskrcnn_benchmark.utils.comm import is_main_process, get_world_size 13 | from maskrcnn_benchmark.utils.comm import all_gather 14 | from maskrcnn_benchmark.utils.comm import synchronize 15 | from maskrcnn_benchmark.utils.timer import Timer, get_time_str 16 | from maskrcnn_benchmark.engine.bbox_aug import im_detect_bbox_aug 17 | 18 | from os2d.data.voc_eval import do_voc_evaluation as eval_detection_voc 19 | from detector_data import convert_boxlist_maskrcnn_to_os2d 20 | 21 | def compute_on_dataset(model, data_loader, device, timer=None): 22 | model.eval() 23 | results_dict = {} 24 | cpu_device = torch.device("cpu") 25 | for _, batch in enumerate(tqdm(data_loader)): 26 | images, targets, image_ids = batch 27 | with torch.no_grad(): 28 | if timer: 29 | timer.tic() 30 | if cfg.TEST.BBOX_AUG.ENABLED: 31 | output = im_detect_bbox_aug(model, images, device) 32 | else: 33 | output = model(images.to(device)) 34 | if timer: 35 | if not cfg.MODEL.DEVICE == 'cpu': 36 | torch.cuda.synchronize() 37 | timer.toc() 38 | output = [o.to(cpu_device) for o in output] 39 | results_dict.update( 40 | {img_id: result for img_id, result in zip(image_ids, output)} 41 | ) 42 | return results_dict 43 | 44 | 45 | def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu): 46 | all_predictions = all_gather(predictions_per_gpu) 47 | if not is_main_process(): 48 | return 49 | # merge the list of dicts 50 | predictions = {} 51 | for p in all_predictions: 52 | predictions.update(p) 53 | # convert a dict where the key is the index in a list 54 | image_ids = list(sorted(predictions.keys())) 55 | if len(image_ids) != image_ids[-1] + 1: 56 | logger = logging.getLogger("maskrcnn_benchmark.inference") 57 | logger.warning( 58 | "Number of images that were gathered from multiple processes is not " 59 | "a contiguous set. Some images might be missing from the evaluation" 60 | ) 61 | 62 | # convert to a list 63 | predictions = [predictions[i] for i in image_ids] 64 | return predictions 65 | 66 | 67 | def inference( 68 | model, 69 | data_loader, 70 | dataset_name, 71 | device="cuda" 72 | ): 73 | # convert to a torch.device for efficiency 74 | device = torch.device(device) 75 | num_devices = get_world_size() 76 | logger = logging.getLogger("maskrcnn_benchmark.inference") 77 | dataset = data_loader.dataset 78 | logger.info("Start evaluation on {} dataset({} images).".format(dataset_name, len(dataset))) 79 | total_timer = Timer() 80 | inference_timer = Timer() 81 | total_timer.tic() 82 | predictions = compute_on_dataset(model, data_loader, device, inference_timer) 83 | # wait for all processes to complete before measuring the time 84 | synchronize() 85 | total_time = total_timer.toc() 86 | total_time_str = get_time_str(total_time) 87 | logger.info( 88 | "Total run time: {} ({} s / img per device, on {} devices)".format( 89 | total_time_str, total_time * num_devices / len(dataset), num_devices 90 | ) 91 | ) 92 | total_infer_time = get_time_str(inference_timer.total_time) 93 | logger.info( 94 | "Model inference time: {} ({} s / img per device, on {} devices)".format( 95 | total_infer_time, 96 | inference_timer.total_time * num_devices / len(dataset), 97 | num_devices, 98 | ) 99 | ) 100 | 101 | predictions = _accumulate_predictions_from_multiple_gpus(predictions) 102 | if not is_main_process(): 103 | return 104 | 105 | results = do_voc_evaluation(dataset=dataset, 106 | predictions=predictions, 107 | logger=logger) 108 | return results 109 | 110 | 111 | def do_voc_evaluation(dataset, predictions, logger): 112 | pred_boxlists = [] 113 | gt_boxlists = [] 114 | for image_id, prediction in enumerate(predictions): 115 | gt_boxlist = dataset.get_groundtruth(image_id) 116 | gt_boxlist = convert_boxlist_maskrcnn_to_os2d(gt_boxlist) 117 | gt_boxlists.append(gt_boxlist) 118 | prediction = convert_boxlist_maskrcnn_to_os2d(prediction) 119 | pred_boxlists.append(prediction) 120 | 121 | result = OrderedDict() 122 | for iou_thresh in [0.5]: 123 | ap_data = eval_detection_voc(pred_boxlists, gt_boxlists, iou_thresh=iou_thresh, use_07_metric=False) 124 | result[f"mAP@{iou_thresh}"] = ap_data['map'] 125 | result[f"recall@{iou_thresh}"] = ap_data['recall'] 126 | 127 | result_strs = ["{0}: {1:.4f}".format(k,v) for k, v in result.items()] 128 | result_str = ', '.join(result_strs) 129 | 130 | logger.info(result_str) 131 | return result 132 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/detector/engine_trainer.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | import os 4 | import time 5 | from typing import Optional, Callable 6 | 7 | import numpy as np 8 | import torch 9 | from maskrcnn_benchmark.engine.trainer import reduce_loss_dict 10 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger 11 | 12 | 13 | def do_train( 14 | model, 15 | data_loader, 16 | optimizer, 17 | scheduler, 18 | checkpointer, 19 | device, 20 | validation_period, 21 | checkpoint_period, 22 | arguments, 23 | run_validation): 24 | logger = logging.getLogger("maskrcnn_benchmark.trainer") 25 | logger.info("Start training") 26 | meters = MetricLogger(delimiter=" ") 27 | max_iter = len(data_loader) 28 | start_iter = arguments["iteration"] 29 | model.train() 30 | start_training_time = time.time() 31 | end = time.time() 32 | 33 | saved_models = {} 34 | best_metric = float("-inf") 35 | best_model_iter = None 36 | 37 | for iteration, (images, targets, _) in enumerate(data_loader, start_iter): 38 | data_time = time.time() - end 39 | batch_start = time.time() 40 | arguments["iteration"] = iteration 41 | 42 | if iteration % validation_period == 0: 43 | results = validate_and_log(model, run_validation, iteration) 44 | 45 | first_dataset_results = next(iter(results.items())) 46 | dataset_name = first_dataset_results[0] 47 | metric_name = "mAP@0.5" 48 | metric = first_dataset_results[1][metric_name] 49 | 50 | if metric > best_metric: 51 | logger.info( f"Found a new current best model: iter {iteration}, {metric_name} on {dataset_name} = {metric:0.4f}" ) 52 | # checkpoint the best model 53 | best_metric = metric 54 | best_model_iter = iteration 55 | model_filename = 'model_best' 56 | checkpointer.save(model_filename, **arguments) 57 | 58 | if iteration % checkpoint_period == 0: 59 | model_filename = 'model_{:07d}'.format(iteration) 60 | checkpointer.save(model_filename, **arguments) 61 | saved_models[iteration] = model_filename + '.pth' 62 | 63 | model.train() 64 | 65 | scheduler.step() 66 | 67 | images = images.to(device) 68 | targets = [target.to(device) for target in targets] 69 | 70 | loss_dict = model(images, targets) 71 | 72 | losses = sum(loss for loss in loss_dict.values()) 73 | 74 | # reduce losses over all GPUs for logging purposes 75 | loss_dict_reduced = reduce_loss_dict(loss_dict) 76 | losses_reduced = sum(loss for loss in loss_dict_reduced.values()) 77 | meters.update(loss=losses_reduced, **loss_dict_reduced) 78 | 79 | optimizer.zero_grad() 80 | losses.backward() 81 | optimizer.step() 82 | 83 | batch_time = time.time() - batch_start 84 | end = time.time() 85 | meters.update(time=batch_time, data=data_time) 86 | 87 | eta_seconds = meters.time.global_avg * (max_iter - iteration) 88 | eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) 89 | 90 | logger.info( 91 | meters.delimiter.join( 92 | [ 93 | "eta: {eta}", 94 | "iter: {iter}", 95 | "{meters}", 96 | "lr: {lr:.6f}", 97 | "max mem: {memory:.0f}", 98 | ] 99 | ).format( 100 | eta=eta_string, 101 | iter=iteration, 102 | meters=str(meters), 103 | lr=optimizer.param_groups[0]["lr"], 104 | memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, 105 | ) 106 | ) 107 | 108 | losses_str = meters.delimiter.join( ["Loss: {:.4f}".format(losses.item())] + ["{0}: {1:.4f}".format(k, v.item()) for k,v in loss_dict_reduced.items()]) 109 | logger.info(losses_str) 110 | 111 | validate_and_log(model, run_validation, arguments["iteration"]) 112 | checkpointer.save("model_final", **arguments) 113 | 114 | if max_iter > 0: 115 | total_training_time = time.time() - start_training_time 116 | total_time_str = str(datetime.timedelta(seconds=total_training_time)) 117 | logger.info( 118 | "Total training time: {} ({:.4f} s / it)".format( 119 | total_time_str, total_training_time / (max_iter) 120 | ) 121 | ) 122 | 123 | 124 | def validate_and_log(model, run_validation, iteration): 125 | results = run_validation(model, iteration) 126 | return results 127 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/detector/experiments/launcher_train_detector.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | from collections import OrderedDict 4 | 5 | from os2d.utils import launcher as launcher 6 | 7 | 8 | if __name__ == "__main__": 9 | # load default launcher parameters 10 | parser = launcher.create_args_parser() 11 | args = parser.parse_args() 12 | 13 | script_path = os.path.dirname(os.path.abspath(__file__)) 14 | detector_root_path = os.path.join(script_path, "..") 15 | config_path = os.path.abspath(os.path.join(detector_root_path, "config")) 16 | log_path = os.path.abspath(os.path.join(detector_root_path, "output")) 17 | 18 | os2d_path = os.path.abspath(os.path.join(detector_root_path, "..", "..", "..")) 19 | 20 | main_command = f"PYTHONPATH={os2d_path}:$PYTHONPATH python {os.path.join(detector_root_path, 'train_detector.py')}" 21 | 22 | exp_job_names = [] 23 | exp_log_paths = [] 24 | exp_commands = [] 25 | 26 | def add_job(job_id, 27 | model, 28 | use_classes, 29 | dataset, 30 | ): 31 | job_name = f"exp{job_id:04}-{model}-{'withCl' if use_classes else 'noCl'}-{dataset}" 32 | commands = [] 33 | 34 | d = OrderedDict() 35 | if model == "R-50" and use_classes: 36 | config_file = os.path.join(config_path, "e2e_faster_rcnn_R_50_FPN_1x_multiscale.yaml") 37 | elif model == "R-101" and use_classes: 38 | config_file = os.path.join(config_path, "e2e_faster_rcnn_R_101_FPN_1x_multiscale.yaml") 39 | elif model == "R-50" and not use_classes: 40 | config_file = os.path.join(config_path, "e2e_faster_rcnn_R_50_FPN_1x_multiscale_noClasses.yaml") 41 | elif model == "R-101" and not use_classes: 42 | config_file = os.path.join(config_path, "e2e_faster_rcnn_R_101_FPN_1x_multiscale_noClasses.yaml") 43 | else: 44 | raise RuntimeError(f"Do not know config for model {model} and use_classes {use_classes}") 45 | 46 | d["--config-file"] = config_file 47 | 48 | if dataset == "grozi": 49 | d["DATASETS.TRAIN"] = "[\\\"grozi-train\\\"]" 50 | if use_classes: 51 | d["DATASETS.TEST"] = "[\\\"grozi-val-old-cl\\\"]" 52 | else: 53 | d["DATASETS.TEST"] = "[\\\"grozi-val-all\\\"]" 54 | d["INPUT.MIN_SIZE_TRAIN"] = "[480,600,768,960,1152,1344,1536]" 55 | d["INPUT.MAX_SIZE_TRAIN"] = 2048 56 | d["INPUT.MIN_SIZE_TEST"] = 960 57 | d["INPUT.MAX_SIZE_TEST"] = 1280 58 | elif dataset == "instre-s1": 59 | d["DATASETS.TRAIN"] = "[\\\"instre-s1-train\\\"]" 60 | d["DATASETS.TEST"] = "[\\\"instre-s1-val\\\"]" 61 | d["INPUT.MIN_SIZE_TRAIN"] = "[210,262,336,420,504,588,672]" 62 | d["INPUT.MAX_SIZE_TRAIN"] = 2048 63 | d["INPUT.MIN_SIZE_TEST"] = 420 64 | d["INPUT.MAX_SIZE_TEST"] = 1280 65 | elif dataset == "instre-s2": 66 | d["DATASETS.TRAIN"] = "[\\\"instre-s2-train\\\"]" 67 | d["DATASETS.TEST"] = "[\\\"instre-s2-val\\\"]" 68 | d["INPUT.MIN_SIZE_TRAIN"] = "[180,225,288,360,432,504,576]" 69 | d["INPUT.MAX_SIZE_TRAIN"] = 2048 70 | d["INPUT.MIN_SIZE_TEST"] = 360 71 | d["INPUT.MAX_SIZE_TEST"] = 1280 72 | else: 73 | raise RuntimeError(f"Unknown dataset {dataset}") 74 | 75 | log_folder = os.path.join(log_path, job_name) 76 | 77 | d["OUTPUT_DIR"] = log_folder 78 | 79 | commands.append(main_command + " " + launcher.parameters_to_str(d)) 80 | 81 | # testing 82 | if not use_classes: 83 | d_testing = OrderedDict() 84 | d_testing["--test_weights"] = os.path.join(log_folder, "model_best.pth") 85 | d_testing.update(d) 86 | 87 | datasets_test = ["[\\\"grozi-val-all\\\"]", 88 | "[\\\"instre-s1-val\\\",\\\"instre-s1-test\\\"]", 89 | "[\\\"instre-s2-val\\\",\\\"instre-s2-test\\\"]"] 90 | scales_test = ["[480,600,768,960,1152,1344,1536]", 91 | "[210,262,336,420,504,588,672]", 92 | "[180,225,288,360,432,504,576]"] 93 | 94 | for dataset, scales in zip(datasets_test, scales_test): 95 | d_testing_local = copy.deepcopy(d_testing) 96 | d_testing_local["DATASETS.TEST"] = dataset 97 | d_testing_local["TEST.BBOX_AUG.ENABLED"] = True 98 | d_testing_local["TEST.BBOX_AUG.SCALES"] = scales 99 | 100 | commands.append(main_command + " " + launcher.parameters_to_str(d_testing_local)) 101 | 102 | exp_job_names.append(job_name) 103 | exp_commands.append(commands) 104 | exp_log_paths.append(log_folder) 105 | 106 | 107 | job_id = 0 108 | # Train class-agnostic detectors for the detector-retrieval baseline 109 | for model in ["R-50", "R-101"]: 110 | for dataset in ["grozi", "instre-s1", "instre-s2"]: 111 | add_job(job_id, model, False, dataset) 112 | job_id += 1 113 | 114 | # Train class-aware detectors as baselines 115 | for model in ["R-50", "R-101"]: 116 | add_job(job_id, model, True, "grozi") 117 | job_id += 1 118 | 119 | 120 | for job_name, log_path, commands in zip(exp_job_names, exp_log_paths, exp_commands): 121 | launcher.add_job(job_name=job_name, 122 | log_path=log_path, 123 | commands=commands) 124 | launcher.launch_all_jobs(args) 125 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/detector/experiments/launcher_train_detector_imagenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | from collections import OrderedDict 4 | 5 | from os2d.utils import launcher as launcher 6 | 7 | 8 | if __name__ == "__main__": 9 | # load default launcher parameters 10 | parser = launcher.create_args_parser() 11 | args = parser.parse_args() 12 | 13 | script_path = os.path.dirname(os.path.abspath(__file__)) 14 | detector_root_path = os.path.join(script_path, "..") 15 | config_path = os.path.abspath(os.path.join(detector_root_path, "config")) 16 | log_path = os.path.abspath(os.path.join(detector_root_path, "output")) 17 | 18 | os2d_path = os.path.abspath(os.path.join(detector_root_path, "..", "..", "..")) 19 | 20 | main_command = f"PYTHONPATH={os2d_path}:$PYTHONPATH python {os.path.join(detector_root_path, 'train_detector.py')}" 21 | 22 | exp_job_names = [] 23 | exp_log_paths = [] 24 | exp_commands = [] 25 | 26 | def add_job(job_id, 27 | model, 28 | dataset, 29 | init_weights, 30 | ): 31 | job_name = f"exp-{model}-{dataset}-{init_weights}" 32 | commands = [] 33 | 34 | d = OrderedDict() 35 | if model == "R-50": 36 | if "pytorch" not in init_weights: 37 | config_file = os.path.join(config_path, "e2e_faster_rcnn_R_50_FPN_1x_multiscale_noClasses.yaml") 38 | else: 39 | config_file = os.path.join(config_path, "e2e_faster_rcnn_R_50_FPN_1x_multiscale_noClasses_fromPytorch.yaml") 40 | elif model == "R-101": 41 | if "pytorch" not in init_weights: 42 | config_file = os.path.join(config_path, "e2e_faster_rcnn_R_101_FPN_1x_multiscale_noClasses.yaml") 43 | else: 44 | config_file = os.path.join(config_path, "e2e_faster_rcnn_R_101_FPN_1x_multiscale_noClasses_fromPytorch.yaml") 45 | else: 46 | raise RuntimeError(f"Do not know config for model {model}") 47 | 48 | d["--validation_period"] = 5000 49 | d["--config-file"] = config_file 50 | 51 | if model == "R-50": 52 | if init_weights == "imagenet-repmet-pytorch": 53 | d["MODEL.WEIGHT"] = "../../../data/ImageNet-RepMet/pretrain/output/resnet50/model_best_maskrcnnbenchmark.pth.tar" 54 | elif init_weights == "imagenet-pytorch": 55 | d["MODEL.WEIGHT"] = "../../../models/resnet50-19c8e357.pth" 56 | elif init_weights == "imagenet-caffe": 57 | pass 58 | else: 59 | raise RuntimeError(f"Do not recognize weight initialization {init_weights}") 60 | elif model == "R-101": 61 | if init_weights == "imagenet-repmet-pytorch": 62 | d["MODEL.WEIGHT"] = "../../../data/ImageNet-RepMet/pretrain/output/resnet101/model_best_maskrcnnbenchmark.pth.tar" 63 | elif init_weights == "imagenet-pytorch": 64 | d["MODEL.WEIGHT"] = "../../../models/resnet101-5d3b4d8f.pth" 65 | elif init_weights == "imagenet-caffe": 66 | pass 67 | else: 68 | raise RuntimeError(f"Do not recognize weight initialization {init_weights}") 69 | else: 70 | raise RuntimeError(f"Do not know config for model {model}") 71 | 72 | if dataset == "imagenet-repmet": 73 | d["DATASETS.TRAIN"] = "[\\\"imagenet-repmet-train\\\"]" 74 | d["DATASETS.TEST"] = "[\\\"imagenet-repmet-val-5000\\\"]" # crop val set from 50k images to 5k GT boxes 75 | d["INPUT.MIN_SIZE_TRAIN"] = "[225,280,360,450,540,630,720]" 76 | d["INPUT.MAX_SIZE_TRAIN"] = 2048 77 | d["INPUT.MIN_SIZE_TEST"] = 450 78 | d["INPUT.MAX_SIZE_TEST"] = 1280 79 | else: 80 | raise RuntimeError(f"Unknown dataset {dataset}") 81 | 82 | log_folder = os.path.join(log_path, job_name) 83 | 84 | d["OUTPUT_DIR"] = log_folder 85 | 86 | commands.append(main_command + " " + launcher.parameters_to_str(d)) 87 | 88 | # testing 89 | d_testing = OrderedDict() 90 | d_testing["--test_weights"] = os.path.join(log_folder, "model_best.pth") 91 | d_testing.update(d) 92 | 93 | datasets_test = ["[\\\"imagenet-repmet-val-5000\\\"]"] 94 | scales_test = ["[180,225,288,360,432,504,576]"] 95 | 96 | for dataset, scales in zip(datasets_test, scales_test): 97 | d_testing_local = copy.deepcopy(d_testing) 98 | d_testing_local["DATASETS.TEST"] = dataset 99 | d_testing_local["TEST.BBOX_AUG.ENABLED"] = True 100 | d_testing_local["TEST.BBOX_AUG.SCALES"] = scales 101 | 102 | commands.append(main_command + " " + launcher.parameters_to_str(d_testing_local)) 103 | 104 | exp_job_names.append(job_name) 105 | exp_commands.append(commands) 106 | exp_log_paths.append(log_folder) 107 | 108 | 109 | job_id = 0 110 | dataset = "imagenet-repmet" 111 | # Train class-agnostic detectors for the detector-retrieval baseline 112 | for model in ["R-101"]:# ["R-50", "R-101"]: 113 | for init_weights in ["imagenet-repmet-pytorch"]: # ["imagenet-repmet-pytorch", "imagenet-pytorch", "imagenet-caffe"]: 114 | add_job(job_id, model, dataset, init_weights) 115 | job_id += 1 116 | 117 | for job_name, log_path, commands in zip(exp_job_names, exp_log_paths, exp_commands): 118 | launcher.add_job(job_name=job_name, 119 | log_path=log_path, 120 | commands=commands) 121 | launcher.launch_all_jobs(args) 122 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/experiments/launcher_grozi_eval_collect.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | from os2d.utils.logger import extract_map_value_from_os2d_log 5 | 6 | 7 | if __name__ == "__main__": 8 | config_path = os.path.dirname(os.path.abspath(__file__)) 9 | log_path = os.path.abspath(os.path.join(config_path, "..", "output/grozi")) 10 | config_job_name = "eval_grozi" 11 | 12 | datasets = ["grozi-val-old-cl", "grozi-val-new-cl", "dairy", "paste-v", "paste-f"] 13 | methods = ["det-ret-baseline-init", "det-ret-baseline-train"] 14 | table = pd.DataFrame(columns=["method"] + datasets) 15 | retrieval_multiscale = "ms" 16 | 17 | for method in methods: 18 | row = {} 19 | row["method"] = method 20 | for eval_dataset in datasets: 21 | suffix = "initModel" if "init" in method else "bestModel" 22 | suffix += "_" + retrieval_multiscale 23 | 24 | result_file = os.path.join(f"{config_job_name}.{method}", 25 | f"eval_{eval_dataset}_{suffix}_out.txt") 26 | value = extract_map_value_from_os2d_log(os.path.join(log_path, result_file), 27 | eval_dataset) 28 | row[eval_dataset] = value 29 | table = table.append(row, ignore_index=True) 30 | 31 | with pd.option_context('display.max_rows', None, 'display.max_columns', None): 32 | print(table, sep='\n') 33 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/experiments/launcher_imagenet_eval_collect.py: -------------------------------------------------------------------------------- 1 | import os 2 | import statistics 3 | import pandas as pd 4 | 5 | from os2d.utils.logger import extract_pattern_after_marked_line, numeric_const_pattern 6 | 7 | 8 | MISSING_VAL_CONSTANT = "None" 9 | 10 | 11 | def mAP_percent_to_points(v): 12 | if v is not None: 13 | return float(v)*100 14 | else: 15 | return MISSING_VAL_CONSTANT 16 | 17 | 18 | def extract_map_value_from_os2d_log(result_file, eval_dataset, metric_name="mAP@0.50"): 19 | dataset_search_pattern = "Evaluated on {0}" 20 | dataset_pattern = dataset_search_pattern.format(eval_dataset) 21 | eval_pattern = f"{metric_name}\s({numeric_const_pattern})" 22 | 23 | value = extract_pattern_after_marked_line(result_file, dataset_pattern, eval_pattern) 24 | return mAP_percent_to_points(value) 25 | 26 | 27 | if __name__ == "__main__": 28 | config_path = os.path.dirname(os.path.abspath(__file__)) 29 | config_job_name = "eval_imagenet_repmet" 30 | 31 | log_path = os.path.abspath(os.path.join(config_path, "..", "output/imagenet-repmet")) 32 | 33 | 34 | def get_result(sub_index, 35 | episodes, 36 | metric_names=["mAP@0.50"], 37 | folder_suffix="", 38 | result_suffix="out.txt"): 39 | # set output folder 40 | log_folder = f"{config_job_name}" 41 | if folder_suffix: 42 | log_folder += "." + folder_suffix 43 | log_folder = os.path.join(log_path, log_folder) 44 | 45 | values = [] 46 | for episode in episodes: 47 | eval_dataset = f"imagenet-repmet-test-episode-{episode}" 48 | 49 | result_file = f"eval_epi{min(episodes)}-{max(episodes)}_{result_suffix}" 50 | result_file = os.path.join(log_folder, result_file) 51 | values_one_run = {} 52 | for m in metric_names: 53 | values_one_run[m] = extract_map_value_from_os2d_log(result_file, eval_dataset, metric_name=m) 54 | values.append(values_one_run) 55 | 56 | return values 57 | 58 | 59 | def collect_run_results(folder_suffix, result_suffix): 60 | num_episodes = 500 61 | episode_per_job = 50 62 | index = 0 63 | i_episode = 0 64 | 65 | metric_names = ["mAP@0.50", "AP_joint_classes@0.50"] 66 | computed_episodes_metric = {m:[] for m in metric_names} 67 | 68 | while i_episode < num_episodes: 69 | list_of_episodes = list(range(i_episode, min(i_episode + episode_per_job, num_episodes))) 70 | results = get_result(sub_index=index, 71 | episodes=list_of_episodes, 72 | metric_names=metric_names, 73 | folder_suffix=folder_suffix, 74 | result_suffix=result_suffix, 75 | ) 76 | 77 | for e, r in zip(list_of_episodes, results): 78 | for m in metric_names: 79 | if r[m] == MISSING_VAL_CONSTANT: 80 | print(f"Missing episode {e} from chunk {index}") 81 | else: 82 | computed_episodes_metric[m].append(r[m]) 83 | 84 | index += 1 85 | i_episode += episode_per_job 86 | 87 | for metric_name in metric_names: 88 | collected_metric = computed_episodes_metric[metric_name] 89 | average_val = sum(collected_metric) / len(collected_metric) 90 | max_val = max(collected_metric) 91 | min_val = min(collected_metric) 92 | std_val = statistics.stdev(collected_metric) 93 | print(f"{folder_suffix}: {len(collected_metric)} episodes; average {metric_name} = {average_val:0.2f}; max {metric_name} = {max_val:0.2f}; min {metric_name} = {min_val:0.2f}; std {metric_name} = {std_val:0.2f};") 94 | 95 | 96 | for init_weights in ["imagenet-repmet-pytorch"]: # ["imagenet-repmet-pytorch", "imagenet-pytorch", "imagenet-caffe"]: 97 | collect_run_results(folder_suffix=f"det-ret-baseline-train-resnet101-{init_weights}-imagenet-repmet", result_suffix="bestModel_ms_out.txt") 98 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/experiments/launcher_instre_eval_collect.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | from os2d.utils.logger import extract_map_value_from_os2d_log 5 | 6 | 7 | if __name__ == "__main__": 8 | config_path = os.path.dirname(os.path.abspath(__file__)) 9 | log_path = os.path.abspath(os.path.join(config_path, "..", "output/instre")) 10 | config_job_name = "eval_instre" 11 | 12 | datasets = ["instre-s1", "instre-s2"] 13 | archs = ["resnet50", "resnet101"] 14 | method = "det-ret-baseline-train" 15 | retrieval_multiscale = "ms" 16 | table = pd.DataFrame(columns=["arch", "method"] + datasets) 17 | 18 | for arch in archs: 19 | row = {} 20 | row["arch"] = arch 21 | row["method"] = method 22 | for dataset in datasets: 23 | suffix = "initModel" if "init" in method else "bestModel" 24 | suffix += "_" + retrieval_multiscale 25 | 26 | eval_dataset = dataset + "-test" 27 | #eval_instre.det-ret-baseline-init-resnet101-instre-s1 28 | result_file = os.path.join(f"{config_job_name}.{method}-{arch}-{dataset}", 29 | # f"eval_{suffix}_out.txt") 30 | f"eval_{eval_dataset}_{suffix}_out.txt") 31 | value = extract_map_value_from_os2d_log(os.path.join(log_path, result_file), 32 | eval_dataset) 33 | row[dataset] = value 34 | table = table.append(row, ignore_index=True) 35 | 36 | with pd.option_context('display.max_rows', None, 'display.max_columns', None): 37 | print(table, sep='\n') 38 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/main_detector_retrieval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import argparse 4 | import numpy as np 5 | import warnings 6 | 7 | import torch 8 | 9 | from os2d.data.dataloader import build_eval_dataloaders_from_cfg 10 | from os2d.utils import set_random_seed, print_meters, get_data_path, setup_logger, mkdir 11 | from os2d.config import cfg 12 | 13 | from evaluate_detector_retrieval import evaluate, build_retrievalnet_from_options 14 | from utils_maskrcnn import build_maskrcnn_model 15 | 16 | 17 | def parse_opts(): 18 | parser = argparse.ArgumentParser(description="Evaluate detector-retrieval baseline") 19 | parser.add_argument( 20 | "--config-file", 21 | default="", 22 | metavar="FILE", 23 | help="path to config file", 24 | type=str, 25 | ) 26 | parser.add_argument("--output_dir", default=None, type=str, 27 | help="path where to save all the outputs") 28 | # retrieval opts 29 | # network 30 | parser.add_argument('--retrieval_network_path', type=str, 31 | help='network path, destination where network is saved') 32 | parser.add_argument('--retrieval_image_size', default=240, type=int, 33 | help='maximum size of longer image side used for testing (default: 240)') 34 | parser.add_argument('--retrieval_multiscale', action='store_true', 35 | help='use multiscale vectors for testing') 36 | parser.add_argument('--retrieval_whitening_path', default=None, type=str, 37 | help='path to add the whitening (default: None)') 38 | # maskrcnn opts 39 | # config 40 | parser.add_argument('--maskrcnn_config_file', type=str, 41 | help='network path, destination where network is saved') 42 | # weights 43 | parser.add_argument('--maskrcnn_weight_file', type=str, 44 | help='network path, destination where network is saved') 45 | parser.add_argument("--nms_iou_threshold_detector_score", default=0.3, type=float, 46 | help='first round of nms done w.r.t. the detector scores: IoU threshold') 47 | parser.add_argument("--nms_score_threshold_detector_score", default=0.1, type=float, 48 | help='first round of nms done w.r.t. the detector scores: score threshold') 49 | 50 | parser.add_argument( 51 | "opts", 52 | help="Modify config options using the command-line", 53 | default=None, 54 | nargs=argparse.REMAINDER, 55 | ) 56 | args = parser.parse_args() 57 | 58 | if args.config_file: 59 | cfg.merge_from_file(args.config_file) 60 | cfg.merge_from_list(args.opts) 61 | cfg.freeze() 62 | 63 | return cfg, args 64 | 65 | def init_logger(args, logger_prefix="detector-retrieval"): 66 | if args.output_dir: 67 | mkdir(args.output_dir) 68 | 69 | logger = setup_logger(logger_prefix, args.output_dir if args.output_dir else None) 70 | 71 | if args.config_file: 72 | with open(args.config_file, "r") as cf: 73 | config_str = "\n" + cf.read() 74 | logger.info(config_str) 75 | logger.info("Running with the default eval section:\n{}".format(cfg.eval)) 76 | else: 77 | logger.info("Launched with no OS2D config file") 78 | logger.info("Running args:\n{}".format(args)) 79 | return logger 80 | 81 | 82 | def main(): 83 | cfg, args = parse_opts() 84 | logger_prefix="detector-retrieval" 85 | logger = init_logger(args, logger_prefix) 86 | 87 | # set this to use faster convolutions 88 | if cfg.is_cuda: 89 | assert torch.cuda.is_available(), "Do not have available GPU, but cfg.is_cuda == 1" 90 | torch.backends.cudnn.benchmark = True 91 | 92 | # random seed 93 | set_random_seed(cfg.random_seed, cfg.is_cuda) 94 | 95 | # Load the detector 96 | maskrcnn_model, maskrcnn_config = build_maskrcnn_model(args.maskrcnn_config_file, args.maskrcnn_weight_file) 97 | 98 | # Load the retrieval network 99 | retrievalnet = build_retrievalnet_from_options(args, is_cuda=cfg.is_cuda) 100 | 101 | # load the dataset 102 | data_path = get_data_path() 103 | img_normalization = {"mean":cfg.model.normalization_mean, "std": cfg.model.normalization_std} # do not actually use this - will use normalization encoded in the config of maskrcnn-benchmark 104 | box_coder = None 105 | dataloaders_eval = build_eval_dataloaders_from_cfg(cfg, box_coder, img_normalization, 106 | data_path=data_path, 107 | logger_prefix=logger_prefix) 108 | 109 | # start evaluation 110 | for dataloader in dataloaders_eval: 111 | losses = evaluate(dataloader, maskrcnn_model, maskrcnn_config, retrievalnet, args, 112 | cfg_eval=cfg.eval, cfg_visualization=cfg.visualization.eval, is_cuda=cfg.is_cuda, 113 | logger_prefix=logger_prefix) 114 | 115 | 116 | if __name__ == "__main__": 117 | main() -------------------------------------------------------------------------------- /baselines/detector_retrieval/retrieval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/os2d/817ffb15a8cbc353e4f94debe419902d4dd83a22/baselines/detector_retrieval/retrieval/__init__.py -------------------------------------------------------------------------------- /baselines/detector_retrieval/retrieval/experiments/launcher_grozi.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | from collections import OrderedDict 4 | 5 | from os2d.utils import launcher as launcher 6 | 7 | 8 | if __name__ == "__main__": 9 | # load default launcher parameters 10 | parser = launcher.create_args_parser() 11 | args = parser.parse_args() 12 | 13 | script_path = os.path.dirname(os.path.abspath(__file__)) 14 | retrieval_path = os.path.join(script_path, "..") 15 | 16 | os2d_path = os.path.join(retrieval_path, "..", "..", "..") 17 | cirtorch_path = os.path.join(retrieval_path, "cnnimageretrieval-pytorch") 18 | detector_path = os.path.join(retrieval_path, "..", "detector") 19 | python_path = f"PYTHONPATH={os2d_path}:{cirtorch_path}" 20 | 21 | retrieval_train_launcher = f"{retrieval_path}/train.py" 22 | 23 | main_command = python_path + " " + f"python {retrieval_train_launcher}" 24 | log_folder_path = f"{retrieval_path}/output/grozi" 25 | main_command += " " + log_folder_path 26 | 27 | exp_commands = [] 28 | exp_job_names = [] 29 | exp_log_paths = [] 30 | 31 | def add_job(sub_index, 32 | training_dataset, 33 | arch, 34 | pool = "gem", 35 | test_whiten = False, 36 | local_whitening = False, 37 | regional = False, 38 | whitening = False, 39 | loss_margin = 0.85, 40 | image_size = 240, 41 | learning_rate = 1e-6, 42 | pretrained = True, 43 | loss = "contrastive", 44 | optimizer = "adam", 45 | weight_decay = 1e-4, 46 | neg_num = 5, 47 | query_size = 2000, 48 | pool_size = 20000, 49 | batch_size = 5): 50 | 51 | directory = "{}".format(training_dataset) 52 | directory += "_{}".format(arch) 53 | directory += "_{}".format(pool) 54 | if local_whitening: 55 | directory += "_lwhiten" 56 | if regional: 57 | directory += "_r" 58 | if whitening: 59 | directory += "_whiten" 60 | if not pretrained: 61 | directory += "_notpretrained" 62 | directory += "_{}_m{:.2f}".format(loss, loss_margin) 63 | directory += "_{}_lr{:.1e}_wd{:.1e}".format(optimizer, learning_rate, weight_decay) 64 | directory += "_nnum{}_qsize{}_psize{}".format(neg_num, query_size, pool_size) 65 | directory += "_bsize{}_imsize{}".format(batch_size, image_size) 66 | 67 | log_path = os.path.join(log_folder_path, directory) 68 | 69 | job_name = "ret-grozi-{0}.{1}".format(sub_index, directory) 70 | 71 | d = OrderedDict() 72 | d["--training-dataset"] = training_dataset 73 | if training_dataset == "grozi-train-retrieval": 74 | d["--test-datasets"] = "grozi-val-new-cl-retrieval,grozi-val-old-cl-retrieval" 75 | elif training_dataset == "grozi-train-retrieval-rndCropPerImage10": 76 | d["--test-datasets"] = "grozi-val-new-cl-retrieval-rndCropPerImage10,grozi-val-new-cl-retrieval-rndCropPerImage10" 77 | else: 78 | raise RuntimeError(f"Unknown training set {training_dataset}") 79 | 80 | if test_whiten: 81 | d["--test-whiten"] = training_dataset 82 | 83 | d["--arch"] = arch 84 | d["--pool"] = pool 85 | if local_whitening: 86 | d["--local-whitening"] = "" 87 | if regional: 88 | d["--regional"] = "" 89 | if whitening: 90 | d["--whitening"] = "" 91 | d["--loss-margin"] = loss_margin 92 | d["--image-size"] = image_size 93 | d["--learning-rate"] = learning_rate 94 | if not pretrained: 95 | d["--not-pretrained"] = "" 96 | d["--loss"] = loss 97 | d["--optimizer"] = optimizer 98 | d["--weight-decay"] = weight_decay 99 | d["--neg-num"] = neg_num 100 | d["--query-size"] = query_size 101 | d["--pool-size"] = pool_size 102 | d["--batch-size"] = batch_size 103 | 104 | commands = [] 105 | commands.append(main_command + " " + launcher.parameters_to_str(d)) 106 | 107 | exp_job_names.append(job_name) 108 | exp_log_paths.append(log_path) 109 | exp_commands.append(commands) 110 | 111 | 112 | job_index = 0 113 | pool = "gem" 114 | training_dataset = "grozi-train-retrieval-rndCropPerImage10" 115 | 116 | for arch in ["resnet50", "resnet101"]: 117 | for whitening in [True, False]: 118 | add_job(job_index, training_dataset=training_dataset, arch=arch, pool=pool, whitening=whitening) 119 | job_index += 1 120 | 121 | for job_name, log_path, commands in zip(exp_job_names, exp_log_paths, exp_commands): 122 | launcher.add_job(job_name=job_name, 123 | log_path=log_path, 124 | commands=commands) 125 | launcher.launch_all_jobs(args) 126 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/retrieval/experiments/launcher_instre.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | from collections import OrderedDict 4 | 5 | from os2d.utils import launcher as launcher 6 | 7 | 8 | if __name__ == "__main__": 9 | # load default launcher parameters 10 | parser = launcher.create_args_parser() 11 | args = parser.parse_args() 12 | 13 | script_path = os.path.dirname(os.path.abspath(__file__)) 14 | retrieval_path = os.path.join(script_path, "..") 15 | 16 | os2d_path = os.path.join(retrieval_path, "..", "..", "..") 17 | cirtorch_path = os.path.join(retrieval_path, "cnnimageretrieval-pytorch") 18 | detector_path = os.path.join(retrieval_path, "..", "detector") 19 | python_path = f"PYTHONPATH={os2d_path}:{cirtorch_path}" 20 | 21 | retrieval_train_launcher = f"{retrieval_path}/train.py" 22 | full_eval_launcher = f"{retrieval_path}/../main_detector_retrieval.py" 23 | 24 | main_command = python_path + " " + f"python {retrieval_train_launcher}" 25 | log_folder_path = f"{retrieval_path}/output/instre" 26 | main_command += " " + log_folder_path 27 | 28 | exp_commands = [] 29 | exp_job_names = [] 30 | exp_log_paths = [] 31 | 32 | def add_job(sub_index, 33 | training_dataset, 34 | arch, 35 | pool = "gem", 36 | test_whiten = False, 37 | local_whitening = False, 38 | regional = False, 39 | whitening = False, 40 | loss_margin = 0.85, 41 | image_size = 240, 42 | learning_rate = 1e-6, 43 | pretrained = True, 44 | loss = "contrastive", 45 | optimizer = "adam", 46 | weight_decay = 1e-4, 47 | neg_num = 5, 48 | query_size = 2000, 49 | pool_size = 20000, 50 | batch_size = 5, 51 | ): 52 | 53 | directory = "{}".format(training_dataset) 54 | directory += "_{}".format(arch) 55 | directory += "_{}".format(pool) 56 | if local_whitening: 57 | directory += "_lwhiten" 58 | if regional: 59 | directory += "_r" 60 | if whitening: 61 | directory += "_whiten" 62 | if not pretrained: 63 | directory += "_notpretrained" 64 | directory += "_{}_m{:.2f}".format(loss, loss_margin) 65 | directory += "_{}_lr{:.1e}_wd{:.1e}".format(optimizer, learning_rate, weight_decay) 66 | directory += "_nnum{}_qsize{}_psize{}".format(neg_num, query_size, pool_size) 67 | directory += "_bsize{}_imsize{}".format(batch_size, image_size) 68 | 69 | log_path = os.path.join(log_folder_path, directory) 70 | 71 | job_name = "ret-instre-{0}.{1}".format(sub_index, directory) 72 | 73 | d = OrderedDict() 74 | d["--training-dataset"] = training_dataset 75 | if training_dataset == "instre-s1-train-retrieval": 76 | d["--test-datasets"] = "instre-s1-val-retrieval" 77 | elif training_dataset == "instre-s2-train-retrieval": 78 | d["--test-datasets"] = "instre-s2-val-retrieval" 79 | elif training_dataset == "instre-s1-train-retrieval-rndCropPerImage10": 80 | d["--test-datasets"] = "instre-s1-val-retrieval-rndCropPerImage10" 81 | elif training_dataset == "instre-s2-train-retrieval-rndCropPerImage10": 82 | d["--test-datasets"] = "instre-s2-val-retrieval-rndCropPerImage10" 83 | else: 84 | raise RuntimeError(f"Unknown training set {training_dataset}") 85 | 86 | if test_whiten: 87 | d["--test-whiten"] = training_dataset 88 | 89 | d["--arch"] = arch 90 | d["--pool"] = pool 91 | if local_whitening: 92 | d["--local-whitening"] = "" 93 | if regional: 94 | d["--regional"] = "" 95 | if whitening: 96 | d["--whitening"] = "" 97 | d["--loss-margin"] = loss_margin 98 | d["--image-size"] = image_size 99 | d["--learning-rate"] = learning_rate 100 | if not pretrained: 101 | d["--not-pretrained"] = "" 102 | d["--loss"] = loss 103 | d["--optimizer"] = optimizer 104 | d["--weight-decay"] = weight_decay 105 | d["--neg-num"] = neg_num 106 | d["--query-size"] = query_size 107 | d["--pool-size"] = pool_size 108 | d["--batch-size"] = batch_size 109 | 110 | commands = [] 111 | commands.append(main_command + " " + launcher.parameters_to_str(d)) 112 | 113 | exp_job_names.append(job_name) 114 | exp_log_paths.append(log_path) 115 | exp_commands.append(commands) 116 | 117 | 118 | job_index = 0 119 | 120 | training_datasets = ["instre-s1-train-retrieval-rndCropPerImage10", 121 | "instre-s2-train-retrieval-rndCropPerImage10"] 122 | archs = ["resnet50", "resnet101"] 123 | pool = "gem" 124 | whitening = True 125 | 126 | for training_dataset in training_datasets: 127 | for arch in archs: 128 | add_job(job_index, training_dataset=training_dataset, arch=arch, pool=pool, whitening=whitening) 129 | job_index += 1 130 | 131 | for job_name, log_path, commands in zip(exp_job_names, exp_log_paths, exp_commands): 132 | launcher.add_job(job_name=job_name, 133 | log_path=log_path, 134 | commands=commands) 135 | launcher.launch_all_jobs(args) 136 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/retrieval/prepare_all_datasets.sh: -------------------------------------------------------------------------------- 1 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 2 | 3 | CIRTORCH_PATH=${SCRIPT_DIR}/cnnimageretrieval-pytorch 4 | OS2D_PATH=${SCRIPT_DIR}/../../.. 5 | export PYTHONPATH=${OS2D_PATH}:${CIRTORCH_PATH}:${PYTHONPATH} 6 | 7 | python prepare_dataset_retrieval.py --dataset-train grozi-train --dataset-train-scale 1280 --dataset-val grozi-val-new-cl --dataset-val-scale 1280 --datasets-test grozi-val-old-cl grozi-val-new-cl --datasets-test-scale 1280 8 | python prepare_dataset_retrieval.py --dataset-train grozi-train --dataset-train-scale 1280 --dataset-val grozi-val-new-cl --dataset-val-scale 1280 --datasets-test grozi-val-old-cl grozi-val-new-cl --datasets-test-scale 1280 --num-random-crops-per-image 10 9 | 10 | python prepare_dataset_retrieval.py --dataset-train instre-s1-train --dataset-train-scale 700 --dataset-val instre-s1-val --dataset-val-scale 700 --datasets-test instre-s1-val instre-s1-test --datasets-test-scale 700 11 | python prepare_dataset_retrieval.py --dataset-train instre-s1-train --dataset-train-scale 700 --dataset-val instre-s1-val --dataset-val-scale 700 --datasets-test instre-s1-val instre-s1-test --datasets-test-scale 700 --num-random-crops-per-image 10 12 | 13 | python prepare_dataset_retrieval.py --dataset-train instre-s1-train --dataset-train-scale 700 --dataset-val instre-s1-val --dataset-val-scale 700 --datasets-test instre-s1-val instre-s1-test --datasets-test-scale 700 14 | python prepare_dataset_retrieval.py --dataset-train instre-s1-train --dataset-train-scale 700 --dataset-val instre-s1-val --dataset-val-scale 700 --datasets-test instre-s1-val instre-s1-test --datasets-test-scale 700 --num-random-crops-per-image 10 15 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/retrieval/prepare_datasets_imagenet.sh: -------------------------------------------------------------------------------- 1 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 2 | 3 | CIRTORCH_PATH=${SCRIPT_DIR}/cnnimageretrieval-pytorch 4 | OS2D_PATH=${SCRIPT_DIR}/../../.. 5 | export PYTHONPATH=${OS2D_PATH}:${CIRTORCH_PATH}:${PYTHONPATH} 6 | 7 | python prepare_dataset_retrieval.py --dataset-train imagenet-repmet-train --dataset-train-scale 600 --dataset-val imagenet-repmet-val-5000 --dataset-val-scale 600 --datasets-test imagenet-repmet-val-5000 --datasets-test-scale 600 --num-queries-image-to-image 10 8 | 9 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/retrieval/retrieval_data.py: -------------------------------------------------------------------------------- 1 | # Code partially copied from https://github.com/filipradenovic/cnnimageretrieval-pytorch/blob/v1.1/cirtorch/datasets/traindataset.py 2 | 3 | import os 4 | import pickle 5 | import numpy as np 6 | from functools import lru_cache 7 | 8 | from cirtorch.datasets.traindataset import TuplesDataset as TuplesDatasetOriginal 9 | from cirtorch.utils.general import get_data_root 10 | from cirtorch.datasets.datahelpers import default_loader, cid2filename 11 | from cirtorch.datasets.testdataset import config_imname, config_qimname 12 | from cirtorch.utils.evaluate import compute_map 13 | 14 | 15 | @lru_cache() 16 | def loader_hashed(path): 17 | return default_loader(path) 18 | 19 | 20 | def configdataset(dataset, dir_main): 21 | """ 22 | Fucntion started from https://github.com/filipradenovic/cnnimageretrieval-pytorch/blob/v1.1/cirtorch/datasets/testdataset.py 23 | """ 24 | # loading imlist, qimlist, and gnd, in cfg as a dict 25 | gnd_fname = os.path.join(dir_main, dataset, 'gnd_{}.pkl'.format(dataset)) 26 | with open(gnd_fname, 'rb') as f: 27 | cfg = pickle.load(f) 28 | cfg['gnd_fname'] = gnd_fname 29 | 30 | cfg['ext'] = '.jpg' 31 | cfg['qext'] = '.jpg' 32 | cfg['dir_data'] = os.path.join(dir_main, dataset) 33 | cfg['dir_images'] = os.path.join(cfg['dir_data'], 'jpg') 34 | 35 | cfg['n'] = len(cfg['imlist']) 36 | cfg['nq'] = len(cfg['qimlist']) 37 | 38 | cfg['im_fname'] = config_imname 39 | cfg['qim_fname'] = config_qimname 40 | 41 | cfg['dataset'] = dataset 42 | 43 | return cfg 44 | 45 | 46 | class TuplesDataset(TuplesDatasetOriginal): 47 | """ 48 | Inheriting from TuplesDataset from 49 | https://github.com/filipradenovic/cnnimageretrieval-pytorch/blob/v1.1/cirtorch/datasets/traindataset.py 50 | Need to add my own data 51 | """ 52 | def __init__(self, name, mode, imsize=None, nnum=5, qsize=2000, poolsize=20000, transform=None, loader=loader_hashed): 53 | 54 | if not (mode == 'train' or mode == 'val'): 55 | raise RuntimeError("MODE should be either train or val, passed as string") 56 | 57 | # setting up paths 58 | data_root = get_data_root() 59 | db_root = os.path.join(data_root, 'train', name) 60 | ims_root = os.path.join(db_root, 'ims') 61 | 62 | # loading db 63 | db_fn = os.path.join(db_root, '{}.pkl'.format(name)) 64 | with open(db_fn, 'rb') as f: 65 | db = pickle.load(f)[mode] 66 | 67 | # setting fullpath for images 68 | self.images = [cid2filename(db['cids'][i], ims_root) for i in range(len(db['cids']))] 69 | 70 | # initializing tuples dataset 71 | self.name = name 72 | self.mode = mode 73 | self.imsize = imsize 74 | self.clusters = db['cluster'] 75 | self.qpool = db['qidxs'] 76 | self.ppool = db['pidxs'] 77 | 78 | ## If we want to keep only unique q-p pairs 79 | ## However, ordering of pairs will change, although that is not important 80 | # qpidxs = list(set([(self.qidxs[i], self.pidxs[i]) for i in range(len(self.qidxs))])) 81 | # self.qidxs = [qpidxs[i][0] for i in range(len(qpidxs))] 82 | # self.pidxs = [qpidxs[i][1] for i in range(len(qpidxs))] 83 | 84 | # size of training subset for an epoch 85 | self.nnum = nnum 86 | self.qsize = min(qsize, len(self.qpool)) 87 | self.poolsize = min(poolsize, len(self.images)) 88 | self.qidxs = None 89 | self.pidxs = None 90 | self.nidxs = None 91 | 92 | self.transform = transform 93 | self.loader = loader 94 | 95 | self.print_freq = 10 96 | 97 | 98 | def compute_map_and_print(dataset, ranks, gnd, kappas=[1, 5, 10]): 99 | """ 100 | Function started from https://github.com/filipradenovic/cnnimageretrieval-pytorch/blob/v1.1/cirtorch/utils/evaluate.py 101 | """ 102 | # new evaluation protocol 103 | if dataset.startswith('roxford5k') or dataset.startswith('rparis6k'): 104 | 105 | gnd_t = [] 106 | for i in range(len(gnd)): 107 | g = {} 108 | g['ok'] = np.concatenate([gnd[i]['easy']]) 109 | g['junk'] = np.concatenate([gnd[i]['junk'], gnd[i]['hard']]) 110 | gnd_t.append(g) 111 | mapE, apsE, mprE, prsE = compute_map(ranks, gnd_t, kappas) 112 | 113 | gnd_t = [] 114 | for i in range(len(gnd)): 115 | g = {} 116 | g['ok'] = np.concatenate([gnd[i]['easy'], gnd[i]['hard']]) 117 | g['junk'] = np.concatenate([gnd[i]['junk']]) 118 | gnd_t.append(g) 119 | mapM, apsM, mprM, prsM = compute_map(ranks, gnd_t, kappas) 120 | 121 | gnd_t = [] 122 | for i in range(len(gnd)): 123 | g = {} 124 | g['ok'] = np.concatenate([gnd[i]['hard']]) 125 | g['junk'] = np.concatenate([gnd[i]['junk'], gnd[i]['easy']]) 126 | gnd_t.append(g) 127 | mapH, apsH, mprH, prsH = compute_map(ranks, gnd_t, kappas) 128 | 129 | print('>> {}: mAP E: {}, M: {}, H: {}'.format(dataset, np.around(mapE*100, decimals=2), np.around(mapM*100, decimals=2), np.around(mapH*100, decimals=2))) 130 | print('>> {}: mP@k{} E: {}, M: {}, H: {}'.format(dataset, kappas, np.around(mprE*100, decimals=2), np.around(mprM*100, decimals=2), np.around(mprH*100, decimals=2))) 131 | return mapM 132 | else: 133 | # old evaluation potocol 134 | map, aps, _, _ = compute_map(ranks, gnd) 135 | print('>> {}: mAP {:.2f}'.format(dataset, np.around(map*100, decimals=2))) 136 | return map 137 | -------------------------------------------------------------------------------- /baselines/detector_retrieval/utils_maskrcnn.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import torch 3 | 4 | from maskrcnn_benchmark.config import cfg 5 | from maskrcnn_benchmark.modeling.detector import build_detection_model 6 | from maskrcnn_benchmark.utils.model_serialization import load_state_dict 7 | from maskrcnn_benchmark.structures.image_list import to_image_list 8 | from maskrcnn_benchmark.data.transforms import transforms as maskrcnn_transforms 9 | 10 | from os2d.data.dataset import build_dataset_by_name 11 | 12 | 13 | def build_maskrcnn_model(config_path, weight_path): 14 | logger = logging.getLogger("detector-retrieval.build_retrievalnet") 15 | logger.info("Building the maskrcnn-benchmark model...") 16 | # get the config file 17 | cfg.merge_from_file(config_path) 18 | 19 | # check the number of classes 20 | if cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES == 0: 21 | cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES = 2 22 | 23 | assert cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES == 2, "We need a one-class detector, but have {0} classes".format(cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES) 24 | 25 | # build the model 26 | model = build_detection_model(cfg) 27 | 28 | # load the weights 29 | logger.info("Loading weights from {}".format(weight_path)) 30 | loaded = torch.load(weight_path, map_location=torch.device("cpu")) 31 | if "model" not in loaded: 32 | loaded = dict(model=loaded) 33 | load_state_dict(model, loaded.pop("model")) 34 | 35 | return model, cfg 36 | 37 | 38 | def run_maskrcnn_on_images(model, cfg, batch_images): 39 | # apply maskrcnn normalization 40 | to_bgr255 = cfg.INPUT.TO_BGR255 41 | normalize_transform = maskrcnn_transforms.Normalize( 42 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255) 43 | device = batch_images.device 44 | batch_images = [normalize_transform(img.cpu(), None).to(device=device) for img in batch_images] 45 | 46 | # convert images to maskrcnn format 47 | batch_images = to_image_list(batch_images, size_divisible=cfg.DATALOADER.SIZE_DIVISIBILITY) 48 | 49 | # model to GPU and eval mode 50 | model.to(device) 51 | model.eval() 52 | 53 | # run 54 | with torch.no_grad(): 55 | output = model(batch_images) 56 | if device.type == "cuda": 57 | torch.cuda.empty_cache() 58 | 59 | bboxes_xyxy = [o.bbox for o in output] 60 | labels = [o.get_field("labels") for o in output] 61 | scores = [o.get_field("scores") for o in output] 62 | 63 | return bboxes_xyxy, labels, scores 64 | -------------------------------------------------------------------------------- /data/ImageNet-RepMet/pretrain/convert_resnet_pytorch_to_cirtorch.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from collections import OrderedDict 3 | 4 | import torch 5 | 6 | 7 | def convert_model(model_file): 8 | model_pth = torch.load(model_file, map_location=torch.device("cpu")) 9 | exts = [".pth.tar", ".pth"] 10 | ext = None 11 | for e in exts: 12 | if model_file.endswith(e): 13 | ext = e 14 | break 15 | assert ext is not None, "Can only parse models saved to one of {} formats".format(exts) 16 | model_name = model_file[:-len(ext)] 17 | target_path = model_name + "_cirtorch" + ext 18 | 19 | print("Converting", model_file, 20 | "to", target_path) 21 | 22 | if "state_dict" in model_pth: 23 | state_dict_pth = model_pth["state_dict"] 24 | else: 25 | state_dict_pth = model_pth 26 | 27 | # Create the pytorch state_dict 28 | state_dict_cirtorch = OrderedDict() 29 | 30 | # create a map of prefix renamings 31 | prefix_map = OrderedDict() 32 | prefix_map["conv1."] = "features.0." 33 | prefix_map["bn1."] = "features.1." 34 | prefix_map["layer1."] = "features.4." 35 | prefix_map["layer2."] = "features.5." 36 | prefix_map["layer3."] = "features.6." 37 | prefix_map["layer4."] = "features.7." 38 | 39 | # rename layers and add to the pytorch model 40 | num_added_tensors = 0 41 | for k, v in state_dict_pth.items(): 42 | # find good prefix 43 | if k.startswith("module."): 44 | k = k[len("module."):] 45 | prefix = None 46 | for p in prefix_map.keys(): 47 | if k.startswith(p): 48 | if prefix is None: 49 | prefix = p 50 | else: 51 | print("For layer {0} found two prefixes: {1} or {2}".format(k, prefix, p)) 52 | if prefix is None: 53 | print("For layer {0} did not find any matching prefix!".format(k)) 54 | else: 55 | new_name = prefix_map[prefix] + k[len(prefix):] 56 | # print("Renaming {0} to {1}".format(k, new_name)) 57 | state_dict_cirtorch[new_name] = v 58 | num_added_tensors += 1 59 | print("Converted {0} tensors".format(num_added_tensors)) 60 | 61 | # saving the model 62 | state_dict_cirtorch = {"state_dict": state_dict_cirtorch} 63 | torch.save(state_dict_cirtorch, target_path) 64 | 65 | 66 | if __name__ == "__main__": 67 | parser = argparse.ArgumentParser(description="Converting pytorch ResNets to Caffe2-cirtorch") 68 | parser.add_argument("model", help="Path to the model to convert, the result will be save to the same folder") 69 | args = parser.parse_args() 70 | 71 | convert_model(args.model) 72 | -------------------------------------------------------------------------------- /data/ImageNet-RepMet/pretrain/convert_resnet_pytorch_to_maskrcnnbenchmark.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | 5 | 6 | def convert_model(model_file): 7 | model_pth = torch.load(model_file, map_location=torch.device("cpu")) 8 | 9 | exts = [".pth.tar", ".pth"] 10 | ext = None 11 | for e in exts: 12 | if model_file.endswith(e): 13 | ext = e 14 | break 15 | assert ext is not None, "Can only parse models saved to one of {} formats".format(exts) 16 | model_name = model_file[:-len(ext)] 17 | target_path = model_name + "_maskrcnnbenchmark" + ext 18 | 19 | print("Converting", model_file, 20 | "to", target_path) 21 | 22 | if "state_dict" in model_pth: 23 | state_dict_pth = model_pth["state_dict"] 24 | else: 25 | state_dict_pth = model_pth 26 | 27 | # saving the model 28 | torch.save(state_dict_pth, target_path) 29 | 30 | 31 | if __name__ == "__main__": 32 | parser = argparse.ArgumentParser(description="Converting pytorch ResNets to the ones maskrcnn-benchmark can init from") 33 | parser.add_argument("model", help="Path to the model to convert, the result will be save to the same folder") 34 | args = parser.parse_args() 35 | 36 | convert_model(args.model) 37 | -------------------------------------------------------------------------------- /data/ImageNet-RepMet/pretrain/prepare_data_exclude_test_classes.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | 4 | 5 | def main(): 6 | cur_path = os.path.dirname(os.path.abspath(__file__)) 7 | imagenet_train_images = os.path.join(cur_path, "..", "ILSVRC", "Data", "CLS-LOC", "train") 8 | imagenet_val_images = os.path.join(cur_path, "..", "ILSVRC", "Data", "CLS-LOC", "val") 9 | target_path = os.path.join(cur_path, "imagenet-repmet") 10 | target_path_train = os.path.join(target_path, "train") 11 | target_path_val = os.path.join(target_path, "val") 12 | validation_annotation_file = os.path.join(cur_path, "imagenet-repmet", "val_classes.txt") 13 | os.makedirs(target_path_train) 14 | os.makedirs(target_path_val) 15 | 16 | # get test classes to exclude 17 | repmet_test_classes_path = os.path.join(cur_path, "..", "repmet_test_classes.txt") 18 | with open(repmet_test_classes_path, "r") as fid: 19 | repmet_test_classes = fid.readlines() 20 | classes_to_exclude = {} 21 | for cl in repmet_test_classes: 22 | classes_to_exclude[cl[:-1]] = 1 # cut off the EOL symbol 23 | 24 | # loop over all train classes 25 | all_class_folders = glob.glob(os.path.join(imagenet_train_images, "n*")) 26 | for class_folder in all_class_folders: 27 | class_name = os.path.basename(class_folder) 28 | if class_name not in classes_to_exclude: 29 | os.symlink(class_folder, os.path.join(target_path_train, class_name)) 30 | 31 | # move validation into labeled subfolders 32 | for class_folder in all_class_folders: 33 | class_name = os.path.basename(class_folder) 34 | if class_name not in classes_to_exclude: 35 | os.makedirs(os.path.join(target_path_val, class_name)) 36 | 37 | with open(validation_annotation_file, "r") as fid: 38 | validation_annotation_lines = fid.readlines() 39 | for line in validation_annotation_lines: 40 | file_name, class_name = line.split(" ") 41 | class_name = class_name[:-1] # chop off the EOL symbol 42 | if class_name not in classes_to_exclude: 43 | os.symlink(os.path.join(imagenet_val_images, file_name), 44 | os.path.join(target_path_val, class_name, file_name)) 45 | 46 | if __name__ == "__main__": 47 | main() 48 | -------------------------------------------------------------------------------- /data/ImageNet-RepMet/repmet_test_classes.txt: -------------------------------------------------------------------------------- 1 | n01440764 2 | n01514668 3 | n01514859 4 | n01518878 5 | n01530575 6 | n01531178 7 | n01532829 8 | n01534433 9 | n01537544 10 | n01558993 11 | n01560419 12 | n01580077 13 | n01582220 14 | n01601694 15 | n01608432 16 | n01614925 17 | n01616318 18 | n01622779 19 | n01629819 20 | n01630670 21 | n01631663 22 | n01632458 23 | n01632777 24 | n01641577 25 | n01644900 26 | n01665541 27 | n01667114 28 | n01667778 29 | n01669191 30 | n01675722 31 | n01685808 32 | n01687978 33 | n01688243 34 | n01689811 35 | n01692333 36 | n01694178 37 | n01695060 38 | n01697457 39 | n01728572 40 | n01728920 41 | n01729322 42 | n01729977 43 | n01734418 44 | n01735189 45 | n01737021 46 | n01739381 47 | n01740131 48 | n01742172 49 | n01744401 50 | n01748264 51 | n01749939 52 | n01753488 53 | n01755581 54 | n01756291 55 | n01774750 56 | n01795545 57 | n01796340 58 | n01797886 59 | n01798484 60 | n01806143 61 | n01806567 62 | n01807496 63 | n01817953 64 | n01818515 65 | n01819313 66 | n01820546 67 | n01824575 68 | n01828970 69 | n01829413 70 | n01833805 71 | n01843065 72 | n01843383 73 | n01847000 74 | n01855032 75 | n01855672 76 | n01860187 77 | n01871265 78 | n01872401 79 | n01873310 80 | n01877812 81 | n01882714 82 | n01883070 83 | n02002556 84 | n02002724 85 | n02006656 86 | n02009229 87 | n02009912 88 | n02011460 89 | n02012849 90 | n02013706 91 | n02018207 92 | n02018795 93 | n02025239 94 | n02027492 95 | n02028035 96 | n02033041 97 | n02037110 98 | n02051845 99 | n02056570 100 | n02058221 101 | n02085620 102 | n02086240 103 | n02086646 104 | n02087394 105 | n02088238 106 | n02088364 107 | n02088632 108 | n02089973 109 | n02090379 110 | n02090622 111 | n02091032 112 | n02091244 113 | n02093428 114 | n02093647 115 | n02093754 116 | n02093859 117 | n02093991 118 | n02094258 119 | n02095314 120 | n02095570 121 | n02096585 122 | n02097047 123 | n02097209 124 | n02097298 125 | n02097474 126 | n02097658 127 | n02099267 128 | n02099429 129 | n02099601 130 | n02099712 131 | n02099849 132 | n02100236 133 | n02100877 134 | n02101006 135 | n02101388 136 | n02102318 137 | n02102480 138 | n02102973 139 | n02104029 140 | n02105251 141 | n02105412 142 | n02105855 143 | n02106030 144 | n02106166 145 | n02106662 146 | n02107142 147 | n02107312 148 | n02107574 149 | n02107908 150 | n02108089 151 | n02108422 152 | n02108551 153 | n02109525 154 | n02109961 155 | n02110627 156 | n02110958 157 | n02111129 158 | n02111500 159 | n02111889 160 | n02112018 161 | n02112137 162 | n02112350 163 | n02113023 164 | n02113624 165 | n02113799 166 | n02114367 167 | n02114548 168 | n02115641 169 | n02115913 170 | n02116738 171 | n02120079 172 | n02123045 173 | n02127052 174 | n02128757 175 | n02129165 176 | n02130308 177 | n02133161 178 | n02134084 179 | n02134418 180 | n02174001 181 | n02276258 182 | n02325366 183 | n02326432 184 | n02328150 185 | n02342885 186 | n02361337 187 | n02363005 188 | n02395406 189 | n02397096 190 | n02398521 191 | n02403003 192 | n02408429 193 | n02410509 194 | n02437312 195 | n02437616 196 | n02443114 197 | n02444819 198 | n02454379 199 | n02480855 200 | n02483362 201 | n02483708 202 | n02486261 203 | n02486410 204 | n02487347 205 | n02488291 206 | n02490219 207 | n02492035 208 | n02492660 209 | n02493793 210 | n02497673 211 | n02504013 212 | n02510455 213 | n02514041 214 | n02641379 215 | -------------------------------------------------------------------------------- /data/dataset_scales_imagenet.txt: -------------------------------------------------------------------------------- 1 | 2020-05-27 17:11:34,745 get_dataset_scales.dataset INFO: Preparing the dataset from the RepMet format: version imagenet-repmet-val-5000, eval scale None, image caching False 2 | 2020-05-27 17:11:53,588 get_dataset_scales.dataset INFO: GT images are not provided 3 | 2020-05-27 17:14:09,180 get_dataset_scales.dataset INFO: Found 4942 data images 4 | 2020-05-27 17:14:09,183 get_dataset_scales.dataset INFO: Loaded dataset imagenet-repmet-val-5000 with 4942 images, 5000 boxes, 1 classes 5 | Reading images from one-shot-detection/data/ImageNet-RepMet/ILSVRC/Data/CLS-LOC 6 | Found 4942 images 7 | Found 5000 non-difficult objects 8 | Average size of object = 455.74 for image size = 1000 9 | Median = 412.86, q10 = 105.80, q90 = 882.72 10 | To get objects to size 240, images should be of size 581 11 | -------------------------------------------------------------------------------- /data/demo/class_image_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/os2d/817ffb15a8cbc353e4f94debe419902d4dd83a22/data/demo/class_image_0.jpg -------------------------------------------------------------------------------- /data/demo/class_image_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/os2d/817ffb15a8cbc353e4f94debe419902d4dd83a22/data/demo/class_image_1.jpg -------------------------------------------------------------------------------- /data/demo/input_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/os2d/817ffb15a8cbc353e4f94debe419902d4dd83a22/data/demo/input_image.jpg -------------------------------------------------------------------------------- /data/get_dataset_scales.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | from collections import OrderedDict 4 | from tqdm import tqdm 5 | 6 | from os2d.utils import get_data_path, setup_logger 7 | from os2d.data.dataset import build_dataset_by_name 8 | from os2d.structures.feature_map import FeatureMapSize 9 | 10 | 11 | DATASET_LIST = ["grozi-train", "grozi-val-new-cl", "dairy", "paste-v", "paste-f", 12 | "instre-s1-train", "instre-s1-val", 13 | "instre-s2-train", "instre-s2-val"] 14 | 15 | 16 | def get_image_sizes(dataset): 17 | print("Reading images from {}".format(dataset.image_path)) 18 | image_sizes_by_id = OrderedDict() 19 | images_in_dataset = dataset.gtboxframe.groupby(["imageid", "imagefilename"]).size().reset_index() 20 | for _, datum in tqdm(images_in_dataset.iterrows()): 21 | img = dataset._get_dataset_image_by_id(datum["imageid"]) 22 | im_size = FeatureMapSize(img=img) 23 | image_sizes_by_id[datum["imageid"]] = im_size 24 | print("Found {} images".format(len(image_sizes_by_id))) 25 | return image_sizes_by_id 26 | 27 | 28 | def compute_average_object_size(gtboxframe, image_sizes_by_id): 29 | object_sizes = [] 30 | for _, datum in gtboxframe.iterrows(): 31 | image_id = datum["imageid"] 32 | 33 | img_size = image_sizes_by_id[image_id] 34 | box_w = (datum['rx'] - datum['lx']) * img_size.w 35 | box_h = (datum['by'] - datum['ty']) * img_size.h 36 | box_size = math.sqrt(box_w * box_h) 37 | if not datum['difficult']: 38 | object_sizes.append(box_size) 39 | print("Found {} non-difficult objects".format(len(object_sizes))) 40 | object_sizes.sort() 41 | median = object_sizes[len(object_sizes) // 2] 42 | q90 = object_sizes[len(object_sizes) * 9 // 10] 43 | q10 = object_sizes[len(object_sizes) // 10] 44 | return sum(object_sizes) / len(object_sizes), median, q10, q90 45 | 46 | 47 | def main(): 48 | target_object_size = 240 49 | data_path = get_data_path() 50 | logger = setup_logger("get_dataset_scales", None) 51 | 52 | for name in DATASET_LIST: 53 | dataset = build_dataset_by_name(data_path, name, eval_scale=None, logger_prefix="get_dataset_scales") 54 | 55 | image_sizes_by_id = get_image_sizes(dataset) 56 | average_size, median, q10, q90 = compute_average_object_size(dataset.gtboxframe, image_sizes_by_id) 57 | print("Average size of object = {0:0.2f} for image size = {1}".format(average_size, dataset.image_size)) 58 | print("Median = {0:0.2f}, q10 = {1:0.2f}, q90 = {2:0.2f}".format(median, q10, q90)) 59 | print("To get objects to size {0}, images should be of size {1:d}".format(target_object_size, int(dataset.image_size * target_object_size / median))) 60 | 61 | 62 | if __name__ == "__main__": 63 | main() 64 | -------------------------------------------------------------------------------- /data/get_dataset_scales_imagenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | from collections import OrderedDict 4 | from tqdm import tqdm 5 | 6 | from os2d.utils import get_data_path, setup_logger 7 | from os2d.data.dataset import build_dataset_by_name 8 | from os2d.structures.feature_map import FeatureMapSize 9 | 10 | 11 | DATASET_LIST = ["imagenet-repmet-val-5000"] 12 | 13 | 14 | def get_image_sizes(dataset): 15 | print("Reading images from {}".format(dataset.image_path)) 16 | image_sizes_by_id = OrderedDict() 17 | images_in_dataset = dataset.gtboxframe.groupby(["imageid", "imagefilename"]).size().reset_index() 18 | for _, datum in tqdm(images_in_dataset.iterrows()): 19 | img = dataset._get_dataset_image_by_id(datum["imageid"]) 20 | im_size = FeatureMapSize(img=img) 21 | image_sizes_by_id[datum["imageid"]] = im_size 22 | print("Found {} images".format(len(image_sizes_by_id))) 23 | return image_sizes_by_id 24 | 25 | 26 | def compute_average_object_size(gtboxframe, image_sizes_by_id): 27 | object_sizes = [] 28 | for _, datum in gtboxframe.iterrows(): 29 | image_id = datum["imageid"] 30 | 31 | img_size = image_sizes_by_id[image_id] 32 | box_w = (datum['rx'] - datum['lx']) * img_size.w 33 | box_h = (datum['by'] - datum['ty']) * img_size.h 34 | box_size = math.sqrt(box_w * box_h) 35 | if not datum['difficult']: 36 | object_sizes.append(box_size) 37 | print("Found {} non-difficult objects".format(len(object_sizes))) 38 | object_sizes.sort() 39 | median = object_sizes[len(object_sizes) // 2] 40 | q90 = object_sizes[len(object_sizes) * 9 // 10] 41 | q10 = object_sizes[len(object_sizes) // 10] 42 | return sum(object_sizes) / len(object_sizes), median, q10, q90 43 | 44 | 45 | def main(): 46 | target_object_size = 240 47 | data_path = get_data_path() 48 | logger = setup_logger("get_dataset_scales", None) 49 | 50 | for name in DATASET_LIST: 51 | dataset = build_dataset_by_name(data_path, name, eval_scale=None, logger_prefix="get_dataset_scales") 52 | 53 | image_sizes_by_id = get_image_sizes(dataset) 54 | average_size, median, q10, q90 = compute_average_object_size(dataset.gtboxframe, image_sizes_by_id) 55 | print("Average size of object = {0:0.2f} for image size = {1}".format(average_size, dataset.image_size)) 56 | print("Median = {0:0.2f}, q10 = {1:0.2f}, q90 = {2:0.2f}".format(median, q10, q90)) 57 | print("To get objects to size {0}, images should be of size {1:d}".format(target_object_size, int(dataset.image_size * target_object_size / median))) 58 | 59 | 60 | if __name__ == "__main__": 61 | main() 62 | -------------------------------------------------------------------------------- /demo-api.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This is a demo illustrating an application of the OS2D method on one image.\n", 8 | "This demo assumes the OS2D API is running at port 80 of your machine, please follow instructions from: [Run OS2D as Service](./FASTAPI.md)." 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "from PIL import Image, ImageDraw\n", 18 | "import base64\n", 19 | "import json\n", 20 | "import requests" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## Running Docker container" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "!docker run -d --rm \\\n", 37 | " --name os2d \\\n", 38 | " -p 80:80 \\\n", 39 | " -v $(pwd):/workspace \\\n", 40 | " os2d:latest \\\n", 41 | " uvicorn app:app --port 80 --host 0.0.0.0" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "## Load images" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "input_image_path = 'data/demo/input_image.jpg'\n", 58 | "first_query_image_path = 'data/demo/class_image_0.jpg'\n", 59 | "second_query_image_path = 'data/demo/class_image_1.jpg'" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "with open(input_image_path, 'rb') as i, open(first_query_image_path, 'rb') as fq, open(second_query_image_path, 'rb') as sq:\n", 69 | " input_image = base64.b64encode(i.read()).decode('utf-8')\n", 70 | " first_query_image = base64.b64encode(fq.read()).decode('utf-8')\n", 71 | " second_query_image = base64.b64encode(sq.read()).decode('utf-8')" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "## Build request body" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "body = json.dumps({\n", 88 | " 'image': {'content': input_image},\n", 89 | " 'query': [\n", 90 | " {'content': first_query_image},\n", 91 | " {'content': second_query_image}\n", 92 | " ]\n", 93 | "})" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "## Send request POST" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "# http://0.0.0.0:80 -> Localhost port 80\n", 110 | "res = requests.post(\"http://0.0.0.0:80/detect-all-instances\", data=body)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "input_image = Image.open(input_image_path)\n", 120 | "im_w, im_h = input_image.size" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "boxes = [[(box[0] * im_w, box[1] * im_h), (box[2] * im_w, box[3] * im_h) ] for box in res.json()['bboxes']]" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "im = ImageDraw.Draw(input_image)\n", 139 | "for box in boxes:\n", 140 | " im.rectangle(box, outline='yellow', width=3)\n", 141 | "input_image" 142 | ] 143 | } 144 | ], 145 | "metadata": { 146 | "kernelspec": { 147 | "display_name": "Python 3.8.13 ('general-conda')", 148 | "language": "python", 149 | "name": "python3" 150 | }, 151 | "language_info": { 152 | "codemirror_mode": { 153 | "name": "ipython", 154 | "version": 3 155 | }, 156 | "file_extension": ".py", 157 | "mimetype": "text/x-python", 158 | "name": "python", 159 | "nbconvert_exporter": "python", 160 | "pygments_lexer": "ipython3", 161 | "version": "3.8.13" 162 | }, 163 | "orig_nbformat": 4, 164 | "vscode": { 165 | "interpreter": { 166 | "hash": "a6c412e415f27236cb9d9dc68868e41cb65d54679c06807ebc8d62a20f3611d4" 167 | } 168 | } 169 | }, 170 | "nbformat": 4, 171 | "nbformat_minor": 2 172 | } 173 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.4' 2 | 3 | services: 4 | os2d: 5 | build: 6 | context: ./Docker/ 7 | args: 8 | USER_UID: ${USER_UID} 9 | USER_GID: ${USER_GID} 10 | OS2D_ROOT: ${WORKDIR} 11 | image: os2d:latest 12 | 13 | user: "${USER_UID}:${USER_GID}" 14 | ports: 15 | - "${CUSTOM_PORT}:${CUSTOM_PORT}" 16 | volumes: 17 | - .:/workspace 18 | command: uvicorn app:app --port ${CUSTOM_PORT} --host 0.0.0.0 -------------------------------------------------------------------------------- /experiments/README.md: -------------------------------------------------------------------------------- 1 | ## Experiments with the OS2D methods (retail and INSTRE datasets) 2 | 3 | ### Preparations 4 | ```bash 5 | # activate the env 6 | conda activate os2d 7 | # move to the root folder 8 | # set OS2D_ROOT, e.g., by OS2D_ROOT=`pwd` 9 | cd $OS2D_ROOT 10 | export PYTHONPATH=$OS2D_ROOT:$PYTHONPATH 11 | ``` 12 | 13 | ### Train models 14 | ```bash 15 | # to use one local GPU run 16 | python experiments/launcher_exp1.py 17 | python experiments/launcher_exp2.py 18 | python experiments/launcher_exp3_instre.py 19 | # note that the first call will process the INSTRE dataset and create the cache file, this might cause crashes if done by deveral proceses in parallel, use --job-indices flag to run only some jobs first 20 | ``` 21 | 22 | ### View logged information 23 | ```bash 24 | # View all the saved logs in Visdom 25 | python os2d/utils/plot_visdom.py --log_paths output/exp1 26 | python os2d/utils/plot_visdom.py --log_paths output/exp2 27 | python os2d/utils/plot_visdom.py --log_paths output/exp3 28 | ``` 29 | 30 | ### Collect data for ablation tables 31 | ```bash 32 | # Table 1: 33 | python experiments/launcher_exp1_collect.py 34 | # Table 2: 35 | python experiments/launcher_exp2_collect.py 36 | ``` 37 | 38 | ### Evaluation on test sets 39 | ```bash 40 | # Retail product datasets: run eval 41 | python experiments/launcher_grozi_eval.py 42 | # Collect results (create a part of Table 3) 43 | python launcher_grozi_eval_collect.py 44 | 45 | # INSTRE datasets: run eval 46 | python experiments/launcher_instre_eval.py 47 | # Collect results (create a part of Table 4) 48 | python launcher_instre_eval_collect.py 49 | ``` 50 | -------------------------------------------------------------------------------- /experiments/README_ImageNet.md: -------------------------------------------------------------------------------- 1 | ## Experiments on the ImageNet dataset 2 | This file describes our experiments in the 1-shot setting of [Karlinsky et al., CVPR 2019](https://openaccess.thecvf.com/content_CVPR_2019/html/Karlinsky_RepMet_Representative-Based_Metric_Learning_for_Classification_and_Few-Shot_Object_Detection_CVPR_2019_paper.html) 3 | based on the ImageNet dataset. 4 | 5 | ### Installing the dataset 6 | The first step is to install the ImageNet dataset. 7 | Please, follow [the official webcite](http://image-net.org/) for instructions. 8 | We used the training and validation subsets of ILSVRC2012. 9 | 10 | Create the simlink `$OS2D_ROOT/data/ImageNet-RepMet/ILSVRC` pointing to your ImageNet installation. 11 | Make sure you have paths `$OS2D_ROOT/data/ImageNet-RepMet/ILSVRC/Data/CLS-LOC` and `$OS2D_ROOT/data/ImageNet-RepMet/ILSVRC/Annotations/CLS-LOC` available. 12 | 13 | Download the episodic data of [Karlinsky et al. (RepMet)](https://github.com/jshtok/RepMet) released on [Google Drive](https://drive.google.com/drive/folders/1MZ6HWQpR_Oseo5_v5gmrlAyubrPL-ciO?usp=sharing) and put it to `$OS2D_ROOT/data/ImageNet-RepMet/RepMet_CVPR19_data`. 14 | We need only two files `RepMet_CVPR19_data/data/Imagenet_LOC/voc_inloc_roidb.pkl` and `RepMet_CVPR19_data/data/Imagenet_LOC/episodes/epi_inloc_in_domain_1_5_10_500.pkl` which can be downloded with these commands: 15 | ```bash 16 | mkdir -p $OS2D_ROOT/data/ImageNet-RepMet/RepMet_CVPR2019_data/data/Imagenet_LOC/episodes 17 | $OS2D_ROOT/os2d/utils/wget_gdrive.sh $OS2D_ROOT/data/ImageNet-RepMet/RepMet_CVPR2019_data/data/Imagenet_LOC/voc_inloc_roidb.pkl 1VFQkO4WToV7OMggzu6F_sOuuHno_qEFE 18 | $OS2D_ROOT/os2d/utils/wget_gdrive.sh $OS2D_ROOT/data/ImageNet-RepMet/RepMet_CVPR2019_data/data/Imagenet_LOC/episodes/epi_inloc_in_domain_1_5_10_500.pkl 1yjBvPoVO-PAnTEXnpHAfTv5XQ1Xg1pJS 19 | ``` 20 | 21 | ### Train ResNet101 on data with RepMet test classes excluded 22 | Preparations: 23 | ```bash 24 | conda activate os2d 25 | export PYTHONPATH=$OS2D_ROOT:$PYTHONPATH 26 | cd $OS2D_ROOT/data/ImageNet-RepMet/pretrain 27 | ``` 28 | Prepare dataset (scripts will create subfolders of `$OS2D_ROOT/data/ImageNet-RepMet/pretrain/imagenet-repmet` with simlinks to the original ImageNet files): 29 | ```bash 30 | python prepare_data_exclude_test_classes.py 31 | ``` 32 | Train the model with [the script from the PyTorch examples](https://github.com/pytorch/examples/tree/master/imagenet): 33 | ```bash 34 | ARCH=resnet101 35 | mkdir -p output/${ARCH} 36 | cd output/${ARCH} 37 | python ../../train_imagenet.py -a ${ARCH} --dist-url 'tcp://127.0.0.1:23455' --dist-backend 'nccl' --multiprocessing-distributed --world-size 1 --rank 0 ../../imagenet-repmet 38 | ``` 39 | We trained on 4 V100 GPUs, the script ran for 90 epochs and obtained Acc@1 of 75.819 and Acc@5 of 92.735 (note that these numbers are not comparable with the standard ImageNet results because of different set of classes). 40 | 41 | Convert the trained model for further usage: 42 | ```bash 43 | python convert_resnet_pytorch_to_maskrcnnbenchmark.py output/${ARCH}/model_best.pth.tar 44 | python convert_resnet_pytorch_to_cirtorch.py output/${ARCH}/model_best.pth.tar 45 | ``` 46 | 47 | ### Train the detector of all classes 48 | ```bash 49 | conda activate os2d 50 | export PYTHONPATH=$OS2D_ROOT:$PYTHONPATH 51 | cd $OS2D_ROOT/baselines/detector_retrieval/detector 52 | python experiments/launcher_train_detector_imagenet.py 53 | ``` 54 | 55 | ### Train the retrieval system 56 | ```bash 57 | conda activate os2d 58 | export PYTHONPATH=$OS2D_ROOT:$PYTHONPATH 59 | cd $OS2D_ROOT/baselines/detector_retrieval/retrieval 60 | bash ./prepare_datasets_imagenet.sh 61 | python experiments/launcher_imagenet.py 62 | ``` 63 | 64 | ### Evaluate the detector-retrieval baseline 65 | ```bash 66 | conda activate os2d 67 | export PYTHONPATH=$OS2D_ROOT:$PYTHONPATH 68 | cd $OS2D_ROOT/baselines/detector_retrieval 69 | python experiments/launcher_imagenet_eval.py 70 | python experiments/launcher_imagenet_eval_collect.py 71 | ``` 72 | 73 | ### Evaluate the OS2D models 74 | ```bash 75 | conda activate os2d 76 | export PYTHONPATH=$OS2D_ROOT:$PYTHONPATH 77 | cd $OS2D_ROOT 78 | python experiments/launcher_imagenet_eval.py 79 | python experiments/launcher_imagenet_eval_collect.py 80 | ``` 81 | -------------------------------------------------------------------------------- /experiments/config_training.yml: -------------------------------------------------------------------------------- 1 | output: 2 | path: "" # Substitute "" 3 | save_iter: 0 4 | best_model: 5 | do_get_best_model: True 6 | dataset: "" # use the first validation dataset 7 | metric: "mAP@0.50" 8 | mode: "max" 9 | is_cuda: True 10 | random_seed: 0 11 | init: 12 | model: "" # Substitute "models/resnet50-19c8e357.pth" 13 | model: 14 | backbone_arch: "" # Substitute "ResNet50" or "ResNet101" 15 | use_inverse_geom_model: False # Substitute v1: False v2 : True 16 | use_simplified_affine_model: True # Substitute v1: True v2 : False 17 | train: 18 | dataset_name: "grozi-train" 19 | dataset_scale: 1280.0 20 | objective: 21 | class_objective: "RLL" 22 | loc_weight: 0.0 # Substitute v1: 0.2, v2: 0.0 23 | positive_iou_threshold: 0.5 24 | negative_iou_threshold: 0.1 25 | remap_classification_targets: True 26 | remap_classification_targets_iou_pos: 0.8 27 | remap_classification_targets_iou_neg: 0.4 28 | optim: 29 | anneal_lr: 30 | type: "MultiStepLR" 31 | milestones: [100000, 150000] 32 | gamma: 0.1 33 | 34 | model: 35 | freeze_bn: True 36 | freeze_bn_transform: True # Substitute v1: False, v2: True 37 | train_transform_on_negs: False 38 | eval: 39 | iter: 1000 40 | dataset_names: ("grozi-val-new-cl",) 41 | dataset_scales: (1280.0,) 42 | mAP_iou_thresholds: (0.5,) 43 | -------------------------------------------------------------------------------- /experiments/launcher_exp1_collect.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from os2d.utils.logger import extract_value_from_os2d_binary_log, mAP_percent_to_points 4 | 5 | 6 | if __name__ == "__main__": 7 | config_path = os.path.dirname(os.path.abspath(__file__)) 8 | config_job_name = "exp1" 9 | 10 | log_path = os.path.abspath(os.path.join(config_path, "..", "output/exp1")) 11 | 12 | 13 | def get_result(job_name, 14 | sub_index, 15 | backbone_arch, 16 | init_model_nickname, 17 | rand_seed, 18 | ): 19 | job_name = f"{config_job_name}.{sub_index}.{job_name}_seed{rand_seed}" 20 | 21 | log_folder = job_name + "_" + backbone_arch + "_init_" + init_model_nickname 22 | log_folder = os.path.join(log_path, log_folder) 23 | 24 | data_file = os.path.join(log_folder, "train_log.pkl") 25 | 26 | return mAP_percent_to_points(extract_value_from_os2d_binary_log(data_file, "mAP@0.50_grozi-val-new-cl", reduce="max")) 27 | 28 | 29 | results = [] 30 | random_seed = 0 31 | results.append(get_result(\ 32 | "lossCL", 0, "ResNet50", "imageNetCaffe2", random_seed, 33 | )) 34 | 35 | results.append(get_result(\ 36 | "lossRLL", 1, "ResNet50", "imageNetCaffe2", random_seed 37 | )) 38 | 39 | results.append(get_result(\ 40 | "lossRLL_remap", 2, "ResNet50", "imageNetCaffe2", random_seed 41 | )) 42 | 43 | results.append(get_result(\ 44 | "lossRLL_remap_mine", 3, "ResNet50", "imageNetCaffe2", random_seed 45 | )) 46 | 47 | results.append(get_result(\ 48 | "lossRLL_remap_invFullAffine", 4, "ResNet50", "imageNetCaffe2", random_seed 49 | )) 50 | 51 | results.append(get_result(\ 52 | "lossRLL_remap_mine_fullAffine", 5, "ResNet50", "imageNetCaffe2", random_seed 53 | )) 54 | 55 | results.append(get_result(\ 56 | "lossRLL_remap_invFullAffine_initTranform", 6, "ResNet50", "imageNetCaffe2", random_seed, 57 | )) 58 | 59 | results.append(get_result(\ 60 | "lossRLL_remap_invFullAffine_initTranform_zeroLocLoss", 7, "ResNet50", "imageNetCaffe2", random_seed 61 | )) 62 | 63 | results.append(get_result(\ 64 | "lossRLL_remap_invFullAffine_initTranform_zeroLocLoss_mine", 8, "ResNet50", "imageNetCaffe2", random_seed 65 | )) 66 | 67 | results.append(get_result(\ 68 | "lossCL_invFullAffine_initTranform_zeroLocLoss", 9, "ResNet50", "imageNetCaffe2", random_seed 69 | )) 70 | 71 | results.append(get_result(\ 72 | "lossCL_remap_invFullAffine_initTranform_zeroLocLoss", 10, "ResNet50", "imageNetCaffe2", random_seed 73 | )) 74 | 75 | results.append(get_result(\ 76 | "lossRLL_invFullAffine_initTranform_zeroLocLoss", 11, "ResNet50", "imageNetCaffe2", random_seed 77 | )) 78 | 79 | for r in results: 80 | print(r) 81 | -------------------------------------------------------------------------------- /experiments/launcher_exp2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | from collections import OrderedDict 4 | 5 | from os2d.utils import launcher as launcher 6 | 7 | 8 | def load_yaml(config_file): 9 | with open(config_file, "r") as stream: 10 | config = yaml.safe_load(stream) 11 | return config 12 | 13 | 14 | if __name__ == "__main__": 15 | # load default launcher parameters 16 | parser = launcher.create_args_parser() 17 | args = parser.parse_args() 18 | 19 | main_command = "python main.py" 20 | 21 | config_path = os.path.dirname(os.path.abspath(__file__)) 22 | config_file = os.path.join(config_path, "config_training.yml") 23 | config = load_yaml(config_file) 24 | config_job_name = "exp2" 25 | 26 | log_path = os.path.abspath(os.path.join(config_path, "..", "output/exp2")) 27 | 28 | config_dict_v1 = OrderedDict() 29 | config_dict_v1["model.use_inverse_geom_model"] = False 30 | config_dict_v1["model.use_simplified_affine_model"] = True 31 | config_dict_v1["train.objective.loc_weight"] = 0.2 32 | config_dict_v1["train.model.freeze_bn_transform"] = False 33 | 34 | config_dict_v2 = OrderedDict() 35 | config_dict_v2["model.use_inverse_geom_model"] = True 36 | config_dict_v2["model.use_simplified_affine_model"] = False 37 | config_dict_v2["train.objective.loc_weight"] = 0.0 38 | config_dict_v2["train.model.freeze_bn_transform"] = True 39 | config_dict_v2["init.transform"] = "models/weakalign_resnet101_affine_tps.pth.tar" 40 | 41 | exp_job_names = [] 42 | exp_log_paths = [] 43 | exp_commands = [] 44 | 45 | 46 | def add_job(job_type, # "v1" or "v2" 47 | sub_index, 48 | backbone_arch, 49 | init_model_nickname, 50 | init_model_path, 51 | extra_params=None, 52 | ): 53 | job_name = f"{config_job_name}.{sub_index}.{job_type}_seed{config['random_seed']}" 54 | 55 | d = OrderedDict() 56 | 57 | d["--config-file"] = config_file 58 | 59 | if job_type == "v1": 60 | d.update(config_dict_v1) 61 | elif job_type == "v2": 62 | d.update(config_dict_v2) 63 | else: 64 | raise RuntimeError("Unknown job_type {0}".format(job_type)) 65 | 66 | d["model.backbone_arch"] = backbone_arch 67 | d["init.model"] = init_model_path 68 | 69 | log_folder = job_name + "_" + backbone_arch + "_init_" + init_model_nickname 70 | log_folder = os.path.join(log_path, log_folder) 71 | 72 | d["output.path"] = log_folder 73 | 74 | if extra_params: 75 | d.update(extra_params) 76 | 77 | commands = [main_command + " " + launcher.parameters_to_str(d)] 78 | 79 | exp_job_names.append(job_name) 80 | exp_commands.append(commands) 81 | exp_log_paths.append(log_folder) 82 | 83 | 84 | for job_type in ["v1", "v2"]: 85 | add_job(job_type, 0, "ResNet50", "fromScratch", 86 | "models/does_not_exist") 87 | add_job(job_type, 1, "ResNet50", "imageNetPth", 88 | "models/resnet50-19c8e357.pth") 89 | add_job(job_type, 2, "ResNet50", "imageNetCaffe2", 90 | "models/imagenet-caffe-resnet50-features-ac468af-converted.pth") 91 | add_job(job_type, 3, "ResNet50", "imageNetCaffe2GroupNorm", 92 | "models/resnet50_caffe2_groupnorm.pth", {"model.use_group_norm" : True}) 93 | add_job(job_type, 4, "ResNet50", "cocoMaskrcnnFpn", 94 | "models/maskrcnn-benchmark/e2e_mask_rcnn_R_50_FPN_1x_converted.pth") 95 | add_job(job_type, 5, "ResNet101", "imageNetPth", 96 | "models/resnet101-5d3b4d8f.pth") 97 | add_job(job_type, 6, "ResNet101", "imageNetCaffe2", 98 | "models/imagenet-caffe-resnet101-features-10a101d-converted.pth") 99 | add_job(job_type, 7, "ResNet101", "buildingsCirtorch", 100 | "models/gl18-tl-resnet101-gem-w-a4d43db-converted.pth") 101 | add_job(job_type, 8, "ResNet101", "cocoMaskrcnnFpn", 102 | "models/maskrcnn-benchmark/e2e_mask_rcnn_R_101_FPN_1x_converted.pth") 103 | add_job(job_type, 9, "ResNet101", "pascalWeakalign", 104 | "models/weakalign_resnet101_affine_tps.pth.tar") 105 | 106 | for job_name, log_path, commands in zip(exp_job_names, exp_log_paths, exp_commands): 107 | launcher.add_job(job_name=job_name, 108 | log_path=log_path, 109 | commands=commands) 110 | launcher.launch_all_jobs(args) 111 | -------------------------------------------------------------------------------- /experiments/launcher_exp2_collect.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | from os2d.utils.logger import extract_value_from_os2d_binary_log, mAP_percent_to_points 5 | 6 | 7 | if __name__ == "__main__": 8 | config_path = os.path.dirname(os.path.abspath(__file__)) 9 | config_job_name = "exp2" 10 | log_path = os.path.abspath(os.path.join(config_path, "..", "output/exp2")) 11 | 12 | 13 | def get_result(job_type, # "v1" or "v2" 14 | sub_index, 15 | backbone_arch, 16 | init_model_nickname, 17 | random_seed, 18 | ): 19 | job_name = f"{config_job_name}.{sub_index}.{job_type}_seed{random_seed}" 20 | 21 | log_folder = job_name + "_" + backbone_arch + "_init_" + init_model_nickname 22 | log_folder = os.path.join(log_path, log_folder) 23 | 24 | data_file = os.path.join(log_folder, "train_log.pkl") 25 | 26 | return mAP_percent_to_points(extract_value_from_os2d_binary_log(data_file, "mAP@0.50_grozi-val-new-cl", reduce="max")),\ 27 | mAP_percent_to_points(extract_value_from_os2d_binary_log(data_file, "mAP@0.50_grozi-val-new-cl", reduce="first")) 28 | 29 | 30 | table = pd.DataFrame(columns=["arch", "init", "v1-train", "v2-init", "v2-train"]) 31 | random_seed = 0 32 | 33 | for i, arch, init in zip(range(10), 34 | ["ResNet50"] * 5 + ["ResNet101"] * 5, 35 | ["fromScratch", "imageNetPth", "imageNetCaffe2", "imageNetCaffe2GroupNorm", "cocoMaskrcnnFpn", 36 | "imageNetPth", "imageNetCaffe2", "buildingsCirtorch", "cocoMaskrcnnFpn", "pascalWeakalign"] 37 | ): 38 | val_train_v1, val_init_v1 = get_result("v1", i, arch, init, random_seed) 39 | val_train_v2, val_init_v2 = get_result("v2", i, arch, init, random_seed) 40 | 41 | table = table.append({"arch":arch, "init":init, 42 | "v1-train":val_train_v1, "v2-init":val_init_v2, "v2-train":val_train_v2}, 43 | ignore_index=True) 44 | 45 | print(table, sep='\n') 46 | -------------------------------------------------------------------------------- /experiments/launcher_exp3_instre.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import copy 4 | from collections import OrderedDict 5 | 6 | from os2d.utils import launcher as launcher 7 | 8 | 9 | def load_yaml(config_file): 10 | with open(config_file, "r") as stream: 11 | config = yaml.safe_load(stream) 12 | return config 13 | 14 | 15 | if __name__ == "__main__": 16 | # load default launcher parameters 17 | parser = launcher.create_args_parser() 18 | args = parser.parse_args() 19 | 20 | main_command = "python main.py" 21 | 22 | config_path = os.path.dirname(os.path.abspath(__file__)) 23 | config_file = os.path.join(config_path, "config_training.yml") 24 | config = load_yaml(config_file) 25 | config_job_name = "exp3" 26 | 27 | log_path = os.path.abspath(os.path.join(config_path, "..", "output/exp3")) 28 | 29 | config_dict_v1 = OrderedDict() 30 | config_dict_v1["model.use_inverse_geom_model"] = False 31 | config_dict_v1["model.use_simplified_affine_model"] = True 32 | config_dict_v1["train.objective.loc_weight"] = 0.2 33 | config_dict_v1["train.model.freeze_bn_transform"] = False 34 | 35 | config_dict_v2 = OrderedDict() 36 | config_dict_v2["model.use_inverse_geom_model"] = True 37 | config_dict_v2["model.use_simplified_affine_model"] = False 38 | config_dict_v2["train.objective.loc_weight"] = 0.0 39 | config_dict_v2["train.model.freeze_bn_transform"] = True 40 | config_dict_v2["init.transform"] = "models/weakalign_resnet101_affine_tps.pth.tar" 41 | 42 | exp_job_names = [] 43 | exp_log_paths = [] 44 | exp_commands = [] 45 | 46 | def add_job(job_type, # "v1" or "v2" 47 | job_id, 48 | backbone_arch, 49 | init_model_nickname, 50 | init_model_path, 51 | extra_params=None, 52 | train_data="", 53 | train_data_scale=None): 54 | job_name = "{0}.{1}.{2}_{3}_seed{4}".format(config_job_name, job_id, job_type, train_data, config["random_seed"]) 55 | 56 | d = OrderedDict() 57 | d["--config-file"] = config_file 58 | 59 | if job_type == "v1": 60 | d.update(config_dict_v1) 61 | elif job_type == "v2": 62 | d.update(config_dict_v2) 63 | else: 64 | raise RuntimeError("Unknown job_type {0}".format(job_type)) 65 | 66 | d["train.dataset_name"] = "\"" + train_data + "\"" 67 | if train_data == "instre-s1-train": 68 | d["train.dataset_scale"] = 700.0 69 | main_val_dataset = "instre-s1-val" 70 | d["eval.dataset_scales"] = "[700.0]" 71 | elif train_data == "instre-s2-train": 72 | d["train.dataset_scale"] = 600.0 73 | main_val_dataset = "instre-s2-val" 74 | d["eval.dataset_scales"] = "[600.0]" 75 | else: 76 | raise RuntimeError(f"Unknown dataset {train_data}") 77 | 78 | d["output.best_model.dataset"] = main_val_dataset 79 | d["eval.dataset_names"] = f"[\\\"{main_val_dataset}\\\"]" 80 | 81 | d["eval.class_image_augmentation"] = "rotation90" 82 | d["eval.iter"] = 5000 83 | 84 | # extra augmentation for this run 85 | d["train.augment.mine_extra_class_images"] = True 86 | 87 | d["model.backbone_arch"] = backbone_arch 88 | d["init.model"] = init_model_path 89 | 90 | log_folder = job_name + "_" + backbone_arch + "_init_" + init_model_nickname 91 | log_folder = os.path.join(log_path, log_folder) 92 | 93 | d["output.path"] = log_folder 94 | 95 | if extra_params: 96 | d.update(extra_params) 97 | 98 | commands = [] 99 | commands.append(main_command + " " + launcher.parameters_to_str(d)) 100 | 101 | exp_job_names.append(job_name) 102 | exp_commands.append(commands) 103 | exp_log_paths.append(log_folder) 104 | 105 | 106 | for train_data in ["instre-s1-train", "instre-s2-train"]: 107 | for job_type in ["v1", "v2"]: 108 | add_job(job_type, "R50", "ResNet50", "imageNetCaffe2", 109 | "models/imagenet-caffe-resnet50-features-ac468af-converted.pth", 110 | train_data=train_data) 111 | add_job(job_type, "R101", "ResNet101", "imageNetCaffe2", 112 | "models/imagenet-caffe-resnet101-features-10a101d-converted.pth", 113 | train_data=train_data) 114 | 115 | 116 | for job_name, log_path, commands in zip(exp_job_names, exp_log_paths, exp_commands): 117 | launcher.add_job(job_name=job_name, 118 | log_path=log_path, 119 | commands=commands) 120 | launcher.launch_all_jobs(args) 121 | -------------------------------------------------------------------------------- /experiments/launcher_grozi_eval_collect.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import pandas as pd 4 | 5 | from os2d.utils.logger import extract_map_value_from_os2d_log 6 | 7 | 8 | if __name__ == "__main__": 9 | config_path = os.path.dirname(os.path.abspath(__file__)) 10 | log_path = os.path.abspath(os.path.join(config_path, "..", "output/eval_grozi")) 11 | config_job_name = "eval_grozi" 12 | 13 | 14 | def get_result(sub_index, 15 | eval_dataset, 16 | folder_suffix="", 17 | result_suffix="out.txt"): 18 | d = OrderedDict() 19 | 20 | log_folder = f"{config_job_name}.{sub_index}" 21 | if folder_suffix: 22 | log_folder += "." + folder_suffix 23 | log_folder = os.path.join(log_path, log_folder) 24 | 25 | # choose eval dataset 26 | if eval_dataset == "grozi-val-new-cl": 27 | d["eval.dataset_names"] = "\"[\\\"grozi-val-new-cl\\\"]\"" 28 | d["eval.dataset_scales"] = "[1280.0]" 29 | elif eval_dataset == "grozi-val-old-cl": 30 | d["eval.dataset_names"] = "\"[\\\"grozi-val-old-cl\\\"]\"" 31 | d["eval.dataset_scales"] = "[1280.0]" 32 | elif eval_dataset == "dairy": 33 | d["eval.dataset_names"] = "\"[\\\"dairy\\\"]\"" 34 | d["eval.dataset_scales"] = "[3500.0]" 35 | elif eval_dataset == "paste-v": 36 | d["eval.dataset_names"] = "\"[\\\"paste-v\\\"]\"" 37 | d["eval.dataset_scales"] = "[3500.0]" 38 | elif eval_dataset == "paste-f": 39 | d["eval.dataset_names"] = "\"[\\\"paste-f\\\"]\"" 40 | d["eval.dataset_scales"] = "[2000.0]" 41 | # eval with rotations 42 | d["eval.class_image_augmentation"] = "rotation90" 43 | else: 44 | raise f"Unknown eval set {eval_dataset}" 45 | 46 | result_file = f"eval_{eval_dataset}_scale{d['eval.dataset_scales'][1:-1]}_{result_suffix}" 47 | result_file = os.path.join(log_folder, result_file) 48 | 49 | return extract_map_value_from_os2d_log(result_file, eval_dataset) 50 | 51 | 52 | datasets = ["grozi-val-old-cl", "grozi-val-new-cl", "dairy", "paste-v", "paste-f"] 53 | methods = ["V1-init", "V1-train", "V2-init", "V2-train"] 54 | ids= [3, 0, 2, 1] 55 | table = pd.DataFrame(columns=["method"] + datasets) 56 | 57 | for i, method in zip(ids, methods): 58 | row = {} 59 | row["method"] = method 60 | for eval_dataset in datasets: 61 | value = get_result(i, eval_dataset, 62 | folder_suffix=f"best_{method}") 63 | row[eval_dataset] = value 64 | table = table.append(row, ignore_index=True) 65 | 66 | print(table, sep='\n') 67 | -------------------------------------------------------------------------------- /experiments/launcher_imagenet_eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | 4 | from os2d.utils import launcher as launcher 5 | 6 | 7 | if __name__ == "__main__": 8 | # load default launcher parameters 9 | parser = launcher.create_args_parser() 10 | args = parser.parse_args() 11 | 12 | main_command = "python main.py" 13 | 14 | config_path = os.path.dirname(os.path.abspath(__file__)) 15 | config_file = os.path.join(config_path, "config_training.yml") 16 | config_job_name = "eval_imagenet" 17 | 18 | log_path = os.path.abspath(os.path.join(config_path, "..", "output/eval_imagenet")) 19 | 20 | exp_job_names = [] 21 | exp_log_paths = [] 22 | exp_commands = [] 23 | exp_log_file_prefix = [] 24 | 25 | 26 | def add_job(sub_index, 27 | backbone_arch, 28 | model_path, 29 | model_checkpoint, 30 | episodes, 31 | eval_scale, 32 | test_augmentation, 33 | folder_suffix="", 34 | extra_params=None): 35 | job_name = f"{config_job_name}.{sub_index}" 36 | commands = [] 37 | 38 | d = OrderedDict() 39 | d["--config-file"] = config_file 40 | 41 | d["model.use_inverse_geom_model"] = True 42 | d["model.use_simplified_affine_model"] = False 43 | d["model.backbone_arch"] = backbone_arch 44 | 45 | d["eval.dataset_scales"] = f"[{eval_scale}]" 46 | 47 | if test_augmentation: 48 | d["eval.class_image_augmentation"] = test_augmentation 49 | 50 | if extra_params: 51 | d.update(extra_params) 52 | 53 | # set output folder 54 | log_folder = f"{config_job_name}" 55 | if folder_suffix: 56 | log_folder += "." + folder_suffix 57 | log_folder = os.path.join(log_path, log_folder) 58 | 59 | d["train.do_training"] = False 60 | 61 | # choose init 62 | if "init.transform" in d: 63 | del d["init.transform"] 64 | if os.path.isfile(model_path): 65 | d["init.model"] = model_path 66 | else: 67 | d["init.model"] = os.path.join(model_path, model_checkpoint) 68 | 69 | for episode in episodes: 70 | d["eval.dataset_names"] = f"[\\\"imagenet-repmet-test-episode-{episode}\\\"]" 71 | 72 | commands.append(main_command + " " + launcher.parameters_to_str(d)) 73 | 74 | exp_job_names.append(job_name) 75 | exp_commands.append(commands) 76 | exp_log_paths.append(log_folder) 77 | exp_log_file_prefix.append(f"eval_scale{d['eval.dataset_scales'][1:-1]}_epi{min(episodes)}-{max(episodes)}_") 78 | 79 | 80 | test_augmentation = "horflip" #"horflip_rotation90" 81 | 82 | num_episodes = 500 83 | episode_per_job = 50 84 | 85 | scales_to_test = [250] 86 | 87 | for eval_scale in scales_to_test: 88 | index = 0 89 | i_episode = 0 90 | while i_episode < num_episodes: 91 | list_of_episodes = list(range(i_episode, min(i_episode + episode_per_job, num_episodes))) 92 | add_job(sub_index=index, 93 | backbone_arch="ResNet50", 94 | model_path="models", 95 | model_checkpoint="os2d_v2-init.pth", 96 | episodes=list_of_episodes, 97 | eval_scale=eval_scale, 98 | test_augmentation=test_augmentation, 99 | folder_suffix=f"model_v2-init_scale_{int(eval_scale)}_aug_horFlip", 100 | extra_params=None) 101 | index += 1 102 | i_episode += episode_per_job 103 | 104 | for eval_scale in scales_to_test: 105 | index = 0 106 | i_episode = 0 107 | while i_episode < num_episodes: 108 | list_of_episodes = list(range(i_episode, min(i_episode + episode_per_job, num_episodes))) 109 | add_job(sub_index=index, 110 | backbone_arch="ResNet50", 111 | model_path="output/exp2/exp2.2.v1_seed0_ResNet50_init_imageNetCaffe2", 112 | model_checkpoint="checkpoint_iter_0.pth", 113 | episodes=list_of_episodes, 114 | eval_scale=eval_scale, 115 | test_augmentation=test_augmentation, 116 | folder_suffix=f"model_v1-init_scale_{int(eval_scale)}_aug_horFlip", 117 | extra_params=None) 118 | index += 1 119 | i_episode += episode_per_job 120 | 121 | for job_name, log_path, commands, log_file_prefix in zip(exp_job_names, exp_log_paths, exp_commands, exp_log_file_prefix): 122 | launcher.add_job(job_name=job_name, 123 | log_path=log_path, 124 | commands=commands, 125 | log_file_prefix=log_file_prefix) 126 | launcher.launch_all_jobs(args) 127 | -------------------------------------------------------------------------------- /experiments/launcher_imagenet_eval_collect.py: -------------------------------------------------------------------------------- 1 | import os 2 | import statistics 3 | import pandas as pd 4 | 5 | from os2d.utils.logger import extract_pattern_after_marked_line, numeric_const_pattern 6 | 7 | 8 | MISSING_VAL_CONSTANT = "None" 9 | 10 | 11 | def mAP_percent_to_points(v): 12 | if v is not None: 13 | return float(v)*100 14 | else: 15 | return MISSING_VAL_CONSTANT 16 | 17 | 18 | def extract_map_value_from_os2d_log(result_file, eval_dataset, metric_name="mAP@0.50"): 19 | dataset_search_pattern = "Evaluated on {0}" 20 | dataset_pattern = dataset_search_pattern.format(eval_dataset) 21 | eval_pattern = f"{metric_name}\s({numeric_const_pattern})" 22 | 23 | value = extract_pattern_after_marked_line(result_file, dataset_pattern, eval_pattern) 24 | return mAP_percent_to_points(value) 25 | 26 | 27 | if __name__ == "__main__": 28 | config_path = os.path.dirname(os.path.abspath(__file__)) 29 | config_job_name = "eval_imagenet" 30 | 31 | log_path = os.path.abspath(os.path.join(config_path, "..", "output/eval_imagenet")) 32 | 33 | 34 | def get_result(sub_index, 35 | episodes, 36 | eval_scale, 37 | metric_names=["mAP@0.50"], 38 | folder_suffix="", 39 | result_suffix="out.txt"): 40 | # set output folder 41 | log_folder = f"{config_job_name}" 42 | if folder_suffix: 43 | log_folder += "." + folder_suffix 44 | log_folder = os.path.join(log_path, log_folder) 45 | 46 | values = [] 47 | for episode in episodes: 48 | eval_dataset = f"imagenet-repmet-test-episode-{episode}" 49 | 50 | result_file = f"eval_scale{eval_scale}_epi{min(episodes)}-{max(episodes)}_{result_suffix}" 51 | result_file = os.path.join(log_folder, result_file) 52 | values_one_run = {} 53 | for m in metric_names: 54 | values_one_run[m] = extract_map_value_from_os2d_log(result_file, eval_dataset, metric_name=m) 55 | values.append(values_one_run) 56 | 57 | return values 58 | 59 | 60 | def collect_run_results(eval_scale, folder_suffix, result_suffix="out.txt"): 61 | num_episodes = 500 62 | episode_per_job = 50 63 | index = 0 64 | i_episode = 0 65 | 66 | metric_names = ["mAP@0.50", "AP_joint_classes@0.50"] 67 | computed_episodes_metric = {m:[] for m in metric_names} 68 | 69 | while i_episode < num_episodes: 70 | list_of_episodes = list(range(i_episode, min(i_episode + episode_per_job, num_episodes))) 71 | results = get_result(sub_index=index, 72 | episodes=list_of_episodes, 73 | eval_scale=eval_scale, 74 | metric_names=metric_names, 75 | folder_suffix=folder_suffix, 76 | result_suffix=result_suffix, 77 | ) 78 | 79 | for e, r in zip(list_of_episodes, results): 80 | for m in metric_names: 81 | if r[m] == MISSING_VAL_CONSTANT: 82 | print(f"Missing episode {e} from chunk {index}") 83 | else: 84 | computed_episodes_metric[m].append(r[m]) 85 | 86 | index += 1 87 | i_episode += episode_per_job 88 | 89 | for metric_name in metric_names: 90 | collected_metric = computed_episodes_metric[metric_name] 91 | average_val = sum(collected_metric) / len(collected_metric) 92 | max_val = max(collected_metric) 93 | min_val = min(collected_metric) 94 | std_val = statistics.stdev(collected_metric) 95 | print(f"{folder_suffix}: {len(collected_metric)} episodes; average {metric_name} = {average_val:0.2f}; max {metric_name} = {max_val:0.2f}; min {metric_name} = {min_val:0.2f}; std {metric_name} = {std_val:0.2f};") 96 | 97 | 98 | scales_to_test = [250] 99 | 100 | for eval_scale in scales_to_test: 101 | collect_run_results(eval_scale=eval_scale, folder_suffix=f"model_v2-init_scale_{int(eval_scale)}_aug_horFlip") 102 | 103 | for eval_scale in scales_to_test: 104 | collect_run_results(eval_scale=eval_scale, folder_suffix=f"model_v1-init_scale_{int(eval_scale)}_aug_horFlip") 105 | -------------------------------------------------------------------------------- /experiments/launcher_instre_eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import copy 4 | from collections import OrderedDict 5 | 6 | from os2d.utils import launcher as launcher 7 | 8 | 9 | def load_yaml(config_file): 10 | with open(config_file, "r") as stream: 11 | config = yaml.safe_load(stream) 12 | return config 13 | 14 | 15 | if __name__ == "__main__": 16 | # load default launcher parameters 17 | parser = launcher.create_args_parser() 18 | args = parser.parse_args() 19 | 20 | main_command = "python main.py" 21 | 22 | config_path = os.path.dirname(os.path.abspath(__file__)) 23 | config_file = os.path.join(config_path, "config_training.yml") 24 | config = load_yaml(config_file) 25 | config_job_name = "eval_instre" 26 | 27 | log_path = os.path.abspath(os.path.join(config_path, "..", "output/eval_instre")) 28 | 29 | config_dict_v1 = OrderedDict() 30 | config_dict_v1["model.use_inverse_geom_model"] = False 31 | config_dict_v1["model.use_simplified_affine_model"] = True 32 | config_dict_v1["train.objective.loc_weight"] = 0.2 33 | config_dict_v1["train.model.freeze_bn_transform"] = False 34 | 35 | config_dict_v2 = OrderedDict() 36 | config_dict_v2["model.use_inverse_geom_model"] = True 37 | config_dict_v2["model.use_simplified_affine_model"] = False 38 | config_dict_v2["train.objective.loc_weight"] = 0.0 39 | config_dict_v2["train.model.freeze_bn_transform"] = True 40 | config_dict_v2["init.transform"] = "models/weakalign_resnet101_affine_tps.pth.tar" 41 | 42 | exp_job_names = [] 43 | exp_log_paths = [] 44 | exp_commands = [] 45 | exp_log_file_prefix = [] 46 | 47 | 48 | def add_job(sub_index, 49 | job_type, # "v1" or "v2" 50 | backbone_arch, 51 | eval_dataset, 52 | model_path, 53 | model_checkpoint, 54 | folder_suffix="", 55 | extra_params=None): 56 | job_name = f"{config_job_name}.{sub_index}.{eval_dataset}" 57 | commands = [] 58 | 59 | d = OrderedDict() 60 | d["--config-file"] = config_file 61 | 62 | if job_type == "v1": 63 | d.update(config_dict_v1) 64 | elif job_type == "v2": 65 | d.update(config_dict_v2) 66 | else: 67 | raise RuntimeError("Unknown job_type {0}".format(job_type)) 68 | 69 | train_data = eval_dataset + "-train" 70 | d["train.dataset_name"] = "\"" + train_data + "\"" 71 | if train_data == "instre-s1-train": 72 | d["train.dataset_scale"] = 700.0 73 | main_val_dataset = "instre-s1-val" 74 | d["eval.dataset_scales"] = "[700.0]" 75 | elif train_data == "instre-s2-train": 76 | d["train.dataset_scale"] = 600.0 77 | main_val_dataset = "instre-s2-val" 78 | d["eval.dataset_scales"] = "[600.0]" 79 | else: 80 | raise RuntimeError(f"Unknown dataset {train_data}") 81 | 82 | d["output.best_model.dataset"] = main_val_dataset 83 | d["eval.dataset_names"] = f"[\\\"{main_val_dataset}\\\"]" 84 | 85 | d["eval.class_image_augmentation"] = "rotation90" 86 | d["eval.iter"] = 5000 87 | 88 | # extra augmentation for this run 89 | d["train.augment.mine_extra_class_images"] = True 90 | 91 | d["model.backbone_arch"] = backbone_arch 92 | 93 | if extra_params: 94 | d.update(extra_params) 95 | 96 | # set output folder 97 | log_folder = f"{config_job_name}" 98 | if folder_suffix: 99 | log_folder += "." + folder_suffix 100 | log_folder = os.path.join(log_path, log_folder) 101 | 102 | d["train.do_training"] = False 103 | if train_data == "instre-s1-train": 104 | d["eval.dataset_names"] = "[\\\"instre-s1-test\\\"]" 105 | d["eval.dataset_scales"] = "[700.0]" 106 | elif train_data == "instre-s2-train": 107 | d["eval.dataset_names"] = "[\\\"instre-s2-test\\\"]" 108 | d["eval.dataset_scales"] = "[600.0]" 109 | else: 110 | raise RuntimeError(f"Unknown dataset {train_data}") 111 | 112 | d["eval.class_image_augmentation"] = "rotation90" 113 | 114 | # choose init 115 | if "init.transform" in d: 116 | del d["init.transform"] 117 | if os.path.isfile(model_path): 118 | d["init.model"] = model_path 119 | else: 120 | d["init.model"] = os.path.join(model_path, model_checkpoint) 121 | 122 | commands.append(main_command + " " + launcher.parameters_to_str(d)) 123 | 124 | exp_job_names.append(job_name) 125 | exp_commands.append(commands) 126 | exp_log_paths.append(log_folder) 127 | exp_log_file_prefix.append(f"eval_{eval_dataset}_scale{d['eval.dataset_scales'][1:-1]}_") 128 | 129 | 130 | index = 0 131 | for dataset in ["instre-s1", "instre-s2"]: 132 | for job_type in ["v1", "v2"]: 133 | for train_type in ["train", "init"]: 134 | 135 | if train_type == "train": 136 | model_checkpoint = f"checkpoint_best_model_{dataset}-val_mAP@0.50.pth" 137 | else: 138 | model_checkpoint = f"checkpoint_iter_0.pth" 139 | 140 | for num_layers in [50, 101]: 141 | add_job(index, job_type, f"ResNet{num_layers}", dataset, 142 | model_path=f"output/exp3/exp3.R{num_layers}.{job_type}_{dataset}-train_seed0_ResNet{num_layers}_init_imageNetCaffe2", 143 | model_checkpoint=model_checkpoint, 144 | folder_suffix=f"{dataset}_{job_type}-{train_type}_ResNet{num_layers}_imageNetCaffe2") 145 | index += 1 146 | 147 | 148 | for job_name, log_path, commands, log_file_prefix in zip(exp_job_names, exp_log_paths, exp_commands, exp_log_file_prefix): 149 | launcher.add_job(job_name=job_name, 150 | log_path=log_path, 151 | commands=commands, 152 | log_file_prefix=log_file_prefix) 153 | launcher.launch_all_jobs(args) 154 | -------------------------------------------------------------------------------- /experiments/launcher_instre_eval_collect.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import pandas as pd 4 | 5 | from os2d.utils.logger import extract_map_value_from_os2d_log 6 | 7 | 8 | if __name__ == "__main__": 9 | config_path = os.path.dirname(os.path.abspath(__file__)) 10 | config_job_name = "eval_instre" 11 | log_path = os.path.abspath(os.path.join(config_path, "..", "output/eval_instre")) 12 | 13 | def get_result(eval_dataset, 14 | folder_suffix="", 15 | result_suffix="out.txt"): 16 | d = OrderedDict() 17 | if "instre-s1" in eval_dataset: 18 | d["eval.dataset_scales"] = "[700.0]" 19 | folder_dataset_pattern = "instre-s1" 20 | elif "instre-s2" in eval_dataset: 21 | d["eval.dataset_scales"] = "[600.0]" 22 | folder_dataset_pattern = "instre-s2" 23 | else: 24 | raise RuntimeError(f"Unknown dataset {eval_dataset}") 25 | 26 | log_folder = f"{config_job_name}" 27 | if folder_suffix: 28 | log_folder += "." + folder_suffix 29 | log_folder = os.path.join(log_path, log_folder) 30 | 31 | result_file = f"eval_{folder_dataset_pattern}_scale{d['eval.dataset_scales'][1:-1]}_{result_suffix}" 32 | result_file = os.path.join(log_folder, result_file) 33 | 34 | return extract_map_value_from_os2d_log(result_file, eval_dataset) 35 | 36 | 37 | datasets = ["instre-s1", "instre-s2"] 38 | init = "imageNetCaffe2" 39 | table = pd.DataFrame(columns=["arch", "init", "type", "trained"] + datasets) 40 | 41 | for arch in ["ResNet50", "ResNet101"]: 42 | for job_type in ["v1", "v2"]: 43 | for train_type in ["train", "init"]: 44 | d = {} 45 | d["arch"] = arch 46 | d["init"] = init 47 | d["type"] = job_type 48 | d["trained"] = train_type 49 | 50 | for dataset in datasets: 51 | folder_suffix = f"{dataset}_{job_type}-{train_type}_{arch}_{init}" 52 | 53 | eval_dataset = dataset + "-test" 54 | val = get_result(eval_dataset, folder_suffix=folder_suffix) 55 | d[dataset] = val 56 | 57 | table = table.append(d, ignore_index=True) 58 | 59 | print(table, sep='\n') 60 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | import torch 5 | 6 | from os2d.modeling.model import build_os2d_from_config 7 | 8 | from os2d.data.dataloader import build_eval_dataloaders_from_cfg, build_train_dataloader_from_config 9 | from os2d.engine.train import trainval_loop 10 | from os2d.utils import set_random_seed, get_trainable_parameters, mkdir, save_config, setup_logger, get_data_path 11 | from os2d.engine.optimization import create_optimizer 12 | from os2d.config import cfg 13 | 14 | 15 | def parse_opts(): 16 | parser = argparse.ArgumentParser(description="Training and evaluation of the OS2D model") 17 | parser.add_argument( 18 | "--config-file", 19 | default="", 20 | metavar="FILE", 21 | help="path to config file", 22 | type=str, 23 | ) 24 | parser.add_argument( 25 | "opts", 26 | help="Modify config options using the command-line", 27 | default=None, 28 | nargs=argparse.REMAINDER, 29 | ) 30 | args = parser.parse_args() 31 | 32 | if args.config_file: 33 | cfg.merge_from_file(args.config_file) 34 | cfg.merge_from_list(args.opts) 35 | cfg.freeze() 36 | 37 | return cfg, args.config_file 38 | 39 | 40 | def init_logger(cfg, config_file): 41 | output_dir = cfg.output.path 42 | if output_dir: 43 | mkdir(output_dir) 44 | 45 | logger = setup_logger("OS2D", output_dir if cfg.output.save_log_to_file else None) 46 | 47 | if config_file: 48 | logger.info("Loaded configuration file {}".format(config_file)) 49 | with open(config_file, "r") as cf: 50 | config_str = "\n" + cf.read() 51 | logger.info(config_str) 52 | else: 53 | logger.info("Config file was not provided") 54 | 55 | logger.info("Running with config:\n{}".format(cfg)) 56 | 57 | # save config file only when training (to run multiple evaluations in the same folder) 58 | if output_dir and cfg.train.do_training: 59 | output_config_path = os.path.join(output_dir, "config.yml") 60 | logger.info("Saving config into: {}".format(output_config_path)) 61 | # save overloaded model config in the output directory 62 | save_config(cfg, output_config_path) 63 | 64 | 65 | def main(): 66 | cfg, config_file = parse_opts() 67 | init_logger(cfg, config_file) 68 | 69 | # set this to use faster convolutions 70 | if cfg.is_cuda: 71 | assert torch.cuda.is_available(), "Do not have available GPU, but cfg.is_cuda == 1" 72 | torch.backends.cudnn.benchmark = True 73 | 74 | # random seed 75 | set_random_seed(cfg.random_seed, cfg.is_cuda) 76 | 77 | # Model 78 | net, box_coder, criterion, img_normalization, optimizer_state = build_os2d_from_config(cfg) 79 | 80 | # Optimizer 81 | parameters = get_trainable_parameters(net) 82 | optimizer = create_optimizer(parameters, cfg.train.optim, optimizer_state) 83 | 84 | # load the dataset 85 | data_path = get_data_path() 86 | dataloader_train, datasets_train_for_eval = build_train_dataloader_from_config(cfg, box_coder, img_normalization, 87 | data_path=data_path) 88 | 89 | dataloaders_eval = build_eval_dataloaders_from_cfg(cfg, box_coder, img_normalization, 90 | datasets_for_eval=datasets_train_for_eval, 91 | data_path=data_path) 92 | 93 | # start training (validation is inside) 94 | trainval_loop(dataloader_train, net, cfg, criterion, optimizer, dataloaders_eval=dataloaders_eval) 95 | 96 | 97 | if __name__ == "__main__": 98 | main() 99 | -------------------------------------------------------------------------------- /models/README.md: -------------------------------------------------------------------------------- 1 | ## Pretrained models 2 | 3 | ### The semantic alignment model trained on [PF-PASCAL dataset](https://www.di.ens.fr/willow/research/proposalflow/) 4 | The semantic alignment model of [Rocco et al.](https://github.com/ignacio-rocco/weakalign) contains the weights of both feature extractor and the transformtion network. The model can be downloaded as follows: 5 | ```bash 6 | cd $OS2D_ROOT/models 7 | wget http://www.di.ens.fr/willow/research/weakalign/trained_models/weakalign_resnet101_affine_tps.pth.tar 8 | ``` 9 | 10 | ### Models trained on [ImageNet](http://www.image-net.org/) for classification in PyTorch 11 | The standard PyTorch models can be downloaded as follows (links from [torchvision](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.pyhttps://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py)): 12 | ```bash 13 | cd $OS2D_ROOT/models 14 | wget https://download.pytorch.org/models/resnet50-19c8e357.pth 15 | wget https://download.pytorch.org/models/resnet101-5d3b4d8f.pth 16 | ``` 17 | 18 | ### Models trained on [ImageNet](http://www.image-net.org/) for classification in Caffe2 19 | Some projects have reported that specific weights os ResNets originally trained in Caffe2 work better, e.g., in image retrieval. We use these weights ported to PyTorch by [Radenović F. et al.](https://github.com/filipradenovic/cnnimageretrieval-pytorch) the models can be downloaded as follows (links from [here](https://github.com/filipradenovic/cnnimageretrieval-pytorch/blob/master/cirtorch/networks/imageretrievalnet.py)): 20 | ```bash 21 | cd $OS2D_ROOT/models 22 | wget http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-resnet50-features-ac468af.pth 23 | wget http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-resnet101-features-10a101d.pth 24 | ``` 25 | These models are in a slightly different format from standard PyTorch, so we need to do some weight surgery to convert them. 26 | ```bash 27 | conda activate os2d 28 | python convert_resnet_caffe2_cirtorch_to_pytorch.py imagenet-caffe-resnet50-features-ac468af.pth 29 | python convert_resnet_caffe2_cirtorch_to_pytorch.py imagenet-caffe-resnet101-features-10a101d.pth 30 | ``` 31 | This should produce files `imagenet-caffe-resnet50-features-ac468af-converted.pth` and `imagenet-caffe-resnet101-features-10a101d-converted.pth`. 32 | 33 | 34 | ### Model with GroupNorm instead of BatchNorm trained on [ImageNet](http://www.image-net.org/) for classification in Caffe2 35 | [Group normalization](https://arxiv.org/abs/1803.08494) has been reported to work better than BatchNorm when the batch size is small. We have tried using ResNet-50 with GroupNorm. Download the model with 32 groups: 36 | ```bash 37 | cd $OS2D_ROOT/models 38 | wget https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl 39 | ``` 40 | Converting the model: 41 | ```bash 42 | conda activate os2d 43 | python convert_resnet_caffe2_groupnorm_to_pytorch.py R-50-GN.pkl --num_layers 50 44 | ``` 45 | This should produce file resnet50_caffe2_groupnorm.pth 46 | 47 | ### Models trained on [COCO](http://cocodataset.org/) for object detection 48 | We have tried to initialize our models from the weights of detection models trained in the [maskrcnn-benchmark framework](https://github.com/facebookresearch/maskrcnn-benchmark). Download the models and their configs: 49 | ```bash 50 | cd $OS2D_ROOT/models 51 | wget -P maskrcnn-benchmark https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_R_50_FPN_1x.pth 52 | wget -P maskrcnn-benchmark https://raw.githubusercontent.com/facebookresearch/maskrcnn-benchmark/master/configs/e2e_mask_rcnn_R_50_FPN_1x.yaml 53 | wget -P maskrcnn-benchmark https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_R_101_FPN_1x.pth 54 | wget -P maskrcnn-benchmark https://raw.githubusercontent.com/facebookresearch/maskrcnn-benchmark/master/configs/e2e_mask_rcnn_R_101_FPN_1x.yaml 55 | ``` 56 | To convert the models one needs to have [maskrcnn-benchmark installed](../baselines/detector_retrieval/INSTALL.md) (we used v0.1) or just have their [default config](https://github.com/facebookresearch/maskrcnn-benchmark/tree/master/maskrcnn_benchmark/config). Scripts for convertion: 57 | ```bash 58 | conda activate os2d 59 | python convert_resnet_maskrcnnbenchmark_to_pytorch.py maskrcnn-benchmark/e2e_mask_rcnn_R_50_FPN_1x.pth maskrcnn-benchmark/e2e_mask_rcnn_R_50_FPN_1x.yaml 60 | python convert_resnet_maskrcnnbenchmark_to_pytorch.py maskrcnn-benchmark/e2e_mask_rcnn_R_101_FPN_1x.pth maskrcnn-benchmark/e2e_mask_rcnn_R_101_FPN_1x.yaml 61 | ``` 62 | This should produce files `maskrcnn-benchmark/e2e_mask_rcnn_R_50_FPN_1x_converted.pth` and `maskrcnn-benchmark/e2e_mask_rcnn_R_101_FPN_1x_converted.pth` 63 | 64 | ### Model trained on [google-landmarks-2018](https://www.kaggle.com/google/google-landmarks-dataset) for image retrieval 65 | We have tried to initialize from a model trained for large scale image retrieval in the [cnnimageretrieval-pytorch](https://github.com/filipradenovic/cnnimageretrieval-pytorch) project. Download the model: 66 | ```bash 67 | cd $OS2D_ROOT/models 68 | wget http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/gl18/gl18-tl-resnet101-gem-w-a4d43db.pth 69 | ``` 70 | Scripts for convertion: 71 | ```bash 72 | conda activate os2d 73 | python convert_resnet_cirtorch_to_pytorch.py gl18-tl-resnet101-gem-w-a4d43db.pth 74 | ``` -------------------------------------------------------------------------------- /models/convert_resnet_caffe2_cirtorch_to_pytorch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from collections import OrderedDict 4 | 5 | import torch 6 | 7 | 8 | def convert_model(model_file): 9 | model_caffe2 = torch.load(model_file) 10 | model_name, ext = os.path.splitext(model_file) 11 | target_path = model_name + "-converted" + ext 12 | 13 | print("Converting", model_file, 14 | "to", target_path) 15 | 16 | # Create the pytorch model 17 | model_pth = OrderedDict() 18 | 19 | # create a map of prefix renamings 20 | prefix_map = OrderedDict() 21 | prefix_map["0."] = "conv1." 22 | prefix_map["1."] = "bn1." 23 | prefix_map["4."] = "layer1." 24 | prefix_map["5."] = "layer2." 25 | prefix_map["6."] = "layer3." 26 | prefix_map["7."] = "layer4." 27 | 28 | # rename layers and add to the pytorch model 29 | num_added_tensors = 0 30 | for k, v in model_caffe2.items(): 31 | # find good prefix 32 | prefix = None 33 | for p in prefix_map.keys(): 34 | if k.startswith(p): 35 | if prefix is None: 36 | prefix = p 37 | else: 38 | print("For layer {0} found two prefixes: {1} or {2}".format(k, prefix, p)) 39 | if prefix is None: 40 | print("For layer {0} did not find any matching prefix!".format(k)) 41 | else: 42 | new_name = prefix_map[prefix] + k[len(prefix):] 43 | # print("Renaming {0} to {1}".format(k, new_name)) 44 | model_pth[new_name] = v 45 | num_added_tensors += 1 46 | print("Converted {0} tensors".format(num_added_tensors)) 47 | 48 | # saving the model 49 | torch.save(model_pth, target_path) 50 | 51 | 52 | if __name__ == "__main__": 53 | parser = argparse.ArgumentParser(description="Converting Caffe2-cirtorch ResNets to pytorch") 54 | parser.add_argument("model", help="Path to the model to convert, the result will be save to the same folder") 55 | args = parser.parse_args() 56 | 57 | convert_model(args.model) 58 | -------------------------------------------------------------------------------- /models/convert_resnet_caffe2_groupnorm_to_pytorch.py: -------------------------------------------------------------------------------- 1 | """Models that can be converted: 2 | ResNet-50-GN: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl 3 | ResNet-101-GN: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl 4 | 5 | Code is base on this script: 6 | https://github.com/ruotianluo/pytorch-resnet/blob/master/convert_gn.py 7 | """ 8 | 9 | import os 10 | import argparse 11 | import pickle 12 | import numpy as np 13 | 14 | import torch 15 | import torchvision.models.resnet as resnet 16 | 17 | 18 | def load_caffe2_model(path): 19 | with open(path, 'rb') as fp: 20 | src_blobs = pickle.load(fp, encoding='latin1') 21 | if 'blobs' in src_blobs: 22 | src_blobs = src_blobs['blobs'] 23 | pretrained_state_dict = src_blobs 24 | return pretrained_state_dict 25 | 26 | 27 | def detectron_weight_mapping(self): 28 | mapping_to_detectron = { 29 | 'conv1.weight': 'conv1_w', 30 | 'bn1.weight': 'conv1_gn_s', 31 | 'bn1.bias': 'conv1_gn_b' 32 | } 33 | 34 | for res_id in range(1, 5): 35 | stage_name = 'layer%d' % res_id 36 | mapping = residual_stage_detectron_mapping( 37 | getattr(self, stage_name), res_id) 38 | mapping_to_detectron.update(mapping) 39 | 40 | return mapping_to_detectron 41 | 42 | 43 | def residual_stage_detectron_mapping(module_ref, res_id): 44 | """Construct weight mapping relation for a residual stage with `num_blocks` of 45 | residual blocks given the stage id: `res_id` 46 | """ 47 | pth_norm_suffix = '_bn' 48 | norm_suffix = '_gn' 49 | mapping_to_detectron = {} 50 | for blk_id in range(len(module_ref)): 51 | detectron_prefix = 'res%d_%d' % (res_id+1, blk_id) 52 | my_prefix = 'layer%s.%d' % (res_id, blk_id) 53 | 54 | # residual branch (if downsample is not None) 55 | if getattr(module_ref[blk_id], 'downsample'): 56 | dtt_bp = detectron_prefix + '_branch1' # short for "detectron_branch_prefix" 57 | mapping_to_detectron[my_prefix 58 | + '.downsample.0.weight'] = dtt_bp + '_w' 59 | mapping_to_detectron[my_prefix 60 | + '.downsample.1.weight'] = dtt_bp + norm_suffix + '_s' 61 | mapping_to_detectron[my_prefix 62 | + '.downsample.1.bias'] = dtt_bp + norm_suffix + '_b' 63 | 64 | # conv branch 65 | for i, c in zip([1, 2, 3], ['a', 'b', 'c']): 66 | dtt_bp = detectron_prefix + '_branch2' + c 67 | mapping_to_detectron[my_prefix 68 | + '.conv%d.weight' % i] = dtt_bp + '_w' 69 | mapping_to_detectron[my_prefix 70 | + '.' + pth_norm_suffix[1:] + '%d.weight' % i] = dtt_bp + norm_suffix + '_s' 71 | mapping_to_detectron[my_prefix 72 | + '.' + pth_norm_suffix[1:] + '%d.bias' % i] = dtt_bp + norm_suffix + '_b' 73 | 74 | return mapping_to_detectron 75 | 76 | 77 | def convert_model(path, num_layers=50, num_groups=32): 78 | target_path = "resnet{}_caffe2_groupnorm.pth".format(num_layers) 79 | 80 | print("Converting ResNet-{0}-GN from {1} to {2}".format(num_layers, path, target_path)) 81 | 82 | # load Caffe2 model 83 | model_caffe2 = load_caffe2_model(path) 84 | 85 | # create pytorch model 86 | norm_layer = lambda width: torch.nn.GroupNorm(num_groups, width) 87 | model_pth = getattr(resnet, 'resnet{}'.format(num_layers))(norm_layer=norm_layer) 88 | model_pth.eval() 89 | model_pth_state_dict = model_pth.state_dict() 90 | 91 | name_mapping = detectron_weight_mapping(model_pth) 92 | name_mapping.update({ 93 | 'fc.weight': 'pred_w', 94 | 'fc.bias': 'pred_b' 95 | }) 96 | 97 | assert set(model_pth_state_dict.keys()) == set(name_mapping.keys()) 98 | assert set(model_caffe2.keys()) == set(name_mapping.values()) 99 | 100 | num_added_tensors = 0 101 | for k, v in name_mapping.items(): 102 | if isinstance(v, str): # maybe a str, None or True 103 | assert(model_pth_state_dict[k].shape == torch.Tensor(model_caffe2[v]).shape) 104 | model_pth_state_dict[k].copy_(torch.Tensor(model_caffe2[v])) 105 | if k == 'conv1.weight': 106 | tmp = model_pth_state_dict[k] 107 | # BGR to RGB 108 | tmp = tmp[:, [2, 1, 0]].numpy() 109 | # renormalize 110 | tmp *= 255.0 111 | tmp *= np.array([0.229, 0.224, 0.225])[np.newaxis, :, np.newaxis, np.newaxis] 112 | 113 | model_pth_state_dict[k].copy_(torch.from_numpy(tmp)) 114 | num_added_tensors += 1 115 | 116 | torch.save(model_pth_state_dict, target_path) 117 | print("Converted {0} tensors".format(num_added_tensors)) 118 | 119 | 120 | if __name__ == "__main__": 121 | parser = argparse.ArgumentParser(description="Converting Caffe2-GroupNorm ResNets to pytorch") 122 | parser.add_argument("model", help="Path to the model to convert, the result will be save to the same folder") 123 | parser.add_argument("--num_layers", default=50, type=int, help="Number of residual blocks in ResNet: X from ResNet-X") 124 | parser.add_argument("--num_groups", default=32, type=int, help="Number of groups in gorup norm") 125 | args = parser.parse_args() 126 | 127 | convert_model(args.model, args.num_layers, args.num_groups) 128 | -------------------------------------------------------------------------------- /models/convert_resnet_cirtorch_to_pytorch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from collections import OrderedDict 4 | 5 | import torch 6 | 7 | 8 | def convert_model(model_file): 9 | model_cirtorch = torch.load(model_file) 10 | model_name, ext = os.path.splitext(model_file) 11 | target_path = model_name + "-converted" + ext 12 | 13 | print("Converting", model_file, 14 | "to", target_path) 15 | 16 | state_dict_cirtorch = model_cirtorch["state_dict"] 17 | 18 | # Create the pytorch state_dict 19 | model_pth = OrderedDict() 20 | 21 | # create a map of prefix renamings 22 | prefix_map = OrderedDict() 23 | prefix_map["features.0."] = "conv1." 24 | prefix_map["features.1."] = "bn1." 25 | prefix_map["features.4."] = "layer1." 26 | prefix_map["features.5."] = "layer2." 27 | prefix_map["features.6."] = "layer3." 28 | prefix_map["features.7."] = "layer4." 29 | 30 | # rename layers and add to the pytorch model 31 | num_added_tensors = 0 32 | for k, v in state_dict_cirtorch.items(): 33 | # find good prefix 34 | prefix = None 35 | for p in prefix_map.keys(): 36 | if k.startswith(p): 37 | if prefix is None: 38 | prefix = p 39 | else: 40 | print("For layer {0} found two prefixes: {1} or {2}".format(k, prefix, p)) 41 | if prefix is None: 42 | print("For layer {0} did not find any matching prefix!".format(k)) 43 | else: 44 | new_name = prefix_map[prefix] + k[len(prefix):] 45 | # print("Renaming {0} to {1}".format(k, new_name)) 46 | model_pth[new_name] = v 47 | num_added_tensors += 1 48 | print("Converted {0} tensors".format(num_added_tensors)) 49 | 50 | # saving the model 51 | torch.save(model_pth, target_path) 52 | 53 | 54 | if __name__ == "__main__": 55 | parser = argparse.ArgumentParser(description="Converting Caffe2-cirtorch ResNets to pytorch") 56 | parser.add_argument("model", help="Path to the model to convert, the result will be save to the same folder") 57 | args = parser.parse_args() 58 | 59 | convert_model(args.model) 60 | -------------------------------------------------------------------------------- /models/convert_resnet_maskrcnnbenchmark_to_pytorch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from collections import OrderedDict 4 | 5 | import torch 6 | from torchvision.models.resnet import resnet50, resnet101 7 | 8 | from maskrcnn_benchmark.config import cfg as maskrcnn_cfg 9 | 10 | 11 | def convert_model(maskrcnn_weight_path, maskrcnn_config_path): 12 | model_name, ext = os.path.splitext(maskrcnn_weight_path) 13 | target_path = model_name + "_converted" + ext 14 | 15 | print("Converting", maskrcnn_weight_path, 16 | "with config", maskrcnn_config_path, 17 | "to", target_path) 18 | 19 | if "R_50" in maskrcnn_weight_path: 20 | target_net = "resnet50" 21 | model = resnet50().state_dict() 22 | elif "R_101" in maskrcnn_weight_path: 23 | target_net = "resnet101" 24 | model = resnet101().state_dict() 25 | else: 26 | raise RuntimeError("Could not recognize architecture from file name {0}".format(maskrcnn_weight_path)) 27 | 28 | maskrcnn_model = torch.load(maskrcnn_weight_path) 29 | maskrcnn_model = maskrcnn_model["model"] 30 | maskrcnn_cfg.merge_from_file(maskrcnn_config_path) 31 | 32 | # create a map of prefix renamings 33 | prefix_map = OrderedDict() 34 | prefix_map["conv1."] = "module.backbone.body.stem.conv1." 35 | prefix_map["bn1."] = "module.backbone.body.stem.bn1." 36 | prefix_map["layer1."] = "module.backbone.body.layer1." 37 | prefix_map["layer2."] = "module.backbone.body.layer2." 38 | prefix_map["layer3."] = "module.backbone.body.layer3." 39 | prefix_map["layer4."] = None 40 | prefix_map["fc."] = None 41 | 42 | new_model = OrderedDict() 43 | num_added_tensors = 0 44 | for k, v in model.items(): 45 | found = False 46 | for prefix in prefix_map.keys(): 47 | if k.startswith(prefix): 48 | found = prefix 49 | if not found: 50 | print("Layer {0} was not found in the prefix map".format(k)) 51 | continue 52 | 53 | if prefix_map[found] is None: 54 | # chop off these 55 | continue 56 | 57 | if k.endswith("num_batches_tracked"): 58 | # skip these parameters 59 | continue 60 | 61 | layer_to_init_from = prefix_map[found] + k[len(found):] 62 | if layer_to_init_from not in maskrcnn_model: 63 | print("Layer {0} to init {1} was not found in the maskrcnn model".format(layer_to_init_from, k)) 64 | 65 | assert maskrcnn_model[layer_to_init_from].size() == v.size(), "Size {0} of the source {1} does not match size {2} of target {3}".format(maskrcnn_model[layer_to_init_from].size(), layer_to_init_from, v.size(), k ) 66 | 67 | new_model[k] = maskrcnn_model[layer_to_init_from].cpu() 68 | num_added_tensors += 1 69 | print("Converted {0} tensors".format(num_added_tensors)) 70 | 71 | # adjust the first layer convolution 72 | assert new_model['conv1.weight'].size(1) == 3, "the first layer is of the wrong size: {}".format(new_model['conv1.weight'].size()) 73 | w = new_model['conv1.weight'] 74 | 75 | # deal with different normalization and BGR 76 | # their normalization 77 | # maskrcnn_cfg.INPUT.PIXEL_MEAN - can't deal mean mean easily 78 | # maskrcnn_cfg.INPUT.PIXEL_STD / 255 79 | 80 | for c in range(3): 81 | w[:,c] = w[:,c] * 255.0 / maskrcnn_cfg.INPUT.PIXEL_STD[c] 82 | 83 | # deal with BGR 84 | if maskrcnn_cfg.INPUT.TO_BGR255: 85 | # remap the first layer from BGR to RGB 86 | w = torch.stack([w[:,2], w[:,1], w[:,0]], 1) 87 | 88 | # pytorch normalization: 89 | normalization = {} 90 | normalization['mean'] = (0.485, 0.456, 0.406) 91 | normalization['std'] = (0.229, 0.224, 0.225) 92 | 93 | for c in range(3): 94 | w[:,c] = w[:,c] * normalization['std'][c] 95 | 96 | new_model['conv1.weight'] = w 97 | 98 | print("saving model to {0}".format(target_path)) 99 | torch.save(new_model, target_path) 100 | 101 | 102 | if __name__ == "__main__": 103 | parser = argparse.ArgumentParser(description="Converting maskrcnn-benchmark ResNets to pytorch") 104 | parser.add_argument("model", help="Path to the model to convert, the result will be save to the same folder") 105 | parser.add_argument("config", help="Path to the config file corresponding to the model") 106 | args = parser.parse_args() 107 | 108 | convert_model(args.model, args.config) 109 | -------------------------------------------------------------------------------- /os2d/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/os2d/817ffb15a8cbc353e4f94debe419902d4dd83a22/os2d/__init__.py -------------------------------------------------------------------------------- /os2d/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/os2d/817ffb15a8cbc353e4f94debe419902d4dd83a22/os2d/data/__init__.py -------------------------------------------------------------------------------- /os2d/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/os2d/817ffb15a8cbc353e4f94debe419902d4dd83a22/os2d/engine/__init__.py -------------------------------------------------------------------------------- /os2d/engine/augmentation.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from os2d.structures.transforms import random_distort, crop 4 | 5 | 6 | class DataAugmentation(): 7 | """ 8 | Class stores the parameters of all the data augmentations 9 | """ 10 | def __init__(self, random_flip_batches, 11 | random_crop_size, 12 | random_crop_scale, 13 | jitter_aspect_ratio, 14 | scale_jitter, 15 | random_color_distortion, 16 | random_crop_label_images, 17 | min_box_coverage): 18 | # random crop size is (width, height) 19 | self.batch_random_hflip = random_flip_batches 20 | self.batch_random_vflip = random_flip_batches 21 | 22 | # color distortions 23 | self.do_random_color = random_color_distortion 24 | self.brightness_delta = 32/255. 25 | self.contrast_delta = 0.5 26 | self.saturation_delta = 0.5 27 | self.hue_delta = 0.1 28 | 29 | self.scale_jitter = scale_jitter 30 | self.jitter_aspect_ratio = jitter_aspect_ratio 31 | 32 | # random crop parameters 33 | self.do_random_crop = True if random_crop_size is not None else False 34 | if self.do_random_crop: 35 | self.random_crop_size = random_crop_size 36 | self.random_crop_scale = random_crop_scale 37 | self.random_interpolation = True 38 | self.coverage_keep_threshold = 0.7 39 | self.coverage_remove_threshold = 0.3 40 | self.max_trial = 100 41 | self.min_box_coverage = min_box_coverage # need this to help random crops contain at least one object 42 | 43 | # random crops of label images 44 | self.do_random_crop_label_images = random_crop_label_images 45 | 46 | def random_distort(self, img): 47 | if self.do_random_color: 48 | img = random_distort(img, 49 | brightness_delta=self.brightness_delta, 50 | contrast_delta=self.contrast_delta, 51 | saturation_delta=self.saturation_delta, 52 | hue_delta=self.hue_delta) 53 | return img 54 | 55 | def random_crop(self, img, boxes=None, transform_list=None): 56 | if not self.do_random_crop: 57 | raise(RuntimeError("Random crop data augmentation is not initialized")) 58 | return self.crop_image(img, crop_position=None, 59 | boxes=boxes, transform_list=transform_list, 60 | random_crop_size=self.random_crop_size) 61 | 62 | def crop_image(self, img, crop_position, boxes=None, transform_list=None, random_crop_size=None): 63 | img, boxes, mask_cutoff_boxes, mask_difficult_boxes = \ 64 | crop(img, 65 | crop_position=crop_position, 66 | random_crop_size=random_crop_size, 67 | random_crop_scale=self.random_crop_scale, 68 | crop_size=self.random_crop_size, 69 | scale_jitter=self.scale_jitter, 70 | jitter_aspect_ratio=self.jitter_aspect_ratio, 71 | coverage_keep_threshold=self.coverage_keep_threshold, 72 | coverage_remove_threshold=self.coverage_remove_threshold, 73 | max_trial=self.max_trial, 74 | min_box_coverage=self.min_box_coverage, 75 | boxes=boxes, transform_list=transform_list) 76 | return img, boxes, mask_cutoff_boxes, mask_difficult_boxes 77 | 78 | def random_crop_label_image(self, img): 79 | if self.do_random_crop_label_images: 80 | ar = img.size[0] / img.size[1] 81 | new_ar = random.uniform(ar * self.jitter_aspect_ratio, ar / self.jitter_aspect_ratio) 82 | w = int( min(img.size[0], img.size[1] * new_ar) ) 83 | h = int( min(img.size[0] / new_ar, img.size[1]) ) 84 | random_crop_size = (w, h) 85 | img = self.crop_image(img, None, random_crop_size=random_crop_size)[0] 86 | return img 87 | -------------------------------------------------------------------------------- /os2d/engine/optimization.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from statistics import median 3 | 4 | from torch import optim 5 | 6 | from os2d.utils import ceildiv 7 | 8 | 9 | def create_optimizer(parameters, cfg, optimizer_state=None): 10 | lr = cfg.lr 11 | optim_method = cfg.optim_method.casefold() 12 | if optim_method == "sgd": 13 | optimizer = optim.SGD(parameters, lr=lr, weight_decay=cfg.weight_decay, momentum=cfg.sgd_momentum) 14 | elif optim_method == "adagrad": 15 | optimizer = optim.Adagrad(parameters, lr=lr, weight_decay=cfg.weight_decay) 16 | elif optim_method == "adadelta": 17 | optimizer = optim.Adadelta(parameters, lr=lr, weight_decay=cfg.weight_decay) 18 | elif optim_method == "adam": 19 | optimizer = optim.Adam(parameters, lr=lr, weight_decay=cfg.weight_decay) 20 | elif optim_method == "adamax": 21 | optimizer = optim.Adamax(parameters, lr=lr, weight_decay=cfg.weight_decay) 22 | elif optim_method == "asgd": 23 | optimizer = optim.ASGD(parameters, lr=lr, t0=5000, weight_decay=cfg.weight_decay) 24 | elif optim_method == "rmsprop": 25 | optimizer = optim.RMSprop(parameters, lr=lr, weight_decay=cfg.weight_decay) 26 | elif optim_method == "rprop": 27 | optimizer = optim.Rprop(parameters, lr=lr) 28 | else: 29 | raise RuntimeError("Invalid optim method: " + cfg.optim_method) 30 | 31 | if optimizer_state is not None: 32 | optimizer.load_state_dict(optimizer_state) 33 | set_learning_rate(optimizer, cfg.lr) 34 | 35 | return optimizer 36 | 37 | 38 | def set_learning_rate(optimizer, learning_rate): 39 | logger = logging.getLogger("OS2D") 40 | 41 | for p in optimizer.param_groups: 42 | if "lr" in p: 43 | if p["lr"] != learning_rate: 44 | logger.info("Changing learning rate from {} to {}".format(p["lr"], learning_rate)) 45 | p["lr"] = learning_rate 46 | 47 | 48 | def get_learning_rate(optimizer): 49 | for p in optimizer.param_groups: 50 | if "lr" in p: 51 | return p["lr"] 52 | 53 | def setup_lr(optimizer, full_log, cfg, eval_iter): 54 | # annealing learning rate 55 | if cfg.type.lower() == "none": 56 | lr_scheduler = None 57 | elif cfg.type.lower() == "MultiStepLR".lower(): 58 | lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, 59 | milestones=[ceildiv(m, eval_iter) for m in cfg.milestones], 60 | gamma=cfg.gamma) 61 | elif cfg.type.lower() == "ReduceLROnPlateau".lower(): 62 | lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau( 63 | optimizer, verbose=True, factor=cfg.reduce_factor, min_lr=cfg.min_value, 64 | threshold=cfg.quantity_epsilon, threshold_mode="rel", mode=cfg.quantity_mode, 65 | patience=ceildiv(cfg.patience, eval_iter), cooldown=ceildiv(cfg.cooldown, eval_iter)) 66 | 67 | # create a function and a closure 68 | averaging_buffer_max_length = ceildiv(cfg.quantity_smoothness, eval_iter) 69 | if averaging_buffer_max_length <= 1: 70 | averaging_buffer_max_length = 1 71 | averaging_buffer = [] 72 | else: 73 | raise RuntimeError(f"Unknown annel_lr type: {cfg.type}") 74 | 75 | 76 | def anneal_lr_func(i_iter, anneal_now=True): 77 | if cfg.type.lower() == "none": 78 | pass 79 | elif cfg.type.lower() == "MultiStepLR".lower(): 80 | lr_scheduler.step() 81 | elif cfg.type.lower() == "ReduceLROnPlateau".lower(): 82 | value_to_monitor = full_log[cfg.quantity_to_monitor][-1] 83 | averaging_buffer.append(value_to_monitor) 84 | if len(averaging_buffer) > averaging_buffer_max_length: 85 | averaging_buffer.pop(0) 86 | averaged_value = median(averaging_buffer) 87 | counter = len(full_log[cfg.quantity_to_monitor]) 88 | if anneal_now: 89 | lr_scheduler.step(averaged_value) 90 | else: 91 | raise RuntimeError(f"Unknown annel_lr type: {cfg.type}") 92 | return get_learning_rate(optimizer) 93 | 94 | return lr_scheduler, anneal_lr_func 95 | -------------------------------------------------------------------------------- /os2d/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/os2d/817ffb15a8cbc353e4f94debe419902d4dd83a22/os2d/modeling/__init__.py -------------------------------------------------------------------------------- /os2d/modeling/feature_extractor.py: -------------------------------------------------------------------------------- 1 | from itertools import chain 2 | 3 | import torch 4 | 5 | from torchvision.models.resnet import ResNet, resnet50, resnet101 6 | 7 | from os2d.structures.feature_map import FeatureMapSize 8 | 9 | 10 | GROUPNORM_NUMGROUPS = 32 11 | 12 | 13 | def build_feature_extractor(backbone_arch, use_group_norm=False): 14 | if backbone_arch.lower() == "resnet50": 15 | net = resnet50_c4(use_group_norm=use_group_norm) 16 | elif backbone_arch.lower() == "resnet101": 17 | net = resnet101_c4(use_group_norm=use_group_norm) 18 | else: 19 | raise(RuntimeError("Unknown backbone arch: {0}".format(backbone_arch))) 20 | return net 21 | 22 | 23 | class ResNetFeatureExtractor(ResNet): 24 | """ 25 | This class implements the feature extractor based on the ResNet backbone 26 | """ 27 | def __init__(self, resnet_full, level, 28 | feature_map_stride, feature_map_receptive_field): 29 | """ 30 | Args: 31 | resnet_full - a resnet model: an instance of the ResNet class from torchvision 32 | https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py 33 | level (int) - level at which to create the feature extractor, can be 1, 2, 3, 4, 5 34 | feature_map_stride (FeatureMapSize) - the stride of the feature map, should be set manually 35 | feature_map_receptive_field (FeatureMapSize) - the effective receptive field of the feature map, should be set manually 36 | """ 37 | self.__dict__ = resnet_full.__dict__.copy() 38 | self.feature_map_receptive_field = feature_map_receptive_field 39 | self.feature_map_stride = feature_map_stride 40 | self._feature_level = level 41 | 42 | # remove unused layers to free memory 43 | delattr(self, "fc") 44 | delattr(self, "avgpool") 45 | 46 | assert level in [1, 2, 3, 4, 5], "Feature level should be one of 1, 2, 3, 4, 5" 47 | # level == 5 - use all blocks 48 | # note that for level == 4, self.layer4 is chopped off 49 | # inconsistency in numbers comes from the inconsistency between layer names in the ResNet paper and torchvision ResNet code 50 | self.resnet_blocks = [self.layer1, self.layer2, self.layer3, self.layer4] 51 | layer_names = ["layer1", "layer2", "layer3", "layer4"] 52 | 53 | self.resnet_blocks = self.resnet_blocks[:level-1] 54 | for name in layer_names[level-1:]: 55 | delattr(self, name) 56 | 57 | def forward(self, x): 58 | x = self.conv1(x) 59 | x = self.bn1(x) 60 | x = self.relu(x) 61 | x = self.maxpool(x) 62 | 63 | for layer in self.resnet_blocks: 64 | x = layer(x) 65 | return x 66 | 67 | def freeze_bn(self): 68 | # Freeze BatchNorm layers 69 | for layer in self.modules(): 70 | if isinstance(layer, torch.nn.BatchNorm2d): 71 | layer.eval() 72 | 73 | def freeze_blocks(self, num_blocks=0): 74 | # join conv1 and bn1 into one block 75 | layer0 = [torch.nn.ModuleList([self.conv1, self.bn1])] 76 | 77 | num_remaining_blocks = num_blocks 78 | blocks = chain(layer0, chain.from_iterable(self.resnet_blocks)) 79 | for b in blocks: 80 | if num_remaining_blocks > 0: 81 | self.freeze_layer_parameters(b) 82 | num_remaining_blocks -= 1 83 | 84 | @staticmethod 85 | def freeze_layer_parameters(layer): 86 | for p in layer.parameters(): 87 | p.requires_grad = False 88 | 89 | def get_num_blocks_in_feature_extractor(self): 90 | # one block - self.conv1 + self.bn1 91 | # the following blocks: self.layer1, self.layer2, self.layer3, until cut off 92 | num_blocks = 1 + sum(len(b) for b in self.resnet_blocks) 93 | return num_blocks 94 | 95 | 96 | def get_norm_layer(use_group_norm): 97 | if use_group_norm: 98 | return lambda width: torch.nn.GroupNorm(GROUPNORM_NUMGROUPS, width) 99 | else: 100 | return torch.nn.BatchNorm2d 101 | 102 | 103 | def _resnet_fe(resnet, level, use_group_norm, feature_map_stride, feature_map_receptive_field): 104 | return ResNetFeatureExtractor(resnet(norm_layer=get_norm_layer(use_group_norm)), level, 105 | feature_map_stride, feature_map_receptive_field) 106 | 107 | 108 | def resnet50_c4(use_group_norm=False): 109 | """ 110 | Constructs the ResNet50 C4 feature extractor (R-50-C4 in maskrcnn-benchmark) 111 | Args: 112 | use_group_norm (bool) - if True use torch.nn.GroupNorm with GROUPNORM_NUMGROUPS groups as normalization layers, 113 | otherwise use torch.nn.BatchNorm2d 114 | """ 115 | return _resnet_fe(resnet50, 4, use_group_norm, 116 | feature_map_stride=FeatureMapSize(h=16, w=16), 117 | feature_map_receptive_field=FeatureMapSize(h=16, w=16)) 118 | 119 | 120 | def resnet101_c4(use_group_norm=False): 121 | """ 122 | Constructs the ResNet101 C4 feature extractor 123 | Args: 124 | use_group_norm (bool) - if True use torch.nn.GroupNorm with GROUPNORM_NUMGROUPS groups as normalization layers, 125 | otherwise use torch.nn.BatchNorm2d 126 | """ 127 | return _resnet_fe(resnet101, 4, use_group_norm, 128 | feature_map_stride=FeatureMapSize(h=16, w=16), 129 | feature_map_receptive_field=FeatureMapSize(h=16, w=16)) 130 | 131 | -------------------------------------------------------------------------------- /os2d/structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/os2d/817ffb15a8cbc353e4f94debe419902d4dd83a22/os2d/structures/__init__.py -------------------------------------------------------------------------------- /os2d/structures/feature_map.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | from torch import Tensor 3 | 4 | 5 | class FeatureMapSize(object): 6 | """ 7 | This class represents the spatial dimensions of feature maps and images. 8 | This class is used to avoid W,H vs H,W format confusion. 9 | This class is immutable. 10 | 11 | For PIL.Image.Image, FeatureMapSize is w, h 12 | For torch.tensor, FeatureMapSize is size(-1), size(-2) 13 | """ 14 | w = None 15 | h = None 16 | def __init__(self, img=None, w=None, h=None): 17 | if w is not None and h is not None: 18 | pass 19 | elif isinstance(img, Image.Image): 20 | w, h = img.size 21 | elif isinstance(img, Tensor): 22 | w = img.size(-1) 23 | h = img.size(-2) 24 | else: 25 | raise RuntimeError("Cannot initialize FeatureMapSize") 26 | super(FeatureMapSize, self).__setattr__("w", w) 27 | super(FeatureMapSize, self).__setattr__("h", h) 28 | 29 | def __setattr__(self, *args): 30 | raise AttributeError("Attributes of FeatureMapSize cannot be changed") 31 | 32 | def __delattr__(self, *args): 33 | raise AttributeError("Attributes of FeatureMapSize cannot be deleted") 34 | 35 | def __repr__(self): 36 | return "{c}(w={w}, h={h})".format(c=FeatureMapSize.__name__, 37 | w=self.w, h=self.h) 38 | 39 | def __eq__(self, othr): 40 | return (isinstance(othr, type(self)) 41 | and (self.w, self.h) == (othr.w, othr.h)) 42 | 43 | def __hash__(self): 44 | return hash((self.w, self.h)) 45 | -------------------------------------------------------------------------------- /os2d/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import * 2 | from .logger import * 3 | -------------------------------------------------------------------------------- /os2d/utils/plot_visdom.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import pickle 4 | import glob 5 | 6 | from visdom import Visdom 7 | import numpy as np 8 | 9 | 10 | parser = argparse.ArgumentParser() 11 | 12 | parser.add_argument("--log_path", default=None, type=str, help="Folder to search for logs") 13 | parser.add_argument("--log_names", default=[], nargs="+", type=str, help="Plot logs from these folder") 14 | 15 | opt = parser.parse_args() 16 | 17 | if opt.log_path is not None and not os.path.isdir(opt.log_path): 18 | raise RuntimeError("Log path %s does not exist" % opt.log_path) 19 | 20 | viz = Visdom() 21 | viz_plots = {} 22 | x_name_all = ["iter", "time"] 23 | 24 | 25 | def vizualize_log(log_path): 26 | # read the log_file 27 | log_file = os.path.join(log_path, "train_log.pkl") 28 | if not os.path.isfile(log_file): 29 | print("WARNING: Could not find file %s" % log_file) 30 | return 31 | logs = pickle.load(open(log_file, "rb")) 32 | 33 | for x_name in x_name_all: 34 | if x_name in logs: 35 | for y_name, y_data in logs.items(): 36 | if not y_name in x_name_all: 37 | x_data = logs[x_name] 38 | plot_key = (y_name, x_name) 39 | 40 | plot_opts = dict( 41 | markers=False, 42 | xlabel=x_name, 43 | ylabel=y_name, 44 | title="{0} vs. {1}".format(y_name, x_name), 45 | showlegend=True 46 | ) 47 | X = np.array(x_data).flatten() 48 | Y = np.array(y_data).flatten() 49 | 50 | # sync lengths 51 | length = min(X.size, Y.size) 52 | X = X[:length] 53 | Y = Y[:length] 54 | 55 | mask_non_nan = np.logical_not(np.isnan(Y)) 56 | X = X[mask_non_nan] 57 | Y = Y[mask_non_nan] 58 | 59 | viz_plots[plot_key] = "{0} vs. {1}".format(y_name, x_name) 60 | line_name = os.path.basename(os.path.normpath(log_path)) 61 | viz.line(X=None, Y=None, win=viz_plots[plot_key], name=line_name, update="remove") 62 | viz.line( 63 | X=X, 64 | Y=Y, 65 | win=viz_plots[plot_key], 66 | name=line_name, 67 | opts=plot_opts, 68 | update="append" 69 | ) 70 | 71 | log_path = opt.log_path if opt.log_path else "" 72 | if len(opt.log_names) == 0: 73 | print("--log_names was not specified, scanning folder %s" % opt.log_path) 74 | log_names = sorted(glob.glob( os.path.join(log_path, "*"))) 75 | else: 76 | log_names = [os.path.join(log_path, name) for name in opt.log_names] 77 | 78 | 79 | n = len(log_names) 80 | for i_log, path in enumerate(log_names): 81 | try: 82 | vizualize_log(path) 83 | print("Plot %d of %d: %s" % (i_log, n, path)) 84 | except (KeyboardInterrupt, SystemExit): 85 | raise 86 | except BaseException as e: 87 | print("Failed to plot from %s. Error: %s" % (path, str(e))) 88 | -------------------------------------------------------------------------------- /os2d/utils/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import random 4 | import errno 5 | from PIL import Image 6 | from io import BytesIO 7 | import base64 8 | import numpy as np 9 | 10 | import torch 11 | 12 | 13 | def get_data_path(): 14 | data_path = os.path.join(os.path.dirname(__file__), "..", "..", "data") 15 | data_path = os.path.expanduser(os.path.abspath(data_path)) 16 | return data_path 17 | 18 | 19 | def get_trainable_parameters(model): 20 | return filter(lambda p: p.requires_grad, model.parameters()) 21 | 22 | 23 | def count_model_parameters(net): 24 | num_params = 0 25 | num_param_groups = 0 26 | for p in get_trainable_parameters(net): 27 | num_param_groups += 1 28 | num_params += p.numel() 29 | return num_params, num_param_groups 30 | 31 | 32 | def get_image_size_after_resize_preserving_aspect_ratio(h, w, target_size): 33 | aspect_ratio_h_to_w = float(h) / w 34 | w = int(target_size / math.sqrt(aspect_ratio_h_to_w)) 35 | h = int(target_size * math.sqrt(aspect_ratio_h_to_w)) 36 | h, w = (1 if s <= 0 else s for s in (h, w)) # filter out crazy one pixel images 37 | return h, w 38 | 39 | 40 | def masked_select_or_fill_constant(a, mask, constant=0): 41 | constant_tensor = torch.tensor([constant], dtype=a.dtype, device=a.device) 42 | return torch.where(mask, a, constant_tensor) 43 | 44 | 45 | def set_random_seed(random_seed, cuda=False): 46 | random.seed(random_seed) 47 | np.random.seed(random_seed) 48 | torch.manual_seed(random_seed) 49 | if cuda: 50 | torch.cuda.manual_seed_all(random_seed) 51 | 52 | 53 | def mkdir(path): 54 | """From https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/utils/miscellaneous.py 55 | """ 56 | try: 57 | os.makedirs(path) 58 | except OSError as e: 59 | if e.errno != errno.EEXIST: 60 | raise 61 | 62 | 63 | def read_image(image_path): 64 | with open(image_path, "rb") as f: 65 | img = Image.open(f) 66 | if img.mode != "RGB": 67 | img = img.convert("RGB") 68 | img.load() 69 | return img 70 | 71 | 72 | def ceildiv(a, b): 73 | return -(-a // b) 74 | 75 | def decode_base64_to_image(base64_str): 76 | return Image.open(BytesIO(base64.b64decode(base64_str))) 77 | -------------------------------------------------------------------------------- /os2d/utils/wget_gdrive.sh: -------------------------------------------------------------------------------- 1 | # Command from here: https://medium.com/@acpanjan/download-google-drive-files-using-wget-3c2c025a8b99 2 | 3 | TARGET_PATH=$1 4 | FILEID=$2 5 | 6 | wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id='${FILEID} -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=${FILEID}" -O ${TARGET_PATH} && rm -rf /tmp/cookies.txt 7 | --------------------------------------------------------------------------------