├── .flake8
├── .gitignore
├── wheat
    ├── config.py
    ├── eval.py
    ├── train.py
    ├── metrics.py
    ├── dataset.py
    └── model.py
├── LICENSE
├── README.md
├── scripts
    └── resize_images.py
└── notebooks
    └── gwd-resize-images-bboxes.ipynb


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 120
3 | max-complexity = 10


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | cache
3 | efficientdet-pytorch
4 | export
5 | code-dataset
6 | 


--------------------------------------------------------------------------------
/wheat/config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | 
 4 | @dataclass
 5 | class WheatConfig:
 6 |     base_dir: str
 7 |     epochs: int
 8 |     arch: str
 9 |     image_size: int
10 |     batch_size: int
11 |     base_lr: float = 2e-4
12 |     num_workers: int = 4
13 |     seed: int = 999
14 |     amp_backend: str = "native"
15 |     amp_level: str = "O2"  # only effective if amp_backend == apex
16 |     precision: int = 32
17 |     gradient_clip_val: float = 10
18 |     grad_accu: int = 1
19 |     cutout: bool = False
20 |     mixup_alpha: float = -1
21 |     no_op_ratio: float = 0.2
22 |     no_op_warmup_steps: int = 100
23 |     mosaic_p: float = -1
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 CeShine Lee
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyTorch EfficientDet Solution for Global Wheat Detection Challenge
 2 | 
 3 | 1. [Training notebook (on Kaggle)](https://www.kaggle.com/ceshine/wheat-detection-training-efficientdet-public?scriptVersionId=67789208&select=wheatdet.pth)
 4 | 2. Inference notebook: [Single model](https://www.kaggle.com/ceshine/effdet-wheat-head-detection-inference-public?scriptVersionId=67809685); [Ensemble](https://www.kaggle.com/ceshine/effdet-wheat-head-detection-inference-public/output?scriptVersionId=67812782)
 5 | 
 6 | See [wheat/config.py](wheat/config.py) for hyper-parameters and system configurations.
 7 | 
 8 | The best mAP score I'm able to get is 0.6167 (Private) / 0.7084 (Public) with a D4 model trained on 768x768 resolution (using a single P100 GPU).
 9 | 
10 | ## Requirements
11 | 
12 | 1. torch>=1.7.0
13 | 1. pytorch-lightning>=1.3.6
14 | 1. pytorch-lightning-spells==0.0.3
15 | 1. efficientdet-pytorch==0.2.4
16 | 
17 | Note: You'll need to use [my fork of efficientdet-pytorch](https://github.com/ceshine/efficientdet-pytorch) to use the O2 level of Apex AMP.
18 | 
19 | ## Instructions
20 | 
21 | Resizing images:
22 | 
23 | ```bash
24 | python scripts/resize_images.py 512 --root data/
25 | ```
26 | 
27 | Training (pass `--help` for more information):
28 | 
29 | ```bash
30 | python -m wheat.train data/512 --epochs 10 --grad-accu 4 --batch-size 8 --arch tf_efficientdet_d3 --fold 0 --mixup 24 --mosaic-p 0.5
31 | ```
32 | 
33 | Evaluation (pass `--help` for more information):
34 | 
35 | ```bash
36 | python -m wheat.eval data/512 export/tf_efficientdet_d3-mosaic-mixup-fold0.pth --batch-size 8 --arch tf_efficientdet_d3 --fold 0
37 | ```
38 | 


--------------------------------------------------------------------------------
/wheat/eval.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import copy
  3 | from datetime import datetime
  4 | from pathlib import Path
  5 | 
  6 | import torch
  7 | import typer
  8 | import numpy as np
  9 | import pandas as pd
 10 | from omegaconf import OmegaConf, DictConfig
 11 | from pytorch_lightning import Trainer, seed_everything
 12 | from pytorch_lightning.loggers import TensorBoardLogger
 13 | from pytorch_lightning_spells.loggers import ScreenLogger
 14 | from pytorch_lightning_spells.callbacks import (
 15 |     Callback,
 16 |     RandomAugmentationChoiceCallback,
 17 | )
 18 | from pytorch_lightning.callbacks import (
 19 |     LearningRateMonitor,
 20 |     ModelCheckpoint,
 21 | )
 22 | 
 23 | from .config import WheatConfig
 24 | from .model import WheatModel
 25 | 
 26 | from sklearn.model_selection import StratifiedKFold
 27 | 
 28 | 
 29 | def eval(model_path: str, config: DictConfig, fold: int = 0):
 30 |     seed_everything(int(config.seed))
 31 |     base_path = Path(config.base_dir)
 32 |     df_train = pd.read_csv(str(base_path / "train.csv"))
 33 |     bboxes = np.stack(df_train["bbox"].apply(lambda x: np.fromstring(x[1:-1], sep=",")))
 34 | 
 35 |     for i, col in enumerate(["x", "y", "w", "h"]):
 36 |         df_train[col] = bboxes[:, i]
 37 |     df_train["area"] = df_train["w"] * df_train["h"]
 38 |     df_train["x2"] = df_train["x"] + df_train["w"]
 39 |     df_train["y2"] = df_train["y"] + df_train["h"]
 40 | 
 41 |     skf = StratifiedKFold(n_splits=5, random_state=88, shuffle=True)
 42 |     # one row for one image
 43 |     df_trunc = df_train[["image_id", "source"]].drop_duplicates().reset_index(drop=True)
 44 |     for fold_idx, (_, valid_index) in enumerate(
 45 |         skf.split(df_trunc, y=df_trunc["source"])
 46 |     ):
 47 |         df_trunc.loc[valid_index, "fold"] = fold_idx
 48 |     # Add fold column back
 49 |     df_train = df_train.merge(df_trunc, on=["image_id", "source"])
 50 | 
 51 |     # create model for one fold
 52 |     model = WheatModel(
 53 |         config,
 54 |         df_train,
 55 |         fold=fold,
 56 |         half=(
 57 |             config.precision == 16
 58 |             and config.amp_backend == "apex"
 59 |             and config.amp_level == "O2"
 60 |         ),
 61 |     )
 62 |     model.model.load_state_dict(torch.load(model_path)["states"])
 63 |     trainer = Trainer(
 64 |         amp_backend=config.amp_backend,
 65 |         amp_level=config.amp_level,
 66 |         precision=config.precision,
 67 |         gpus=1,
 68 |     )
 69 |     trainer.validate(model)
 70 | 
 71 | 
 72 | def main(
 73 |     base_dir: str,
 74 |     model_path: str,
 75 |     grad_accu: int = 1,
 76 |     arch: str = "tf_efficientdet_d3",
 77 |     batch_size: int = 8,
 78 |     fold: int = 0,
 79 |     mixup: float = -1,
 80 |     cutout: bool = False,
 81 |     mosaic_p: float = -1,
 82 | ):
 83 |     config = WheatConfig(
 84 |         base_dir=base_dir,
 85 |         epochs=0,
 86 |         image_size=int(Path(base_dir).name),
 87 |         arch=arch,
 88 |         grad_accu=grad_accu,
 89 |         batch_size=batch_size,
 90 |         precision=16,
 91 |         amp_backend="apex",
 92 |         amp_level="O2",
 93 |         cutout=cutout,
 94 |         mixup_alpha=mixup,
 95 |         mosaic_p=mosaic_p,
 96 |     )
 97 |     assert not (cutout is True and mixup > 0), "Can only enable one of MixUp and CutOut"
 98 |     omega_conf = OmegaConf.structured(config)
 99 |     eval(model_path, omega_conf, fold=fold)
100 | 
101 | 
102 | if __name__ == "__main__":
103 |     typer.run(main)
104 | 


--------------------------------------------------------------------------------
/scripts/resize_images.py:
--------------------------------------------------------------------------------
  1 | """Batch resizing the training data
  2 | 
  3 | Source: https://www.kaggle.com/phunghieu/gwd-resize-images-bboxes
  4 | """
  5 | 
  6 | import json
  7 | from pathlib import Path
  8 | 
  9 | import typer
 10 | import numpy as np
 11 | import pandas as pd
 12 | import cv2
 13 | import albumentations as A
 14 | from tqdm import tqdm
 15 | 
 16 | 
 17 | def load_dataframe(csv_path: Path, image_dir: Path) -> pd.DataFrame:
 18 |     df = pd.read_csv(csv_path)
 19 | 
 20 |     # Merge all bboxes of each corresponding image
 21 |     # Format: [[x1 y1 w1 h1], [x2 y2 w2 h2], [x3 y3 w3 h3], ...]
 22 |     df.bbox = df.bbox.apply(lambda x: " ".join(np.array(json.loads(x), dtype=str)))
 23 |     df.bbox = df.groupby(["image_id"]).bbox.transform(lambda x: "|".join(x))
 24 |     df.drop_duplicates(inplace=True, ignore_index=True)
 25 |     df.bbox = df.bbox.apply(
 26 |         lambda x: np.array(
 27 |             [item.split(" ") for item in x.split("|")], dtype=np.float32
 28 |         ).tolist()
 29 |     )
 30 | 
 31 |     # Create a path to each image
 32 |     df["image_path"] = df.image_id.apply(lambda x: str(image_dir / (x + ".jpg")))
 33 | 
 34 |     return df
 35 | 
 36 | 
 37 | def load_image(image_path: str) -> np.ndarray:
 38 |     image = cv2.imread(image_path, cv2.IMREAD_COLOR)
 39 |     return image
 40 | 
 41 | 
 42 | def fix_out_of_range(bbox: list, max_size: int = 1024) -> list:
 43 |     bbox[2] = min(bbox[2], max_size - bbox[0])
 44 |     bbox[3] = min(bbox[3], max_size - bbox[1])
 45 |     return bbox
 46 | 
 47 | 
 48 | def main(image_size: int, root: str = "data/"):
 49 |     root_path = Path(root)
 50 |     train_dir = root_path / "train"
 51 |     target_dir = root_path / f"{image_size}"
 52 |     (target_dir / "train").mkdir(parents=True)
 53 | 
 54 |     df = load_dataframe(root_path / "train.csv", train_dir)
 55 | 
 56 |     transform = A.Compose(
 57 |         [
 58 |             A.Resize(height=image_size, width=image_size, interpolation=cv2.INTER_AREA),
 59 |         ],
 60 |         p=1.0,
 61 |         bbox_params=A.BboxParams(
 62 |             format="coco", min_area=0, min_visibility=0, label_fields=["labels"]
 63 |         ),
 64 |     )
 65 | 
 66 |     list_of_image_ids = []
 67 |     list_of_bboxes = []
 68 |     list_of_sources = []
 69 | 
 70 |     for _, row in tqdm(df.iterrows(), total=df.shape[0]):
 71 |         image = load_image(row.image_path)
 72 |         bboxes = row.bbox
 73 | 
 74 |         # Fix "out-of-range" bboxes
 75 |         bboxes = [fix_out_of_range(bbox) for bbox in bboxes]
 76 | 
 77 |         result = transform(image=image, bboxes=bboxes, labels=np.ones(len(bboxes)))
 78 |         new_image = result["image"]
 79 |         new_bboxes = np.array(result["bboxes"]).tolist()
 80 | 
 81 |         # Save new image
 82 |         cv2.imwrite(str(target_dir / "train" / (row.image_id + ".jpg")), new_image)
 83 | 
 84 |         for new_bbox in new_bboxes:
 85 |             list_of_image_ids.append(row.image_id)
 86 |             list_of_bboxes.append(new_bbox)
 87 |             list_of_sources.append(row.source)
 88 | 
 89 |     new_data_dict = {
 90 |         "image_id": list_of_image_ids,
 91 |         "width": [image_size] * len(list_of_image_ids),
 92 |         "height": [image_size] * len(list_of_image_ids),
 93 |         "bbox": list_of_bboxes,
 94 |         "source": list_of_sources,
 95 |     }
 96 |     new_df = pd.DataFrame(new_data_dict)
 97 |     new_df.to_csv(target_dir / "train.csv", index=False)
 98 | 
 99 | 
100 | if __name__ == "__main__":
101 |     typer.run(main)
102 | 


--------------------------------------------------------------------------------
/wheat/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import copy
  3 | from datetime import datetime
  4 | from pathlib import Path
  5 | 
  6 | import torch
  7 | import typer
  8 | import numpy as np
  9 | import pandas as pd
 10 | from omegaconf import OmegaConf, DictConfig
 11 | from pytorch_lightning import Trainer, seed_everything
 12 | from pytorch_lightning.loggers import TensorBoardLogger
 13 | from pytorch_lightning_spells.loggers import ScreenLogger
 14 | from pytorch_lightning_spells.callbacks import (
 15 |     Callback,
 16 |     RandomAugmentationChoiceCallback,
 17 | )
 18 | from pytorch_lightning.callbacks import (
 19 |     LearningRateMonitor,
 20 |     ModelCheckpoint,
 21 | )
 22 | 
 23 | from .config import WheatConfig
 24 | from .model import WheatModel
 25 | 
 26 | from sklearn.model_selection import StratifiedKFold
 27 | 
 28 | 
 29 | class MixUpDetectionCallback(Callback):
 30 |     def __init__(self, alpha: float = 0.4):
 31 |         super().__init__()
 32 |         self.alpha = alpha
 33 | 
 34 |     def on_train_batch_start(
 35 |         self, trainer, pl_module, batch, batch_idx, dataloader_idx
 36 |     ):
 37 |         old_batch = batch
 38 |         batch, targets = batch
 39 |         lambd = np.clip(np.random.beta(self.alpha, self.alpha, 1), 0.35, 0.65)
 40 |         lambd_ = torch.tensor(max(lambd, 1 - lambd), device=batch.device).float()
 41 |         # Combine input batch
 42 |         new_batch = batch * lambd_ + batch.flip(0) * (1 - lambd_)
 43 |         # Combine targets
 44 |         assert isinstance(targets, dict)
 45 |         for col in ("bbox", "cls"):
 46 |             targets[col] = torch.cat([targets[col], targets[col].flip(0)], dim=1)
 47 | 
 48 |         old_batch[0] = new_batch
 49 |         old_batch[1] = targets
 50 | 
 51 | 
 52 | def train(config: DictConfig, fold: int = 0):
 53 |     seed_everything(int(config.seed))
 54 |     base_path = Path(config.base_dir)
 55 |     df_train = pd.read_csv(str(base_path / "train.csv"))
 56 |     bboxes = np.stack(df_train["bbox"].apply(lambda x: np.fromstring(x[1:-1], sep=",")))
 57 | 
 58 |     for i, col in enumerate(["x", "y", "w", "h"]):
 59 |         df_train[col] = bboxes[:, i]
 60 |     df_train["area"] = df_train["w"] * df_train["h"]
 61 |     df_train["x2"] = df_train["x"] + df_train["w"]
 62 |     df_train["y2"] = df_train["y"] + df_train["h"]
 63 | 
 64 |     skf = StratifiedKFold(n_splits=5, random_state=88, shuffle=True)
 65 |     # one row for one image
 66 |     df_trunc = df_train[["image_id", "source"]].drop_duplicates().reset_index(drop=True)
 67 |     for fold_idx, (_, valid_index) in enumerate(
 68 |         skf.split(df_trunc, y=df_trunc["source"])
 69 |     ):
 70 |         df_trunc.loc[valid_index, "fold"] = fold_idx
 71 |     # Add fold column back
 72 |     df_train = df_train.merge(df_trunc, on=["image_id", "source"])
 73 | 
 74 |     # create model for one fold
 75 |     model = WheatModel(
 76 |         config,
 77 |         df_train,
 78 |         fold=fold,
 79 |         half=(
 80 |             config.precision == 16
 81 |             and config.amp_backend == "apex"
 82 |             and config.amp_level == "O2"
 83 |         ),
 84 |     )
 85 |     checkpoints = ModelCheckpoint(
 86 |         dirpath="cache/checkpoints/",
 87 |         monitor="val_MAP",
 88 |         mode="max",
 89 |         filename="{step:06d}-{val_loss:.4f}",
 90 |         save_top_k=1,
 91 |         save_last=False,
 92 |     )
 93 |     callbacks = [LearningRateMonitor(logging_interval="step"), checkpoints]
 94 |     if config.mixup_alpha > 0:
 95 |         callbacks.append(
 96 |             RandomAugmentationChoiceCallback(
 97 |                 [MixUpDetectionCallback(config.mixup_alpha)],
 98 |                 p=[1.0],
 99 |                 no_op_warmup=config.no_op_warmup_steps,
100 |                 no_op_prob=config.no_op_ratio,
101 |             )
102 |         )
103 |     trainer = Trainer(
104 |         amp_backend=config.amp_backend,
105 |         amp_level=config.amp_level,
106 |         precision=config.precision,
107 |         gpus=1,
108 |         callbacks=callbacks,
109 |         # val_check_interval=0.5,
110 |         gradient_clip_val=config.gradient_clip_val,
111 |         logger=[
112 |             TensorBoardLogger(
113 |                 "cache/tb_logs",
114 |                 name="wheat",
115 |                 version=f"fold-{fold}-{datetime.now():%Y%m%dT%H%M}",
116 |             ),
117 |             ScreenLogger(),
118 |         ],
119 |         accumulate_grad_batches=config.grad_accu,
120 |         # fast_dev_run=True,
121 |         max_epochs=config.epochs,
122 |     )
123 | 
124 |     trainer.fit(model)
125 | 
126 |     print(checkpoints.best_model_path, checkpoints.best_model_score)
127 |     pl_module = WheatModel.load_from_checkpoint(
128 |         checkpoints.best_model_path,
129 |         config=copy.deepcopy(config),
130 |         df=df_train,
131 |         fold=fold,
132 |         half=False,
133 |     )
134 |     torch.save(
135 |         {"states": pl_module.model.state_dict(), "arch": config.arch}, "wheatdet.pth"
136 |     )
137 | 
138 | 
139 | def main(
140 |     base_dir: str,
141 |     epochs: int = 2,
142 |     grad_accu: int = 1,
143 |     arch: str = "tf_efficientdet_d3",
144 |     batch_size: int = 8,
145 |     fold: int = 0,
146 |     mixup: float = -1,
147 |     cutout: bool = False,
148 |     mosaic_p: float = -1,
149 | ):
150 |     config = WheatConfig(
151 |         base_dir=base_dir,
152 |         epochs=epochs,
153 |         image_size=int(Path(base_dir).name),
154 |         arch=arch,
155 |         grad_accu=grad_accu,
156 |         batch_size=batch_size,
157 |         precision=16,
158 |         amp_backend="apex",
159 |         amp_level="O2",
160 |         cutout=cutout,
161 |         mixup_alpha=mixup,
162 |         mosaic_p=mosaic_p,
163 |     )
164 |     assert not (cutout is True and mixup > 0), "Can only enable one of MixUp and CutOut"
165 |     if os.environ.get("SEED"):
166 |         config.seed = int(os.environ["SEED"])
167 |     omega_conf = OmegaConf.structured(config)
168 |     with open("train_config.yaml", "w") as fout:
169 |         OmegaConf.save(config=omega_conf, f=fout)
170 |     train(omega_conf, fold=fold)
171 | 
172 | 
173 | if __name__ == "__main__":
174 |     typer.run(main)
175 | 


--------------------------------------------------------------------------------
/wheat/metrics.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Adapted from https://www.kaggle.com/pestipeti/competition-metric-details-script
  3 | """
  4 | from typing import Optional, Any, Iterable, Callable, Dict
  5 | 
  6 | import torch
  7 | import numpy as np
  8 | from numba import jit
  9 | 
 10 | from torchmetrics.metric import Metric
 11 | 
 12 | 
 13 | @jit(nopython=True)
 14 | def calculate_iou(gt, pr, form="pascal_voc") -> float:
 15 |     """Calculates the Intersection over Union.
 16 | 
 17 |     Args:
 18 |         gt: (np.ndarray[Union[int, float]]) coordinates of the ground-truth box
 19 |         pr: (np.ndarray[Union[int, float]]) coordinates of the predicted box
 20 |         form: (str) gt/pred coordinates format
 21 |             - pascal_voc: [xmin, ymin, xmax, ymax]
 22 |             - coco: [xmin, ymin, w, h]
 23 |     Returns:
 24 |         (float) Intersection over union (0.0 <= iou <= 1.0)
 25 |     """
 26 |     if form == "coco":
 27 |         gt = gt.copy()
 28 |         pr = pr.copy()
 29 | 
 30 |         gt[2] = gt[0] + gt[2]
 31 |         gt[3] = gt[1] + gt[3]
 32 |         pr[2] = pr[0] + pr[2]
 33 |         pr[3] = pr[1] + pr[3]
 34 | 
 35 |     # Calculate overlap area
 36 |     dx = min(gt[2], pr[2]) - max(gt[0], pr[0]) + 1
 37 | 
 38 |     if dx < 0:
 39 |         return 0.0
 40 | 
 41 |     dy = min(gt[3], pr[3]) - max(gt[1], pr[1]) + 1
 42 | 
 43 |     if dy < 0:
 44 |         return 0.0
 45 | 
 46 |     overlap_area = dx * dy
 47 | 
 48 |     # Calculate union area
 49 |     union_area = (
 50 |         (gt[2] - gt[0] + 1) * (gt[3] - gt[1] + 1)
 51 |         + (pr[2] - pr[0] + 1) * (pr[3] - pr[1] + 1)
 52 |         - overlap_area
 53 |     )
 54 | 
 55 |     return overlap_area / union_area
 56 | 
 57 | 
 58 | @jit(nopython=True)
 59 | def find_best_match(
 60 |     gts, pred, pred_idx, threshold=0.5, form="pascal_voc", ious=None
 61 | ) -> int:
 62 |     """Returns the index of the 'best match' between the
 63 |     ground-truth boxes and the prediction. The 'best match'
 64 |     is the highest IoU. (0.0 IoUs are ignored).
 65 | 
 66 |     Args:
 67 |         gts: (List[List[Union[int, float]]]) Coordinates of the available ground-truth boxes
 68 |         pred: (List[Union[int, float]]) Coordinates of the predicted box
 69 |         pred_idx: (int) Index of the current predicted box
 70 |         threshold: (float) Threshold
 71 |         form: (str) Format of the coordinates
 72 |         ious: (np.ndarray) len(gts) x len(preds) matrix for storing calculated ious.
 73 | 
 74 |     Return:
 75 |         (int) Index of the best match GT box (-1 if no match above threshold)
 76 |     """
 77 |     best_match_iou = -np.inf
 78 |     best_match_idx = -1
 79 | 
 80 |     for gt_idx in range(len(gts)):
 81 | 
 82 |         if gts[gt_idx][0] < 0:
 83 |             # Already matched GT-box
 84 |             continue
 85 | 
 86 |         iou = -1 if ious is None else ious[gt_idx][pred_idx]
 87 | 
 88 |         if iou < 0:
 89 |             iou = calculate_iou(gts[gt_idx], pred, form=form)
 90 |             if ious is not None:
 91 |                 ious[gt_idx][pred_idx] = iou
 92 | 
 93 |         if iou < threshold:
 94 |             continue
 95 | 
 96 |         if iou > best_match_iou:
 97 |             best_match_iou = iou
 98 |             best_match_idx = gt_idx
 99 |     return best_match_idx
100 | 
101 | 
102 | @jit(nopython=True)
103 | def calculate_precision(gts, preds, threshold=0.5, form="coco", ious=None) -> float:
104 |     """Calculates precision for GT - prediction pairs at one threshold.
105 | 
106 |     Args:
107 |         gts: (List[List[Union[int, float]]]) Coordinates of the available ground-truth boxes
108 |         preds: (List[List[Union[int, float]]]) Coordinates of the predicted boxes,
109 |                sorted by confidence value (descending)
110 |         threshold: (float) Threshold
111 |         form: (str) Format of the coordinates
112 |         ious: (np.ndarray) len(gts) x len(preds) matrix for storing calculated ious.
113 | 
114 |     Return:
115 |         (float) Precision
116 |     """
117 |     n = len(preds)
118 |     tp = 0
119 |     fp = 0
120 | 
121 |     # for pred_idx, pred in enumerate(preds_sorted):
122 |     for pred_idx in range(n):
123 | 
124 |         best_match_gt_idx = find_best_match(
125 |             gts, preds[pred_idx], pred_idx, threshold=threshold, form=form, ious=ious
126 |         )
127 | 
128 |         if best_match_gt_idx >= 0:
129 |             # True positive: The predicted box matches a gt box with an IoU above the threshold.
130 |             tp += 1
131 |             # Remove the matched GT box
132 |             gts[best_match_gt_idx] = -1
133 | 
134 |         else:
135 |             # No match
136 |             # False positive: indicates a predicted box had no associated gt box.
137 |             fp += 1
138 | 
139 |     # False negative: indicates a gt box had no associated predicted box.
140 |     fn = (gts.sum(axis=1) > 0).sum()
141 | 
142 |     return tp / (tp + fp + fn)
143 | 
144 | 
145 | @jit(nopython=True)
146 | def calculate_image_precision(gts, preds, thresholds=(0.5,), form="coco") -> float:
147 |     """Calculates image precision.
148 | 
149 |     Args:
150 |         gts: (List[List[Union[int, float]]]) Coordinates of the available ground-truth boxes
151 |         preds: (List[List[Union[int, float]]]) Coordinates of the predicted boxes,
152 |                sorted by confidence value (descending)
153 |         thresholds: (float) Different thresholds
154 |         form: (str) Format of the coordinates
155 | 
156 |     Return:
157 |         (float) Precision
158 |     """
159 |     n_threshold = len(thresholds)
160 |     image_precision = 0.0
161 | 
162 |     ious = np.ones((len(gts), len(preds))) * -1
163 |     # ious = None
164 | 
165 |     for threshold in thresholds:
166 |         precision_at_threshold = calculate_precision(
167 |             gts.copy(), preds, threshold=threshold, form=form, ious=ious
168 |         )
169 |         image_precision += precision_at_threshold / n_threshold
170 | 
171 |     return image_precision
172 | 
173 | 
174 | class mAP(Metric):
175 |     def __init__(
176 |         self,
177 |         thresholds: Iterable[float],
178 |         confidence_threshold: float,
179 |         form: str,
180 |         compute_on_step: bool = True,
181 |         dist_sync_on_step: bool = False,
182 |         process_group: Optional[Any] = None,
183 |         dist_sync_fn: Callable = None,
184 |     ) -> None:
185 |         super().__init__(
186 |             compute_on_step=compute_on_step,
187 |             dist_sync_on_step=dist_sync_on_step,
188 |             process_group=process_group,
189 |             dist_sync_fn=dist_sync_fn,
190 |         )
191 | 
192 |         self.thresholds = thresholds
193 |         self.confidence_threshold = confidence_threshold
194 |         self.form = form
195 | 
196 |         self.add_state(
197 |             "mAP_sum",
198 |             default=torch.tensor(0.0, dtype=torch.float64),
199 |             dist_reduce_fx="sum",
200 |         )
201 |         self.add_state(
202 |             "total",
203 |             default=torch.tensor(0.0, dtype=torch.float64),
204 |             dist_reduce_fx="sum",
205 |         )
206 | 
207 |     def update(self, detections: torch.Tensor, targets: torch.Tensor) -> None:  # type: ignore
208 |         for i in range(len(detections)):
209 |             local_detections = detections[i].cpu().numpy()
210 |             local_detections = local_detections[
211 |                 local_detections[:, 4] > self.confidence_threshold
212 |             ]
213 |             # print(targets[i][:2])
214 |             map = calculate_image_precision(
215 |                 targets[i].cpu().numpy(),
216 |                 local_detections[:, :4],
217 |                 self.thresholds,
218 |                 self.form,
219 |             )
220 |             # Update states
221 |             self.mAP_sum += map
222 |             self.total += 1
223 | 
224 |     def compute(self) -> torch.Tensor:
225 |         return self.mAP_sum / self.total
226 | 
227 |     @property
228 |     def is_differentiable(self) -> bool:
229 |         return False
230 | 


--------------------------------------------------------------------------------
/wheat/dataset.py:
--------------------------------------------------------------------------------
  1 | import enum
  2 | import random
  3 | from pathlib import Path
  4 | 
  5 | import torch
  6 | import numpy as np
  7 | from PIL import Image
  8 | from torch.utils.data import Dataset
  9 | import albumentations as A
 10 | 
 11 | 
 12 | class Mode(enum.Enum):
 13 |     train = 0
 14 |     validation = 1
 15 |     test = 2
 16 | 
 17 | 
 18 | class WheatDataset(Dataset):
 19 |     """
 20 |     [reference](https://www.kaggle.com/dangnam739/faster-rcnn-global-wheat-detection)
 21 |     """
 22 | 
 23 |     def __init__(
 24 |         self,
 25 |         image_dir: str,
 26 |         df=None,
 27 |         mode: Mode = Mode.train,
 28 |         transforms=None,
 29 |         min_box_edge=-1,
 30 |         mosaic_p: float = -1,
 31 |     ):
 32 |         super().__init__()
 33 |         if df is not None:
 34 |             self.df = df.copy()
 35 |             self.image_ids = df["image_id"].unique()
 36 |             assert self.df["width"].nunique() == 1
 37 |             assert self.df["height"].nunique() == 1
 38 |             assert self.df["width"].values[0] == self.df["height"].values[0]
 39 |             self.image_size = self.df["width"].values[0]
 40 |             self.min_box_edge = min_box_edge
 41 |         else:
 42 |             # test case
 43 |             self.df = None
 44 |             self.image_ids = [p.stem for p in Path(image_dir).glob("*.jpg")]
 45 |             # TODO: set image size automatically
 46 |             self.image_size = 384
 47 |         self.mosaic_p = mosaic_p
 48 |         self.image_dir = image_dir
 49 |         self.transforms = transforms
 50 |         self.mode = mode
 51 | 
 52 |     def _load_image(self, image_id):
 53 |         image = Image.open(f"{self.image_dir}/{image_id}.jpg").convert("RGB")
 54 |         return np.array(image)
 55 | 
 56 |     def _load_bbox(self, image_id):
 57 |         records = self.df[self.df["image_id"] == image_id]
 58 |         if self.min_box_edge > 0:
 59 |             records = records[
 60 |                 (records.w >= self.min_box_edge) & (records.h >= self.min_box_edge)
 61 |             ]
 62 |         boxes = records[["x", "y", "x2", "y2"]].values
 63 |         return boxes
 64 | 
 65 |     def __getitem__(self, index: int):
 66 |         if self.mode in (Mode.test, Mode.validation) or (
 67 |             self.mode is Mode.train and (random.random() > self.mosaic_p)
 68 |         ):
 69 |             image_id = self.image_ids[index]
 70 |             image = self._load_image(image_id)
 71 |             if self.mode in (Mode.train, Mode.validation):
 72 |                 boxes = self._load_bbox(image_id)
 73 |             else:
 74 |                 boxes = (np.asarray([[0, 0, 0, 0]], dtype=np.float32),)
 75 |         else:
 76 |             image, boxes = self._load_mosaic(index)
 77 | 
 78 |         target = {}
 79 |         target["bbox"] = boxes
 80 |         target["cls"] = np.ones((len(boxes),), dtype=np.int64)
 81 |         # These are needed as well by the efficientdet model.
 82 |         target["img_size"] = (self.image_size, self.image_size)
 83 |         target["img_scale"] = 1.0
 84 | 
 85 |         if self.transforms:
 86 |             sample = {"image": image, "bboxes": target["bbox"], "labels": target["cls"]}
 87 |             sample = self.transforms(**sample)
 88 |             if len(sample["bboxes"]) > 0:
 89 |                 # apply augmentation on the fly
 90 |                 target["cls"] = np.asarray(sample["labels"])
 91 |                 target["bbox"] = np.asarray(sample["bboxes"])
 92 |             else:
 93 |                 target["cls"] = np.empty(0, dtype=int)
 94 |                 target["bbox"] = np.empty((0, 4), dtype=np.float32)
 95 |             image = sample["image"].transpose(2, 0, 1)
 96 |         else:
 97 |             image = image.transpose(2, 0, 1)
 98 |         # convert to yxyx format
 99 |         target["bbox"] = target["bbox"][:, [1, 0, 3, 2]]
100 |         return image, target
101 | 
102 |     def _load_mosaic(self, index):
103 |         """
104 |         Adapted from:
105 |         1. https://github.com/ultralytics/yolov5/blob/831773f5a23926658ee76459ce37550643432123/utils/datasets.py#L529
106 |         2. https://www.kaggle.com/shonenkov/training-efficientdet
107 |         """
108 |         w, h = self.image_size, self.image_size
109 |         border_size = self.image_size // 2
110 |         min_visibility = self.transforms.processors["bboxes"].params.min_visibility
111 | 
112 |         xc, yc = [
113 |             int(random.uniform(border_size // 2, self.image_size - border_size // 2))
114 |             for _ in range(2)
115 |         ]  # center x, y
116 |         indexes = [index] + np.random.choice(
117 |             range(len(self)), 3, replace=False
118 |         ).tolist()
119 | 
120 |         result_image = np.full((self.image_size, self.image_size, 3), 0, dtype=np.uint8)
121 |         result_boxes = []
122 | 
123 |         for i, index in enumerate(indexes):
124 |             image = self._load_image(self.image_ids[index])
125 |             boxes = self._load_bbox(self.image_ids[index])
126 |             if i == 0:  # top left
127 |                 x1a, y1a, x2a, y2a = (
128 |                     max(xc - w, 0),
129 |                     max(yc - h, 0),
130 |                     xc,
131 |                     yc,
132 |                 )  # xmin, ymin, xmax, ymax (large image)
133 |                 x1b, y1b, x2b, y2b = (
134 |                     w - (x2a - x1a),
135 |                     h - (y2a - y1a),
136 |                     w,
137 |                     h,
138 |                 )  # xmin, ymin, xmax, ymax (small image)
139 |             elif i == 1:  # top right
140 |                 x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, w), yc
141 |                 x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
142 |             elif i == 2:  # bottom left
143 |                 x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(h, yc + h)
144 |                 x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
145 |             elif i == 3:  # bottom right
146 |                 x1a, y1a, x2a, y2a = xc, yc, min(xc + w, w), min(h, yc + h)
147 |                 x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
148 |             result_image[y1a:y2a, x1a:x2a] = image[y1b:y2b, x1b:x2b]
149 |             padw = x1a - x1b
150 |             padh = y1a - y1b
151 | 
152 |             boxes[:, 0] += padw
153 |             boxes[:, 1] += padh
154 |             boxes[:, 2] += padw
155 |             boxes[:, 3] += padh
156 | 
157 |             area_before = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
158 |             # Filter non-boxes
159 |             np.clip(boxes, 0, self.image_size, out=boxes)
160 |             area_after = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
161 |             visibility = area_after / area_before
162 | 
163 |             boxes = boxes[np.where(visibility > min_visibility)]
164 |             result_boxes.append(boxes)
165 | 
166 |         result_boxes = np.concatenate(result_boxes, 0)
167 |         return result_image, result_boxes
168 | 
169 |     def __len__(self) -> int:
170 |         return len(self.image_ids)
171 | 
172 | 
173 | def get_train_transforms(image_size: int, cutout: bool = False):
174 |     transforms = [
175 |         A.RandomSizedCrop(
176 |             (int(image_size * 0.8), image_size),
177 |             image_size,
178 |             image_size,
179 |             p=0.5,
180 |         ),
181 |         A.OneOf(
182 |             [
183 |                 A.HueSaturationValue(
184 |                     hue_shift_limit=0.2,
185 |                     sat_shift_limit=0.2,
186 |                     val_shift_limit=0.2,
187 |                     p=0.9,
188 |                 ),
189 |                 A.RandomBrightnessContrast(
190 |                     brightness_limit=0.2, contrast_limit=0.2, p=0.9
191 |                 ),
192 |             ],
193 |             p=0.9,
194 |         ),
195 |         # A.ToGray(p=0.01),
196 |         A.HorizontalFlip(p=0.5),
197 |         A.VerticalFlip(p=0.5),
198 |     ]
199 |     if cutout:
200 |         size = int(image_size * 0.1)
201 |         transforms.append(
202 |             A.Cutout(
203 |                 num_holes=8, max_h_size=size, max_w_size=size, fill_value=0, p=0.5
204 |             ),
205 |         )
206 |     print(transforms)
207 |     return A.Compose(
208 |         transforms,
209 |         p=1.0,
210 |         bbox_params=A.BboxParams(
211 |             format="pascal_voc",
212 |             min_area=0,
213 |             min_visibility=0.3,
214 |             label_fields=["labels"],
215 |         ),
216 |     )
217 | 


--------------------------------------------------------------------------------
/wheat/model.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from pathlib import Path
  3 | from typing import Tuple, Union, Dict, List
  4 | 
  5 | import torch
  6 | import numpy as np
  7 | import pandas as pd
  8 | from omegaconf import DictConfig
  9 | 
 10 | from effdet import get_efficientdet_config, EfficientDet
 11 | from effdet.bench import DetBenchTrain, DetBenchPredict
 12 | from effdet.helpers import load_pretrained
 13 | from effdet.data.loader import DetectionFastCollate
 14 | 
 15 | import pytorch_lightning_spells as pls
 16 | from pytorch_lightning_spells import BaseModule
 17 | from torch.utils.data import DataLoader
 18 | 
 19 | from .dataset import WheatDataset, get_train_transforms, Mode
 20 | from .metrics import mAP
 21 | 
 22 | TARGET_TYPE = Dict[str, torch.Tensor]
 23 | 
 24 | 
 25 | class DetectionModule(BaseModule):
 26 |     def validation_step_end(self, outputs):
 27 |         """This method logs the validation loss and metrics for you.
 28 | 
 29 |         The output from `.validation_step()` method must contains these three entries:
 30 | 
 31 |             1. loss: the validation loss.
 32 |             2. pred: the predicted labels or values.
 33 |             3. target: the ground truth lables or values.
 34 | 
 35 |         Args:
 36 |             outputs (Dict): the output from `.validation_step()` method.
 37 |         """
 38 |         self.log("val_loss", outputs["loss"].mean())
 39 |         for name, metric in self.metrics:
 40 |             metric(outputs["detections"], outputs["targets"])
 41 |             self.log("val_" + name, metric)
 42 | 
 43 | 
 44 | def get_train_efficientdet(
 45 |     model_name: str = "tf_efficientdet_d5",
 46 |     image_size: Tuple[int, int] = (384, 384),
 47 |     mode: str = "train",
 48 |     pretrained: bool = True,
 49 | ):
 50 |     config = get_efficientdet_config(model_name)
 51 |     config.image_size = image_size
 52 |     net = EfficientDet(config, pretrained_backbone=False)
 53 |     # load pretrained
 54 |     if pretrained:
 55 |         load_pretrained(net, config.url)
 56 |     net.reset_head(num_classes=1)
 57 |     if mode == "train":
 58 |         return DetBenchTrain(net, create_labeler=True)
 59 |     else:
 60 |         return DetBenchPredict(net)
 61 | 
 62 | 
 63 | def collate_fn(batch):
 64 |     return tuple(zip(*batch))
 65 | 
 66 | 
 67 | IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
 68 | IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
 69 | 
 70 | 
 71 | class PrefetchLoader:
 72 |     def __init__(
 73 |         self,
 74 |         loader,
 75 |         mean=IMAGENET_DEFAULT_MEAN,
 76 |         std=IMAGENET_DEFAULT_STD,
 77 |         half: bool = False,
 78 |     ):
 79 |         self.half = half
 80 |         self.loader = loader
 81 |         self.mean = torch.tensor([x * 255 for x in mean]).cuda().view(1, 3, 1, 1)
 82 |         self.std = torch.tensor([x * 255 for x in std]).cuda().view(1, 3, 1, 1)
 83 | 
 84 |     def __iter__(self):
 85 |         stream = torch.cuda.Stream()
 86 |         first = True
 87 | 
 88 |         for next_input, next_target in self.loader:
 89 |             with torch.cuda.stream(stream):
 90 |                 next_input = next_input.cuda(non_blocking=True)
 91 |                 next_input = next_input.float().sub_(self.mean).div_(self.std)
 92 |                 next_target = {
 93 |                     k: v.cuda(non_blocking=True) for k, v in next_target.items()
 94 |                 }
 95 |                 if self.half:
 96 |                     next_input = next_input.half()
 97 | 
 98 |             if not first:
 99 |                 yield [input, target]
100 |             else:
101 |                 first = False
102 | 
103 |             torch.cuda.current_stream().wait_stream(stream)
104 |             input = next_input
105 |             target = next_target
106 | 
107 |         yield [input, target]
108 | 
109 |     def __len__(self):
110 |         return len(self.loader)
111 | 
112 |     @property
113 |     def sampler(self):
114 |         return self.loader.sampler
115 | 
116 |     @property
117 |     def dataset(self):
118 |         return self.loader.dataset
119 | 
120 | 
121 | class WheatModel(DetectionModule):
122 |     def __init__(
123 |         self,
124 |         config: DictConfig,
125 |         df: pd.DataFrame,
126 |         fold: int,
127 |         half: bool = False,
128 |     ):
129 |         super().__init__()
130 |         self.df = df
131 |         self.config = config
132 |         self.train_df = self.df.loc[lambda df: df["fold"] != fold]
133 |         self.valid_df = self.df.loc[lambda df: df["fold"] == fold]
134 |         self.image_dir = str(Path(config.base_dir) / "train")
135 |         self.model = get_train_efficientdet(
136 |             config.arch, image_size=(config.image_size, config.image_size)
137 |         )
138 |         self.min_box_edge = 10 / (1024 / config.image_size)
139 |         self.num_workers = config.num_workers
140 |         self.batch_size = config.batch_size
141 |         self.metrics = [
142 |             (
143 |                 "MAP",
144 |                 mAP(
145 |                     thresholds=np.arange(0.5, 0.76, 0.05),
146 |                     form="pascal_voc",
147 |                     confidence_threshold=0.4,
148 |                 ),
149 |             )
150 |             # ("acc", RetrievalMAP(compute_on_step=False)),
151 |         ]
152 |         train_transforms = get_train_transforms(config.image_size, cutout=config.cutout)
153 |         self.train_dataset = WheatDataset(
154 |             df=self.train_df,
155 |             image_dir=self.image_dir,
156 |             transforms=train_transforms,
157 |             min_box_edge=self.min_box_edge,
158 |             mode=Mode.train,
159 |             mosaic_p=self.config.mosaic_p,
160 |         )
161 |         # valid_transforms = get_valid_transforms()
162 |         self.valid_dataset = WheatDataset(
163 |             df=self.valid_df,
164 |             image_dir=self.image_dir,
165 |             transforms=None,
166 |             mode=Mode.validation,
167 |         )
168 |         self.grad_accu = config.grad_accu
169 |         self.epochs = config.epochs
170 |         self.half = half
171 |         print("# of train images:", len(self.train_dataset))
172 |         print("# of valid images:", len(self.valid_dataset))
173 | 
174 |     def forward(self, image, target):
175 |         return self.model(image, target)
176 | 
177 |     def training_step(self, batch, batch_idx):
178 |         images, targets = batch
179 |         losses_dict = self.forward(images, targets)
180 | 
181 |         return {
182 |             "loss": losses_dict["loss"],
183 |             "log": batch_idx % self.trainer.accumulate_grad_batches == 0,
184 |         }
185 | 
186 |     def validation_step(self, batch, batch_idx):
187 |         images, targets = batch
188 |         losses_dict = self.model(images, targets)
189 |         loss_val = losses_dict["loss"]
190 |         detections = losses_dict["detections"]
191 |         # Back to xyxy form
192 |         bbox = targets["bbox"][:, :, [1, 0, 3, 2]]
193 |         return {"loss": loss_val, "detections": detections, "targets": bbox}
194 | 
195 |     def configure_optimizers(self):
196 |         steps_per_epochs = math.floor(
197 |             len(self.train_dataset)
198 |             / self.batch_size
199 |             / self.grad_accu  # / self.num_gpus # dpp mode
200 |         )
201 |         print("Steps per epochs:", steps_per_epochs)
202 |         n_steps = steps_per_epochs * self.epochs
203 |         lr_durations = [int(n_steps * 0.05), int(np.ceil(n_steps * 0.95)) + 1]
204 |         break_points = [0] + list(np.cumsum(lr_durations))[:-1]
205 |         optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.config.base_lr)
206 |         scheduler = {
207 |             "scheduler": pls.lr_schedulers.MultiStageScheduler(
208 |                 [
209 |                     pls.lr_schedulers.LinearLR(optimizer, 0.01, lr_durations[0]),
210 |                     pls.lr_schedulers.CosineAnnealingLR(optimizer, lr_durations[1]),
211 |                 ],
212 |                 start_at_epochs=break_points,
213 |             ),
214 |             "interval": "step",
215 |             "frequency": 1,
216 |             "strict": True,
217 |         }
218 |         return {"optimizer": optimizer, "lr_scheduler": scheduler}
219 | 
220 |     def train_dataloader(self):
221 |         loader = DataLoader(
222 |             self.train_dataset,
223 |             batch_size=self.batch_size,
224 |             shuffle=True,
225 |             pin_memory=False,
226 |             drop_last=True,
227 |             collate_fn=DetectionFastCollate(anchor_labeler=None),
228 |             num_workers=self.num_workers,
229 |         )
230 |         return PrefetchLoader(loader, half=self.half)
231 | 
232 |     def val_dataloader(self):
233 |         valid_dataloader = DataLoader(
234 |             self.valid_dataset,
235 |             batch_size=self.batch_size,
236 |             pin_memory=False,
237 |             shuffle=False,
238 |             collate_fn=DetectionFastCollate(anchor_labeler=None),
239 |             num_workers=self.num_workers,
240 |         )
241 | 
242 |         # iou_types = ["bbox"]
243 | 
244 |         return PrefetchLoader(valid_dataloader, half=self.half)
245 | 


--------------------------------------------------------------------------------
/notebooks/gwd-resize-images-bboxes.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "<a id=\"toc\"></a>\n",
  9 |     "# Table of Contents\n",
 10 |     "1. [Import libraries](#import_libraries)\n",
 11 |     "1. [Configure hyper-parameters](#configure_hyper_parameters)\n",
 12 |     "1. [Define helper-functions](#define_helper_functions)\n",
 13 |     "1. [Resize images and corresponding bboxes](#resize_images_and_corresponding_bboxes)\n",
 14 |     "1. [Save and compress the results](#save_and_compress_the_result)"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "<a id=\"import_libraries\"></a>\n",
 23 |     "# Import libraries\n",
 24 |     "[Bach to Table of Contents](#toc)"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 1,
 30 |    "metadata": {
 31 |     "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
 32 |     "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5"
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "import pathlib\n",
 37 |     "from pathlib import Path\n",
 38 |     "import json\n",
 39 |     "\n",
 40 |     "import numpy as np\n",
 41 |     "import pandas as pd\n",
 42 |     "import cv2\n",
 43 |     "import albumentations as A\n",
 44 |     "from tqdm import tqdm"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "<a id=\"configure_hyper_parameters\"></a>\n",
 53 |     "# Configure hyper-parameters\n",
 54 |     "[Bach to Table of Contents](#toc)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 2,
 60 |    "metadata": {
 61 |     "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
 62 |     "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a"
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "ROOT = Path('/kaggle/input/global-wheat-detection/')\n",
 67 |     "TRAIN_DIR = ROOT / 'train'\n",
 68 |     "TEST_DIR = ROOT / 'test'\n",
 69 |     "\n",
 70 |     "WORKING_DIR = Path('/kaggle/working/')\n",
 71 |     "\n",
 72 |     "IMG_SIZE = 224"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "<a id=\"define_helper_functions\"></a>\n",
 81 |     "# Define helper-functions\n",
 82 |     "[Bach to Table of Contents](#toc)"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 3,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "def load_dataframe(csv_path: pathlib.PosixPath, image_dir: pathlib.PosixPath) -> pd.DataFrame:\n",
 92 |     "    df = pd.read_csv(csv_path)\n",
 93 |     "    \n",
 94 |     "    # Merge all bboxes of each corresponding image\n",
 95 |     "    # Format: [[x1 y1 w1 h1], [x2 y2 w2 h2], [x3 y3 w3 h3], ...]\n",
 96 |     "    df.bbox = df.bbox.apply(lambda x: ' '.join(np.array(json.loads(x), dtype=str)))\n",
 97 |     "    df.bbox = df.groupby(['image_id']).bbox.transform(lambda x: '|'.join(x))\n",
 98 |     "    df.drop_duplicates(inplace=True, ignore_index=True)\n",
 99 |     "    df.bbox = df.bbox.apply(lambda x: np.array([item.split(' ') for item in x.split('|')], dtype=np.float32).tolist())\n",
100 |     "    \n",
101 |     "    # Create a path to each image\n",
102 |     "    df['image_path'] = df.image_id.apply(lambda x: str(image_dir / (x + '.jpg')))\n",
103 |     "    \n",
104 |     "    return df\n",
105 |     "\n",
106 |     "def load_image(image_path: str) -> np.array:\n",
107 |     "    image = cv2.imread(image_path, cv2.IMREAD_COLOR)\n",
108 |     "\n",
109 |     "    return image\n",
110 |     "\n",
111 |     "def fix_out_of_range(bbox: list, max_size: int = 1024) -> list:\n",
112 |     "    bbox[2] = min(bbox[2], max_size - bbox[0])\n",
113 |     "    bbox[3] = min(bbox[3], max_size - bbox[1])\n",
114 |     "\n",
115 |     "    return bbox"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 4,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "df = load_dataframe(ROOT / 'train.csv', TRAIN_DIR)"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 5,
130 |    "metadata": {},
131 |    "outputs": [
132 |     {
133 |      "data": {
134 |       "text/html": [
135 |        "<div>\n",
136 |        "<style scoped>\n",
137 |        "    .dataframe tbody tr th:only-of-type {\n",
138 |        "        vertical-align: middle;\n",
139 |        "    }\n",
140 |        "\n",
141 |        "    .dataframe tbody tr th {\n",
142 |        "        vertical-align: top;\n",
143 |        "    }\n",
144 |        "\n",
145 |        "    .dataframe thead th {\n",
146 |        "        text-align: right;\n",
147 |        "    }\n",
148 |        "</style>\n",
149 |        "<table border=\"1\" class=\"dataframe\">\n",
150 |        "  <thead>\n",
151 |        "    <tr style=\"text-align: right;\">\n",
152 |        "      <th></th>\n",
153 |        "      <th>image_id</th>\n",
154 |        "      <th>width</th>\n",
155 |        "      <th>height</th>\n",
156 |        "      <th>bbox</th>\n",
157 |        "      <th>source</th>\n",
158 |        "      <th>image_path</th>\n",
159 |        "    </tr>\n",
160 |        "  </thead>\n",
161 |        "  <tbody>\n",
162 |        "    <tr>\n",
163 |        "      <th>0</th>\n",
164 |        "      <td>b6ab77fd7</td>\n",
165 |        "      <td>1024</td>\n",
166 |        "      <td>1024</td>\n",
167 |        "      <td>[[834.0, 222.0, 56.0, 36.0], [226.0, 548.0, 13...</td>\n",
168 |        "      <td>usask_1</td>\n",
169 |        "      <td>/kaggle/input/global-wheat-detection/train/b6a...</td>\n",
170 |        "    </tr>\n",
171 |        "    <tr>\n",
172 |        "      <th>1</th>\n",
173 |        "      <td>b53afdf5c</td>\n",
174 |        "      <td>1024</td>\n",
175 |        "      <td>1024</td>\n",
176 |        "      <td>[[988.0, 781.0, 36.0, 96.0], [331.0, 863.0, 70...</td>\n",
177 |        "      <td>usask_1</td>\n",
178 |        "      <td>/kaggle/input/global-wheat-detection/train/b53...</td>\n",
179 |        "    </tr>\n",
180 |        "    <tr>\n",
181 |        "      <th>2</th>\n",
182 |        "      <td>7b72ea0fb</td>\n",
183 |        "      <td>1024</td>\n",
184 |        "      <td>1024</td>\n",
185 |        "      <td>[[332.0, 662.0, 113.0, 50.0], [285.0, 755.0, 3...</td>\n",
186 |        "      <td>usask_1</td>\n",
187 |        "      <td>/kaggle/input/global-wheat-detection/train/7b7...</td>\n",
188 |        "    </tr>\n",
189 |        "    <tr>\n",
190 |        "      <th>3</th>\n",
191 |        "      <td>91c9d9c38</td>\n",
192 |        "      <td>1024</td>\n",
193 |        "      <td>1024</td>\n",
194 |        "      <td>[[124.0, 273.0, 59.0, 73.0], [688.0, 939.0, 61...</td>\n",
195 |        "      <td>usask_1</td>\n",
196 |        "      <td>/kaggle/input/global-wheat-detection/train/91c...</td>\n",
197 |        "    </tr>\n",
198 |        "    <tr>\n",
199 |        "      <th>4</th>\n",
200 |        "      <td>41c0123cc</td>\n",
201 |        "      <td>1024</td>\n",
202 |        "      <td>1024</td>\n",
203 |        "      <td>[[0.0, 669.0, 73.0, 111.0], [572.0, 757.0, 110...</td>\n",
204 |        "      <td>usask_1</td>\n",
205 |        "      <td>/kaggle/input/global-wheat-detection/train/41c...</td>\n",
206 |        "    </tr>\n",
207 |        "    <tr>\n",
208 |        "      <th>...</th>\n",
209 |        "      <td>...</td>\n",
210 |        "      <td>...</td>\n",
211 |        "      <td>...</td>\n",
212 |        "      <td>...</td>\n",
213 |        "      <td>...</td>\n",
214 |        "      <td>...</td>\n",
215 |        "    </tr>\n",
216 |        "    <tr>\n",
217 |        "      <th>3368</th>\n",
218 |        "      <td>990c1777d</td>\n",
219 |        "      <td>1024</td>\n",
220 |        "      <td>1024</td>\n",
221 |        "      <td>[[120.0, 97.0, 92.0, 89.0], [491.0, 312.0, 174...</td>\n",
222 |        "      <td>arvalis_2</td>\n",
223 |        "      <td>/kaggle/input/global-wheat-detection/train/990...</td>\n",
224 |        "    </tr>\n",
225 |        "    <tr>\n",
226 |        "      <th>3369</th>\n",
227 |        "      <td>bce2fdc4d</td>\n",
228 |        "      <td>1024</td>\n",
229 |        "      <td>1024</td>\n",
230 |        "      <td>[[59.0, 0.0, 133.0, 42.0], [742.0, 839.0, 115....</td>\n",
231 |        "      <td>arvalis_2</td>\n",
232 |        "      <td>/kaggle/input/global-wheat-detection/train/bce...</td>\n",
233 |        "    </tr>\n",
234 |        "    <tr>\n",
235 |        "      <th>3370</th>\n",
236 |        "      <td>a5c8d5f5c</td>\n",
237 |        "      <td>1024</td>\n",
238 |        "      <td>1024</td>\n",
239 |        "      <td>[[619.0, 194.0, 113.0, 90.0], [53.0, 430.0, 14...</td>\n",
240 |        "      <td>arvalis_2</td>\n",
241 |        "      <td>/kaggle/input/global-wheat-detection/train/a5c...</td>\n",
242 |        "    </tr>\n",
243 |        "    <tr>\n",
244 |        "      <th>3371</th>\n",
245 |        "      <td>e6b5e296d</td>\n",
246 |        "      <td>1024</td>\n",
247 |        "      <td>1024</td>\n",
248 |        "      <td>[[940.0, 462.0, 84.0, 85.0], [532.0, 613.0, 10...</td>\n",
249 |        "      <td>arvalis_2</td>\n",
250 |        "      <td>/kaggle/input/global-wheat-detection/train/e6b...</td>\n",
251 |        "    </tr>\n",
252 |        "    <tr>\n",
253 |        "      <th>3372</th>\n",
254 |        "      <td>5e0747034</td>\n",
255 |        "      <td>1024</td>\n",
256 |        "      <td>1024</td>\n",
257 |        "      <td>[[273.0, 284.0, 113.0, 92.0], [494.0, 125.0, 1...</td>\n",
258 |        "      <td>arvalis_2</td>\n",
259 |        "      <td>/kaggle/input/global-wheat-detection/train/5e0...</td>\n",
260 |        "    </tr>\n",
261 |        "  </tbody>\n",
262 |        "</table>\n",
263 |        "<p>3373 rows × 6 columns</p>\n",
264 |        "</div>"
265 |       ],
266 |       "text/plain": [
267 |        "       image_id  width  height  \\\n",
268 |        "0     b6ab77fd7   1024    1024   \n",
269 |        "1     b53afdf5c   1024    1024   \n",
270 |        "2     7b72ea0fb   1024    1024   \n",
271 |        "3     91c9d9c38   1024    1024   \n",
272 |        "4     41c0123cc   1024    1024   \n",
273 |        "...         ...    ...     ...   \n",
274 |        "3368  990c1777d   1024    1024   \n",
275 |        "3369  bce2fdc4d   1024    1024   \n",
276 |        "3370  a5c8d5f5c   1024    1024   \n",
277 |        "3371  e6b5e296d   1024    1024   \n",
278 |        "3372  5e0747034   1024    1024   \n",
279 |        "\n",
280 |        "                                                   bbox     source  \\\n",
281 |        "0     [[834.0, 222.0, 56.0, 36.0], [226.0, 548.0, 13...    usask_1   \n",
282 |        "1     [[988.0, 781.0, 36.0, 96.0], [331.0, 863.0, 70...    usask_1   \n",
283 |        "2     [[332.0, 662.0, 113.0, 50.0], [285.0, 755.0, 3...    usask_1   \n",
284 |        "3     [[124.0, 273.0, 59.0, 73.0], [688.0, 939.0, 61...    usask_1   \n",
285 |        "4     [[0.0, 669.0, 73.0, 111.0], [572.0, 757.0, 110...    usask_1   \n",
286 |        "...                                                 ...        ...   \n",
287 |        "3368  [[120.0, 97.0, 92.0, 89.0], [491.0, 312.0, 174...  arvalis_2   \n",
288 |        "3369  [[59.0, 0.0, 133.0, 42.0], [742.0, 839.0, 115....  arvalis_2   \n",
289 |        "3370  [[619.0, 194.0, 113.0, 90.0], [53.0, 430.0, 14...  arvalis_2   \n",
290 |        "3371  [[940.0, 462.0, 84.0, 85.0], [532.0, 613.0, 10...  arvalis_2   \n",
291 |        "3372  [[273.0, 284.0, 113.0, 92.0], [494.0, 125.0, 1...  arvalis_2   \n",
292 |        "\n",
293 |        "                                             image_path  \n",
294 |        "0     /kaggle/input/global-wheat-detection/train/b6a...  \n",
295 |        "1     /kaggle/input/global-wheat-detection/train/b53...  \n",
296 |        "2     /kaggle/input/global-wheat-detection/train/7b7...  \n",
297 |        "3     /kaggle/input/global-wheat-detection/train/91c...  \n",
298 |        "4     /kaggle/input/global-wheat-detection/train/41c...  \n",
299 |        "...                                                 ...  \n",
300 |        "3368  /kaggle/input/global-wheat-detection/train/990...  \n",
301 |        "3369  /kaggle/input/global-wheat-detection/train/bce...  \n",
302 |        "3370  /kaggle/input/global-wheat-detection/train/a5c...  \n",
303 |        "3371  /kaggle/input/global-wheat-detection/train/e6b...  \n",
304 |        "3372  /kaggle/input/global-wheat-detection/train/5e0...  \n",
305 |        "\n",
306 |        "[3373 rows x 6 columns]"
307 |       ]
308 |      },
309 |      "execution_count": 5,
310 |      "metadata": {},
311 |      "output_type": "execute_result"
312 |     }
313 |    ],
314 |    "source": [
315 |     "df"
316 |    ]
317 |   },
318 |   {
319 |    "cell_type": "markdown",
320 |    "execution_count": null,
321 |    "metadata": {},
322 |    "source": [
323 |     "<a id=\"resize_images_and_corresponding_bboxes\"></a>\n",
324 |     "# Resize images and corresponding bboxes\n",
325 |     "[Bach to Table of Contents](#toc)"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": 6,
331 |    "metadata": {},
332 |    "outputs": [],
333 |    "source": [
334 |     "mkdir train"
335 |    ]
336 |   },
337 |   {
338 |    "cell_type": "code",
339 |    "execution_count": 7,
340 |    "metadata": {},
341 |    "outputs": [
342 |     {
343 |      "name": "stderr",
344 |      "output_type": "stream",
345 |      "text": [
346 |       "100%|██████████| 3373/3373 [01:31<00:00, 36.98it/s]\n"
347 |      ]
348 |     }
349 |    ],
350 |    "source": [
351 |     "transform = A.Compose(\n",
352 |     "    [\n",
353 |     "        A.Resize(height=IMG_SIZE, width=IMG_SIZE, p=1),\n",
354 |     "    ], \n",
355 |     "    p=1.0, \n",
356 |     "    bbox_params=A.BboxParams(\n",
357 |     "        format='coco',\n",
358 |     "        min_area=0, \n",
359 |     "        min_visibility=0,\n",
360 |     "        label_fields=['labels']\n",
361 |     "    )\n",
362 |     ")\n",
363 |     "\n",
364 |     "list_of_image_ids = []\n",
365 |     "list_of_bboxes = []\n",
366 |     "list_of_sources = []\n",
367 |     "\n",
368 |     "for idx, row in tqdm(df.iterrows(), total=df.shape[0]):\n",
369 |     "    image = load_image(row.image_path)\n",
370 |     "    bboxes = row.bbox\n",
371 |     "\n",
372 |     "    # Fix \"out-of-range\" bboxes\n",
373 |     "    bboxes = [fix_out_of_range(bbox) for bbox in bboxes]\n",
374 |     "    \n",
375 |     "    result = transform(image=image, bboxes=bboxes, labels=np.ones(len(bboxes)))\n",
376 |     "    new_image = result['image']\n",
377 |     "    new_bboxes = np.array(result['bboxes']).tolist()\n",
378 |     "    \n",
379 |     "    # Save new image\n",
380 |     "    cv2.imwrite(str(WORKING_DIR / 'train' / (row.image_id + '.jpg')), new_image)\n",
381 |     "\n",
382 |     "    for new_bbox in new_bboxes:\n",
383 |     "        list_of_image_ids.append(row.image_id)\n",
384 |     "        list_of_bboxes.append(new_bbox)\n",
385 |     "        list_of_sources.append(row.source)"
386 |    ]
387 |   },
388 |   {
389 |    "cell_type": "code",
390 |    "execution_count": 8,
391 |    "metadata": {},
392 |    "outputs": [],
393 |    "source": [
394 |     "new_data_dict = {\n",
395 |     "    'image_id': list_of_image_ids,\n",
396 |     "    'width': [IMG_SIZE] * len(list_of_image_ids),\n",
397 |     "    'height': [IMG_SIZE] * len(list_of_image_ids),\n",
398 |     "    'bbox': list_of_bboxes,\n",
399 |     "    'source': list_of_sources\n",
400 |     "}"
401 |    ]
402 |   },
403 |   {
404 |    "cell_type": "code",
405 |    "execution_count": 9,
406 |    "metadata": {},
407 |    "outputs": [],
408 |    "source": [
409 |     "new_df = pd.DataFrame(new_data_dict)"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "markdown",
414 |    "execution_count": null,
415 |    "metadata": {},
416 |    "source": [
417 |     "<a id=\"save_and_compress_the_result\"></a>\n",
418 |     "# Save and compress the results\n",
419 |     "[Bach to Table of Contents](#toc)"
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "code",
424 |    "execution_count": 10,
425 |    "metadata": {},
426 |    "outputs": [],
427 |    "source": [
428 |     "new_df.to_csv('train.csv', index=False)"
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "code",
433 |    "execution_count": 11,
434 |    "metadata": {},
435 |    "outputs": [],
436 |    "source": [
437 |     "!cp $ROOT/sample_submission.csv ./\n",
438 |     "!cp -r $ROOT/test ./"
439 |    ]
440 |   },
441 |   {
442 |    "cell_type": "code",
443 |    "execution_count": 12,
444 |    "metadata": {},
445 |    "outputs": [],
446 |    "source": [
447 |     "!zip -rm -qq global-wheat-detection.zip train test train.csv sample_submission.csv"
448 |    ]
449 |   }
450 |  ],
451 |  "metadata": {
452 |   "kernelspec": {
453 |    "display_name": "Python 3",
454 |    "language": "python",
455 |    "name": "python3"
456 |   },
457 |   "language_info": {
458 |    "codemirror_mode": {
459 |     "name": "ipython",
460 |     "version": 3
461 |    },
462 |    "file_extension": ".py",
463 |    "mimetype": "text/x-python",
464 |    "name": "python",
465 |    "nbconvert_exporter": "python",
466 |    "pygments_lexer": "ipython3",
467 |    "version": "3.7.6"
468 |   }
469 |  },
470 |  "nbformat": 4,
471 |  "nbformat_minor": 4
472 | }
473 | 


--------------------------------------------------------------------------------