├── .flake8 ├── .gitignore ├── wheat ├── config.py ├── eval.py ├── train.py ├── metrics.py ├── dataset.py └── model.py ├── LICENSE ├── README.md ├── scripts └── resize_images.py └── notebooks └── gwd-resize-images-bboxes.ipynb /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | max-complexity = 10 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | cache 3 | efficientdet-pytorch 4 | export 5 | code-dataset 6 | -------------------------------------------------------------------------------- /wheat/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class WheatConfig: 6 | base_dir: str 7 | epochs: int 8 | arch: str 9 | image_size: int 10 | batch_size: int 11 | base_lr: float = 2e-4 12 | num_workers: int = 4 13 | seed: int = 999 14 | amp_backend: str = "native" 15 | amp_level: str = "O2" # only effective if amp_backend == apex 16 | precision: int = 32 17 | gradient_clip_val: float = 10 18 | grad_accu: int = 1 19 | cutout: bool = False 20 | mixup_alpha: float = -1 21 | no_op_ratio: float = 0.2 22 | no_op_warmup_steps: int = 100 23 | mosaic_p: float = -1 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 CeShine Lee 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyTorch EfficientDet Solution for Global Wheat Detection Challenge 2 | 3 | 1. [Training notebook (on Kaggle)](https://www.kaggle.com/ceshine/wheat-detection-training-efficientdet-public?scriptVersionId=67789208&select=wheatdet.pth) 4 | 2. Inference notebook: [Single model](https://www.kaggle.com/ceshine/effdet-wheat-head-detection-inference-public?scriptVersionId=67809685); [Ensemble](https://www.kaggle.com/ceshine/effdet-wheat-head-detection-inference-public/output?scriptVersionId=67812782) 5 | 6 | See [wheat/config.py](wheat/config.py) for hyper-parameters and system configurations. 7 | 8 | The best mAP score I'm able to get is 0.6167 (Private) / 0.7084 (Public) with a D4 model trained on 768x768 resolution (using a single P100 GPU). 9 | 10 | ## Requirements 11 | 12 | 1. torch>=1.7.0 13 | 1. pytorch-lightning>=1.3.6 14 | 1. pytorch-lightning-spells==0.0.3 15 | 1. efficientdet-pytorch==0.2.4 16 | 17 | Note: You'll need to use [my fork of efficientdet-pytorch](https://github.com/ceshine/efficientdet-pytorch) to use the O2 level of Apex AMP. 18 | 19 | ## Instructions 20 | 21 | Resizing images: 22 | 23 | ```bash 24 | python scripts/resize_images.py 512 --root data/ 25 | ``` 26 | 27 | Training (pass `--help` for more information): 28 | 29 | ```bash 30 | python -m wheat.train data/512 --epochs 10 --grad-accu 4 --batch-size 8 --arch tf_efficientdet_d3 --fold 0 --mixup 24 --mosaic-p 0.5 31 | ``` 32 | 33 | Evaluation (pass `--help` for more information): 34 | 35 | ```bash 36 | python -m wheat.eval data/512 export/tf_efficientdet_d3-mosaic-mixup-fold0.pth --batch-size 8 --arch tf_efficientdet_d3 --fold 0 37 | ``` 38 | -------------------------------------------------------------------------------- /wheat/eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | from datetime import datetime 4 | from pathlib import Path 5 | 6 | import torch 7 | import typer 8 | import numpy as np 9 | import pandas as pd 10 | from omegaconf import OmegaConf, DictConfig 11 | from pytorch_lightning import Trainer, seed_everything 12 | from pytorch_lightning.loggers import TensorBoardLogger 13 | from pytorch_lightning_spells.loggers import ScreenLogger 14 | from pytorch_lightning_spells.callbacks import ( 15 | Callback, 16 | RandomAugmentationChoiceCallback, 17 | ) 18 | from pytorch_lightning.callbacks import ( 19 | LearningRateMonitor, 20 | ModelCheckpoint, 21 | ) 22 | 23 | from .config import WheatConfig 24 | from .model import WheatModel 25 | 26 | from sklearn.model_selection import StratifiedKFold 27 | 28 | 29 | def eval(model_path: str, config: DictConfig, fold: int = 0): 30 | seed_everything(int(config.seed)) 31 | base_path = Path(config.base_dir) 32 | df_train = pd.read_csv(str(base_path / "train.csv")) 33 | bboxes = np.stack(df_train["bbox"].apply(lambda x: np.fromstring(x[1:-1], sep=","))) 34 | 35 | for i, col in enumerate(["x", "y", "w", "h"]): 36 | df_train[col] = bboxes[:, i] 37 | df_train["area"] = df_train["w"] * df_train["h"] 38 | df_train["x2"] = df_train["x"] + df_train["w"] 39 | df_train["y2"] = df_train["y"] + df_train["h"] 40 | 41 | skf = StratifiedKFold(n_splits=5, random_state=88, shuffle=True) 42 | # one row for one image 43 | df_trunc = df_train[["image_id", "source"]].drop_duplicates().reset_index(drop=True) 44 | for fold_idx, (_, valid_index) in enumerate( 45 | skf.split(df_trunc, y=df_trunc["source"]) 46 | ): 47 | df_trunc.loc[valid_index, "fold"] = fold_idx 48 | # Add fold column back 49 | df_train = df_train.merge(df_trunc, on=["image_id", "source"]) 50 | 51 | # create model for one fold 52 | model = WheatModel( 53 | config, 54 | df_train, 55 | fold=fold, 56 | half=( 57 | config.precision == 16 58 | and config.amp_backend == "apex" 59 | and config.amp_level == "O2" 60 | ), 61 | ) 62 | model.model.load_state_dict(torch.load(model_path)["states"]) 63 | trainer = Trainer( 64 | amp_backend=config.amp_backend, 65 | amp_level=config.amp_level, 66 | precision=config.precision, 67 | gpus=1, 68 | ) 69 | trainer.validate(model) 70 | 71 | 72 | def main( 73 | base_dir: str, 74 | model_path: str, 75 | grad_accu: int = 1, 76 | arch: str = "tf_efficientdet_d3", 77 | batch_size: int = 8, 78 | fold: int = 0, 79 | mixup: float = -1, 80 | cutout: bool = False, 81 | mosaic_p: float = -1, 82 | ): 83 | config = WheatConfig( 84 | base_dir=base_dir, 85 | epochs=0, 86 | image_size=int(Path(base_dir).name), 87 | arch=arch, 88 | grad_accu=grad_accu, 89 | batch_size=batch_size, 90 | precision=16, 91 | amp_backend="apex", 92 | amp_level="O2", 93 | cutout=cutout, 94 | mixup_alpha=mixup, 95 | mosaic_p=mosaic_p, 96 | ) 97 | assert not (cutout is True and mixup > 0), "Can only enable one of MixUp and CutOut" 98 | omega_conf = OmegaConf.structured(config) 99 | eval(model_path, omega_conf, fold=fold) 100 | 101 | 102 | if __name__ == "__main__": 103 | typer.run(main) 104 | -------------------------------------------------------------------------------- /scripts/resize_images.py: -------------------------------------------------------------------------------- 1 | """Batch resizing the training data 2 | 3 | Source: https://www.kaggle.com/phunghieu/gwd-resize-images-bboxes 4 | """ 5 | 6 | import json 7 | from pathlib import Path 8 | 9 | import typer 10 | import numpy as np 11 | import pandas as pd 12 | import cv2 13 | import albumentations as A 14 | from tqdm import tqdm 15 | 16 | 17 | def load_dataframe(csv_path: Path, image_dir: Path) -> pd.DataFrame: 18 | df = pd.read_csv(csv_path) 19 | 20 | # Merge all bboxes of each corresponding image 21 | # Format: [[x1 y1 w1 h1], [x2 y2 w2 h2], [x3 y3 w3 h3], ...] 22 | df.bbox = df.bbox.apply(lambda x: " ".join(np.array(json.loads(x), dtype=str))) 23 | df.bbox = df.groupby(["image_id"]).bbox.transform(lambda x: "|".join(x)) 24 | df.drop_duplicates(inplace=True, ignore_index=True) 25 | df.bbox = df.bbox.apply( 26 | lambda x: np.array( 27 | [item.split(" ") for item in x.split("|")], dtype=np.float32 28 | ).tolist() 29 | ) 30 | 31 | # Create a path to each image 32 | df["image_path"] = df.image_id.apply(lambda x: str(image_dir / (x + ".jpg"))) 33 | 34 | return df 35 | 36 | 37 | def load_image(image_path: str) -> np.ndarray: 38 | image = cv2.imread(image_path, cv2.IMREAD_COLOR) 39 | return image 40 | 41 | 42 | def fix_out_of_range(bbox: list, max_size: int = 1024) -> list: 43 | bbox[2] = min(bbox[2], max_size - bbox[0]) 44 | bbox[3] = min(bbox[3], max_size - bbox[1]) 45 | return bbox 46 | 47 | 48 | def main(image_size: int, root: str = "data/"): 49 | root_path = Path(root) 50 | train_dir = root_path / "train" 51 | target_dir = root_path / f"{image_size}" 52 | (target_dir / "train").mkdir(parents=True) 53 | 54 | df = load_dataframe(root_path / "train.csv", train_dir) 55 | 56 | transform = A.Compose( 57 | [ 58 | A.Resize(height=image_size, width=image_size, interpolation=cv2.INTER_AREA), 59 | ], 60 | p=1.0, 61 | bbox_params=A.BboxParams( 62 | format="coco", min_area=0, min_visibility=0, label_fields=["labels"] 63 | ), 64 | ) 65 | 66 | list_of_image_ids = [] 67 | list_of_bboxes = [] 68 | list_of_sources = [] 69 | 70 | for _, row in tqdm(df.iterrows(), total=df.shape[0]): 71 | image = load_image(row.image_path) 72 | bboxes = row.bbox 73 | 74 | # Fix "out-of-range" bboxes 75 | bboxes = [fix_out_of_range(bbox) for bbox in bboxes] 76 | 77 | result = transform(image=image, bboxes=bboxes, labels=np.ones(len(bboxes))) 78 | new_image = result["image"] 79 | new_bboxes = np.array(result["bboxes"]).tolist() 80 | 81 | # Save new image 82 | cv2.imwrite(str(target_dir / "train" / (row.image_id + ".jpg")), new_image) 83 | 84 | for new_bbox in new_bboxes: 85 | list_of_image_ids.append(row.image_id) 86 | list_of_bboxes.append(new_bbox) 87 | list_of_sources.append(row.source) 88 | 89 | new_data_dict = { 90 | "image_id": list_of_image_ids, 91 | "width": [image_size] * len(list_of_image_ids), 92 | "height": [image_size] * len(list_of_image_ids), 93 | "bbox": list_of_bboxes, 94 | "source": list_of_sources, 95 | } 96 | new_df = pd.DataFrame(new_data_dict) 97 | new_df.to_csv(target_dir / "train.csv", index=False) 98 | 99 | 100 | if __name__ == "__main__": 101 | typer.run(main) 102 | -------------------------------------------------------------------------------- /wheat/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | from datetime import datetime 4 | from pathlib import Path 5 | 6 | import torch 7 | import typer 8 | import numpy as np 9 | import pandas as pd 10 | from omegaconf import OmegaConf, DictConfig 11 | from pytorch_lightning import Trainer, seed_everything 12 | from pytorch_lightning.loggers import TensorBoardLogger 13 | from pytorch_lightning_spells.loggers import ScreenLogger 14 | from pytorch_lightning_spells.callbacks import ( 15 | Callback, 16 | RandomAugmentationChoiceCallback, 17 | ) 18 | from pytorch_lightning.callbacks import ( 19 | LearningRateMonitor, 20 | ModelCheckpoint, 21 | ) 22 | 23 | from .config import WheatConfig 24 | from .model import WheatModel 25 | 26 | from sklearn.model_selection import StratifiedKFold 27 | 28 | 29 | class MixUpDetectionCallback(Callback): 30 | def __init__(self, alpha: float = 0.4): 31 | super().__init__() 32 | self.alpha = alpha 33 | 34 | def on_train_batch_start( 35 | self, trainer, pl_module, batch, batch_idx, dataloader_idx 36 | ): 37 | old_batch = batch 38 | batch, targets = batch 39 | lambd = np.clip(np.random.beta(self.alpha, self.alpha, 1), 0.35, 0.65) 40 | lambd_ = torch.tensor(max(lambd, 1 - lambd), device=batch.device).float() 41 | # Combine input batch 42 | new_batch = batch * lambd_ + batch.flip(0) * (1 - lambd_) 43 | # Combine targets 44 | assert isinstance(targets, dict) 45 | for col in ("bbox", "cls"): 46 | targets[col] = torch.cat([targets[col], targets[col].flip(0)], dim=1) 47 | 48 | old_batch[0] = new_batch 49 | old_batch[1] = targets 50 | 51 | 52 | def train(config: DictConfig, fold: int = 0): 53 | seed_everything(int(config.seed)) 54 | base_path = Path(config.base_dir) 55 | df_train = pd.read_csv(str(base_path / "train.csv")) 56 | bboxes = np.stack(df_train["bbox"].apply(lambda x: np.fromstring(x[1:-1], sep=","))) 57 | 58 | for i, col in enumerate(["x", "y", "w", "h"]): 59 | df_train[col] = bboxes[:, i] 60 | df_train["area"] = df_train["w"] * df_train["h"] 61 | df_train["x2"] = df_train["x"] + df_train["w"] 62 | df_train["y2"] = df_train["y"] + df_train["h"] 63 | 64 | skf = StratifiedKFold(n_splits=5, random_state=88, shuffle=True) 65 | # one row for one image 66 | df_trunc = df_train[["image_id", "source"]].drop_duplicates().reset_index(drop=True) 67 | for fold_idx, (_, valid_index) in enumerate( 68 | skf.split(df_trunc, y=df_trunc["source"]) 69 | ): 70 | df_trunc.loc[valid_index, "fold"] = fold_idx 71 | # Add fold column back 72 | df_train = df_train.merge(df_trunc, on=["image_id", "source"]) 73 | 74 | # create model for one fold 75 | model = WheatModel( 76 | config, 77 | df_train, 78 | fold=fold, 79 | half=( 80 | config.precision == 16 81 | and config.amp_backend == "apex" 82 | and config.amp_level == "O2" 83 | ), 84 | ) 85 | checkpoints = ModelCheckpoint( 86 | dirpath="cache/checkpoints/", 87 | monitor="val_MAP", 88 | mode="max", 89 | filename="{step:06d}-{val_loss:.4f}", 90 | save_top_k=1, 91 | save_last=False, 92 | ) 93 | callbacks = [LearningRateMonitor(logging_interval="step"), checkpoints] 94 | if config.mixup_alpha > 0: 95 | callbacks.append( 96 | RandomAugmentationChoiceCallback( 97 | [MixUpDetectionCallback(config.mixup_alpha)], 98 | p=[1.0], 99 | no_op_warmup=config.no_op_warmup_steps, 100 | no_op_prob=config.no_op_ratio, 101 | ) 102 | ) 103 | trainer = Trainer( 104 | amp_backend=config.amp_backend, 105 | amp_level=config.amp_level, 106 | precision=config.precision, 107 | gpus=1, 108 | callbacks=callbacks, 109 | # val_check_interval=0.5, 110 | gradient_clip_val=config.gradient_clip_val, 111 | logger=[ 112 | TensorBoardLogger( 113 | "cache/tb_logs", 114 | name="wheat", 115 | version=f"fold-{fold}-{datetime.now():%Y%m%dT%H%M}", 116 | ), 117 | ScreenLogger(), 118 | ], 119 | accumulate_grad_batches=config.grad_accu, 120 | # fast_dev_run=True, 121 | max_epochs=config.epochs, 122 | ) 123 | 124 | trainer.fit(model) 125 | 126 | print(checkpoints.best_model_path, checkpoints.best_model_score) 127 | pl_module = WheatModel.load_from_checkpoint( 128 | checkpoints.best_model_path, 129 | config=copy.deepcopy(config), 130 | df=df_train, 131 | fold=fold, 132 | half=False, 133 | ) 134 | torch.save( 135 | {"states": pl_module.model.state_dict(), "arch": config.arch}, "wheatdet.pth" 136 | ) 137 | 138 | 139 | def main( 140 | base_dir: str, 141 | epochs: int = 2, 142 | grad_accu: int = 1, 143 | arch: str = "tf_efficientdet_d3", 144 | batch_size: int = 8, 145 | fold: int = 0, 146 | mixup: float = -1, 147 | cutout: bool = False, 148 | mosaic_p: float = -1, 149 | ): 150 | config = WheatConfig( 151 | base_dir=base_dir, 152 | epochs=epochs, 153 | image_size=int(Path(base_dir).name), 154 | arch=arch, 155 | grad_accu=grad_accu, 156 | batch_size=batch_size, 157 | precision=16, 158 | amp_backend="apex", 159 | amp_level="O2", 160 | cutout=cutout, 161 | mixup_alpha=mixup, 162 | mosaic_p=mosaic_p, 163 | ) 164 | assert not (cutout is True and mixup > 0), "Can only enable one of MixUp and CutOut" 165 | if os.environ.get("SEED"): 166 | config.seed = int(os.environ["SEED"]) 167 | omega_conf = OmegaConf.structured(config) 168 | with open("train_config.yaml", "w") as fout: 169 | OmegaConf.save(config=omega_conf, f=fout) 170 | train(omega_conf, fold=fold) 171 | 172 | 173 | if __name__ == "__main__": 174 | typer.run(main) 175 | -------------------------------------------------------------------------------- /wheat/metrics.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adapted from https://www.kaggle.com/pestipeti/competition-metric-details-script 3 | """ 4 | from typing import Optional, Any, Iterable, Callable, Dict 5 | 6 | import torch 7 | import numpy as np 8 | from numba import jit 9 | 10 | from torchmetrics.metric import Metric 11 | 12 | 13 | @jit(nopython=True) 14 | def calculate_iou(gt, pr, form="pascal_voc") -> float: 15 | """Calculates the Intersection over Union. 16 | 17 | Args: 18 | gt: (np.ndarray[Union[int, float]]) coordinates of the ground-truth box 19 | pr: (np.ndarray[Union[int, float]]) coordinates of the predicted box 20 | form: (str) gt/pred coordinates format 21 | - pascal_voc: [xmin, ymin, xmax, ymax] 22 | - coco: [xmin, ymin, w, h] 23 | Returns: 24 | (float) Intersection over union (0.0 <= iou <= 1.0) 25 | """ 26 | if form == "coco": 27 | gt = gt.copy() 28 | pr = pr.copy() 29 | 30 | gt[2] = gt[0] + gt[2] 31 | gt[3] = gt[1] + gt[3] 32 | pr[2] = pr[0] + pr[2] 33 | pr[3] = pr[1] + pr[3] 34 | 35 | # Calculate overlap area 36 | dx = min(gt[2], pr[2]) - max(gt[0], pr[0]) + 1 37 | 38 | if dx < 0: 39 | return 0.0 40 | 41 | dy = min(gt[3], pr[3]) - max(gt[1], pr[1]) + 1 42 | 43 | if dy < 0: 44 | return 0.0 45 | 46 | overlap_area = dx * dy 47 | 48 | # Calculate union area 49 | union_area = ( 50 | (gt[2] - gt[0] + 1) * (gt[3] - gt[1] + 1) 51 | + (pr[2] - pr[0] + 1) * (pr[3] - pr[1] + 1) 52 | - overlap_area 53 | ) 54 | 55 | return overlap_area / union_area 56 | 57 | 58 | @jit(nopython=True) 59 | def find_best_match( 60 | gts, pred, pred_idx, threshold=0.5, form="pascal_voc", ious=None 61 | ) -> int: 62 | """Returns the index of the 'best match' between the 63 | ground-truth boxes and the prediction. The 'best match' 64 | is the highest IoU. (0.0 IoUs are ignored). 65 | 66 | Args: 67 | gts: (List[List[Union[int, float]]]) Coordinates of the available ground-truth boxes 68 | pred: (List[Union[int, float]]) Coordinates of the predicted box 69 | pred_idx: (int) Index of the current predicted box 70 | threshold: (float) Threshold 71 | form: (str) Format of the coordinates 72 | ious: (np.ndarray) len(gts) x len(preds) matrix for storing calculated ious. 73 | 74 | Return: 75 | (int) Index of the best match GT box (-1 if no match above threshold) 76 | """ 77 | best_match_iou = -np.inf 78 | best_match_idx = -1 79 | 80 | for gt_idx in range(len(gts)): 81 | 82 | if gts[gt_idx][0] < 0: 83 | # Already matched GT-box 84 | continue 85 | 86 | iou = -1 if ious is None else ious[gt_idx][pred_idx] 87 | 88 | if iou < 0: 89 | iou = calculate_iou(gts[gt_idx], pred, form=form) 90 | if ious is not None: 91 | ious[gt_idx][pred_idx] = iou 92 | 93 | if iou < threshold: 94 | continue 95 | 96 | if iou > best_match_iou: 97 | best_match_iou = iou 98 | best_match_idx = gt_idx 99 | return best_match_idx 100 | 101 | 102 | @jit(nopython=True) 103 | def calculate_precision(gts, preds, threshold=0.5, form="coco", ious=None) -> float: 104 | """Calculates precision for GT - prediction pairs at one threshold. 105 | 106 | Args: 107 | gts: (List[List[Union[int, float]]]) Coordinates of the available ground-truth boxes 108 | preds: (List[List[Union[int, float]]]) Coordinates of the predicted boxes, 109 | sorted by confidence value (descending) 110 | threshold: (float) Threshold 111 | form: (str) Format of the coordinates 112 | ious: (np.ndarray) len(gts) x len(preds) matrix for storing calculated ious. 113 | 114 | Return: 115 | (float) Precision 116 | """ 117 | n = len(preds) 118 | tp = 0 119 | fp = 0 120 | 121 | # for pred_idx, pred in enumerate(preds_sorted): 122 | for pred_idx in range(n): 123 | 124 | best_match_gt_idx = find_best_match( 125 | gts, preds[pred_idx], pred_idx, threshold=threshold, form=form, ious=ious 126 | ) 127 | 128 | if best_match_gt_idx >= 0: 129 | # True positive: The predicted box matches a gt box with an IoU above the threshold. 130 | tp += 1 131 | # Remove the matched GT box 132 | gts[best_match_gt_idx] = -1 133 | 134 | else: 135 | # No match 136 | # False positive: indicates a predicted box had no associated gt box. 137 | fp += 1 138 | 139 | # False negative: indicates a gt box had no associated predicted box. 140 | fn = (gts.sum(axis=1) > 0).sum() 141 | 142 | return tp / (tp + fp + fn) 143 | 144 | 145 | @jit(nopython=True) 146 | def calculate_image_precision(gts, preds, thresholds=(0.5,), form="coco") -> float: 147 | """Calculates image precision. 148 | 149 | Args: 150 | gts: (List[List[Union[int, float]]]) Coordinates of the available ground-truth boxes 151 | preds: (List[List[Union[int, float]]]) Coordinates of the predicted boxes, 152 | sorted by confidence value (descending) 153 | thresholds: (float) Different thresholds 154 | form: (str) Format of the coordinates 155 | 156 | Return: 157 | (float) Precision 158 | """ 159 | n_threshold = len(thresholds) 160 | image_precision = 0.0 161 | 162 | ious = np.ones((len(gts), len(preds))) * -1 163 | # ious = None 164 | 165 | for threshold in thresholds: 166 | precision_at_threshold = calculate_precision( 167 | gts.copy(), preds, threshold=threshold, form=form, ious=ious 168 | ) 169 | image_precision += precision_at_threshold / n_threshold 170 | 171 | return image_precision 172 | 173 | 174 | class mAP(Metric): 175 | def __init__( 176 | self, 177 | thresholds: Iterable[float], 178 | confidence_threshold: float, 179 | form: str, 180 | compute_on_step: bool = True, 181 | dist_sync_on_step: bool = False, 182 | process_group: Optional[Any] = None, 183 | dist_sync_fn: Callable = None, 184 | ) -> None: 185 | super().__init__( 186 | compute_on_step=compute_on_step, 187 | dist_sync_on_step=dist_sync_on_step, 188 | process_group=process_group, 189 | dist_sync_fn=dist_sync_fn, 190 | ) 191 | 192 | self.thresholds = thresholds 193 | self.confidence_threshold = confidence_threshold 194 | self.form = form 195 | 196 | self.add_state( 197 | "mAP_sum", 198 | default=torch.tensor(0.0, dtype=torch.float64), 199 | dist_reduce_fx="sum", 200 | ) 201 | self.add_state( 202 | "total", 203 | default=torch.tensor(0.0, dtype=torch.float64), 204 | dist_reduce_fx="sum", 205 | ) 206 | 207 | def update(self, detections: torch.Tensor, targets: torch.Tensor) -> None: # type: ignore 208 | for i in range(len(detections)): 209 | local_detections = detections[i].cpu().numpy() 210 | local_detections = local_detections[ 211 | local_detections[:, 4] > self.confidence_threshold 212 | ] 213 | # print(targets[i][:2]) 214 | map = calculate_image_precision( 215 | targets[i].cpu().numpy(), 216 | local_detections[:, :4], 217 | self.thresholds, 218 | self.form, 219 | ) 220 | # Update states 221 | self.mAP_sum += map 222 | self.total += 1 223 | 224 | def compute(self) -> torch.Tensor: 225 | return self.mAP_sum / self.total 226 | 227 | @property 228 | def is_differentiable(self) -> bool: 229 | return False 230 | -------------------------------------------------------------------------------- /wheat/dataset.py: -------------------------------------------------------------------------------- 1 | import enum 2 | import random 3 | from pathlib import Path 4 | 5 | import torch 6 | import numpy as np 7 | from PIL import Image 8 | from torch.utils.data import Dataset 9 | import albumentations as A 10 | 11 | 12 | class Mode(enum.Enum): 13 | train = 0 14 | validation = 1 15 | test = 2 16 | 17 | 18 | class WheatDataset(Dataset): 19 | """ 20 | [reference](https://www.kaggle.com/dangnam739/faster-rcnn-global-wheat-detection) 21 | """ 22 | 23 | def __init__( 24 | self, 25 | image_dir: str, 26 | df=None, 27 | mode: Mode = Mode.train, 28 | transforms=None, 29 | min_box_edge=-1, 30 | mosaic_p: float = -1, 31 | ): 32 | super().__init__() 33 | if df is not None: 34 | self.df = df.copy() 35 | self.image_ids = df["image_id"].unique() 36 | assert self.df["width"].nunique() == 1 37 | assert self.df["height"].nunique() == 1 38 | assert self.df["width"].values[0] == self.df["height"].values[0] 39 | self.image_size = self.df["width"].values[0] 40 | self.min_box_edge = min_box_edge 41 | else: 42 | # test case 43 | self.df = None 44 | self.image_ids = [p.stem for p in Path(image_dir).glob("*.jpg")] 45 | # TODO: set image size automatically 46 | self.image_size = 384 47 | self.mosaic_p = mosaic_p 48 | self.image_dir = image_dir 49 | self.transforms = transforms 50 | self.mode = mode 51 | 52 | def _load_image(self, image_id): 53 | image = Image.open(f"{self.image_dir}/{image_id}.jpg").convert("RGB") 54 | return np.array(image) 55 | 56 | def _load_bbox(self, image_id): 57 | records = self.df[self.df["image_id"] == image_id] 58 | if self.min_box_edge > 0: 59 | records = records[ 60 | (records.w >= self.min_box_edge) & (records.h >= self.min_box_edge) 61 | ] 62 | boxes = records[["x", "y", "x2", "y2"]].values 63 | return boxes 64 | 65 | def __getitem__(self, index: int): 66 | if self.mode in (Mode.test, Mode.validation) or ( 67 | self.mode is Mode.train and (random.random() > self.mosaic_p) 68 | ): 69 | image_id = self.image_ids[index] 70 | image = self._load_image(image_id) 71 | if self.mode in (Mode.train, Mode.validation): 72 | boxes = self._load_bbox(image_id) 73 | else: 74 | boxes = (np.asarray([[0, 0, 0, 0]], dtype=np.float32),) 75 | else: 76 | image, boxes = self._load_mosaic(index) 77 | 78 | target = {} 79 | target["bbox"] = boxes 80 | target["cls"] = np.ones((len(boxes),), dtype=np.int64) 81 | # These are needed as well by the efficientdet model. 82 | target["img_size"] = (self.image_size, self.image_size) 83 | target["img_scale"] = 1.0 84 | 85 | if self.transforms: 86 | sample = {"image": image, "bboxes": target["bbox"], "labels": target["cls"]} 87 | sample = self.transforms(**sample) 88 | if len(sample["bboxes"]) > 0: 89 | # apply augmentation on the fly 90 | target["cls"] = np.asarray(sample["labels"]) 91 | target["bbox"] = np.asarray(sample["bboxes"]) 92 | else: 93 | target["cls"] = np.empty(0, dtype=int) 94 | target["bbox"] = np.empty((0, 4), dtype=np.float32) 95 | image = sample["image"].transpose(2, 0, 1) 96 | else: 97 | image = image.transpose(2, 0, 1) 98 | # convert to yxyx format 99 | target["bbox"] = target["bbox"][:, [1, 0, 3, 2]] 100 | return image, target 101 | 102 | def _load_mosaic(self, index): 103 | """ 104 | Adapted from: 105 | 1. https://github.com/ultralytics/yolov5/blob/831773f5a23926658ee76459ce37550643432123/utils/datasets.py#L529 106 | 2. https://www.kaggle.com/shonenkov/training-efficientdet 107 | """ 108 | w, h = self.image_size, self.image_size 109 | border_size = self.image_size // 2 110 | min_visibility = self.transforms.processors["bboxes"].params.min_visibility 111 | 112 | xc, yc = [ 113 | int(random.uniform(border_size // 2, self.image_size - border_size // 2)) 114 | for _ in range(2) 115 | ] # center x, y 116 | indexes = [index] + np.random.choice( 117 | range(len(self)), 3, replace=False 118 | ).tolist() 119 | 120 | result_image = np.full((self.image_size, self.image_size, 3), 0, dtype=np.uint8) 121 | result_boxes = [] 122 | 123 | for i, index in enumerate(indexes): 124 | image = self._load_image(self.image_ids[index]) 125 | boxes = self._load_bbox(self.image_ids[index]) 126 | if i == 0: # top left 127 | x1a, y1a, x2a, y2a = ( 128 | max(xc - w, 0), 129 | max(yc - h, 0), 130 | xc, 131 | yc, 132 | ) # xmin, ymin, xmax, ymax (large image) 133 | x1b, y1b, x2b, y2b = ( 134 | w - (x2a - x1a), 135 | h - (y2a - y1a), 136 | w, 137 | h, 138 | ) # xmin, ymin, xmax, ymax (small image) 139 | elif i == 1: # top right 140 | x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, w), yc 141 | x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h 142 | elif i == 2: # bottom left 143 | x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(h, yc + h) 144 | x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h) 145 | elif i == 3: # bottom right 146 | x1a, y1a, x2a, y2a = xc, yc, min(xc + w, w), min(h, yc + h) 147 | x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) 148 | result_image[y1a:y2a, x1a:x2a] = image[y1b:y2b, x1b:x2b] 149 | padw = x1a - x1b 150 | padh = y1a - y1b 151 | 152 | boxes[:, 0] += padw 153 | boxes[:, 1] += padh 154 | boxes[:, 2] += padw 155 | boxes[:, 3] += padh 156 | 157 | area_before = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) 158 | # Filter non-boxes 159 | np.clip(boxes, 0, self.image_size, out=boxes) 160 | area_after = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) 161 | visibility = area_after / area_before 162 | 163 | boxes = boxes[np.where(visibility > min_visibility)] 164 | result_boxes.append(boxes) 165 | 166 | result_boxes = np.concatenate(result_boxes, 0) 167 | return result_image, result_boxes 168 | 169 | def __len__(self) -> int: 170 | return len(self.image_ids) 171 | 172 | 173 | def get_train_transforms(image_size: int, cutout: bool = False): 174 | transforms = [ 175 | A.RandomSizedCrop( 176 | (int(image_size * 0.8), image_size), 177 | image_size, 178 | image_size, 179 | p=0.5, 180 | ), 181 | A.OneOf( 182 | [ 183 | A.HueSaturationValue( 184 | hue_shift_limit=0.2, 185 | sat_shift_limit=0.2, 186 | val_shift_limit=0.2, 187 | p=0.9, 188 | ), 189 | A.RandomBrightnessContrast( 190 | brightness_limit=0.2, contrast_limit=0.2, p=0.9 191 | ), 192 | ], 193 | p=0.9, 194 | ), 195 | # A.ToGray(p=0.01), 196 | A.HorizontalFlip(p=0.5), 197 | A.VerticalFlip(p=0.5), 198 | ] 199 | if cutout: 200 | size = int(image_size * 0.1) 201 | transforms.append( 202 | A.Cutout( 203 | num_holes=8, max_h_size=size, max_w_size=size, fill_value=0, p=0.5 204 | ), 205 | ) 206 | print(transforms) 207 | return A.Compose( 208 | transforms, 209 | p=1.0, 210 | bbox_params=A.BboxParams( 211 | format="pascal_voc", 212 | min_area=0, 213 | min_visibility=0.3, 214 | label_fields=["labels"], 215 | ), 216 | ) 217 | -------------------------------------------------------------------------------- /wheat/model.py: -------------------------------------------------------------------------------- 1 | import math 2 | from pathlib import Path 3 | from typing import Tuple, Union, Dict, List 4 | 5 | import torch 6 | import numpy as np 7 | import pandas as pd 8 | from omegaconf import DictConfig 9 | 10 | from effdet import get_efficientdet_config, EfficientDet 11 | from effdet.bench import DetBenchTrain, DetBenchPredict 12 | from effdet.helpers import load_pretrained 13 | from effdet.data.loader import DetectionFastCollate 14 | 15 | import pytorch_lightning_spells as pls 16 | from pytorch_lightning_spells import BaseModule 17 | from torch.utils.data import DataLoader 18 | 19 | from .dataset import WheatDataset, get_train_transforms, Mode 20 | from .metrics import mAP 21 | 22 | TARGET_TYPE = Dict[str, torch.Tensor] 23 | 24 | 25 | class DetectionModule(BaseModule): 26 | def validation_step_end(self, outputs): 27 | """This method logs the validation loss and metrics for you. 28 | 29 | The output from `.validation_step()` method must contains these three entries: 30 | 31 | 1. loss: the validation loss. 32 | 2. pred: the predicted labels or values. 33 | 3. target: the ground truth lables or values. 34 | 35 | Args: 36 | outputs (Dict): the output from `.validation_step()` method. 37 | """ 38 | self.log("val_loss", outputs["loss"].mean()) 39 | for name, metric in self.metrics: 40 | metric(outputs["detections"], outputs["targets"]) 41 | self.log("val_" + name, metric) 42 | 43 | 44 | def get_train_efficientdet( 45 | model_name: str = "tf_efficientdet_d5", 46 | image_size: Tuple[int, int] = (384, 384), 47 | mode: str = "train", 48 | pretrained: bool = True, 49 | ): 50 | config = get_efficientdet_config(model_name) 51 | config.image_size = image_size 52 | net = EfficientDet(config, pretrained_backbone=False) 53 | # load pretrained 54 | if pretrained: 55 | load_pretrained(net, config.url) 56 | net.reset_head(num_classes=1) 57 | if mode == "train": 58 | return DetBenchTrain(net, create_labeler=True) 59 | else: 60 | return DetBenchPredict(net) 61 | 62 | 63 | def collate_fn(batch): 64 | return tuple(zip(*batch)) 65 | 66 | 67 | IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406) 68 | IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225) 69 | 70 | 71 | class PrefetchLoader: 72 | def __init__( 73 | self, 74 | loader, 75 | mean=IMAGENET_DEFAULT_MEAN, 76 | std=IMAGENET_DEFAULT_STD, 77 | half: bool = False, 78 | ): 79 | self.half = half 80 | self.loader = loader 81 | self.mean = torch.tensor([x * 255 for x in mean]).cuda().view(1, 3, 1, 1) 82 | self.std = torch.tensor([x * 255 for x in std]).cuda().view(1, 3, 1, 1) 83 | 84 | def __iter__(self): 85 | stream = torch.cuda.Stream() 86 | first = True 87 | 88 | for next_input, next_target in self.loader: 89 | with torch.cuda.stream(stream): 90 | next_input = next_input.cuda(non_blocking=True) 91 | next_input = next_input.float().sub_(self.mean).div_(self.std) 92 | next_target = { 93 | k: v.cuda(non_blocking=True) for k, v in next_target.items() 94 | } 95 | if self.half: 96 | next_input = next_input.half() 97 | 98 | if not first: 99 | yield [input, target] 100 | else: 101 | first = False 102 | 103 | torch.cuda.current_stream().wait_stream(stream) 104 | input = next_input 105 | target = next_target 106 | 107 | yield [input, target] 108 | 109 | def __len__(self): 110 | return len(self.loader) 111 | 112 | @property 113 | def sampler(self): 114 | return self.loader.sampler 115 | 116 | @property 117 | def dataset(self): 118 | return self.loader.dataset 119 | 120 | 121 | class WheatModel(DetectionModule): 122 | def __init__( 123 | self, 124 | config: DictConfig, 125 | df: pd.DataFrame, 126 | fold: int, 127 | half: bool = False, 128 | ): 129 | super().__init__() 130 | self.df = df 131 | self.config = config 132 | self.train_df = self.df.loc[lambda df: df["fold"] != fold] 133 | self.valid_df = self.df.loc[lambda df: df["fold"] == fold] 134 | self.image_dir = str(Path(config.base_dir) / "train") 135 | self.model = get_train_efficientdet( 136 | config.arch, image_size=(config.image_size, config.image_size) 137 | ) 138 | self.min_box_edge = 10 / (1024 / config.image_size) 139 | self.num_workers = config.num_workers 140 | self.batch_size = config.batch_size 141 | self.metrics = [ 142 | ( 143 | "MAP", 144 | mAP( 145 | thresholds=np.arange(0.5, 0.76, 0.05), 146 | form="pascal_voc", 147 | confidence_threshold=0.4, 148 | ), 149 | ) 150 | # ("acc", RetrievalMAP(compute_on_step=False)), 151 | ] 152 | train_transforms = get_train_transforms(config.image_size, cutout=config.cutout) 153 | self.train_dataset = WheatDataset( 154 | df=self.train_df, 155 | image_dir=self.image_dir, 156 | transforms=train_transforms, 157 | min_box_edge=self.min_box_edge, 158 | mode=Mode.train, 159 | mosaic_p=self.config.mosaic_p, 160 | ) 161 | # valid_transforms = get_valid_transforms() 162 | self.valid_dataset = WheatDataset( 163 | df=self.valid_df, 164 | image_dir=self.image_dir, 165 | transforms=None, 166 | mode=Mode.validation, 167 | ) 168 | self.grad_accu = config.grad_accu 169 | self.epochs = config.epochs 170 | self.half = half 171 | print("# of train images:", len(self.train_dataset)) 172 | print("# of valid images:", len(self.valid_dataset)) 173 | 174 | def forward(self, image, target): 175 | return self.model(image, target) 176 | 177 | def training_step(self, batch, batch_idx): 178 | images, targets = batch 179 | losses_dict = self.forward(images, targets) 180 | 181 | return { 182 | "loss": losses_dict["loss"], 183 | "log": batch_idx % self.trainer.accumulate_grad_batches == 0, 184 | } 185 | 186 | def validation_step(self, batch, batch_idx): 187 | images, targets = batch 188 | losses_dict = self.model(images, targets) 189 | loss_val = losses_dict["loss"] 190 | detections = losses_dict["detections"] 191 | # Back to xyxy form 192 | bbox = targets["bbox"][:, :, [1, 0, 3, 2]] 193 | return {"loss": loss_val, "detections": detections, "targets": bbox} 194 | 195 | def configure_optimizers(self): 196 | steps_per_epochs = math.floor( 197 | len(self.train_dataset) 198 | / self.batch_size 199 | / self.grad_accu # / self.num_gpus # dpp mode 200 | ) 201 | print("Steps per epochs:", steps_per_epochs) 202 | n_steps = steps_per_epochs * self.epochs 203 | lr_durations = [int(n_steps * 0.05), int(np.ceil(n_steps * 0.95)) + 1] 204 | break_points = [0] + list(np.cumsum(lr_durations))[:-1] 205 | optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.config.base_lr) 206 | scheduler = { 207 | "scheduler": pls.lr_schedulers.MultiStageScheduler( 208 | [ 209 | pls.lr_schedulers.LinearLR(optimizer, 0.01, lr_durations[0]), 210 | pls.lr_schedulers.CosineAnnealingLR(optimizer, lr_durations[1]), 211 | ], 212 | start_at_epochs=break_points, 213 | ), 214 | "interval": "step", 215 | "frequency": 1, 216 | "strict": True, 217 | } 218 | return {"optimizer": optimizer, "lr_scheduler": scheduler} 219 | 220 | def train_dataloader(self): 221 | loader = DataLoader( 222 | self.train_dataset, 223 | batch_size=self.batch_size, 224 | shuffle=True, 225 | pin_memory=False, 226 | drop_last=True, 227 | collate_fn=DetectionFastCollate(anchor_labeler=None), 228 | num_workers=self.num_workers, 229 | ) 230 | return PrefetchLoader(loader, half=self.half) 231 | 232 | def val_dataloader(self): 233 | valid_dataloader = DataLoader( 234 | self.valid_dataset, 235 | batch_size=self.batch_size, 236 | pin_memory=False, 237 | shuffle=False, 238 | collate_fn=DetectionFastCollate(anchor_labeler=None), 239 | num_workers=self.num_workers, 240 | ) 241 | 242 | # iou_types = ["bbox"] 243 | 244 | return PrefetchLoader(valid_dataloader, half=self.half) 245 | -------------------------------------------------------------------------------- /notebooks/gwd-resize-images-bboxes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "source": [ 8 | "\n", 9 | "# Table of Contents\n", 10 | "1. [Import libraries](#import_libraries)\n", 11 | "1. [Configure hyper-parameters](#configure_hyper_parameters)\n", 12 | "1. [Define helper-functions](#define_helper_functions)\n", 13 | "1. [Resize images and corresponding bboxes](#resize_images_and_corresponding_bboxes)\n", 14 | "1. [Save and compress the results](#save_and_compress_the_result)" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "source": [ 22 | "\n", 23 | "# Import libraries\n", 24 | "[Bach to Table of Contents](#toc)" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 1, 30 | "metadata": { 31 | "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", 32 | "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5" 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "import pathlib\n", 37 | "from pathlib import Path\n", 38 | "import json\n", 39 | "\n", 40 | "import numpy as np\n", 41 | "import pandas as pd\n", 42 | "import cv2\n", 43 | "import albumentations as A\n", 44 | "from tqdm import tqdm" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "source": [ 52 | "\n", 53 | "# Configure hyper-parameters\n", 54 | "[Bach to Table of Contents](#toc)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 2, 60 | "metadata": { 61 | "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", 62 | "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a" 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "ROOT = Path('/kaggle/input/global-wheat-detection/')\n", 67 | "TRAIN_DIR = ROOT / 'train'\n", 68 | "TEST_DIR = ROOT / 'test'\n", 69 | "\n", 70 | "WORKING_DIR = Path('/kaggle/working/')\n", 71 | "\n", 72 | "IMG_SIZE = 224" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "source": [ 80 | "\n", 81 | "# Define helper-functions\n", 82 | "[Bach to Table of Contents](#toc)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 3, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "def load_dataframe(csv_path: pathlib.PosixPath, image_dir: pathlib.PosixPath) -> pd.DataFrame:\n", 92 | " df = pd.read_csv(csv_path)\n", 93 | " \n", 94 | " # Merge all bboxes of each corresponding image\n", 95 | " # Format: [[x1 y1 w1 h1], [x2 y2 w2 h2], [x3 y3 w3 h3], ...]\n", 96 | " df.bbox = df.bbox.apply(lambda x: ' '.join(np.array(json.loads(x), dtype=str)))\n", 97 | " df.bbox = df.groupby(['image_id']).bbox.transform(lambda x: '|'.join(x))\n", 98 | " df.drop_duplicates(inplace=True, ignore_index=True)\n", 99 | " df.bbox = df.bbox.apply(lambda x: np.array([item.split(' ') for item in x.split('|')], dtype=np.float32).tolist())\n", 100 | " \n", 101 | " # Create a path to each image\n", 102 | " df['image_path'] = df.image_id.apply(lambda x: str(image_dir / (x + '.jpg')))\n", 103 | " \n", 104 | " return df\n", 105 | "\n", 106 | "def load_image(image_path: str) -> np.array:\n", 107 | " image = cv2.imread(image_path, cv2.IMREAD_COLOR)\n", 108 | "\n", 109 | " return image\n", 110 | "\n", 111 | "def fix_out_of_range(bbox: list, max_size: int = 1024) -> list:\n", 112 | " bbox[2] = min(bbox[2], max_size - bbox[0])\n", 113 | " bbox[3] = min(bbox[3], max_size - bbox[1])\n", 114 | "\n", 115 | " return bbox" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 4, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "df = load_dataframe(ROOT / 'train.csv', TRAIN_DIR)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 5, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/html": [ 135 | "
| \n", 153 | " | image_id | \n", 154 | "width | \n", 155 | "height | \n", 156 | "bbox | \n", 157 | "source | \n", 158 | "image_path | \n", 159 | "
|---|---|---|---|---|---|---|
| 0 | \n", 164 | "b6ab77fd7 | \n", 165 | "1024 | \n", 166 | "1024 | \n", 167 | "[[834.0, 222.0, 56.0, 36.0], [226.0, 548.0, 13... | \n", 168 | "usask_1 | \n", 169 | "/kaggle/input/global-wheat-detection/train/b6a... | \n", 170 | "
| 1 | \n", 173 | "b53afdf5c | \n", 174 | "1024 | \n", 175 | "1024 | \n", 176 | "[[988.0, 781.0, 36.0, 96.0], [331.0, 863.0, 70... | \n", 177 | "usask_1 | \n", 178 | "/kaggle/input/global-wheat-detection/train/b53... | \n", 179 | "
| 2 | \n", 182 | "7b72ea0fb | \n", 183 | "1024 | \n", 184 | "1024 | \n", 185 | "[[332.0, 662.0, 113.0, 50.0], [285.0, 755.0, 3... | \n", 186 | "usask_1 | \n", 187 | "/kaggle/input/global-wheat-detection/train/7b7... | \n", 188 | "
| 3 | \n", 191 | "91c9d9c38 | \n", 192 | "1024 | \n", 193 | "1024 | \n", 194 | "[[124.0, 273.0, 59.0, 73.0], [688.0, 939.0, 61... | \n", 195 | "usask_1 | \n", 196 | "/kaggle/input/global-wheat-detection/train/91c... | \n", 197 | "
| 4 | \n", 200 | "41c0123cc | \n", 201 | "1024 | \n", 202 | "1024 | \n", 203 | "[[0.0, 669.0, 73.0, 111.0], [572.0, 757.0, 110... | \n", 204 | "usask_1 | \n", 205 | "/kaggle/input/global-wheat-detection/train/41c... | \n", 206 | "
| ... | \n", 209 | "... | \n", 210 | "... | \n", 211 | "... | \n", 212 | "... | \n", 213 | "... | \n", 214 | "... | \n", 215 | "
| 3368 | \n", 218 | "990c1777d | \n", 219 | "1024 | \n", 220 | "1024 | \n", 221 | "[[120.0, 97.0, 92.0, 89.0], [491.0, 312.0, 174... | \n", 222 | "arvalis_2 | \n", 223 | "/kaggle/input/global-wheat-detection/train/990... | \n", 224 | "
| 3369 | \n", 227 | "bce2fdc4d | \n", 228 | "1024 | \n", 229 | "1024 | \n", 230 | "[[59.0, 0.0, 133.0, 42.0], [742.0, 839.0, 115.... | \n", 231 | "arvalis_2 | \n", 232 | "/kaggle/input/global-wheat-detection/train/bce... | \n", 233 | "
| 3370 | \n", 236 | "a5c8d5f5c | \n", 237 | "1024 | \n", 238 | "1024 | \n", 239 | "[[619.0, 194.0, 113.0, 90.0], [53.0, 430.0, 14... | \n", 240 | "arvalis_2 | \n", 241 | "/kaggle/input/global-wheat-detection/train/a5c... | \n", 242 | "
| 3371 | \n", 245 | "e6b5e296d | \n", 246 | "1024 | \n", 247 | "1024 | \n", 248 | "[[940.0, 462.0, 84.0, 85.0], [532.0, 613.0, 10... | \n", 249 | "arvalis_2 | \n", 250 | "/kaggle/input/global-wheat-detection/train/e6b... | \n", 251 | "
| 3372 | \n", 254 | "5e0747034 | \n", 255 | "1024 | \n", 256 | "1024 | \n", 257 | "[[273.0, 284.0, 113.0, 92.0], [494.0, 125.0, 1... | \n", 258 | "arvalis_2 | \n", 259 | "/kaggle/input/global-wheat-detection/train/5e0... | \n", 260 | "
3373 rows × 6 columns
\n", 264 | "