├── synthetic_deforestation_dataset.xlsx ├── README.md ├── generate_synthetic_deforestation_dataset.py └── file /synthetic_deforestation_dataset.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nelvinebi/Deforestation-Detection-Using-Satellite-Imagery-Deep-Learning/HEAD/synthetic_deforestation_dataset.xlsx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Deforestation Detection Using Satellite Imagery (Deep Learning, Synthetic Data) 2 | 3 | This project simulates deforestation detection using synthetic satellite-like data and applies deep learning methods to classify land cover changes. By generating synthetic RED and NIR reflectance bands, the project computes NDVI before and after disturbance to label samples as deforested or non-deforested. 4 | 5 | Features 6 | 7 | Generates synthetic tabular dataset with >100 samples. 8 | 9 | Includes before/after reflectance bands (RED, NIR). 10 | 11 | Computes NDVI before, NDVI after, and NDVI difference. 12 | 13 | Labels data automatically as deforested (1) or not deforested (0). 14 | 15 | Saves dataset in both Excel (.xlsx) and CSV (.csv) formats. 16 | 17 | Ready for use in machine learning or deep learning workflows. 18 | 19 | Installation 20 | 21 | Clone this repository and install dependencies: 22 | 23 | git clone https://github.com/yourusername/Deforestation-Detection-Using-Satellite-Imagery-Deep-Learning.git 24 | cd Deforestation-Detection-Using-Satellite-Imagery-Deep-Learning 25 | pip install -r requirements.txt 26 | 27 | 28 | Dependencies: 29 | 30 | Python 3.8+ 31 | 32 | NumPy 33 | 34 | Pandas 35 | 36 | OpenPyXL (for Excel export) 37 | 38 | Usage 39 | 40 | Generate the dataset by running: 41 | 42 | python generate_synthetic_deforestation_dataset.py --n 300 --seed 42 --out outputs 43 | 44 | 45 | Arguments: 46 | 47 | --n: Number of samples (default: 300, must be >100). 48 | 49 | --seed: Random seed for reproducibility (default: 42). 50 | 51 | --out: Output folder (default: outputs). 52 | 53 | Example Output 54 | 55 | outputs/synthetic_deforestation_dataset.xlsx 56 | 57 | outputs/synthetic_deforestation_dataset.csv 58 | 59 | Dataset Columns 60 | 61 | sample_id 62 | 63 | RED_before, NIR_before 64 | 65 | RED_after, NIR_after 66 | 67 | NDVI_before, NDVI_after, NDVI_diff 68 | 69 | label_deforested (1 = deforested, 0 = not deforested) 70 | 71 | Applications 72 | 73 | Benchmarking deforestation detection models. 74 | 75 | Experimenting with vegetation indices (NDVI). 76 | 77 | Testing deep learning pipelines without requiring real imagery. 78 | 79 | Author 80 | 81 | Agbozu Ebingiye Nelvin 82 | 83 | License 84 | 85 | This project is released under the MIT License. See LICENSE for details. 86 | -------------------------------------------------------------------------------- /generate_synthetic_deforestation_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | generate_synthetic_deforestation_dataset.py 4 | ------------------------------------------- 5 | Create a tabular synthetic dataset for deforestation detection from 6 | "before/after" RED & NIR reflectances. Exports Excel (.xlsx) and CSV. 7 | 8 | Usage: 9 | python generate_synthetic_deforestation_dataset.py --n 300 --seed 42 --out outputs 10 | 11 | Columns: 12 | sample_id, RED_before, NIR_before, RED_after, NIR_after, 13 | NDVI_before, NDVI_after, NDVI_diff, label_deforested (0/1) 14 | """ 15 | 16 | import os 17 | import argparse 18 | import numpy as np 19 | import pandas as pd 20 | 21 | def generate_deforestation_dataset(n_samples: int = 300, seed: int = 42) -> pd.DataFrame: 22 | rng = np.random.default_rng(seed) 23 | eps = 1e-6 24 | records = [] 25 | 26 | for i in range(n_samples): 27 | deforested = rng.random() < 0.5 28 | 29 | # vegetation baseline (0.3..0.9) 30 | veg = rng.uniform(0.3, 0.9) 31 | 32 | # "Before" reflectance (higher NIR, lower RED with more vegetation) 33 | RED_b = 0.25 + 0.25 * (1 - veg) + rng.normal(0, 0.02) 34 | NIR_b = 0.55 + 0.40 * veg + rng.normal(0, 0.02) 35 | 36 | # "After": deforestation => RED increases, NIR decreases 37 | if deforested: 38 | RED_a = RED_b + 0.20 + rng.normal(0, 0.02) 39 | NIR_a = NIR_b - 0.30 + rng.normal(0, 0.02) 40 | else: 41 | RED_a = RED_b + rng.normal(0, 0.02) 42 | NIR_a = NIR_b + rng.normal(0, 0.02) 43 | 44 | NDVI_b = (NIR_b - RED_b) / (NIR_b + RED_b + eps) 45 | NDVI_a = (NIR_a - RED_a) / (NIR_a + RED_a + eps) 46 | NDVI_diff = NDVI_b - NDVI_a # positive when vegetation decreases 47 | 48 | records.append({ 49 | "sample_id": i, 50 | "RED_before": RED_b, 51 | "NIR_before": NIR_b, 52 | "RED_after": RED_a, 53 | "NIR_after": NIR_a, 54 | "NDVI_before": NDVI_b, 55 | "NDVI_after": NDVI_a, 56 | "NDVI_diff": NDVI_diff, 57 | "label_deforested": int(deforested), 58 | }) 59 | 60 | return pd.DataFrame(records) 61 | 62 | def main(): 63 | parser = argparse.ArgumentParser(description="Generate a synthetic deforestation dataset and save to Excel/CSV.") 64 | parser.add_argument("--n", type=int, default=300, help="Number of samples (>100).") 65 | parser.add_argument("--seed", type=int, default=42, help="Random seed.") 66 | parser.add_argument("--out", type=str, default="outputs", help="Output directory.") 67 | args = parser.parse_args() 68 | 69 | if args.n <= 100: 70 | raise ValueError("Please set --n > 100 to satisfy minimum dataset size.") 71 | 72 | os.makedirs(args.out, exist_ok=True) 73 | 74 | df = generate_deforestation_dataset(n_samples=args.n, seed=args.seed) 75 | 76 | xlsx_path = os.path.join(args.out, "synthetic_deforestation_dataset.xlsx") 77 | csv_path = os.path.join(args.out, "synthetic_deforestation_dataset.csv") 78 | 79 | # Save both formats 80 | df.to_excel(xlsx_path, index=False) 81 | df.to_csv(csv_path, index=False) 82 | 83 | print(f"Saved Excel -> {xlsx_path}") 84 | print(f"Saved CSV -> {csv_path}") 85 | print(f"Rows: {len(df)} | Columns: {len(df.columns)}") 86 | 87 | if __name__ == "__main__": 88 | main() 89 | -------------------------------------------------------------------------------- /file: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Deforestation-Detection-Using-Satellite-Imagery-Deep-Learning (Synthetic) 4 | ------------------------------------------------------------------------- 5 | 6 | Generates synthetic "before" and "after" reflectance patches, simulates 7 | deforestation as drops in NDVI (via RED↑, NIR↓ in affected areas), trains a 8 | CNN to classify patches (deforested vs. not), and evaluates performance. 9 | 10 | Requirements: 11 | - Python 3.8+ 12 | - numpy, matplotlib, scikit-learn, torch, torchvision (for transforms only) 13 | 14 | Usage: 15 | python deforestation_synthetic_cnn.py --n 1200 --size 32 --epochs 10 --batch 64 --out outputs 16 | """ 17 | 18 | import os 19 | import math 20 | import argparse 21 | import random 22 | from dataclasses import dataclass 23 | 24 | import numpy as np 25 | import matplotlib.pyplot as plt 26 | 27 | import torch 28 | import torch.nn as nn 29 | import torch.optim as optim 30 | from torch.utils.data import Dataset, DataLoader, random_split 31 | from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix 32 | 33 | 34 | # ----------------------------- 35 | # Utilities & Reproducibility 36 | # ----------------------------- 37 | def set_seed(seed: int = 42): 38 | random.seed(seed) 39 | np.random.seed(seed) 40 | torch.manual_seed(seed) 41 | torch.cuda.manual_seed_all(seed) 42 | 43 | 44 | def ensure_dir(p: str): 45 | os.makedirs(p, exist_ok=True) 46 | 47 | 48 | # ----------------------------- 49 | # Synthetic Data Generation 50 | # ----------------------------- 51 | @dataclass 52 | class SynthConfig: 53 | patch_size: int = 32 54 | n_samples: int = 1200 # > 100 by default 55 | pos_ratio: float = 0.5 # ~50% deforested 56 | noise_sigma: float = 0.04 57 | seed: int = 42 58 | 59 | 60 | def _smooth_noise(img: np.ndarray, k: int = 5) -> np.ndarray: 61 | """Very light smoothing using a box filter without scipy.""" 62 | assert k % 2 == 1, "kernel size should be odd" 63 | pad = k // 2 64 | padded = np.pad(img, ((pad, pad), (pad, pad)), mode="reflect") 65 | out = np.zeros_like(img) 66 | for i in range(img.shape[0]): 67 | for j in range(img.shape[1]): 68 | out[i, j] = padded[i:i + k, j:j + k].mean() 69 | return out 70 | 71 | 72 | def _random_shape_mask(h: int, w: int) -> np.ndarray: 73 | """Create a random rectangular or circular mask for deforestation.""" 74 | mask = np.zeros((h, w), dtype=np.float32) 75 | if random.random() < 0.5: 76 | # rectangle 77 | rh, rw = random.randint(h//5, h//2), random.randint(w//5, w//2) 78 | r0 = random.randint(0, h - rh) 79 | c0 = random.randint(0, w - rw) 80 | mask[r0:r0 + rh, c0:c0 + rw] = 1.0 81 | else: 82 | # circle 83 | r = random.randint(min(h, w)//6, min(h, w)//3) 84 | cy, cx = random.randint(r, h - r - 1), random.randint(r, w - r - 1) 85 | yy, xx = np.ogrid[:h, :w] 86 | mask[(yy - cy)**2 + (xx - cx)**2 <= r**2] = 1.0 87 | # smooth edges a bit 88 | mask = _smooth_noise(mask, k=3) 89 | mask = np.clip(mask, 0.0, 1.0) 90 | return mask 91 | 92 | 93 | def make_one_sample(ps: int, noise_sigma: float, deforested: bool) -> tuple[np.ndarray, int]: 94 | """ 95 | Returns: 96 | X: (C=5, H, W) channels = [RED_b, NIR_b, RED_a, NIR_a, NDVI_diff] 97 | y: 0/1 98 | """ 99 | # Base vegetation field (0..1) via smoothed uniform noise 100 | base = np.random.rand(ps, ps).astype(np.float32) 101 | base = _smooth_noise(base, k=7) 102 | base = (base - base.min()) / (base.max() - base.min() + 1e-6) 103 | 104 | # Simulate "before" reflectances influenced by vegetation 105 | # vegetation -> high NIR, lower RED 106 | RED_b = np.clip(0.25 + 0.25*(1.0 - base) + noise_sigma*np.random.randn(ps, ps), 0, 1) 107 | NIR_b = np.clip(0.55 + 0.40*base + noise_sigma*np.random.randn(ps, ps), 0, 1) 108 | 109 | # After: start from before 110 | RED_a = RED_b.copy() 111 | NIR_a = NIR_b.copy() 112 | 113 | if deforested: 114 | mask = _random_shape_mask(ps, ps) 115 | # Deforestation effect: RED goes up, NIR goes down inside mask 116 | RED_a = np.clip(RED_a + 0.25*mask + noise_sigma*np.random.randn(ps, ps), 0, 1) 117 | NIR_a = np.clip(NIR_a - 0.35*mask + noise_sigma*np.random.randn(ps, ps), 0, 1) 118 | else: 119 | # Small natural variability 120 | RED_a = np.clip(RED_a + noise_sigma*np.random.randn(ps, ps), 0, 1) 121 | NIR_a = np.clip(NIR_a + noise_sigma*np.random.randn(ps, ps), 0, 1) 122 | 123 | # NDVI before/after and diff 124 | eps = 1e-6 125 | NDVI_b = (NIR_b - RED_b) / (NIR_b + RED_b + eps) 126 | NDVI_a = (NIR_a - RED_a) / (NIR_a + RED_a + eps) 127 | NDVI_diff = NDVI_b - NDVI_a # positive when vegetation decreases 128 | 129 | # Stack channels (5, H, W) 130 | X = np.stack([RED_b, NIR_b, RED_a, NIR_a, NDVI_diff], axis=0).astype(np.float32) 131 | y = int(deforested) 132 | return X, y 133 | 134 | 135 | def make_dataset(cfg: SynthConfig) -> tuple[np.ndarray, np.ndarray]: 136 | n_pos = int(cfg.n_samples * cfg.pos_ratio) 137 | n_neg = cfg.n_samples - n_pos 138 | data = [] 139 | labels = [] 140 | 141 | for _ in range(n_pos): 142 | X, y = make_one_sample(cfg.patch_size, cfg.noise_sigma, True) 143 | data.append(X); labels.append(y) 144 | for _ in range(n_neg): 145 | X, y = make_one_sample(cfg.patch_size, cfg.noise_sigma, False) 146 | data.append(X); labels.append(y) 147 | 148 | data = np.stack(data, axis=0) 149 | labels = np.array(labels, dtype=np.int64) 150 | # shuffle 151 | idx = np.random.permutation(len(labels)) 152 | return data[idx], labels[idx] 153 | 154 | 155 | class DeforestationDataset(Dataset): 156 | def __init__(self, X: np.ndarray, y: np.ndarray): 157 | self.X = torch.from_numpy(X) 158 | self.y = torch.from_numpy(y) 159 | 160 | def __len__(self): 161 | return len(self.y) 162 | 163 | def __getitem__(self, i): 164 | return self.X[i], self.y[i] 165 | 166 | 167 | # ----------------------------- 168 | # Model 169 | # ----------------------------- 170 | class SmallCNN(nn.Module): 171 | def __init__(self, in_ch: int = 5, n_classes: int = 1): 172 | super().__init__() 173 | self.net = nn.Sequential( 174 | nn.Conv2d(in_ch, 16, 3, padding=1), 175 | nn.BatchNorm2d(16), 176 | nn.ReLU(inplace=True), 177 | nn.MaxPool2d(2), # 16x16 178 | 179 | nn.Conv2d(16, 32, 3, padding=1), 180 | nn.BatchNorm2d(32), 181 | nn.ReLU(inplace=True), 182 | nn.MaxPool2d(2), # 8x8 183 | 184 | nn.Conv2d(32, 64, 3, padding=1), 185 | nn.BatchNorm2d(64), 186 | nn.ReLU(inplace=True), 187 | nn.AdaptiveAvgPool2d(1), # 64x1x1 188 | ) 189 | self.head = nn.Linear(64, n_classes) # binary -> 1 logit 190 | 191 | def forward(self, x): 192 | x = self.net(x) 193 | x = x.flatten(1) 194 | return self.head(x).squeeze(1) 195 | 196 | 197 | # ----------------------------- 198 | # Training / Evaluation 199 | # ----------------------------- 200 | def train_one_epoch(model, loader, criterion, optimizer, device): 201 | model.train() 202 | total, total_loss = 0, 0.0 203 | for X, y in loader: 204 | X, y = X.to(device), y.to(device).float() 205 | optimizer.zero_grad() 206 | logits = model(X) 207 | loss = criterion(logits, y) 208 | loss.backward() 209 | optimizer.step() 210 | total += y.size(0) 211 | total_loss += loss.item() * y.size(0) 212 | return total_loss / total 213 | 214 | 215 | @torch.no_grad() 216 | def evaluate(model, loader, device): 217 | model.eval() 218 | all_y, all_p, all_s = [], [], [] 219 | for X, y in loader: 220 | X = X.to(device) 221 | logits = model(X) 222 | probs = torch.sigmoid(logits).cpu().numpy() 223 | preds = (probs >= 0.5).astype(int) 224 | all_p.append(preds) 225 | all_s.append(probs) 226 | all_y.append(y.numpy()) 227 | y_true = np.concatenate(all_y) 228 | y_pred = np.concatenate(all_p) 229 | scores = np.concatenate(all_s) 230 | acc = accuracy_score(y_true, y_pred) 231 | prec, rec, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary", zero_division=0) 232 | cm = confusion_matrix(y_true, y_pred) 233 | return acc, prec, rec, f1, cm, y_true, y_pred, scores 234 | 235 | 236 | def plot_examples(X, y_true, y_pred, scores, outdir, k=6): 237 | ensure_dir(outdir) 238 | k = min(k, len(y_true)) 239 | idx = np.random.choice(len(y_true), k, replace=False) 240 | fig, axs = plt.subplots(k, 3, figsize=(9, 3*k)) 241 | 242 | for row, i in enumerate(idx): 243 | # visualize NDVI before and after + diff 244 | RED_b, NIR_b, RED_a, NIR_a, NDVI_diff = X[i] 245 | eps = 1e-6 246 | NDVI_b = (NIR_b - RED_b) / (NIR_b + RED_b + eps) 247 | NDVI_a = (NIR_a - RED_a) / (NIR_a + RED_a + eps) 248 | 249 | axs[row, 0].imshow(NDVI_b, vmin=-1, vmax=1) 250 | axs[row, 0].set_title("NDVI Before") 251 | axs[row, 1].imshow(NDVI_a, vmin=-1, vmax=1) 252 | axs[row, 1].set_title("NDVI After") 253 | axs[row, 2].imshow(NDVI_diff, vmin=-1, vmax=1) 254 | axs[row, 2].set_title(f"NDVI Diff\nTrue:{y_true[i]} Pred:{y_pred[i]} p={scores[i]:.2f}") 255 | for c in range(3): 256 | axs[row, c].axis('off') 257 | 258 | plt.tight_layout() 259 | fig.savefig(os.path.join(outdir, "examples.png"), dpi=180) 260 | plt.close(fig) 261 | 262 | 263 | # ----------------------------- 264 | # Main 265 | # ----------------------------- 266 | def main(): 267 | parser = argparse.ArgumentParser() 268 | parser.add_argument("--n", type=int, default=1200, help="Number of samples (>100).") 269 | parser.add_argument("--size", type=int, default=32, help="Patch size (pixels).") 270 | parser.add_argument("--epochs", type=int, default=10, help="Training epochs.") 271 | parser.add_argument("--batch", type=int, default=64, help="Batch size.") 272 | parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate.") 273 | parser.add_argument("--seed", type=int, default=42, help="Random seed.") 274 | parser.add_argument("--out", type=str, default="outputs", help="Output directory.") 275 | args = parser.parse_args() 276 | 277 | if args.n <= 100: 278 | raise ValueError("Please set --n > 100 for sufficient data points.") 279 | 280 | set_seed(args.seed) 281 | ensure_dir(args.out) 282 | 283 | # Create synthetic dataset 284 | cfg = SynthConfig(patch_size=args.size, n_samples=args.n, seed=args.seed) 285 | X, y = make_dataset(cfg) # X: (N, 5, H, W), y: (N,) 286 | 287 | # Split train/val 288 | n_total = len(y) 289 | n_train = int(0.8 * n_total) 290 | n_val = n_total - n_train 291 | dataset = DeforestationDataset(X, y) 292 | train_ds, val_ds = random_split(dataset, [n_train, n_val], 293 | generator=torch.Generator().manual_seed(args.seed)) 294 | 295 | train_loader = DataLoader(train_ds, batch_size=args.batch, shuffle=True, num_workers=0) 296 | val_loader = DataLoader(val_ds, batch_size=args.batch, shuffle=False, num_workers=0) 297 | 298 | # Model 299 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 300 | model = SmallCNN(in_ch=5).to(device) 301 | criterion = nn.BCEWithLogitsLoss() 302 | optimizer = optim.Adam(model.parameters(), lr=args.lr) 303 | 304 | # Train 305 | best_f1, best_state = -1, None 306 | for ep in range(1, args.epochs + 1): 307 | tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device) 308 | acc, prec, rec, f1, cm, *_ = evaluate(model, val_loader, device) 309 | print(f"Epoch {ep:02d} | loss {tr_loss:.4f} | val acc {acc:.3f} | P {prec:.3f} R {rec:.3f} F1 {f1:.3f}") 310 | if f1 > best_f1: 311 | best_f1, best_state = f1, {k: v.cpu() for k, v in model.state_dict().items()} 312 | 313 | # Save best model 314 | if best_state is not None: 315 | model.load_state_dict(best_state) 316 | model_path = os.path.join(args.out, "deforestation_cnn.pt") 317 | torch.save(model.state_dict(), model_path) 318 | print(f"Saved model -> {model_path}") 319 | 320 | # Final eval & artifacts 321 | acc, prec, rec, f1, cm, y_true, y_pred, scores = evaluate(model, val_loader, device) 322 | print("Confusion matrix:\n", cm) 323 | 324 | # Save metrics 325 | with open(os.path.join(args.out, "metrics.txt"), "w") as f: 326 | f.write(f"ACC={acc:.4f}\nPREC={prec:.4f}\nREC={rec:.4f}\nF1={f1:.4f}\n") 327 | f.write(f"CM=\n{cm}\n") 328 | 329 | # Plot example predictions 330 | plot_examples(X[val_loader.dataset.indices], y_true, y_pred, scores, 331 | outdir=args.out, k=8) 332 | 333 | print("Done. Artifacts saved in:", args.out) 334 | 335 | 336 | if __name__ == "__main__": 337 | main() 338 | --------------------------------------------------------------------------------