├── synthetic_deforestation_dataset.xlsx
├── README.md
├── generate_synthetic_deforestation_dataset.py
└── file


/synthetic_deforestation_dataset.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nelvinebi/Deforestation-Detection-Using-Satellite-Imagery-Deep-Learning/HEAD/synthetic_deforestation_dataset.xlsx


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Deforestation Detection Using Satellite Imagery (Deep Learning, Synthetic Data)
 2 | 
 3 | This project simulates deforestation detection using synthetic satellite-like data and applies deep learning methods to classify land cover changes. By generating synthetic RED and NIR reflectance bands, the project computes NDVI before and after disturbance to label samples as deforested or non-deforested.
 4 | 
 5 | Features
 6 | 
 7 | Generates synthetic tabular dataset with >100 samples.
 8 | 
 9 | Includes before/after reflectance bands (RED, NIR).
10 | 
11 | Computes NDVI before, NDVI after, and NDVI difference.
12 | 
13 | Labels data automatically as deforested (1) or not deforested (0).
14 | 
15 | Saves dataset in both Excel (.xlsx) and CSV (.csv) formats.
16 | 
17 | Ready for use in machine learning or deep learning workflows.
18 | 
19 | Installation
20 | 
21 | Clone this repository and install dependencies:
22 | 
23 | git clone https://github.com/yourusername/Deforestation-Detection-Using-Satellite-Imagery-Deep-Learning.git
24 | cd Deforestation-Detection-Using-Satellite-Imagery-Deep-Learning
25 | pip install -r requirements.txt
26 | 
27 | 
28 | Dependencies:
29 | 
30 | Python 3.8+
31 | 
32 | NumPy
33 | 
34 | Pandas
35 | 
36 | OpenPyXL (for Excel export)
37 | 
38 | Usage
39 | 
40 | Generate the dataset by running:
41 | 
42 | python generate_synthetic_deforestation_dataset.py --n 300 --seed 42 --out outputs
43 | 
44 | 
45 | Arguments:
46 | 
47 | --n: Number of samples (default: 300, must be >100).
48 | 
49 | --seed: Random seed for reproducibility (default: 42).
50 | 
51 | --out: Output folder (default: outputs).
52 | 
53 | Example Output
54 | 
55 | outputs/synthetic_deforestation_dataset.xlsx
56 | 
57 | outputs/synthetic_deforestation_dataset.csv
58 | 
59 | Dataset Columns
60 | 
61 | sample_id
62 | 
63 | RED_before, NIR_before
64 | 
65 | RED_after, NIR_after
66 | 
67 | NDVI_before, NDVI_after, NDVI_diff
68 | 
69 | label_deforested (1 = deforested, 0 = not deforested)
70 | 
71 | Applications
72 | 
73 | Benchmarking deforestation detection models.
74 | 
75 | Experimenting with vegetation indices (NDVI).
76 | 
77 | Testing deep learning pipelines without requiring real imagery.
78 | 
79 | Author
80 | 
81 | Agbozu Ebingiye Nelvin
82 | 
83 | License
84 | 
85 | This project is released under the MIT License. See LICENSE for details.
86 | 


--------------------------------------------------------------------------------
/generate_synthetic_deforestation_dataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | generate_synthetic_deforestation_dataset.py
 4 | -------------------------------------------
 5 | Create a tabular synthetic dataset for deforestation detection from
 6 | "before/after" RED & NIR reflectances. Exports Excel (.xlsx) and CSV.
 7 | 
 8 | Usage:
 9 |   python generate_synthetic_deforestation_dataset.py --n 300 --seed 42 --out outputs
10 | 
11 | Columns:
12 |   sample_id, RED_before, NIR_before, RED_after, NIR_after,
13 |   NDVI_before, NDVI_after, NDVI_diff, label_deforested (0/1)
14 | """
15 | 
16 | import os
17 | import argparse
18 | import numpy as np
19 | import pandas as pd
20 | 
21 | def generate_deforestation_dataset(n_samples: int = 300, seed: int = 42) -> pd.DataFrame:
22 |     rng = np.random.default_rng(seed)
23 |     eps = 1e-6
24 |     records = []
25 | 
26 |     for i in range(n_samples):
27 |         deforested = rng.random() < 0.5
28 | 
29 |         # vegetation baseline (0.3..0.9)
30 |         veg = rng.uniform(0.3, 0.9)
31 | 
32 |         # "Before" reflectance (higher NIR, lower RED with more vegetation)
33 |         RED_b = 0.25 + 0.25 * (1 - veg) + rng.normal(0, 0.02)
34 |         NIR_b = 0.55 + 0.40 * veg + rng.normal(0, 0.02)
35 | 
36 |         # "After": deforestation => RED increases, NIR decreases
37 |         if deforested:
38 |             RED_a = RED_b + 0.20 + rng.normal(0, 0.02)
39 |             NIR_a = NIR_b - 0.30 + rng.normal(0, 0.02)
40 |         else:
41 |             RED_a = RED_b + rng.normal(0, 0.02)
42 |             NIR_a = NIR_b + rng.normal(0, 0.02)
43 | 
44 |         NDVI_b = (NIR_b - RED_b) / (NIR_b + RED_b + eps)
45 |         NDVI_a = (NIR_a - RED_a) / (NIR_a + RED_a + eps)
46 |         NDVI_diff = NDVI_b - NDVI_a  # positive when vegetation decreases
47 | 
48 |         records.append({
49 |             "sample_id": i,
50 |             "RED_before": RED_b,
51 |             "NIR_before": NIR_b,
52 |             "RED_after": RED_a,
53 |             "NIR_after": NIR_a,
54 |             "NDVI_before": NDVI_b,
55 |             "NDVI_after": NDVI_a,
56 |             "NDVI_diff": NDVI_diff,
57 |             "label_deforested": int(deforested),
58 |         })
59 | 
60 |     return pd.DataFrame(records)
61 | 
62 | def main():
63 |     parser = argparse.ArgumentParser(description="Generate a synthetic deforestation dataset and save to Excel/CSV.")
64 |     parser.add_argument("--n", type=int, default=300, help="Number of samples (>100).")
65 |     parser.add_argument("--seed", type=int, default=42, help="Random seed.")
66 |     parser.add_argument("--out", type=str, default="outputs", help="Output directory.")
67 |     args = parser.parse_args()
68 | 
69 |     if args.n <= 100:
70 |         raise ValueError("Please set --n > 100 to satisfy minimum dataset size.")
71 | 
72 |     os.makedirs(args.out, exist_ok=True)
73 | 
74 |     df = generate_deforestation_dataset(n_samples=args.n, seed=args.seed)
75 | 
76 |     xlsx_path = os.path.join(args.out, "synthetic_deforestation_dataset.xlsx")
77 |     csv_path  = os.path.join(args.out, "synthetic_deforestation_dataset.csv")
78 | 
79 |     # Save both formats
80 |     df.to_excel(xlsx_path, index=False)
81 |     df.to_csv(csv_path, index=False)
82 | 
83 |     print(f"Saved Excel -> {xlsx_path}")
84 |     print(f"Saved CSV   -> {csv_path}")
85 |     print(f"Rows: {len(df)} | Columns: {len(df.columns)}")
86 | 
87 | if __name__ == "__main__":
88 |     main()
89 | 


--------------------------------------------------------------------------------
/file:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Deforestation-Detection-Using-Satellite-Imagery-Deep-Learning (Synthetic)
  4 | -------------------------------------------------------------------------
  5 | 
  6 | Generates synthetic "before" and "after" reflectance patches, simulates
  7 | deforestation as drops in NDVI (via RED↑, NIR↓ in affected areas), trains a
  8 | CNN to classify patches (deforested vs. not), and evaluates performance.
  9 | 
 10 | Requirements:
 11 |   - Python 3.8+
 12 |   - numpy, matplotlib, scikit-learn, torch, torchvision (for transforms only)
 13 | 
 14 | Usage:
 15 |   python deforestation_synthetic_cnn.py --n 1200 --size 32 --epochs 10 --batch 64 --out outputs
 16 | """
 17 | 
 18 | import os
 19 | import math
 20 | import argparse
 21 | import random
 22 | from dataclasses import dataclass
 23 | 
 24 | import numpy as np
 25 | import matplotlib.pyplot as plt
 26 | 
 27 | import torch
 28 | import torch.nn as nn
 29 | import torch.optim as optim
 30 | from torch.utils.data import Dataset, DataLoader, random_split
 31 | from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
 32 | 
 33 | 
 34 | # -----------------------------
 35 | # Utilities & Reproducibility
 36 | # -----------------------------
 37 | def set_seed(seed: int = 42):
 38 |     random.seed(seed)
 39 |     np.random.seed(seed)
 40 |     torch.manual_seed(seed)
 41 |     torch.cuda.manual_seed_all(seed)
 42 | 
 43 | 
 44 | def ensure_dir(p: str):
 45 |     os.makedirs(p, exist_ok=True)
 46 | 
 47 | 
 48 | # -----------------------------
 49 | # Synthetic Data Generation
 50 | # -----------------------------
 51 | @dataclass
 52 | class SynthConfig:
 53 |     patch_size: int = 32
 54 |     n_samples: int = 1200    # > 100 by default
 55 |     pos_ratio: float = 0.5   # ~50% deforested
 56 |     noise_sigma: float = 0.04
 57 |     seed: int = 42
 58 | 
 59 | 
 60 | def _smooth_noise(img: np.ndarray, k: int = 5) -> np.ndarray:
 61 |     """Very light smoothing using a box filter without scipy."""
 62 |     assert k % 2 == 1, "kernel size should be odd"
 63 |     pad = k // 2
 64 |     padded = np.pad(img, ((pad, pad), (pad, pad)), mode="reflect")
 65 |     out = np.zeros_like(img)
 66 |     for i in range(img.shape[0]):
 67 |         for j in range(img.shape[1]):
 68 |             out[i, j] = padded[i:i + k, j:j + k].mean()
 69 |     return out
 70 | 
 71 | 
 72 | def _random_shape_mask(h: int, w: int) -> np.ndarray:
 73 |     """Create a random rectangular or circular mask for deforestation."""
 74 |     mask = np.zeros((h, w), dtype=np.float32)
 75 |     if random.random() < 0.5:
 76 |         # rectangle
 77 |         rh, rw = random.randint(h//5, h//2), random.randint(w//5, w//2)
 78 |         r0 = random.randint(0, h - rh)
 79 |         c0 = random.randint(0, w - rw)
 80 |         mask[r0:r0 + rh, c0:c0 + rw] = 1.0
 81 |     else:
 82 |         # circle
 83 |         r = random.randint(min(h, w)//6, min(h, w)//3)
 84 |         cy, cx = random.randint(r, h - r - 1), random.randint(r, w - r - 1)
 85 |         yy, xx = np.ogrid[:h, :w]
 86 |         mask[(yy - cy)**2 + (xx - cx)**2 <= r**2] = 1.0
 87 |     # smooth edges a bit
 88 |     mask = _smooth_noise(mask, k=3)
 89 |     mask = np.clip(mask, 0.0, 1.0)
 90 |     return mask
 91 | 
 92 | 
 93 | def make_one_sample(ps: int, noise_sigma: float, deforested: bool) -> tuple[np.ndarray, int]:
 94 |     """
 95 |     Returns:
 96 |       X: (C=5, H, W) channels = [RED_b, NIR_b, RED_a, NIR_a, NDVI_diff]
 97 |       y: 0/1
 98 |     """
 99 |     # Base vegetation field (0..1) via smoothed uniform noise
100 |     base = np.random.rand(ps, ps).astype(np.float32)
101 |     base = _smooth_noise(base, k=7)
102 |     base = (base - base.min()) / (base.max() - base.min() + 1e-6)
103 | 
104 |     # Simulate "before" reflectances influenced by vegetation
105 |     # vegetation -> high NIR, lower RED
106 |     RED_b = np.clip(0.25 + 0.25*(1.0 - base) + noise_sigma*np.random.randn(ps, ps), 0, 1)
107 |     NIR_b = np.clip(0.55 + 0.40*base + noise_sigma*np.random.randn(ps, ps), 0, 1)
108 | 
109 |     # After: start from before
110 |     RED_a = RED_b.copy()
111 |     NIR_a = NIR_b.copy()
112 | 
113 |     if deforested:
114 |         mask = _random_shape_mask(ps, ps)
115 |         # Deforestation effect: RED goes up, NIR goes down inside mask
116 |         RED_a = np.clip(RED_a + 0.25*mask + noise_sigma*np.random.randn(ps, ps), 0, 1)
117 |         NIR_a = np.clip(NIR_a - 0.35*mask + noise_sigma*np.random.randn(ps, ps), 0, 1)
118 |     else:
119 |         # Small natural variability
120 |         RED_a = np.clip(RED_a + noise_sigma*np.random.randn(ps, ps), 0, 1)
121 |         NIR_a = np.clip(NIR_a + noise_sigma*np.random.randn(ps, ps), 0, 1)
122 | 
123 |     # NDVI before/after and diff
124 |     eps = 1e-6
125 |     NDVI_b = (NIR_b - RED_b) / (NIR_b + RED_b + eps)
126 |     NDVI_a = (NIR_a - RED_a) / (NIR_a + RED_a + eps)
127 |     NDVI_diff = NDVI_b - NDVI_a  # positive when vegetation decreases
128 | 
129 |     # Stack channels (5, H, W)
130 |     X = np.stack([RED_b, NIR_b, RED_a, NIR_a, NDVI_diff], axis=0).astype(np.float32)
131 |     y = int(deforested)
132 |     return X, y
133 | 
134 | 
135 | def make_dataset(cfg: SynthConfig) -> tuple[np.ndarray, np.ndarray]:
136 |     n_pos = int(cfg.n_samples * cfg.pos_ratio)
137 |     n_neg = cfg.n_samples - n_pos
138 |     data = []
139 |     labels = []
140 | 
141 |     for _ in range(n_pos):
142 |         X, y = make_one_sample(cfg.patch_size, cfg.noise_sigma, True)
143 |         data.append(X); labels.append(y)
144 |     for _ in range(n_neg):
145 |         X, y = make_one_sample(cfg.patch_size, cfg.noise_sigma, False)
146 |         data.append(X); labels.append(y)
147 | 
148 |     data = np.stack(data, axis=0)
149 |     labels = np.array(labels, dtype=np.int64)
150 |     # shuffle
151 |     idx = np.random.permutation(len(labels))
152 |     return data[idx], labels[idx]
153 | 
154 | 
155 | class DeforestationDataset(Dataset):
156 |     def __init__(self, X: np.ndarray, y: np.ndarray):
157 |         self.X = torch.from_numpy(X)
158 |         self.y = torch.from_numpy(y)
159 | 
160 |     def __len__(self):
161 |         return len(self.y)
162 | 
163 |     def __getitem__(self, i):
164 |         return self.X[i], self.y[i]
165 | 
166 | 
167 | # -----------------------------
168 | # Model
169 | # -----------------------------
170 | class SmallCNN(nn.Module):
171 |     def __init__(self, in_ch: int = 5, n_classes: int = 1):
172 |         super().__init__()
173 |         self.net = nn.Sequential(
174 |             nn.Conv2d(in_ch, 16, 3, padding=1),
175 |             nn.BatchNorm2d(16),
176 |             nn.ReLU(inplace=True),
177 |             nn.MaxPool2d(2),  # 16x16
178 | 
179 |             nn.Conv2d(16, 32, 3, padding=1),
180 |             nn.BatchNorm2d(32),
181 |             nn.ReLU(inplace=True),
182 |             nn.MaxPool2d(2),  # 8x8
183 | 
184 |             nn.Conv2d(32, 64, 3, padding=1),
185 |             nn.BatchNorm2d(64),
186 |             nn.ReLU(inplace=True),
187 |             nn.AdaptiveAvgPool2d(1),  # 64x1x1
188 |         )
189 |         self.head = nn.Linear(64, n_classes)  # binary -> 1 logit
190 | 
191 |     def forward(self, x):
192 |         x = self.net(x)
193 |         x = x.flatten(1)
194 |         return self.head(x).squeeze(1)
195 | 
196 | 
197 | # -----------------------------
198 | # Training / Evaluation
199 | # -----------------------------
200 | def train_one_epoch(model, loader, criterion, optimizer, device):
201 |     model.train()
202 |     total, total_loss = 0, 0.0
203 |     for X, y in loader:
204 |         X, y = X.to(device), y.to(device).float()
205 |         optimizer.zero_grad()
206 |         logits = model(X)
207 |         loss = criterion(logits, y)
208 |         loss.backward()
209 |         optimizer.step()
210 |         total += y.size(0)
211 |         total_loss += loss.item() * y.size(0)
212 |     return total_loss / total
213 | 
214 | 
215 | @torch.no_grad()
216 | def evaluate(model, loader, device):
217 |     model.eval()
218 |     all_y, all_p, all_s = [], [], []
219 |     for X, y in loader:
220 |         X = X.to(device)
221 |         logits = model(X)
222 |         probs = torch.sigmoid(logits).cpu().numpy()
223 |         preds = (probs >= 0.5).astype(int)
224 |         all_p.append(preds)
225 |         all_s.append(probs)
226 |         all_y.append(y.numpy())
227 |     y_true = np.concatenate(all_y)
228 |     y_pred = np.concatenate(all_p)
229 |     scores = np.concatenate(all_s)
230 |     acc = accuracy_score(y_true, y_pred)
231 |     prec, rec, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary", zero_division=0)
232 |     cm = confusion_matrix(y_true, y_pred)
233 |     return acc, prec, rec, f1, cm, y_true, y_pred, scores
234 | 
235 | 
236 | def plot_examples(X, y_true, y_pred, scores, outdir, k=6):
237 |     ensure_dir(outdir)
238 |     k = min(k, len(y_true))
239 |     idx = np.random.choice(len(y_true), k, replace=False)
240 |     fig, axs = plt.subplots(k, 3, figsize=(9, 3*k))
241 | 
242 |     for row, i in enumerate(idx):
243 |         # visualize NDVI before and after + diff
244 |         RED_b, NIR_b, RED_a, NIR_a, NDVI_diff = X[i]
245 |         eps = 1e-6
246 |         NDVI_b = (NIR_b - RED_b) / (NIR_b + RED_b + eps)
247 |         NDVI_a = (NIR_a - RED_a) / (NIR_a + RED_a + eps)
248 | 
249 |         axs[row, 0].imshow(NDVI_b, vmin=-1, vmax=1)
250 |         axs[row, 0].set_title("NDVI Before")
251 |         axs[row, 1].imshow(NDVI_a, vmin=-1, vmax=1)
252 |         axs[row, 1].set_title("NDVI After")
253 |         axs[row, 2].imshow(NDVI_diff, vmin=-1, vmax=1)
254 |         axs[row, 2].set_title(f"NDVI Diff\nTrue:{y_true[i]} Pred:{y_pred[i]} p={scores[i]:.2f}")
255 |         for c in range(3):
256 |             axs[row, c].axis('off')
257 | 
258 |     plt.tight_layout()
259 |     fig.savefig(os.path.join(outdir, "examples.png"), dpi=180)
260 |     plt.close(fig)
261 | 
262 | 
263 | # -----------------------------
264 | # Main
265 | # -----------------------------
266 | def main():
267 |     parser = argparse.ArgumentParser()
268 |     parser.add_argument("--n", type=int, default=1200, help="Number of samples (>100).")
269 |     parser.add_argument("--size", type=int, default=32, help="Patch size (pixels).")
270 |     parser.add_argument("--epochs", type=int, default=10, help="Training epochs.")
271 |     parser.add_argument("--batch", type=int, default=64, help="Batch size.")
272 |     parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate.")
273 |     parser.add_argument("--seed", type=int, default=42, help="Random seed.")
274 |     parser.add_argument("--out", type=str, default="outputs", help="Output directory.")
275 |     args = parser.parse_args()
276 | 
277 |     if args.n <= 100:
278 |         raise ValueError("Please set --n > 100 for sufficient data points.")
279 | 
280 |     set_seed(args.seed)
281 |     ensure_dir(args.out)
282 | 
283 |     # Create synthetic dataset
284 |     cfg = SynthConfig(patch_size=args.size, n_samples=args.n, seed=args.seed)
285 |     X, y = make_dataset(cfg)  # X: (N, 5, H, W), y: (N,)
286 | 
287 |     # Split train/val
288 |     n_total = len(y)
289 |     n_train = int(0.8 * n_total)
290 |     n_val = n_total - n_train
291 |     dataset = DeforestationDataset(X, y)
292 |     train_ds, val_ds = random_split(dataset, [n_train, n_val],
293 |                                     generator=torch.Generator().manual_seed(args.seed))
294 | 
295 |     train_loader = DataLoader(train_ds, batch_size=args.batch, shuffle=True, num_workers=0)
296 |     val_loader = DataLoader(val_ds, batch_size=args.batch, shuffle=False, num_workers=0)
297 | 
298 |     # Model
299 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
300 |     model = SmallCNN(in_ch=5).to(device)
301 |     criterion = nn.BCEWithLogitsLoss()
302 |     optimizer = optim.Adam(model.parameters(), lr=args.lr)
303 | 
304 |     # Train
305 |     best_f1, best_state = -1, None
306 |     for ep in range(1, args.epochs + 1):
307 |         tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
308 |         acc, prec, rec, f1, cm, *_ = evaluate(model, val_loader, device)
309 |         print(f"Epoch {ep:02d} | loss {tr_loss:.4f} | val acc {acc:.3f} | P {prec:.3f} R {rec:.3f} F1 {f1:.3f}")
310 |         if f1 > best_f1:
311 |             best_f1, best_state = f1, {k: v.cpu() for k, v in model.state_dict().items()}
312 | 
313 |     # Save best model
314 |     if best_state is not None:
315 |         model.load_state_dict(best_state)
316 |     model_path = os.path.join(args.out, "deforestation_cnn.pt")
317 |     torch.save(model.state_dict(), model_path)
318 |     print(f"Saved model -> {model_path}")
319 | 
320 |     # Final eval & artifacts
321 |     acc, prec, rec, f1, cm, y_true, y_pred, scores = evaluate(model, val_loader, device)
322 |     print("Confusion matrix:\n", cm)
323 | 
324 |     # Save metrics
325 |     with open(os.path.join(args.out, "metrics.txt"), "w") as f:
326 |         f.write(f"ACC={acc:.4f}\nPREC={prec:.4f}\nREC={rec:.4f}\nF1={f1:.4f}\n")
327 |         f.write(f"CM=\n{cm}\n")
328 | 
329 |     # Plot example predictions
330 |     plot_examples(X[val_loader.dataset.indices], y_true, y_pred, scores,
331 |                   outdir=args.out, k=8)
332 | 
333 |     print("Done. Artifacts saved in:", args.out)
334 | 
335 | 
336 | if __name__ == "__main__":
337 |     main()
338 | 


--------------------------------------------------------------------------------