├── files
    ├── lena.bmp
    ├── three.png
    ├── example1.png
    ├── example2.png
    └── lasso_problem.png
├── README.md
└── fused_lasso.py


/files/lena.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ptyshevs/fused_lasso/HEAD/files/lena.bmp


--------------------------------------------------------------------------------
/files/three.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ptyshevs/fused_lasso/HEAD/files/three.png


--------------------------------------------------------------------------------
/files/example1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ptyshevs/fused_lasso/HEAD/files/example1.png


--------------------------------------------------------------------------------
/files/example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ptyshevs/fused_lasso/HEAD/files/example2.png


--------------------------------------------------------------------------------
/files/lasso_problem.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ptyshevs/fused_lasso/HEAD/files/lasso_problem.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## 2D Fused LASSO for grayscale image restoration
 2 | 
 3 | ### Problem
 4 | 
 5 | ![img](files/lasso_problem.png)
 6 | 
 7 | Fused LASSO is a variation of MSE + L1 regularization. We penalize weight (which corresponds to pixel brightness value) for being different from nearby pixels. As widely known, L1 is not differentiable, but it is convex, thus subgradient can be calculated, which corresponds to sign(w). Apart from this, training process is not that much different from regular Gradient Descent. To be fancy, three optimization algorithms implemented: vanilla Gradient Descent, GD with Momentum, and Nesterov-corrected momentum (NAG).
 8 | 
 9 | ### Examples
10 | 
11 | Gaussian noise N(0, 20) is added to each of the examples, then model is trained and result is compared with both the original and noisy image. Metric used for evaluation is a modification R^2 coefficient, where Variance is replaced with estimation of Gaussian noise standard deviation. This can be interpreted as amount of variation of the original image "preserved" in denoised version.
12 | ![example1](files/example1.png)
13 | 
14 | ![example2](files/example2.png)
15 | 
16 | ### Interface
17 | 
18 | Along with a research notebook, `fused_lasso.py` file is provided, containing model class with sklearn-like API.
19 | 
20 | ## Acknowledgements
21 | 
22 | - Ryan Tibshirani's [course](https://www.stat.cmu.edu/~ryantibs/convexopt/) on convex optimization
23 | - Sebastian Ruder's  [blog post](http://ruder.io/optimizing-gradient-descent/) on Gradient Descent optimization algorithms
24 | - Yann LeCun's [MNIST dataset](http://yann.lecun.com/exdb/mnist/)
25 | 


--------------------------------------------------------------------------------
/fused_lasso.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class FusedLASSO:
 4 | 
 5 | 	def __init__(self, optimizer='vanilla', alpha=0.01, l=0.3, momentum=0.9, n_iter=1000, verbose=False):
 6 | 		optimizers = {'vanilla': self._vanilla_gd,
 7 | 					'momentum' : self._momentum_gd,
 8 | 					'nesterov': self._nesterov_gd}
 9 | 		if optimizer not in optimizers.keys():
10 | 			raise ValueError(f"{optimizer} is not recognized. Use on of the {list(optimizers.keys())}")
11 | 		else:
12 | 			self.optimizer = optimizers[optimizer]
13 | 		self.alpha = alpha
14 | 		self.l = l
15 | 		self.momentum = momentum
16 | 		self.n_iter = n_iter
17 | 		self.verbose = verbose
18 | 		self._weights = None
19 | 
20 | 	def fit(self, X):
21 | 		img_shape = X.shape
22 | 		X = X.ravel()
23 | 		self._weights = np.random.normal(size=(X.shape)) / 100
24 | 		self._velocity = np.zeros_like(self._weights)
25 | 		adjacency_matrix = self._build_adjacency_matrix(img_shape)
26 | 		for it in range(self.n_iter):
27 | 			self._weights -= self.optimizer(X, adjacency_matrix)
28 | 			if self.verbose and it > 0 and it % 100 == 0:
29 | 				print(f"{it}: MSE = {self._mse_loss(X)}")
30 | 
31 | 	def fit_transform(self, X):
32 | 		self.fit(X)
33 | 		return self._weights.reshape(X.shape)
34 | 
35 | 
36 | 	def _build_adjacency_matrix(self, shape):
37 | 		"""
38 | 		Adjacency matrix contains indices of neighbour pixels
39 | 		"""
40 | 		x, y = shape
41 | 		a = np.zeros((x * y, 4), int)
42 | 		for i in range(x):
43 | 			for j in range(y):
44 | 				ind = i * x + j
45 | 				a[ind, 0] = ind if (i - 1) < 0 else x * (i - 1) + j
46 | 				a[ind, 1] = ind if (i + 1) >= x else x * (i + 1) + j
47 | 				a[ind, 2] = ind if (j - 1) < 0 else x * i + (j - 1)
48 | 				a[ind, 3] = ind if (j + 1) >= y else x * i + (j + 1)
49 | 		return a
50 | 
51 | 
52 | 	def _vanilla_gd(self, X, adj):
53 | 		return self._loss_grad(X, self._weights, adj)
54 | 
55 | 	def _momentum_gd(self, X, adj):
56 | 		self._velocity = self.momentum * self._velocity
57 | 		self._velocity += self._loss_grad(X, self._weights, adj)
58 | 		return self._velocity
59 | 
60 | 	def _nesterov_gd(self, X, adj):
61 | 		self._velocity = self.momentum * self._velocity
62 | 		self._velocity += self._loss_grad(X, self._weights - self.momentum * self._velocity, adj)
63 | 		return self._velocity
64 | 
65 | 	def _mse_loss(self, X):
66 | 		return np.sum((X - self._weights) ** 2)
67 | 
68 | 	def _mse_grad(self, X, W):
69 | 		return W - X
70 | 
71 | 	def _l1_subgrad(self, W, indices):
72 | 		"""
73 | 		l1 is not differentiable, but it's convex, hence we can use subgradient
74 | 		"""
75 | 		sum = np.zeros_like(W)
76 | 		for col in range(indices.shape[1]):
77 | 			sum += np.sign(W - W[indices[:, col]])
78 | 		return sum
79 | 
80 | 	def _loss_grad(self, X, W, adj):
81 | 		return self.alpha * self._mse_grad(X, W) + self.l * self._l1_subgrad(W, adj)


--------------------------------------------------------------------------------