├── comp_graph.png ├── grad ├── __init__.py ├── variable.py └── gate.py ├── README.md └── nn.ipynb /comp_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlisaLC/GraDino/HEAD/comp_graph.png -------------------------------------------------------------------------------- /grad/__init__.py: -------------------------------------------------------------------------------- 1 | from . import variable as vp 2 | from .variable import Variable as vn 3 | 4 | class no_grad: 5 | def __init__(self): 6 | pass 7 | 8 | def __enter__(self): 9 | vp.is_grad_enabled = False 10 | 11 | def __exit__(self, exc_type, exc_value, traceback): 12 | vp.is_grad_enabled = True -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GraDino 2 | an autograd package in python. GraDino variables can be used in lists, tuples or even `numpy` arrays! 3 | gradients are calculated using backpropagation. examples of linear regression and XOR neural network is in the `nn.ipynb` notebook. 4 | ## Basic Usage 5 | a `Variable` can be defined using a `float` or `int` datatype. every operation on a `Variable` results in another `Variable`. finally when `backward` method is called on a `Variable`. every `Variable`'s gradient is calculated using backpropagation and stored in `grad` property. 6 | ```python 7 | import grad 8 | from grad.variable import Variable as vn 9 | 10 | x = vn(1.0, requires_grad=True) 11 | y = vn(2.0, requires_grad=True) 12 | z = (x - y) ** 2 13 | z.backward() 14 | print(x.grad, y.grad) # -2.0 2.0 15 | ```` 16 | 17 | ## Numpy Arrays 18 | an autograding array can be defined using `array` function in the module. it can work with lists and numpy arryas. to extract the gradients we can use `array_grad` function to extract the gradients. 19 | ```python 20 | import grad 21 | from grad import variable as vp 22 | 23 | x = vp.array(np.random.randn(3, 10)) 24 | y = vp.array(np.random.randn(10, 3)) 25 | z = np.mean((x - y.T) ** 2) 26 | z.backward() 27 | print(vp.array_grad(x)) 28 | print(vp.array_grad(y)) 29 | ``` 30 | ## Gradient Calculation Techniques 31 | * if we want to define a variable where there is no need for backpropagation we can set `requires_grad` attribute to `False`. 32 | * to zero the calculated gradients after applying optimization in each iteration, `zero_grad` method should be called on each `Variable`. to apply this in an array we can use `array_zero_grad` function. 33 | * to disable computation graph during optimization, `with vp.no_grad():` can be used. 34 | * `zero_grad` can be called on the final product to reset the gradients recursively. 35 | ```python 36 | import grad 37 | from grad import variable as vp 38 | 39 | x = vn(1.0, requires_grad=True) 40 | y = vn(2.0, requires_grad=True) 41 | z = (x - y) ** 2 42 | z.backward() 43 | print(x.grad, y.grad) # -2.0 2.0 44 | z.zero_grad() 45 | print(x.grad, y.grad) # 0.0 0.0 46 | ``` 47 | ## Computational Graph Drawing 48 | to draw the computational graph of a `Variable` we can use `draw_graph` function. it uses `graphviz` library to draw the graph. the graph is returned as a `graphviz.Digraph` object. 49 | ```python 50 | import grad 51 | from grad import variable as vp 52 | 53 | x = vn(1.0, requires_grad=True) 54 | y = vn(2.0, requires_grad=True) 55 | z = (x - y) ** 2 56 | z.backward() 57 | g = z.draw_graph() 58 | g.view() 59 | ``` 60 |

61 | 62 |

63 | 64 | ## Higher Order Derivatives 65 | to calculate higher order derivatives we can use `backward` method multiple times. the gradients are accumulated in `grad` property. `make_graph` argument on `backward` must be set to `True` to make the computational graph. after each `backward` we `zero_grad` to reset the gradients. 66 | ```python 67 | import grad 68 | from grad import variable as vp 69 | 70 | x = vn(17.0, requires_grad=True) 71 | y = vn(13.0, requires_grad=True) 72 | z = (x - y) ** 2 73 | z.backward(make_graph=True) 74 | print(x.grad, y.grad) # 8.0 -8.0 75 | x_grad = x.grad # 2 * (x - y) 76 | z.zero_grad() 77 | x_grad.backward() 78 | print(x.grad, y.grad) # 2.0 -2.0 79 | ``` 80 | when `make_graph` is set to `True` the computational graph is made and stored in `graph` property of `Variable`. we can use `draw_graph` method to draw the graph. -------------------------------------------------------------------------------- /grad/variable.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from . import gate as g 3 | import graphviz 4 | 5 | is_grad_enabled = True 6 | 7 | 8 | class Variable: 9 | def __init__(self, data, gate=None, requires_grad=True): 10 | if isinstance(data, Variable): 11 | self = data 12 | return 13 | elif isinstance(data, (int, np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16, np.int32, np.int64)): 14 | data = int(data) 15 | elif isinstance(data, (float, np.float16, np.float32, np.float64)): 16 | data = float(data) 17 | else: 18 | raise TypeError('expected int or float, got ' + 19 | str(type(data)) + " instead") 20 | self.data = data 21 | self.grad = 0 22 | if gate is None or not is_grad_enabled: 23 | self.gate = g.Identity() 24 | else: 25 | self.gate = gate 26 | self.requires_grad = requires_grad 27 | self.is_graph = False 28 | 29 | def __repr__(self): 30 | return f"{self.data}" 31 | 32 | def __int__(self): 33 | return int(self.data) 34 | 35 | def __float__(self): 36 | return float(self.data) 37 | 38 | def __str__(self): 39 | return str(self.data) 40 | 41 | def __format__(self, *args, **kwargs): 42 | return self.data.__format__(*args, **kwargs) 43 | 44 | def backward(self, grad=None, make_graph=False): 45 | if not self.requires_grad or not is_grad_enabled: 46 | return 47 | if grad is None: 48 | if make_graph: 49 | grad = Variable(1, requires_grad=False) 50 | else: 51 | grad = 1 52 | self.grad += grad 53 | self.gate.backward(grad=grad, make_graph=make_graph) 54 | 55 | def draw_graph(self, graph=None): 56 | if graph is None: 57 | graph = graphviz.Digraph() 58 | if not self.is_graph: 59 | if self.gate is not None and self.gate.name == 'identity' and self.requires_grad: 60 | graph.node(str(id(self)), f'{self.data:.4g}', style='filled', fillcolor='lightblue') 61 | else: 62 | graph.node(str(id(self)), f'{self.data:.4g}') 63 | self.is_graph = True 64 | if self.gate is not None and self.gate.name != 'identity': 65 | graph.node(str(id(self.gate)), self.gate.name, style='filled', fillcolor='lightgreen') 66 | label = None 67 | if self.requires_grad: 68 | label = f'{self.grad:.4g}' 69 | graph.edge(str(id(self.gate)), str(id(self)), label=label) 70 | self.gate.draw_graph(graph) 71 | return graph 72 | 73 | def clear_graph(self): 74 | self.is_graph = False 75 | self.gate.clear_graph() 76 | 77 | 78 | def zero_grad(self): 79 | if not self.requires_grad or not is_grad_enabled: 80 | return 81 | self.grad = 0 82 | self.gate.zero_grad() 83 | 84 | def __eq__(self, other): 85 | if not isinstance(other, Variable): 86 | other = Variable(other, requires_grad=False) 87 | return self.data == other.data 88 | 89 | def __lt__(self, other): 90 | if not isinstance(other, Variable): 91 | other = Variable(other, requires_grad=False) 92 | return self.data < other.data 93 | 94 | def __gt__(self, other): 95 | if not isinstance(other, Variable): 96 | other = Variable(other, requires_grad=False) 97 | return self.data > other.data 98 | 99 | def __le__(self, other): 100 | if not isinstance(other, Variable): 101 | other = Variable(other, requires_grad=False) 102 | return self.data <= other.data 103 | 104 | def __ge__(self, other): 105 | if not isinstance(other, Variable): 106 | other = Variable(other, requires_grad=False) 107 | return self.data >= other.data 108 | 109 | def __ne__(self, other): 110 | if not isinstance(other, Variable): 111 | other = Variable(other, requires_grad=False) 112 | return self.data != other.data 113 | 114 | def __pos__(self): 115 | return self 116 | 117 | def __neg__(self): 118 | gate = g.Neg() 119 | return Variable(gate(self), gate=gate) 120 | 121 | def __abs__(self): 122 | gate = g.Abs() 123 | return Variable(gate(self), gate=gate) 124 | 125 | def __add__(self, other): 126 | if not isinstance(other, Variable): 127 | other = Variable(other, requires_grad=False) 128 | gate = g.Add() 129 | return Variable(gate(self, other), gate=gate) 130 | 131 | def __radd__(self, other): 132 | return self.__add__(other) 133 | 134 | def __sub__(self, other): 135 | if not isinstance(other, Variable): 136 | other = Variable(other, requires_grad=False) 137 | gate = g.Neg() 138 | other = Variable(gate(other), gate=gate) 139 | gate = g.Add() 140 | return Variable(gate(self, other), gate=gate) 141 | 142 | def __rsub__(self, other): 143 | if not isinstance(other, Variable): 144 | other = Variable(other, requires_grad=False) 145 | return other - self 146 | 147 | def __mul__(self, other): 148 | if not isinstance(other, Variable): 149 | other = Variable(other, requires_grad=False) 150 | gate = g.Mul() 151 | return Variable(gate(self, other), gate=gate) 152 | 153 | def __rmul__(self, other): 154 | return self.__mul__(other) 155 | 156 | def __truediv__(self, other): 157 | if not isinstance(other, Variable): 158 | other = Variable(other, requires_grad=False) 159 | gate = g.Div() 160 | return Variable(gate(self, other), gate=gate) 161 | 162 | def __rtruediv__(self, other): 163 | if not isinstance(other, Variable): 164 | other = Variable(other, requires_grad=False) 165 | return other / self 166 | 167 | def __pow__(self, other): 168 | if not isinstance(other, Variable): 169 | other = Variable(other, requires_grad=False) 170 | gate = g.Pow() 171 | return Variable(gate(self, other), gate=gate) 172 | 173 | def __rpow__(self, other): 174 | if not isinstance(other, Variable): 175 | other = Variable(other, requires_grad=False) 176 | return other ** self 177 | 178 | def sqrt(self): 179 | return self ** 0.5 180 | 181 | def sin(self): 182 | gate = g.Sin() 183 | return Variable(gate(self), gate=gate) 184 | 185 | def arcsin(self): 186 | gate = g.Asin() 187 | return Variable(gate(self), gate=gate) 188 | 189 | def sinh(self): 190 | gate = g.Sinh() 191 | return Variable(gate(self), gate=gate) 192 | 193 | def arcsinh(self): 194 | gate = g.Asinh() 195 | return Variable(gate(self), gate=gate) 196 | 197 | def cos(self): 198 | gate = g.Cos() 199 | return Variable(gate(self), gate=gate) 200 | 201 | def arccos(self): 202 | gate = g.Acos() 203 | return Variable(gate(self), gate=gate) 204 | 205 | def cosh(self): 206 | gate = g.Cosh() 207 | return Variable(gate(self), gate=gate) 208 | 209 | def arccosh(self): 210 | gate = g.Acosh() 211 | return Variable(gate(self), gate=gate) 212 | 213 | def tan(self): 214 | gate = g.Tan() 215 | return Variable(gate(self), gate=gate) 216 | 217 | def arctan(self): 218 | gate = g.Atan() 219 | return Variable(gate(self), gate=gate) 220 | 221 | def tanh(self): 222 | gate = g.Tanh() 223 | return Variable(gate(self), gate=gate) 224 | 225 | def arctanh(self): 226 | gate = g.Atanh() 227 | return Variable(gate(self), gate=gate) 228 | 229 | def exp(self): 230 | gate = g.Exp() 231 | return Variable(gate(self), gate=gate) 232 | 233 | def log(self): 234 | gate = g.Log() 235 | return Variable(gate(self), gate=gate) 236 | 237 | def conjugate(self): 238 | return self 239 | 240 | 241 | def array(data, requires_grad=True): 242 | if isinstance(data, Variable): 243 | return data 244 | if isinstance(data, (list, tuple)): 245 | return [array(x, requires_grad=requires_grad) for x in data] 246 | if isinstance(data, np.ndarray): 247 | return np.array([array(x, requires_grad=requires_grad) for x in data]) 248 | return Variable(data, requires_grad=requires_grad) 249 | 250 | 251 | def array_grad(data): 252 | if isinstance(data, Variable): 253 | return data.grad 254 | if isinstance(data, (list, tuple)): 255 | return [array_grad(x) for x in data] 256 | if isinstance(data, np.ndarray): 257 | return np.array([array_grad(x) for x in data]) 258 | return data 259 | 260 | 261 | def array_zero_grad(data): 262 | if isinstance(data, Variable): 263 | data.zero_grad() 264 | elif isinstance(data, (list, tuple)): 265 | for x in data: 266 | array_zero_grad(x) 267 | elif isinstance(data, np.ndarray): 268 | for x in data: 269 | array_zero_grad(x) 270 | -------------------------------------------------------------------------------- /grad/gate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import graphviz 3 | 4 | 5 | class Gate: 6 | def __init__(self, name): 7 | self.name = name 8 | self.vars = [] 9 | 10 | def __repr__(self): 11 | return f"Gate({self.name})" 12 | 13 | def forward(self): 14 | raise NotImplementedError 15 | 16 | def backward(self, grad, make_graph=False): 17 | raise NotImplementedError 18 | 19 | def __call__(self, *args, **kwds): 20 | return self.forward(*args, **kwds) 21 | 22 | def draw_graph(self, graph): 23 | for var in self.vars: 24 | if not var.is_graph: 25 | if var.gate is not None and var.gate.name == 'identity' and var.requires_grad: 26 | graph.node( 27 | str(id(var)), f'{var.data:.4g}', style='filled', fillcolor='lightblue') 28 | else: 29 | graph.node(str(id(var)), f'{var.data:.4g}') 30 | var.is_graph = True 31 | label = None 32 | if var.requires_grad: 33 | label = f'{var.grad:.4g}' 34 | graph.edge(str(id(var)), str(id(self)), label=label) 35 | var.draw_graph(graph) 36 | 37 | def clear_graph(self): 38 | for var in self.vars: 39 | var.clear_graph() 40 | 41 | def zero_grad(self): 42 | for var in self.vars: 43 | var.zero_grad() 44 | 45 | 46 | class Identity(Gate): 47 | def __init__(self): 48 | super().__init__("identity") 49 | 50 | def forward(self, x): 51 | return x 52 | 53 | def backward(self, grad, make_graph=False): 54 | return grad 55 | 56 | 57 | class Add(Gate): 58 | def __init__(self): 59 | super().__init__("add") 60 | 61 | def forward(self, x, y): 62 | self.vars = [x, y] 63 | return x.data + y.data 64 | 65 | def backward(self, grad, make_graph=False): 66 | self.vars[0].backward(grad, make_graph=make_graph) 67 | self.vars[1].backward(grad, make_graph=make_graph) 68 | 69 | 70 | class Mul(Gate): 71 | def __init__(self): 72 | super().__init__("mul") 73 | 74 | def forward(self, x, y): 75 | self.vars = [x, y] 76 | return x.data * y.data 77 | 78 | def backward(self, grad, make_graph=False): 79 | var0, var1 = self.vars[0], self.vars[1] 80 | if not make_graph: 81 | var0, var1 = var0.data, var1.data 82 | self.vars[0].backward(grad * var1, make_graph=make_graph) 83 | self.vars[1].backward(grad * var0, make_graph=make_graph) 84 | 85 | 86 | class Neg(Gate): 87 | def __init__(self): 88 | super().__init__("neg") 89 | 90 | def forward(self, x): 91 | self.vars = [x] 92 | return -x.data 93 | 94 | def backward(self, grad, make_graph=False): 95 | self.vars[0].backward(-grad, make_graph=make_graph) 96 | 97 | 98 | class Abs(Gate): 99 | def __init__(self): 100 | super().__init__("abs") 101 | 102 | def forward(self, x): 103 | self.vars = [x] 104 | return abs(x.data) 105 | 106 | def backward(self, grad, make_graph=False): 107 | self.vars[0].backward( 108 | grad * (-1 if self.vars[0].data < 0 else 1), make_graph=make_graph) 109 | 110 | 111 | class Div(Gate): 112 | def __init__(self): 113 | super().__init__("div") 114 | 115 | def forward(self, x, y): 116 | self.vars = [x, y] 117 | return x.data / y.data 118 | 119 | def backward(self, grad, make_graph=False): 120 | var0, var1 = self.vars[0], self.vars[1] 121 | if not make_graph: 122 | var0, var1 = var0.data, var1.data 123 | if self.vars[0].requires_grad: 124 | self.vars[0].backward(grad / var1, make_graph=make_graph) 125 | if self.vars[1].requires_grad: 126 | self.vars[1].backward(-grad * var0 / var1 ** 127 | 2, make_graph=make_graph) 128 | 129 | 130 | class Pow(Gate): 131 | def __init__(self): 132 | super().__init__("pow") 133 | 134 | def forward(self, x, y): 135 | self.vars = [x, y] 136 | return x.data ** y.data 137 | 138 | def backward(self, grad, make_graph=False): 139 | var0, var1 = self.vars[0], self.vars[1] 140 | if not make_graph: 141 | var0, var1 = var0.data, var1.data 142 | if self.vars[0].requires_grad: 143 | self.vars[0].backward(grad * var1 * var0 ** 144 | (var1 - 1), make_graph=make_graph) 145 | if self.vars[1].requires_grad: 146 | self.vars[1].backward(grad * var0 ** var1 * 147 | np.log(var0), make_graph=make_graph) 148 | 149 | 150 | class Sin(Gate): 151 | def __init__(self): 152 | super().__init__("sin") 153 | 154 | def forward(self, x): 155 | self.vars = [x] 156 | return np.sin(x.data) 157 | 158 | def backward(self, grad, make_graph=False): 159 | var0 = self.vars[0] 160 | if not make_graph: 161 | var0 = var0.data 162 | self.vars[0].backward(grad * np.cos(var0), make_graph=make_graph) 163 | 164 | 165 | class Asin(Gate): 166 | def __init__(self): 167 | super().__init__("asin") 168 | 169 | def forward(self, x): 170 | self.vars = [x] 171 | return np.arcsin(x.data) 172 | 173 | def backward(self, grad, make_graph=False): 174 | var0 = self.vars[0] 175 | if not make_graph: 176 | var0 = var0.data 177 | self.vars[0].backward( 178 | grad / np.sqrt(1 - var0 ** 2), make_graph=make_graph) 179 | 180 | 181 | class Sinh(Gate): 182 | def __init__(self): 183 | super().__init__("sinh") 184 | 185 | def forward(self, x): 186 | self.vars = [x] 187 | return np.sinh(x.data) 188 | 189 | def backward(self, grad, make_graph=False): 190 | var0 = self.vars[0] 191 | if not make_graph: 192 | var0 = var0.data 193 | self.vars[0].backward(grad * np.cosh(var0), make_graph=make_graph) 194 | 195 | 196 | class Asinh(Gate): 197 | def __init__(self): 198 | super().__init__("asinh") 199 | 200 | def forward(self, x): 201 | self.vars = [x] 202 | return np.arcsinh(x.data) 203 | 204 | def backward(self, grad, make_graph=False): 205 | var0 = self.vars[0] 206 | if not make_graph: 207 | var0 = var0.data 208 | self.vars[0].backward( 209 | grad / np.sqrt(1 + var0 ** 2), make_graph=make_graph) 210 | 211 | 212 | class Cos(Gate): 213 | def __init__(self): 214 | super().__init__("cos") 215 | 216 | def forward(self, x): 217 | self.vars = [x] 218 | return np.cos(x.data) 219 | 220 | def backward(self, grad, make_graph=False): 221 | var0 = self.vars[0] 222 | if not make_graph: 223 | var0 = var0.data 224 | self.vars[0].backward(-grad * np.sin(var0), make_graph=make_graph) 225 | 226 | 227 | class Acos(Gate): 228 | def __init__(self): 229 | super().__init__("acos") 230 | 231 | def forward(self, x): 232 | self.vars = [x] 233 | return np.arccos(x.data) 234 | 235 | def backward(self, grad, make_graph=False): 236 | var0 = self.vars[0] 237 | if not make_graph: 238 | var0 = var0.data 239 | self.vars[0].backward(-grad / np.sqrt(1 - var0 ** 240 | 2), make_graph=make_graph) 241 | 242 | 243 | class Cosh(Gate): 244 | def __init__(self): 245 | super().__init__("cosh") 246 | 247 | def forward(self, x): 248 | self.vars = [x] 249 | return np.cosh(x.data) 250 | 251 | def backward(self, grad, make_graph=False): 252 | var0 = self.vars[0] 253 | if not make_graph: 254 | var0 = var0.data 255 | self.vars[0].backward(grad * np.sinh(var0), make_graph=make_graph) 256 | 257 | 258 | class Acosh(Gate): 259 | def __init__(self): 260 | super().__init__("acosh") 261 | 262 | def forward(self, x): 263 | self.vars = [x] 264 | return np.arccosh(x.data) 265 | 266 | def backward(self, grad, make_graph=False): 267 | var0 = self.vars[0] 268 | if not make_graph: 269 | var0 = var0.data 270 | self.vars[0].backward( 271 | grad / np.sqrt(var0 ** 2 - 1), make_graph=make_graph) 272 | 273 | 274 | class Tan(Gate): 275 | def __init__(self): 276 | super().__init__("tan") 277 | 278 | def forward(self, x): 279 | self.vars = [x] 280 | return np.tan(x.data) 281 | 282 | def backward(self, grad, make_graph=False): 283 | var0 = self.vars[0] 284 | if not make_graph: 285 | var0 = var0.data 286 | self.vars[0].backward(grad / np.cos(var0) ** 2, make_graph=make_graph) 287 | 288 | 289 | class Atan(Gate): 290 | def __init__(self): 291 | super().__init__("atan") 292 | 293 | def forward(self, x): 294 | self.vars = [x] 295 | return np.arctan(x.data) 296 | 297 | def backward(self, grad, make_graph=False): 298 | var0 = self.vars[0] 299 | if not make_graph: 300 | var0 = var0.data 301 | self.vars[0].backward(grad / (1 + var0 ** 2), make_graph=make_graph) 302 | 303 | 304 | class Tanh(Gate): 305 | def __init__(self): 306 | super().__init__("tanh") 307 | 308 | def forward(self, x): 309 | self.vars = [x] 310 | return np.tanh(x.data) 311 | 312 | def backward(self, grad, make_graph=False): 313 | var0 = self.vars[0] 314 | if not make_graph: 315 | var0 = var0.data 316 | self.vars[0].backward( 317 | grad * (1 - np.tanh(var0) ** 2), make_graph=make_graph) 318 | 319 | 320 | class Atanh(Gate): 321 | def __init__(self): 322 | super().__init__("atanh") 323 | 324 | def forward(self, x): 325 | self.vars = [x] 326 | return np.arctanh(x.data) 327 | 328 | def backward(self, grad, make_graph=False): 329 | var0 = self.vars[0] 330 | if not make_graph: 331 | var0 = var0.data 332 | self.vars[0].backward(grad / (1 - var0 ** 2), make_graph=make_graph) 333 | 334 | 335 | class Exp(Gate): 336 | def __init__(self): 337 | super().__init__("exp") 338 | 339 | def forward(self, x): 340 | self.vars = [x] 341 | return np.exp(x.data) 342 | 343 | def backward(self, grad, make_graph=False): 344 | var0 = self.vars[0] 345 | if not make_graph: 346 | var0 = var0.data 347 | self.vars[0].backward(grad * np.exp(var0), make_graph=make_graph) 348 | 349 | 350 | class Log(Gate): 351 | def __init__(self): 352 | super().__init__("log") 353 | 354 | def forward(self, x): 355 | self.vars = [x] 356 | return np.log(x.data) 357 | 358 | def backward(self, grad, make_graph=False): 359 | var0 = self.vars[0] 360 | if not make_graph: 361 | var0 = var0.data 362 | self.vars[0].backward(grad / var0, make_graph=make_graph) 363 | -------------------------------------------------------------------------------- /nn.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import grad\n", 11 | "from grad import variable as vp\n", 12 | "from grad.variable import Variable as vn" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "name": "stdout", 22 | "output_type": "stream", 23 | "text": [ 24 | "8.0 -8.0\n", 25 | "2.0 -2.0\n" 26 | ] 27 | }, 28 | { 29 | "data": { 30 | "text/plain": [ 31 | "'x_grad.pdf'" 32 | ] 33 | }, 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | } 38 | ], 39 | "source": [ 40 | "x = vn(17.0, requires_grad=True)\n", 41 | "y = vn(13.0, requires_grad=True)\n", 42 | "z = (x - y) ** 2\n", 43 | "z.backward(make_graph=True)\n", 44 | "print(x.grad, y.grad)\n", 45 | "z.draw_graph().render('z')\n", 46 | "z.clear_graph()\n", 47 | "x_grad = x.grad\n", 48 | "z.zero_grad()\n", 49 | "x_grad.backward()\n", 50 | "print(x.grad, y.grad)\n", 51 | "x_grad.draw_graph().render('x_grad')" 52 | ] 53 | }, 54 | { 55 | "attachments": {}, 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "## Linear Regression" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 3, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stderr", 69 | "output_type": "stream", 70 | "text": [ 71 | "100%|██████████| 300/300 [00:00<00:00, 2153.92it/s]\n" 72 | ] 73 | }, 74 | { 75 | "data": { 76 | "image/png": "", 77 | "text/plain": [ 78 | "
" 79 | ] 80 | }, 81 | "metadata": {}, 82 | "output_type": "display_data" 83 | }, 84 | { 85 | "name": "stdout", 86 | "output_type": "stream", 87 | "text": [ 88 | "[[16.999999999999996 12.999999999999998 18.999999999999996\n", 89 | " 22.999999999999996]]\n" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "import matplotlib.pyplot as plt\n", 95 | "from tqdm import tqdm\n", 96 | "x = vn(2)\n", 97 | "y = vn(3)\n", 98 | "z = vn(4)\n", 99 | "a = np.array([[x, y, z]])\n", 100 | "W = vp.array(np.random.randn(4, 3))\n", 101 | "Y = vp.array(np.array([17, 13, 19, 23]), requires_grad=False)\n", 102 | "losses = []\n", 103 | "for i in tqdm(range(300)):\n", 104 | " b = a @ W.T\n", 105 | " loss = np.mean((Y - b) ** 2)\n", 106 | " losses.append(loss)\n", 107 | " loss.backward()\n", 108 | " with grad.no_grad():\n", 109 | " W = W - 0.01 * vp.array_grad(W)\n", 110 | " if i != 299:\n", 111 | " loss.zero_grad()\n", 112 | "loss.draw_graph().view()\n", 113 | "plt.plot(losses)\n", 114 | "plt.yscale('log')\n", 115 | "plt.show()\n", 116 | "print(a @ W.T)\n" 117 | ] 118 | }, 119 | { 120 | "attachments": {}, 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "## XOR Neural Net" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 4, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "name": "stderr", 134 | "output_type": "stream", 135 | "text": [ 136 | "100%|██████████| 500/500 [02:13<00:00, 3.74it/s]\n" 137 | ] 138 | }, 139 | { 140 | "name": "stdout", 141 | "output_type": "stream", 142 | "text": [ 143 | "[[-0.9984565423358374]\n", 144 | " [0.9974001865275044]\n", 145 | " [0.9977095500300808]\n", 146 | " [-0.9985410651493936]]\n" 147 | ] 148 | }, 149 | { 150 | "data": { 151 | "image/png": "", 152 | "text/plain": [ 153 | "
" 154 | ] 155 | }, 156 | "metadata": {}, 157 | "output_type": "display_data" 158 | } 159 | ], 160 | "source": [ 161 | "x = vp.array(np.array([[-1, -1], [-1, 1], [1, -1], [1, 1]]), requires_grad=False)\n", 162 | "y = vp.array(np.array([[-1], [1], [1], [-1]]), requires_grad=False)\n", 163 | "W_1 = vp.array(np.random.uniform(-1./np.sqrt(2),1./np.sqrt(2),(64, 2)))\n", 164 | "b_1 = vp.array(np.zeros(64))\n", 165 | "W_2 = vp.array(np.random.uniform(-1./np.sqrt(64),1./np.sqrt(64),(16, 64)))\n", 166 | "b_2 = vp.array(np.zeros(16))\n", 167 | "W_3 = vp.array(np.random.uniform(-1./np.sqrt(16),1./np.sqrt(16),(1, 16)))\n", 168 | "b_3 = vp.array(np.zeros(1))\n", 169 | "losses = []\n", 170 | "lr = 0.1\n", 171 | "for i in tqdm(range(500)):\n", 172 | " f_1 = np.tanh(x @ W_1.T + b_1)\n", 173 | " f_2 = np.tanh(f_1 @ W_2.T + b_2)\n", 174 | " z_3 = f_2 @ W_3.T + b_3\n", 175 | " loss = np.mean((z_3 - y) ** 2)\n", 176 | " losses.append(loss)\n", 177 | " loss.backward()\n", 178 | " with grad.no_grad():\n", 179 | " W_1 = W_1 - lr * vp.array_grad(W_1)\n", 180 | " b_1 = b_1 - lr * vp.array_grad(b_1)\n", 181 | " W_2 = W_2 - lr * vp.array_grad(W_2)\n", 182 | " b_2 = b_2 - lr * vp.array_grad(b_2)\n", 183 | " W_3 = W_3 - lr * vp.array_grad(W_3)\n", 184 | " b_3 = b_3 - lr * vp.array_grad(b_3)\n", 185 | " loss.zero_grad()\n", 186 | " lr *= 0.99\n", 187 | "print(z_3)\n", 188 | "plt.plot(losses)\n", 189 | "plt.yscale('log')" 190 | ] 191 | } 192 | ], 193 | "metadata": { 194 | "kernelspec": { 195 | "display_name": "Python 3", 196 | "language": "python", 197 | "name": "python3" 198 | }, 199 | "language_info": { 200 | "codemirror_mode": { 201 | "name": "ipython", 202 | "version": 3 203 | }, 204 | "file_extension": ".py", 205 | "mimetype": "text/x-python", 206 | "name": "python", 207 | "nbconvert_exporter": "python", 208 | "pygments_lexer": "ipython3", 209 | "version": "3.9.13" 210 | }, 211 | "orig_nbformat": 4 212 | }, 213 | "nbformat": 4, 214 | "nbformat_minor": 2 215 | } 216 | --------------------------------------------------------------------------------