├── minitorch ├── autograd │ ├── __init__.py │ ├── edge.py │ ├── engine.py │ ├── functional.py │ └── node.py ├── nn │ ├── modules │ │ ├── __init__.py │ │ ├── activation.py │ │ ├── linear.py │ │ ├── loss.py │ │ └── module.py │ ├── __init__.py │ └── parameter.py ├── optim │ ├── __init__.py │ ├── optimizer.py │ └── sgd.py ├── __init__.py └── tensor.py ├── tests ├── test_nn │ ├── __init__.py │ ├── test_linear.py │ ├── test_loss.py │ └── test_activation.py ├── test_autograd │ ├── __init__.py │ ├── test_neg.py │ ├── test_pow.py │ ├── test_relu.py │ ├── test_exp.py │ ├── test_sum.py │ ├── test_mean.py │ ├── test_matmul.py │ ├── test_add.py │ ├── test_mul.py │ ├── test_sub.py │ └── test_div.py └── test_suite.py ├── requirements.txt ├── setup.py ├── examples ├── example.py ├── neural_network.py └── train.py ├── .flake8 ├── .gitignore └── README.md /minitorch/autograd/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /minitorch/nn/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /minitorch/optim/__init__.py: -------------------------------------------------------------------------------- 1 | from .sgd import * 2 | -------------------------------------------------------------------------------- /tests/test_nn/__init__.py: -------------------------------------------------------------------------------- 1 | from .test_loss import TestLoss -------------------------------------------------------------------------------- /minitorch/__init__.py: -------------------------------------------------------------------------------- 1 | from .tensor import Tensor, rand 2 | from .autograd.functional import * 3 | -------------------------------------------------------------------------------- /minitorch/autograd/edge.py: -------------------------------------------------------------------------------- 1 | class Edge: 2 | 3 | def __init__(self, node): 4 | self.node = node 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mypy==0.800 2 | mypy-extensions==0.4.3 3 | numpy==1.19.5 4 | typed-ast==1.4.2 5 | typing-extensions==3.7.4.3 6 | -------------------------------------------------------------------------------- /minitorch/nn/__init__.py: -------------------------------------------------------------------------------- 1 | from .modules.activation import * 2 | from .modules.linear import * 3 | from .modules.loss import * 4 | from .modules.module import * 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | 4 | setup( 5 | name="minitorch", 6 | version="1.0", 7 | author="zhouzaida", 8 | description="a minimal neural network libary", 9 | packages=["minitorch"], 10 | ) 11 | -------------------------------------------------------------------------------- /minitorch/optim/optimizer.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class Optimizer(metaclass=ABCMeta): 5 | """Base class for all optimizers.""" 6 | 7 | @abstractmethod 8 | def step(self): 9 | pass 10 | -------------------------------------------------------------------------------- /examples/example.py: -------------------------------------------------------------------------------- 1 | from minitorch import Tensor 2 | 3 | 4 | t1 = Tensor(4.0) 5 | t2 = Tensor(3.0, requires_grad=True) 6 | t3 = -t2 7 | t4 = t1 * t3 8 | t4.backward() 9 | print(f"t1 grad: {t1.grad}") # t1 grad: Tensor(7.0, requires_grad=False) 10 | print(f"t2 grad: {t2.grad}") # t2 grad: None 11 | -------------------------------------------------------------------------------- /minitorch/nn/parameter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from minitorch import Tensor 4 | 5 | 6 | class Parameter(Tensor): 7 | """A kind of Tensor that is to be considered a module parameter.""" 8 | 9 | def __init__(self, *shape) -> None: 10 | data = np.random.randn(*shape) 11 | super().__init__(data=data, requires_grad=True) 12 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | select = B,C,E,F,P,T4,W,B9 3 | max-line-length = 120 4 | max-doc-length = 120 5 | # C408 ignored because we like the dict keyword argument syntax 6 | # E501 is not flexible enough, we're using B950 instead 7 | ignore = 8 | E203,E305,E402,E501,E721,E741,F403,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303 9 | exclude = .git,__pycache__,docs/source/conf.py,old,build,dist -------------------------------------------------------------------------------- /minitorch/nn/modules/activation.py: -------------------------------------------------------------------------------- 1 | from minitorch import Tensor 2 | import minitorch.autograd.functional as F 3 | from .module import Module 4 | 5 | 6 | class Sigmoid(Module): 7 | def forward(self, input: Tensor) -> Tensor: 8 | return 1 / (1 + F.exp(-input)) 9 | 10 | 11 | class ReLU(Module): 12 | def forward(self, input: Tensor) -> Tensor: 13 | return F.relu(input) 14 | -------------------------------------------------------------------------------- /tests/test_autograd/__init__.py: -------------------------------------------------------------------------------- 1 | from .test_add import TestAdd 2 | from .test_div import TestDiv 3 | from .test_exp import TestExp 4 | from .test_matmul import TestMatmul 5 | from .test_mean import TestMean 6 | from .test_mul import TestMul 7 | from .test_neg import TestNeg 8 | from .test_pow import TestPow 9 | from .test_relu import TestReLU 10 | from .test_sub import TestSub 11 | from .test_sum import TestSum 12 | -------------------------------------------------------------------------------- /tests/test_nn/test_linear.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from minitorch import Tensor 4 | import minitorch.nn as nn 5 | 6 | 7 | class TestLinear(TestCase): 8 | 9 | def test_linear(self): 10 | input = Tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]) 11 | target = Tensor([[6, 7, 8, 9, 10], [1, 2, 3, 4, 5]]) 12 | loss = nn.MSELoss() 13 | output = loss(input, target) 14 | self.assertEqual(output.data.tolist(), 25.) 15 | -------------------------------------------------------------------------------- /tests/test_nn/test_loss.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from minitorch import Tensor 4 | import minitorch.nn as nn 5 | 6 | 7 | class TestLoss(TestCase): 8 | 9 | def test_mse_loss(self): 10 | input = Tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]) 11 | target = Tensor([[6, 7, 8, 9, 10], [1, 2, 3, 4, 5]]) 12 | loss = nn.MSELoss() 13 | output = loss(input, target) 14 | self.assertEqual(output.data.tolist(), 25.) 15 | -------------------------------------------------------------------------------- /minitorch/optim/sgd.py: -------------------------------------------------------------------------------- 1 | from .optimizer import Optimizer 2 | 3 | 4 | class SGD(Optimizer): 5 | """Implements stochastic gradient descent""" 6 | 7 | def __init__(self, params, lr): 8 | if lr < 0.0: 9 | raise ValueError("Invalid learning rate: {}".format(lr)) 10 | self.params = list(params) 11 | self.lr = lr 12 | 13 | def step(self): 14 | for param in self.params: 15 | param.data = param.data - self.lr * param.grad.data 16 | -------------------------------------------------------------------------------- /tests/test_nn/test_activation.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from minitorch import Tensor 4 | import minitorch.nn as nn 5 | 6 | 7 | class TestActivation(TestCase): 8 | 9 | def test_sigmoid(self): 10 | input = Tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]) 11 | target = Tensor([[6, 7, 8, 9, 10], [1, 2, 3, 4, 5]]) 12 | loss = nn.MSELoss() 13 | output = loss(input, target) 14 | self.assertEqual(output.data.tolist(), 25.) 15 | 16 | def test_relu(self): 17 | ... 18 | -------------------------------------------------------------------------------- /tests/test_suite.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import test_autograd 4 | import test_nn 5 | 6 | 7 | if __name__ == '__main__': 8 | suite = unittest.TestSuite() 9 | suite.addTests(unittest.TestLoader().loadTestsFromModule(test_autograd)) 10 | suite.addTests(unittest.TestLoader().loadTestsFromModule(test_nn)) 11 | 12 | # with open('UnittestTextReport.txt', 'a') as f: 13 | # runner = unittest.TextTestRunner(stream=f, verbosity=2) 14 | runner = unittest.TextTestRunner(verbosity=2) 15 | runner.run(suite) 16 | -------------------------------------------------------------------------------- /tests/test_autograd/test_neg.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from minitorch import Tensor 4 | 5 | 6 | class TestNeg(TestCase): 7 | 8 | def test_neg(self): 9 | # scalar neg 10 | t1 = Tensor(1.0) 11 | t2 = -t1 12 | self.assertEqual(t2.data.tolist(), -1.0) 13 | 14 | t1 = Tensor(2.0, requires_grad=True) 15 | t2 = -t1 16 | t2.backward() 17 | self.assertEqual(t1.grad.data.tolist(), -1.0) 18 | 19 | # vector neg 20 | t1 = Tensor([1.0, 2.0]) 21 | t2 = -t1 22 | self.assertEqual(t2.data.tolist(), [-1.0, -2.0]) 23 | 24 | t1 = Tensor([1.0, 2.0], requires_grad=True) 25 | t2 = -t1 26 | t2.backward(Tensor([1.0, 1.0])) 27 | self.assertEqual(t1.grad.data.tolist(), [-1.0, -1.0]) 28 | -------------------------------------------------------------------------------- /tests/test_autograd/test_pow.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from minitorch import Tensor 4 | 5 | 6 | class TestPow(TestCase): 7 | 8 | def test_pow(self): 9 | # scalar pow 10 | t1 = Tensor(2.0) 11 | t2 = t1 ** 3 12 | self.assertEqual(t2.data.tolist(), 8.0) 13 | 14 | t1 = Tensor(2.0, requires_grad=True) 15 | t2 = t1 ** 3 16 | t2.backward() 17 | self.assertEqual(t1.grad.data.tolist(), 12.0) 18 | 19 | # vector pow 20 | t1 = Tensor([1.0, 2.0]) 21 | t2 = t1 ** 3 22 | self.assertEqual(t2.data.tolist(), [1.0, 8.0]) 23 | 24 | t1 = Tensor([1.0, 2.0], requires_grad=True) 25 | t2 = t1 ** 3 26 | t2.backward(Tensor([1.0, 1.0])) 27 | self.assertEqual(t1.grad.data.tolist(), [3.0, 12.0]) 28 | -------------------------------------------------------------------------------- /tests/test_autograd/test_relu.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from minitorch import Tensor 4 | 5 | 6 | class TestReLU(TestCase): 7 | 8 | def test_relu(self): 9 | # scalar relu 10 | t1 = Tensor(2.0) 11 | t2 = t1.relu() 12 | self.assertEqual(t2.data.tolist(), 2.0) 13 | 14 | t1 = Tensor(2.0, requires_grad=True) 15 | t2 = t1.relu() 16 | t2.backward() 17 | self.assertEqual(t1.grad.data.tolist(), 1.0) 18 | 19 | # vector relu 20 | t1 = Tensor([-1.0, 2.0]) 21 | t2 = t1.relu() 22 | self.assertEqual(t2.data.tolist(), [0, 2.0]) 23 | 24 | t1 = Tensor([-1.0, 2.0], requires_grad=True) 25 | t2 = t1.relu() 26 | t2.backward(Tensor([1.0, 1.0])) 27 | self.assertEqual(t1.grad.data.tolist(), [0, 1.0]) 28 | -------------------------------------------------------------------------------- /minitorch/nn/modules/linear.py: -------------------------------------------------------------------------------- 1 | from minitorch import Tensor 2 | from .module import Module 3 | from ..parameter import Parameter 4 | 5 | 6 | class Linear(Module): 7 | """Applies a linear transformation to the incoming data: y = xA + b""" 8 | 9 | def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None: 10 | super().__init__() 11 | self.in_features = in_features 12 | self.out_features = out_features 13 | self.weight = Parameter(out_features, in_features) 14 | if bias: 15 | self.bias = Parameter(out_features) 16 | else: 17 | self.bias = None 18 | 19 | def forward(self, input: Tensor) -> Tensor: 20 | output = input @ self.weight.t() 21 | if self.bias is not None: 22 | output = output + self.bias 23 | return output 24 | -------------------------------------------------------------------------------- /tests/test_autograd/test_exp.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | import numpy as np 4 | 5 | from minitorch import Tensor 6 | 7 | 8 | class TestExp(TestCase): 9 | 10 | def test_exp(self): 11 | # scalar exp 12 | t1 = Tensor(2.0) 13 | t2 = t1.exp() 14 | np.testing.assert_allclose(t2.data, np.exp(2)) 15 | 16 | t1 = Tensor(2.0, requires_grad=True) 17 | t2 = t1.exp() 18 | t2.backward() 19 | np.testing.assert_allclose(t1.grad.data, np.exp(2)) 20 | 21 | # vector exp 22 | t1 = Tensor([1.0, 2.0]) 23 | t2 = t1.exp() 24 | np.testing.assert_allclose(t2.data, np.exp([1, 2])) 25 | 26 | t1 = Tensor([1.0, 2.0], requires_grad=True) 27 | t2 = t1.exp() 28 | t2.backward(Tensor([1.0, 1.0])) 29 | np.testing.assert_allclose(t1.grad.data, np.exp([1, 2])) 30 | -------------------------------------------------------------------------------- /examples/neural_network.py: -------------------------------------------------------------------------------- 1 | import minitorch 2 | import minitorch.nn as nn 3 | 4 | 5 | input = minitorch.rand(2, 3) 6 | linear = nn.Linear(3, 5, bias=True) 7 | output = linear(input) 8 | print(f"output: {output}") 9 | 10 | 11 | class Model(nn.Module): 12 | 13 | def __init__(self): 14 | super().__init__() 15 | self.linear_1 = nn.Linear(3, 5, bias=True) 16 | self.linear_2 = nn.Linear(5, 6) 17 | 18 | def forward(self, input): 19 | output = self.linear_1(input) 20 | output = self.linear_2(output) 21 | return output 22 | 23 | input = minitorch.rand(2, 3) 24 | model = Model() 25 | output = model(input) 26 | print(f"output: {output}") 27 | 28 | for name, parameter in model.named_parameters(): 29 | print(f"{name}: {parameter}") 30 | 31 | for name, module in model.named_modules(prefix='model'): 32 | print(f"{name}: {module}") 33 | -------------------------------------------------------------------------------- /minitorch/nn/modules/loss.py: -------------------------------------------------------------------------------- 1 | from minitorch import Tensor 2 | from .module import Module 3 | 4 | 5 | class MSELoss(Module): 6 | """Creates a criterion that measures the mean squared error (squared L2 norm) between 7 | each element in the input x and target y. 8 | """ 9 | 10 | def __init__(self, reduction: str = 'mean'): 11 | self.reduction = reduction 12 | 13 | def forward(self, input: Tensor, target: Tensor) -> Tensor: 14 | result = (input - target) ** 2 15 | if self.reduction is None: 16 | return result 17 | elif self.reduction == 'mean': 18 | return result.mean() 19 | elif self.reduction == 'sum': 20 | return result.sum() 21 | else: 22 | raise ValueError("reduction should be one of the 'none,mean,sum', " 23 | f"rather than {self.reduction}") 24 | 25 | 26 | class CrossEntropyLoss(Module): 27 | pass 28 | -------------------------------------------------------------------------------- /tests/test_autograd/test_sum.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from minitorch import Tensor 4 | 5 | 6 | class TestSum(TestCase): 7 | 8 | def test_sum(self): 9 | t1 = Tensor([1., 2., 3.]) 10 | t2 = t1.sum() 11 | self.assertEqual(t2.data.tolist(), 6.) 12 | 13 | # (3,) -> () 14 | t1 = Tensor([1., 2., 3.], requires_grad=True) 15 | t2 = t1.sum() 16 | t2.backward() 17 | self.assertEqual(t1.grad.data.tolist(), [1., 1., 1.]) 18 | 19 | # (2, 3) -> (3, ) 20 | t1 = Tensor([[1., 2., 3.], [4., 5., 6.]], requires_grad=True) 21 | t2 = t1.sum(axis=0) 22 | t2.backward(Tensor([1., 1., 1.])) 23 | self.assertEqual(t1.grad.data.tolist(), [[1., 1., 1.], [1., 1., 1.]]) 24 | 25 | # (2, 3) -> (2, ) 26 | t1 = Tensor([[1., 2., 3.], [4., 5., 6.]], requires_grad=True) 27 | t2 = t1.sum(axis=1) 28 | t2.backward(Tensor([1., 1.])) 29 | self.assertEqual(t1.grad.data.tolist(), [[1., 1., 1.], [1., 1., 1.]]) 30 | 31 | # (2, 3) -> (,) 32 | t1 = Tensor([[1., 2., 3.], [4., 5., 6.]], requires_grad=True) 33 | t2 = t1.sum() 34 | t2.backward(Tensor(1.0)) 35 | self.assertEqual(t1.grad.data.tolist(), [[1., 1., 1.], [1., 1., 1.]]) 36 | -------------------------------------------------------------------------------- /tests/test_autograd/test_mean.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from minitorch import Tensor 4 | 5 | 6 | class TestMean(TestCase): 7 | 8 | def test_mean(self): 9 | t1 = Tensor([1., 2., 3.]) 10 | t2 = t1.mean() 11 | self.assertEqual(t2.data.tolist(), 2.) 12 | 13 | # (3,) -> () 14 | t1 = Tensor([1., 2., 3., 4.], requires_grad=True) 15 | t2 = t1.mean() 16 | t2.backward() 17 | self.assertEqual(t1.grad.data.tolist(), [1/4., 1/4, 1/4, 1/4]) 18 | 19 | # (2, 3) -> (3, ) 20 | t1 = Tensor([[1., 2., 3.], [4., 5., 6.]], requires_grad=True) 21 | t2 = t1.mean(axis=0) 22 | t2.backward(Tensor([1., 1., 1.])) 23 | self.assertEqual(t1.grad.data.tolist(), [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]) 24 | 25 | # (2, 3) -> (2, ) 26 | t1 = Tensor([[1., 2., 3., 4.], [4., 5., 6.,7.]], requires_grad=True) 27 | t2 = t1.mean(axis=1) 28 | t2.backward(Tensor([1., 1.])) 29 | self.assertEqual(t1.grad.data.tolist(), [[1/4., 1/4, 1/4, 1/4], [1/4., 1/4, 1/4, 1/4]]) 30 | 31 | # (2, 3) -> (,) 32 | t1 = Tensor([[1., 2., 3.], [4., 5., 6.]], requires_grad=True) 33 | t2 = t1.mean() 34 | t2.backward(Tensor(1.0)) 35 | self.assertEqual(t1.grad.data.tolist(), [[1/6, 1/6, 1/6], [1/6, 1/6, 1/6]]) 36 | -------------------------------------------------------------------------------- /examples/train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from minitorch import Tensor 4 | import minitorch.nn as nn 5 | import minitorch.optim as optim 6 | 7 | 8 | class Model(nn.Module): 9 | 10 | def __init__(self, in_features=3): 11 | super().__init__() 12 | self.linear1 = nn.Linear(in_features, 5, bias=True) 13 | self.relu1 = nn.ReLU() 14 | self.linear2 = nn.Linear(5, 1, bias=True) 15 | 16 | def forward(self, input): 17 | output = self.linear1(input) 18 | output = self.relu1(output) 19 | output = self.linear2(output) 20 | return output 21 | 22 | 23 | def train(model, x, y, epoch=30): # TODO 24 | optimizer = optim.SGD(model.parameters(), lr=0.1) 25 | mse_loss = nn.MSELoss() 26 | for i in range(1, epoch + 1): 27 | model.zero_grad() 28 | output = model(x) 29 | loss = mse_loss(output, y) 30 | print(f"train: epoch {i}, loss {loss}") 31 | loss.backward() 32 | optimizer.step() 33 | 34 | 35 | def test(model, x, y): 36 | output = model(x) 37 | mse_loss = nn.MSELoss() 38 | loss = mse_loss(output, y) 39 | print(f"test: loss {loss}") 40 | 41 | 42 | def main(): 43 | coef = Tensor(np.array([1, 3, 2])) 44 | x_train = Tensor(np.random.rand(100, 3)) 45 | y_train = x_train @ coef + 5 46 | x_test = Tensor(np.random.rand(20, 3)) 47 | y_test = x_test @ coef + 5 48 | model = Model() 49 | train(model, x_train, y_train) 50 | test(model, x_test, y_test) 51 | 52 | 53 | if __name__ == '__main__': 54 | main() 55 | -------------------------------------------------------------------------------- /tests/test_autograd/test_matmul.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from minitorch import Tensor 4 | 5 | 6 | class TestMatmul(TestCase): 7 | 8 | def test_matmul(self): 9 | t1 = Tensor([1.0, 2.0]) 10 | t2 = Tensor([2.0, 3.0]) 11 | t3 = t1 @ t2 12 | self.assertEqual(t3.data.tolist(), 8.0) 13 | 14 | t1 = Tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]]) # 3 * 2 15 | t2 = Tensor([[2.0, 3.0], [3.0, 4.0]]) # 2 * 2 16 | t3 = t1 @ t2 # 3 * 2 17 | self.assertEqual(t3.data.tolist(), [[8.0, 11.], [13., 18.0], [18.0, 25.0]]) 18 | 19 | t1 = Tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]], requires_grad=True) # 3 * 2 20 | t2 = Tensor([[2.0, 3.0], [3.0, 4.0]]) # 2 * 2 21 | t3 = t1 @ t2 22 | t3.backward(Tensor([[1.0, 2.0], [3.0, 4.0], [3.0, 5.0]])) 23 | self.assertEqual(t1.grad.data.tolist(), [[8.0, 11.0], [18.0, 25.0], [21.0, 29.0]]) 24 | 25 | t1 = Tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]]) # 3 * 2 26 | t2 = Tensor([[2.0, 3.0], [3.0, 4.0]], requires_grad=True) # 2 * 2 27 | t3 = t1 @ t2 28 | t3.backward(Tensor([[1.0, 2.0], [3.0, 4.0], [3.0, 5.0]])) 29 | self.assertEqual(t2.grad.data.tolist(), [[16.0, 25.0], [23.0, 36.0]]) 30 | 31 | t1 = Tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]], requires_grad=True) # 3 * 2 32 | t2 = Tensor([[2.0, 3.0], [3.0, 4.0]], requires_grad=True) # 2 * 2 33 | t3 = t1 @ t2 34 | t3.backward(Tensor([[1.0, 2.0], [3.0, 4.0], [3.0, 5.0]])) 35 | self.assertEqual(t1.grad.data.tolist(), [[8.0, 11.0], [18.0, 25.0], [21.0, 29.0]]) 36 | self.assertEqual(t2.grad.data.tolist(), [[16.0, 25.0], [23.0, 36.0]]) 37 | -------------------------------------------------------------------------------- /minitorch/autograd/engine.py: -------------------------------------------------------------------------------- 1 | """Execute danamic computational graph 2 | 3 | Topological Sorting 4 | """ 5 | 6 | from collections import defaultdict, deque 7 | 8 | from minitorch import Tensor 9 | from .node import Node 10 | 11 | 12 | class NodeTask: 13 | def __init__(self, node: Node, grad_input: Tensor): 14 | self.node = node 15 | self.grad_input = grad_input 16 | 17 | def update_grad_input(self, grad_input: Tensor): 18 | self.grad_input += grad_input 19 | 20 | 21 | class Engine: 22 | 23 | def execute(self, tensor, grad_input): 24 | dependencies = self._compute_dependencies(tensor.grad_fn) 25 | not_ready_dict = {} 26 | ready_queue = deque([NodeTask(tensor.grad_fn, grad_input)]) 27 | while ready_queue: 28 | node_task = ready_queue.popleft() 29 | grad_outputs = node_task.node(node_task.grad_input) 30 | if grad_outputs is None: 31 | continue 32 | for grad_output, edge in zip(grad_outputs, node_task.node.next_edges): 33 | next_node = edge.node 34 | dependencies[next_node] -= 1 35 | if next_node not in not_ready_dict: 36 | not_ready_dict[next_node] = NodeTask(next_node, grad_output) 37 | else: 38 | not_ready_dict[next_node].update_grad_input(grad_output) 39 | if dependencies[next_node] == 0: 40 | ready_queue.append(not_ready_dict[next_node]) 41 | 42 | def _compute_dependencies(self, root: Node): 43 | dependencies = defaultdict(int) 44 | dependencies[root] = 0 45 | queue = deque([root]) 46 | while queue: 47 | node = queue.pop() 48 | if hasattr(node, "next_edges"): 49 | for edge in node.next_edges: 50 | next_node = edge.node 51 | dependencies[next_node] += 1 52 | queue.append(next_node) 53 | return dependencies 54 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | cover/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | .pybuilder/ 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | # For a library or package, you might want to ignore these files since the code is 89 | # intended to run in multiple environments; otherwise, check them in: 90 | # .python-version 91 | 92 | # pipenv 93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 96 | # install all needed dependencies. 97 | #Pipfile.lock 98 | 99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 100 | __pypackages__/ 101 | 102 | # Celery stuff 103 | celerybeat-schedule 104 | celerybeat.pid 105 | 106 | # SageMath parsed files 107 | *.sage.py 108 | 109 | # Environments 110 | .env 111 | .venv 112 | env/ 113 | venv/ 114 | ENV/ 115 | env.bak/ 116 | venv.bak/ 117 | 118 | # Spyder project settings 119 | .spyderproject 120 | .spyproject 121 | 122 | # Rope project settings 123 | .ropeproject 124 | 125 | # mkdocs documentation 126 | /site 127 | 128 | # mypy 129 | .mypy_cache/ 130 | .dmypy.json 131 | dmypy.json 132 | 133 | # Pyre type checker 134 | .pyre/ 135 | 136 | # pytype static type analyzer 137 | .pytype/ 138 | 139 | # Cython debug symbols 140 | cython_debug/ -------------------------------------------------------------------------------- /minitorch/nn/modules/module.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from collections import OrderedDict 3 | 4 | from typing import Iterator, Union, Tuple 5 | 6 | from ..parameter import Parameter 7 | from minitorch import Tensor 8 | 9 | 10 | class Module(metaclass=ABCMeta): 11 | r"""Base class for all neural network modules. 12 | 13 | Your models should also subclass this class. 14 | 15 | Modules can also contain other Modules, allowing to nest them in 16 | a tree structure. 17 | """ 18 | 19 | def __init__(self): 20 | self._parameters = OrderedDict() 21 | self._modules = OrderedDict() 22 | 23 | def __call__(self, *inputs): 24 | return self.forward(*inputs) 25 | 26 | @abstractmethod 27 | def forward(self, *inputs): 28 | """subclass must implement the method.""" 29 | pass 30 | 31 | def __getattr__(self, name: str) -> Union[Tensor, 'Module']: 32 | _parameters = self.__dict__['_parameters'] 33 | if name in _parameters: 34 | return _parameters[name] 35 | _modules = self.__dict__['_modules'] 36 | if name in _modules: 37 | return _modules[name] 38 | raise AttributeError("'{}' object has no attribute '{}'".format( 39 | type(self).__name__, name)) 40 | 41 | def __setattr__(self, name: str, value: Union[Tensor, 'Module']) -> None: 42 | if isinstance(value, Parameter): 43 | self._parameters[name] = value 44 | elif isinstance(value, Module): 45 | self._modules[name] = value 46 | else: 47 | object.__setattr__(self, name, value) 48 | 49 | def _named_members(self, get_members_fn, prefix='', recurse=True): 50 | memo = set() 51 | modules = self.named_modules(prefix=prefix) if recurse else [(prefix, self)] 52 | for module_prefix, module in modules: 53 | members = get_members_fn(module) 54 | for k, v in members: 55 | if v is None or v in memo: 56 | continue 57 | memo.add(v) 58 | name = module_prefix + ('.' if module_prefix else '') + k 59 | yield name, v 60 | 61 | def parameters(self, recurse: bool = True) -> Iterator[Parameter]: 62 | for name, param in self.named_parameters(recurse=recurse): 63 | yield param 64 | 65 | def named_parameters(self, prefix: str = '', recurse: bool = True) -> Iterator[Tuple[str, Parameter]]: 66 | gen = self._named_members( 67 | lambda module: module._parameters.items(), 68 | prefix=prefix, recurse=recurse) 69 | for elem in gen: 70 | yield elem 71 | 72 | def modules(self) -> Iterator['Module']: 73 | for name, module in self.named_modules(): 74 | yield module 75 | 76 | def named_modules(self, prefix: str = '') -> Iterator[Tuple[str, 'Module']]: 77 | yield prefix, self 78 | for name, module in self._modules.items(): 79 | submodule_prefix = prefix + ('.' if prefix else '') + name 80 | yield from module.named_modules(submodule_prefix) 81 | 82 | def zero_grad(self) -> None: 83 | for p in self.parameters(): 84 | if p.grad is not None: 85 | p.grad = None 86 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # minitorch 2 | 3 | Implement a minimal library from scratch to help understand the Dynamic Computaional Graph of PyTorch. 4 | 5 | # TODO 6 | + support CUDA 7 | 8 | # Requirements 9 | 10 | 1. Create virtual environment 11 | ```bash 12 | python3 -m venv minitorch-env 13 | ``` 14 | 15 | 2. Activate virtual environment 16 | ```bash 17 | source minitorch-env/bin/activate 18 | ``` 19 | 20 | 3. Install dependencies 21 | ```bash 22 | pip install -r requirements.txt 23 | ``` 24 | 25 | # Quick Start 26 | 27 | 1. Clone the codebase 28 | ```bash 29 | git clone git@github.com:zhouzaida/minitorch.git 30 | ``` 31 | 32 | 2. Install or develop 33 | ```python 34 | python setup.py install 35 | # or 36 | python setup.py develop 37 | ``` 38 | 39 | # Examples 40 | 41 | + create Tensor 42 | 43 | ```python 44 | from minitorch import Tensor 45 | 46 | t1 = Tensor(2.0) 47 | t2 = Tensor(3.0) 48 | t3 = t1 + t2 49 | print(t3) # Tensor(3.0, requires_grad=False) 50 | ``` 51 | 52 | + autograd 53 | 54 | ```python 55 | from minitorch import Tensor 56 | 57 | t1 = Tensor(2.0, requires_grad=True) 58 | t2 = Tensor(3.0) 59 | t3 = t1 + t2 60 | t4 = t1 * t3 61 | t4.backward() 62 | print(f"t1 grad: {t1.grad}") # t1 grad: Tensor(7.0, requires_grad=False) 63 | print(f"t2 grad: {t2.grad}") # t2 grad: None 64 | ``` 65 | 66 | + gradient for broadcast 67 | 68 | ```python 69 | from minitorch import Tensor 70 | 71 | t1 = Tensor([1.0, 2.0], requires_grad=True) 72 | t2 = Tensor(2.0, requires_grad=True) 73 | t3 = t1 + t2 74 | t3.backward(Tensor([1.0, 1.0])) 75 | print(f"t1 grad: {t1.grad}") # t1 grad: Tensor([1., 1.], requires_grad=False) 76 | print(f"t2 grad: {t2.grad}") # t2 grad: Tensor(2.0, requires_grad=False) 77 | ``` 78 | 79 | + create neural network 80 | 81 | ```python 82 | import minitorch 83 | import minitorch.nn as nn 84 | 85 | input = minitorch.rand(2, 3) 86 | linear = nn.Linear(3, 5, bias=True) 87 | output = nn.linear(input) 88 | print(f"output: {output}") 89 | 90 | class Model(nn.Module): 91 | 92 | def __init__(self): 93 | super().__init__() 94 | self.linear_1 = nn.Linear(3, 5, bias=True) 95 | self.linear_2 = nn.Linear(5, 6) 96 | 97 | def forward(self, input): 98 | output = self.linear_1(input) 99 | output = self.linear_2(output) 100 | return output 101 | 102 | input = minitorch.rand(2, 3) 103 | model = Model() 104 | output = model(input) 105 | print(f"output: {output}") 106 | 107 | for name, module in model.named_modules(prefix='model'): 108 | print(f"{name}: {module}") 109 | ``` 110 | 111 | # Tools 112 | + [mypy is a static type checker for Python](https://mypy.readthedocs.io/) 113 | + [Flake8: Your Tool For Style Guide Enforcement](https://flake8.pycqa.org/en/latest/) 114 | + [unittest](https://docs.python.org/3/library/unittest.html) 115 | 116 | # References 117 | + [PyTorch](https://github.com/pytorch/pytorch) 118 | + [autograd](https://github.com/joelgrus/autograd) 119 | + [tinygrad](https://github.com/geohot/tinygrad) 120 | -------------------------------------------------------------------------------- /tests/test_autograd/test_add.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from minitorch import Tensor 4 | 5 | 6 | class TestAdd(TestCase): 7 | 8 | def test_simple_add(self): 9 | # scalar add 10 | t1 = Tensor(1.0) 11 | t2 = Tensor(2.0) 12 | t3 = t1 + t2 13 | self.assertEqual(t3.data.tolist(), 3.0) 14 | 15 | t1 = Tensor(2.0, requires_grad=True) 16 | t2 = Tensor(3.0) 17 | t3 = t1 + t2 18 | t3.backward() 19 | self.assertEqual(t1.grad.data.tolist(), 1.0) 20 | 21 | t1 = Tensor(2.0) 22 | t2 = Tensor(3.0, requires_grad=True) 23 | t3 = t1 + t2 24 | t3.backward() 25 | self.assertEqual(t2.grad.data.tolist(), 1.0) 26 | 27 | t1 = Tensor(2.0, requires_grad=True) 28 | t2 = Tensor(3.0, requires_grad=True) 29 | t3 = t1 + t2 30 | t3.backward() 31 | self.assertEqual(t1.grad.data.tolist(), 1.0) 32 | self.assertEqual(t2.grad.data.tolist(), 1.0) 33 | 34 | # vector add 35 | t1 = Tensor([1.0, 2.0]) 36 | t2 = Tensor([2.0, 3.0]) 37 | t3 = t1 + t2 38 | self.assertEqual(t3.data.tolist(), [3.0, 5.0]) 39 | 40 | t1 = Tensor([1.0, 2.0], requires_grad=True) 41 | t2 = Tensor([2.0, 3.0]) 42 | t3 = t1 + t2 43 | t3.backward(Tensor([1.0, 1.0])) 44 | self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0]) 45 | 46 | t1 = Tensor([1.0, 2.0]) 47 | t2 = Tensor([2.0, 3.0], requires_grad=True) 48 | t3 = t1 + t2 49 | t3.backward(Tensor([1.0, 1.0])) 50 | self.assertEqual(t2.grad.data.tolist(), [1.0, 1.0]) 51 | 52 | t1 = Tensor([1.0, 2.0], requires_grad=True) 53 | t2 = Tensor([2.0, 3.0], requires_grad=True) 54 | t3 = t1 + t2 55 | t3.backward(Tensor([1.0, 1.0])) 56 | self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0]) 57 | self.assertEqual(t2.grad.data.tolist(), [1.0, 1.0]) 58 | 59 | def test_broadcast_add(self): 60 | # (2,) + () 61 | t1 = Tensor([1.0, 2.0], requires_grad=True) 62 | t2 = Tensor(2.0, requires_grad=True) 63 | t3 = t1 + t2 64 | t3.backward(Tensor([1.0, 1.0])) 65 | self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0]) 66 | self.assertEqual(t2.grad.data.tolist(), 2.0) 67 | 68 | # (2,) + (1,) 69 | t1 = Tensor([1.0, 2.0], requires_grad=True) 70 | t2 = Tensor([2.0], requires_grad=True) 71 | t3 = t1 + t2 72 | t3.backward(Tensor([1.0, 1.0])) 73 | self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0]) 74 | self.assertEqual(t2.grad.data.tolist(), [2.0]) 75 | 76 | # (2, 2) + () 77 | t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) 78 | t2 = Tensor(2.0, requires_grad=True) 79 | t3 = t1 + t2 80 | t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) 81 | self.assertEqual(t1.grad.data.tolist(), [[1.0, 1.0], [1.0, 1.0]]) 82 | self.assertEqual(t2.grad.data.tolist(), 4.0) 83 | 84 | # (2, 2) + (1,) 85 | t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) 86 | t2 = Tensor([2.0], requires_grad=True) 87 | t3 = t1 + t2 88 | t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) 89 | self.assertEqual(t1.grad.data.tolist(), [[1.0, 1.0], [1.0, 1.0]]) 90 | self.assertEqual(t2.grad.data.tolist(), [4.0]) 91 | 92 | # (2, 2) + (2, ) 93 | t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) 94 | t2 = Tensor([2.0, 3.0], requires_grad=True) 95 | t3 = t1 + t2 96 | t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) 97 | self.assertEqual(t1.grad.data.tolist(), [[1.0, 1.0], [1.0, 1.0]]) 98 | self.assertEqual(t2.grad.data.tolist(), [2.0, 2.0]) 99 | -------------------------------------------------------------------------------- /tests/test_autograd/test_mul.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from minitorch import Tensor 4 | 5 | 6 | class TestMul(TestCase): 7 | 8 | def test_simple_mul(self): 9 | # scalar mul 10 | t1 = Tensor(1.0) 11 | t2 = Tensor(2.0) 12 | t3 = t1 * t2 13 | self.assertEqual(t3.data.tolist(), 2.0) 14 | 15 | t1 = Tensor(1.0, requires_grad=True) 16 | t2 = Tensor(2.0) 17 | t3 = t1 * t2 18 | t3.backward() 19 | self.assertEqual(t1.grad.data.tolist(), 2.0) 20 | 21 | t1 = Tensor(1.0) 22 | t2 = Tensor(2.0, requires_grad=True) 23 | t3 = t1 * t2 24 | t3.backward() 25 | self.assertEqual(t2.grad.data.tolist(), 1.0) 26 | 27 | t1 = Tensor(1.0, requires_grad=True) 28 | t2 = Tensor(2.0, requires_grad=True) 29 | t3 = t1 * t2 30 | t3.backward() 31 | self.assertEqual(t1.grad.data.tolist(), 2.0) 32 | self.assertEqual(t2.grad.data.tolist(), 1.0) 33 | 34 | # vector mul 35 | t1 = Tensor([1.0, 2.0]) 36 | t2 = Tensor([2.0, 3.0]) 37 | t3 = t1 * t2 38 | self.assertEqual(t3.data.tolist(), [2.0, 6.0]) 39 | 40 | t1 = Tensor([1.0, 2.0], requires_grad=True) 41 | t2 = Tensor([2.0, 3.0]) 42 | t3 = t1 * t2 43 | t3.backward(Tensor([1.0, 1.0])) 44 | self.assertEqual(t1.grad.data.tolist(), [2.0, 3.0]) 45 | 46 | t1 = Tensor([1.0, 2.0]) 47 | t2 = Tensor([2.0, 3.0], requires_grad=True) 48 | t3 = t1 * t2 49 | t3.backward(Tensor([1.0, 1.0])) 50 | self.assertEqual(t2.grad.data.tolist(), [1.0, 2.0]) 51 | 52 | t1 = Tensor([1.0, 2.0], requires_grad=True) 53 | t2 = Tensor([2.0, 3.0], requires_grad=True) 54 | t3 = t1 * t2 55 | t3.backward(Tensor([1.0, 1.0])) 56 | self.assertEqual(t1.grad.data.tolist(), [2.0, 3.0]) 57 | self.assertEqual(t2.grad.data.tolist(), [1.0, 2.0]) 58 | 59 | def test_broadcast_mul(self): 60 | # (2,) * () 61 | t1 = Tensor([1.0, 2.0], requires_grad=True) 62 | t2 = Tensor(2.0, requires_grad=True) 63 | t3 = t1 * t2 64 | t3.backward(Tensor([1.0, 1.0])) 65 | self.assertEqual(t1.grad.data.tolist(), [2.0, 2.0]) 66 | self.assertEqual(t2.grad.data.tolist(), 3.0) 67 | 68 | # (2,) * (1,) 69 | t1 = Tensor([1.0, 2.0], requires_grad=True) 70 | t2 = Tensor([2.0], requires_grad=True) 71 | t3 = t1 * t2 72 | t3.backward(Tensor([1.0, 1.0])) 73 | self.assertEqual(t1.grad.data.tolist(), [2.0, 2.0]) 74 | self.assertEqual(t2.grad.data.tolist(), [3.0]) 75 | 76 | # (2, 2) * () 77 | t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) 78 | t2 = Tensor(2.0, requires_grad=True) 79 | t3 = t1 * t2 80 | t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) 81 | self.assertEqual(t1.grad.data.tolist(), [[2.0, 2.0], [2.0, 2.0]]) 82 | self.assertEqual(t2.grad.data.tolist(), 10.0) 83 | 84 | # (2, 2) * (1,) 85 | t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) 86 | t2 = Tensor([2.0], requires_grad=True) 87 | t3 = t1 * t2 88 | t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) 89 | self.assertEqual(t1.grad.data.tolist(), [[2.0, 2.0], [2.0, 2.0]]) 90 | self.assertEqual(t2.grad.data.tolist(), [10.0]) 91 | 92 | # (2, 2) * (2, ) 93 | t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) 94 | t2 = Tensor([2.0, 3.0], requires_grad=True) 95 | t3 = t1 * t2 96 | t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) 97 | self.assertEqual(t1.grad.data.tolist(), [[2.0, 3.0], [2.0, 3.0]]) 98 | self.assertEqual(t2.grad.data.tolist(), [4.0, 6.0]) 99 | -------------------------------------------------------------------------------- /tests/test_autograd/test_sub.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from minitorch import Tensor 4 | 5 | 6 | class TestSub(TestCase): 7 | 8 | def test_simple_sub(self): 9 | # scalar sub 10 | t1 = Tensor(1.0) 11 | t2 = Tensor(2.0) 12 | t3 = t1 - t2 13 | self.assertEqual(t3.data.tolist(), -1.0) 14 | 15 | t1 = Tensor(2.0, requires_grad=True) 16 | t2 = Tensor(3.0) 17 | t3 = t1 - t2 18 | t3.backward() 19 | self.assertEqual(t1.grad.data.tolist(), 1.0) 20 | 21 | t1 = Tensor(2.0) 22 | t2 = Tensor(3.0, requires_grad=True) 23 | t3 = t1 - t2 24 | t3.backward() 25 | self.assertEqual(t2.grad.data.tolist(), -1.0) 26 | 27 | t1 = Tensor(2.0, requires_grad=True) 28 | t2 = Tensor(3.0, requires_grad=True) 29 | t3 = t1 - t2 30 | t3.backward() 31 | self.assertEqual(t1.grad.data.tolist(), 1.0) 32 | self.assertEqual(t2.grad.data.tolist(), -1.0) 33 | 34 | # vector sub 35 | t1 = Tensor([1.0, 2.0]) 36 | t2 = Tensor([2.0, 3.0]) 37 | t3 = t1 - t2 38 | self.assertEqual(t3.data.tolist(), [-1.0, -1.0]) 39 | 40 | t1 = Tensor([1.0, 2.0], requires_grad=True) 41 | t2 = Tensor([2.0, 3.0]) 42 | t3 = t1 - t2 43 | t3.backward(Tensor([1.0, 1.0])) 44 | self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0]) 45 | 46 | t1 = Tensor([1.0, 2.0]) 47 | t2 = Tensor([2.0, 3.0], requires_grad=True) 48 | t3 = t1 - t2 49 | t3.backward(Tensor([1.0, 1.0])) 50 | self.assertEqual(t2.grad.data.tolist(), [-1.0, -1.0]) 51 | 52 | t1 = Tensor([1.0, 2.0], requires_grad=True) 53 | t2 = Tensor([2.0, 3.0], requires_grad=True) 54 | t3 = t1 - t2 55 | t3.backward(Tensor([1.0, 1.0])) 56 | self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0]) 57 | self.assertEqual(t2.grad.data.tolist(), [-1.0, -1.0]) 58 | 59 | def test_broadcast_sub(self): 60 | # (2,) - () 61 | t1 = Tensor([1.0, 2.0], requires_grad=True) 62 | t2 = Tensor(2.0, requires_grad=True) 63 | t3 = t1 - t2 64 | t3.backward(Tensor([1.0, 1.0])) 65 | self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0]) 66 | self.assertEqual(t2.grad.data.tolist(), -2.0) 67 | 68 | # (2,) - (1,) 69 | t1 = Tensor([1.0, 2.0], requires_grad=True) 70 | t2 = Tensor([2.0], requires_grad=True) 71 | t3 = t1 - t2 72 | t3.backward(Tensor([1.0, 1.0])) 73 | self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0]) 74 | self.assertEqual(t2.grad.data.tolist(), [-2.0]) 75 | 76 | # (2, 2) - () 77 | t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) 78 | t2 = Tensor(2.0, requires_grad=True) 79 | t3 = t1 - t2 80 | t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) 81 | self.assertEqual(t1.grad.data.tolist(), [[1.0, 1.0], [1.0, 1.0]]) 82 | self.assertEqual(t2.grad.data.tolist(), -4.0) 83 | 84 | # (2, 2) - (1,) 85 | t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) 86 | t2 = Tensor([2.0], requires_grad=True) 87 | t3 = t1 - t2 88 | t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) 89 | self.assertEqual(t1.grad.data.tolist(), [[1.0, 1.0], [1.0, 1.0]]) 90 | self.assertEqual(t2.grad.data.tolist(), [-4.0]) 91 | 92 | # (2, 2) - (2, ) 93 | t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) 94 | t2 = Tensor([2.0, 3.0], requires_grad=True) 95 | t3 = t1 - t2 96 | t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) 97 | self.assertEqual(t1.grad.data.tolist(), [[1.0, 1.0], [1.0, 1.0]]) 98 | self.assertEqual(t2.grad.data.tolist(), [-2.0, -2.0]) 99 | -------------------------------------------------------------------------------- /tests/test_autograd/test_div.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from minitorch import Tensor 4 | 5 | 6 | class TestDiv(TestCase): 7 | 8 | def test_simple_div(self): 9 | # scalar div 10 | t1 = Tensor(1.0) 11 | t2 = Tensor(2.0) 12 | t3 = t1 / t2 13 | self.assertEqual(t3.data.tolist(), 0.5) 14 | 15 | t1 = Tensor(1.0, requires_grad=True) 16 | t2 = Tensor(2.0) 17 | t3 = t1 / t2 18 | t3.backward() 19 | self.assertEqual(t1.grad.data.tolist(), 0.5) 20 | 21 | t1 = Tensor(1.0) 22 | t2 = Tensor(2.0, requires_grad=True) 23 | t3 = t1 / t2 24 | t3.backward() 25 | self.assertEqual(t2.grad.data.tolist(), -0.25) 26 | 27 | t1 = Tensor(1.0, requires_grad=True) 28 | t2 = Tensor(2.0, requires_grad=True) 29 | t3 = t1 / t2 30 | t3.backward() 31 | self.assertEqual(t1.grad.data.tolist(), 0.5) 32 | self.assertEqual(t2.grad.data.tolist(), -0.25) 33 | 34 | # vector div 35 | t1 = Tensor([1.0, 2.0]) 36 | t2 = Tensor([2.0, 4.0]) 37 | t3 = t1 / t2 38 | self.assertEqual(t3.data.tolist(), [0.5, 0.5]) 39 | 40 | t1 = Tensor([1.0, 2.0], requires_grad=True) 41 | t2 = Tensor([2.0, 4.0]) 42 | t3 = t1 / t2 43 | t3.backward(Tensor([1.0, 1.0])) 44 | self.assertEqual(t1.grad.data.tolist(), [0.5, 0.25]) 45 | 46 | t1 = Tensor([1.0, 2.0]) 47 | t2 = Tensor([2.0, 4.0], requires_grad=True) 48 | t3 = t1 / t2 49 | t3.backward(Tensor([1.0, 1.0])) 50 | self.assertEqual(t2.grad.data.tolist(), [-0.25, -1/8]) 51 | 52 | t1 = Tensor([1.0, 2.0], requires_grad=True) 53 | t2 = Tensor([2.0, 4.0], requires_grad=True) 54 | t3 = t1 / t2 55 | t3.backward(Tensor([1.0, 1.0])) 56 | self.assertEqual(t1.grad.data.tolist(), [0.5, 0.25]) 57 | self.assertEqual(t2.grad.data.tolist(), [-0.25, -1/8]) 58 | 59 | def test_broadcast_div(self): 60 | # (2,) / () 61 | t1 = Tensor([1.0, 2.0], requires_grad=True) 62 | t2 = Tensor(2.0, requires_grad=True) 63 | t3 = t1 / t2 64 | t3.backward(Tensor([1.0, 1.0])) 65 | self.assertEqual(t1.grad.data.tolist(), [0.5, 0.5]) 66 | self.assertEqual(t2.grad.data.tolist(), -0.75) 67 | 68 | # (2,) / (1,) 69 | t1 = Tensor([1.0, 2.0], requires_grad=True) 70 | t2 = Tensor([2.0], requires_grad=True) 71 | t3 = t1 / t2 72 | t3.backward(Tensor([1.0, 1.0])) 73 | self.assertEqual(t1.grad.data.tolist(), [0.5, 0.5]) 74 | self.assertEqual(t2.grad.data.tolist(), [-0.75]) 75 | 76 | # (2, 2) / () 77 | t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) 78 | t2 = Tensor(2.0, requires_grad=True) 79 | t3 = t1 / t2 80 | t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) 81 | self.assertEqual(t1.grad.data.tolist(), [[0.5, 0.5], [0.5, 0.5]]) 82 | self.assertEqual(t2.grad.data.tolist(), -2.5) 83 | 84 | # (2, 2) / (1,) 85 | t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) 86 | t2 = Tensor([2.0], requires_grad=True) 87 | t3 = t1 / t2 88 | t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) 89 | self.assertEqual(t1.grad.data.tolist(), [[0.5, 0.5], [0.5, 0.5]]) 90 | self.assertEqual(t2.grad.data.tolist(), [-2.5]) 91 | 92 | # (2, 2) / (2, ) 93 | t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) 94 | t2 = Tensor([2.0, 4.0], requires_grad=True) 95 | t3 = t1 / t2 96 | t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]])) 97 | self.assertEqual(t1.grad.data.tolist(), [[0.5, 0.25], [0.5, 0.25]]) 98 | self.assertEqual(t2.grad.data.tolist(), [-1.0, -0.375]) 99 | -------------------------------------------------------------------------------- /minitorch/tensor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import Union, Tuple 3 | 4 | from minitorch import autograd 5 | 6 | 7 | Arrayable = Union[float, list, np.ndarray] 8 | 9 | 10 | def ensure_ndarray(data: Arrayable) -> np.ndarray: 11 | if isinstance(data, np.ndarray): 12 | return data 13 | else: 14 | return np.array(data) 15 | 16 | 17 | def ensure_tensor(data): 18 | if isinstance(data, Tensor): 19 | return data 20 | else: 21 | return Tensor(data) 22 | 23 | 24 | class Tensor: 25 | def __init__(self, 26 | data: Arrayable, 27 | requires_grad: bool = False, 28 | grad_fn=None): 29 | self.data = ensure_ndarray(data) 30 | self.requires_grad = requires_grad 31 | self.grad = None 32 | self.grad_fn = grad_fn 33 | 34 | @property 35 | def shape(self): 36 | return self.data.shape 37 | 38 | def __repr__(self): 39 | return f"Tensor({self.data}, requires_grad={self.requires_grad})" 40 | 41 | def __add__(self, other) -> 'Tensor': 42 | return autograd.functional.add(self, ensure_tensor(other)) 43 | 44 | def __radd__(self, other) -> 'Tensor': 45 | return autograd.functional.add(ensure_tensor(other), self) 46 | 47 | def __iadd__(self, other) -> 'Tensor': 48 | self.data += ensure_tensor(other).data 49 | return self 50 | 51 | def __neg__(self) -> 'Tensor': 52 | return autograd.functional.neg(self) 53 | 54 | def __sub__(self, other) -> 'Tensor': 55 | return autograd.functional.sub(self, ensure_tensor(other)) 56 | 57 | def __rsub__(self, othere) -> 'Tensor': 58 | return autograd.functional.sub(ensure_tensor(other), self) 59 | 60 | def __isub__(self, other) -> 'Tensor': 61 | self.data -= ensure_tensor(other).data 62 | return self 63 | 64 | def __mul__(self, other) -> 'Tensor': 65 | return autograd.functional.mul(self, ensure_tensor(other)) 66 | 67 | def __rmul__(self, other) -> 'Tensor': 68 | return autograd.functional.mul(ensure_tensor(other), self) 69 | 70 | def __truediv__(self, other) -> 'Tensor': 71 | return autograd.functional.div(self, ensure_tensor(other)) 72 | 73 | def __rtruediv__(self, other) -> 'Tensor': 74 | return autograd.functional.div(ensure_tensor(other), self) 75 | 76 | def __matmul__(self, other) -> 'Tensor': 77 | return autograd.functional.matmul(self, other) 78 | 79 | def __pow__(self, other: float) -> 'Tensor': 80 | return autograd.functional.pow(self, other) 81 | 82 | def sum(self, axis: Union[int, Tuple[int]] = None) -> 'Tensor': 83 | return autograd.functional.sum(self, axis) 84 | 85 | def mean(self, axis: Union[int, Tuple[int]] = None) -> 'Tensor': 86 | return autograd.functional.mean(self, axis) 87 | 88 | def t(self) -> 'Tensor': 89 | """transpose""" 90 | return autograd.functional.t(self) 91 | 92 | def exp(self) -> 'Tensor': 93 | return autograd.functional.exp(self) 94 | 95 | def relu(self) -> 'Tensor': 96 | return autograd.functional.relu(self) 97 | 98 | def backward(self, grad: 'Tensor' = None) -> None: 99 | assert self.requires_grad 100 | if grad is None and self.shape != (): 101 | raise RuntimeError("grad can be implicitly created only for scalar outputs") 102 | grad = grad if grad else Tensor(1.0) 103 | from minitorch.autograd.engine import Engine 104 | engine = Engine() 105 | engine.execute(self, grad) 106 | 107 | def zero_grad(self) -> None: 108 | self.grad = None 109 | 110 | 111 | def rand(*shape, requires_grad=False) -> Tensor: 112 | data = np.random.randn(*shape) 113 | return Tensor(data=data, requires_grad=requires_grad) 114 | -------------------------------------------------------------------------------- /minitorch/autograd/functional.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Tuple 2 | 3 | import numpy as np 4 | 5 | from minitorch import Tensor 6 | from .node import collect_next_edges 7 | from .node import * 8 | 9 | 10 | ############## reduce operator ################## 11 | 12 | def sum(t: Tensor, axis: Union[int, Tuple[int]] = None) -> Tensor: 13 | data = t.data.sum(axis=axis) 14 | requires_grad = t.requires_grad 15 | if requires_grad: 16 | sum_bw = SumBackward() 17 | sum_bw.set_next_edges(collect_next_edges(t)) 18 | sum_bw.axis = axis 19 | sum_bw.shape = t.shape 20 | return Tensor(data=data, 21 | requires_grad=True, 22 | grad_fn=sum_bw) 23 | else: 24 | return Tensor(data=data) 25 | 26 | 27 | def mean(t: Tensor, axis: Union[int, Tuple[int]] = None) -> Tensor: 28 | data = t.data.mean(axis=axis) 29 | requires_grad = t.requires_grad 30 | if requires_grad: 31 | mean_bw = MeanBackward() 32 | mean_bw.set_next_edges(collect_next_edges(t)) 33 | mean_bw.axis = axis 34 | mean_bw.shape = t.shape 35 | return Tensor(data=data, 36 | requires_grad=True, 37 | grad_fn=mean_bw) 38 | else: 39 | return Tensor(data=data) 40 | 41 | ############## unary operator ################## 42 | 43 | def neg(t: Tensor) -> Tensor: 44 | data = -t.data 45 | requires_grad = t.requires_grad 46 | if requires_grad: 47 | neg_bw = NegBackward() 48 | neg_bw.set_next_edges(collect_next_edges(t)) 49 | return Tensor(data=data, 50 | requires_grad=True, 51 | grad_fn=neg_bw) 52 | else: 53 | return Tensor(data=data) 54 | 55 | 56 | def t(t: Tensor) -> Tensor: 57 | # transpose 58 | data = t.data.T 59 | requires_grad = t.requires_grad 60 | if requires_grad: 61 | t_bw = TBackward() 62 | t_bw.set_next_edges(collect_next_edges(t)) 63 | return Tensor(data=data, 64 | requires_grad=True, 65 | grad_fn=t_bw) 66 | else: 67 | return Tensor(data=data) 68 | 69 | 70 | def relu(t: Tensor) -> Tensor: 71 | data = np.maximum(t.data, 0) 72 | requires_grad = t.requires_grad 73 | if requires_grad: 74 | relu_bw = ReluBackward() 75 | relu_bw.set_next_edges(collect_next_edges(t)) 76 | relu_bw.input = Tensor(data=t.data) 77 | return Tensor(data=data, 78 | requires_grad=True, 79 | grad_fn=relu_bw) 80 | else: 81 | return Tensor(data=data) 82 | 83 | 84 | def exp(t: Tensor) -> Tensor: 85 | data = np.exp(t.data) 86 | requires_grad = t.requires_grad 87 | if requires_grad: 88 | exp_bw = ExpBackward() 89 | exp_bw.set_next_edges(collect_next_edges(t)) 90 | exp_bw.output = Tensor(data=data) 91 | return Tensor(data=data, 92 | requires_grad=True, 93 | grad_fn=exp_bw) 94 | else: 95 | return Tensor(data=data) 96 | 97 | ############## binary operator ################## 98 | def add(t1: Tensor, t2: Tensor) -> Tensor: 99 | data = t1.data + t2.data 100 | requires_grad = t1.requires_grad or t2.requires_grad 101 | if requires_grad: 102 | add_bw = AddBackward() 103 | add_bw.set_next_edges(collect_next_edges(t1, t2)) 104 | if t1.requires_grad: 105 | add_bw.t1_shape = t1.shape 106 | if t2.requires_grad: 107 | add_bw.t2_shape = t2.shape 108 | return Tensor(data=data, 109 | requires_grad=True, 110 | grad_fn=add_bw) 111 | else: 112 | return Tensor(data=data) 113 | 114 | 115 | def sub(t1: Tensor, t2: Tensor) -> Tensor: 116 | data = t1.data - t2.data 117 | requires_grad = t1.requires_grad or t2.requires_grad 118 | if requires_grad: 119 | sub_bw = SubBackward() 120 | sub_bw.set_next_edges(collect_next_edges(t1, t2)) 121 | if t1.requires_grad: 122 | sub_bw.t1_shape = t1.shape 123 | if t2.requires_grad: 124 | sub_bw.t2_shape = t2.shape 125 | return Tensor(data=data, 126 | requires_grad=True, 127 | grad_fn=sub_bw) 128 | else: 129 | return Tensor(data=data) 130 | 131 | 132 | def mul(t1: Tensor, t2: Tensor) -> Tensor: 133 | data = t1.data * t2.data 134 | requires_grad = t1.requires_grad or t2.requires_grad 135 | if requires_grad: 136 | mul_bw = MulBackward() 137 | mul_bw.set_next_edges(collect_next_edges(t1, t2)) 138 | if t1.requires_grad: 139 | mul_bw.t2 = Tensor(data=t2.data) 140 | mul_bw.t1_shape = t1.shape 141 | if t2.requires_grad: 142 | mul_bw.t1 = Tensor(data=t1.data) 143 | mul_bw.t2_shape = t2.shape 144 | return Tensor(data=data, 145 | requires_grad=True, 146 | grad_fn=mul_bw) 147 | else: 148 | return Tensor(data=data) 149 | 150 | 151 | def div(t1: Tensor, t2: Tensor) -> Tensor: 152 | data = t1.data / t2.data 153 | requires_grad = t1.requires_grad or t2.requires_grad 154 | if requires_grad: 155 | div_bw = DivBackward() 156 | div_bw.set_next_edges(collect_next_edges(t1, t2)) 157 | if t1.requires_grad: 158 | div_bw.t2 = Tensor(data=t2.data) 159 | div_bw.t1_shape = t1.shape 160 | if t2.requires_grad: 161 | div_bw.t1 = Tensor(data=t1.data) 162 | div_bw.t2 = Tensor(data=t2.data) if div_bw.t2 is None else div_bw.t2 163 | div_bw.t2_shape = t2.shape 164 | return Tensor(data=data, 165 | requires_grad=True, 166 | grad_fn=div_bw) 167 | else: 168 | return Tensor(data=data) 169 | 170 | 171 | def matmul(t1: Tensor, t2: Tensor) -> Tensor: 172 | data = t1.data @ t2.data 173 | requires_grad = t1.requires_grad or t2.requires_grad 174 | if requires_grad: 175 | matmul_bw = MatMulBackward() 176 | matmul_bw.set_next_edges(collect_next_edges(t1, t2)) 177 | if t1.requires_grad: 178 | matmul_bw.t2 = t2 179 | if t2.requires_grad: 180 | matmul_bw.t1 = t1 181 | return Tensor(data=data, 182 | requires_grad=True, 183 | grad_fn=matmul_bw) 184 | else: 185 | return Tensor(data=data) 186 | 187 | 188 | def pow(t1: Tensor, t2: float) -> Tensor: 189 | data = t1.data ** t2 190 | requires_grad = t1.requires_grad 191 | if requires_grad: 192 | pow_bw = PowBackward() 193 | pow_bw.set_next_edges(collect_next_edges(t1)) 194 | pow_bw.t1 = Tensor(data=t1.data) 195 | pow_bw.t2 = t2 196 | return Tensor(data=data, 197 | requires_grad=True, 198 | grad_fn=pow_bw) 199 | else: 200 | return Tensor(data=data) 201 | -------------------------------------------------------------------------------- /minitorch/autograd/node.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import numpy as np 4 | from typing import List 5 | 6 | from minitorch import Tensor 7 | from .edge import Edge 8 | 9 | 10 | def collect_next_edges(*tensors) -> List[Edge]: 11 | next_edges = [] 12 | for t in tensors: 13 | if not t.requires_grad: 14 | continue 15 | if t.grad_fn is None: 16 | t.grad_fn = AccumulateGrad(t) 17 | next_edges.append(Edge(t.grad_fn)) 18 | else: 19 | next_edges.append(Edge(t.grad_fn)) 20 | return next_edges 21 | 22 | 23 | def unbroadcast(grad_input: Tensor, input_shape: tuple) -> Tensor: 24 | """When broadcast is applied to an operation, unbroadcast should also 25 | be executed when backpropagating. 26 | 27 | References: 28 | 1. https://numpy.org/doc/stable/user/basics.broadcasting.html 29 | 2. http://coldattic.info/post/116/ 30 | 3. https://github.com/joelgrus/autograd/blob/part06/autograd/tensor.py#L150 31 | """ 32 | if grad_input.shape == input_shape: 33 | return grad_input 34 | data = grad_input.data 35 | ndims_added = len(grad_input.shape) - len(input_shape) 36 | for _ in range(ndims_added): 37 | data = data.sum(axis=0) 38 | for i, dim in enumerate(input_shape): 39 | if dim == 1: 40 | data = data.sum(axis=i, keepdims=True) 41 | 42 | return Tensor(data=data) 43 | 44 | 45 | class Node(metaclass=ABCMeta): 46 | 47 | def __call__(self, *grad_outputs): 48 | return self.apply(*grad_outputs) 49 | 50 | def set_next_edges(self, next_edges: List[Edge] = None): 51 | self.next_edges = next_edges 52 | 53 | @abstractmethod 54 | def apply(self, *grad_outputs): 55 | """You must implement the abstract method for custome Node""" 56 | pass 57 | 58 | 59 | class AccumulateGrad(Node): 60 | 61 | def __init__(self, leaf_tensor: Tensor): 62 | self.leaf_tensor = leaf_tensor 63 | 64 | def apply(self, grad_output: Tensor): 65 | if self.leaf_tensor.grad is None: 66 | self.leaf_tensor.grad = grad_output 67 | else: 68 | self.leaf_tensor.grad += grad_output 69 | return None 70 | 71 | 72 | ############## reduce operator ################## 73 | 74 | class SumBackward(Node): 75 | 76 | def __init__(self): 77 | self.axis = None 78 | self.shape: tuple = None 79 | 80 | def apply(self, grad_output: Tensor) -> tuple: 81 | if isinstance(self.axis, int): 82 | self.axis = [self.axis] 83 | if self.axis is None: 84 | shape = [1] * len(self.shape) 85 | else: 86 | shape = [1 if i in self.axis else self.shape[i] for i in range(len(self.shape))] 87 | data = grad_output.data.reshape(shape) + np.zeros(self.shape) 88 | return Tensor(data=data), 89 | 90 | 91 | class MeanBackward(Node): 92 | 93 | def __init__(self): 94 | self.axis = None 95 | self.shape: tuple = None 96 | 97 | def apply(self, grad_output: Tensor) -> tuple: 98 | if isinstance(self.axis, int): 99 | self.axis = [self.axis] 100 | if self.axis is None: 101 | shape = [1] * len(self.shape) 102 | else: 103 | shape = [1 if i in self.axis else self.shape[i] for i in range(len(self.shape))] 104 | scale = np.prod(grad_output.shape) / np.prod(self.shape) 105 | data = scale * grad_output.data.reshape(shape) + np.zeros(self.shape) 106 | return Tensor(data=data), 107 | 108 | 109 | ############## unary operator ################## 110 | 111 | class NegBackward(Node): 112 | 113 | def apply(self, grad_output: Tensor) -> list: 114 | return -grad_output, 115 | 116 | 117 | class TBackward(Node): 118 | """Transpose""" 119 | 120 | def apply(self, grad_output: Tensor) -> list: 121 | return Tensor(data=grad_output.data.T), 122 | 123 | 124 | class ReluBackward(Node): 125 | 126 | def __init__(self): 127 | self.input: Tensor = None 128 | 129 | def apply(self, grad_output: Tensor) -> list: 130 | return grad_output * Tensor(data=(self.input.data >= 0)), 131 | 132 | 133 | class ExpBackward(Node): 134 | 135 | def __init__(self): 136 | self.output: Tensor = None 137 | 138 | def apply(self, grad_output: Tensor) -> list: 139 | return grad_output * self.output, 140 | 141 | ############## binary operator ################## 142 | 143 | class AddBackward(Node): 144 | 145 | def __init__(self): 146 | self.t1_shape: tuple = None 147 | self.t2_shape: tuple = None 148 | 149 | def apply(self, grad_output: Tensor) -> list: 150 | grad_input = [] 151 | if self.t1_shape is not None: 152 | grad_input.append(unbroadcast(grad_output, self.t1_shape)) 153 | if self.t2_shape is not None: 154 | grad_input.append(unbroadcast(grad_output, self.t2_shape)) 155 | return grad_input 156 | 157 | 158 | class SubBackward(Node): 159 | 160 | def __init__(self): 161 | self.t1_shape: tuple = None 162 | self.t2_shape: tuple = None 163 | 164 | def apply(self, grad_output: Tensor) -> list: 165 | grad_input = [] 166 | if self.t1_shape is not None: 167 | grad_input.append(unbroadcast(grad_output, self.t1_shape)) 168 | if self.t2_shape is not None: 169 | grad_input.append(unbroadcast(-grad_output, self.t2_shape)) 170 | return grad_input 171 | 172 | 173 | class MulBackward(Node): 174 | 175 | def __init__(self): 176 | self.t1: Tensor = None 177 | self.t1_shape: tuple = None 178 | self.t2: Tensor = None 179 | self.t2_shape: tuple = None 180 | 181 | def apply(self, grad_output: Tensor) -> list: 182 | grad_input = [] 183 | if self.t2 is not None: 184 | grad_input.append(unbroadcast(self.t2 * grad_output, self.t1_shape)) 185 | if self.t1 is not None: 186 | grad_input.append(unbroadcast(self.t1 * grad_output, self.t2_shape)) 187 | return grad_input 188 | 189 | 190 | class DivBackward(Node): 191 | 192 | def __init__(self): 193 | self.t1: Tensor = None 194 | self.t1_shape: tuple = None 195 | self.t2: Tensor = None 196 | self.t2_shape: tuple = None 197 | 198 | def apply(self, grad_output: Tensor) -> list: 199 | grad_input = [] 200 | if self.t2 is not None and self.t1_shape is not None: 201 | grad_input.append(unbroadcast(1 / self.t2 * grad_output, self.t1_shape)) 202 | if self.t1 is not None: 203 | grad_input.append(unbroadcast(-self.t1 / (self.t2 ** 2) * grad_output, self.t2_shape)) 204 | return grad_input 205 | 206 | 207 | class MatMulBackward(Node): 208 | 209 | def __init__(self): 210 | self.t1: Tensor = None 211 | self.t2: Tensor = None 212 | 213 | def apply(self, grad_output: Tensor) -> list: 214 | grad_input = [] 215 | if self.t2 is not None: 216 | grad_input.append(grad_output @ Tensor(self.t2.data.T)) 217 | if self.t1 is not None: 218 | grad_input.append(Tensor(self.t1.data.T) @ grad_output) 219 | return grad_input 220 | 221 | 222 | class PowBackward(Node): 223 | 224 | def __init__(self): 225 | self.t1: Tensor = None 226 | self.t2: float = None 227 | 228 | def apply(self, grad_output: Tensor) -> tuple: 229 | return grad_output * self.t2 * self.t1 ** (self.t2-1), 230 | --------------------------------------------------------------------------------