├── minitorch
    ├── autograd
    │   ├── __init__.py
    │   ├── edge.py
    │   ├── engine.py
    │   ├── functional.py
    │   └── node.py
    ├── nn
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── activation.py
    │   │   ├── linear.py
    │   │   ├── loss.py
    │   │   └── module.py
    │   ├── __init__.py
    │   └── parameter.py
    ├── optim
    │   ├── __init__.py
    │   ├── optimizer.py
    │   └── sgd.py
    ├── __init__.py
    └── tensor.py
├── tests
    ├── test_nn
    │   ├── __init__.py
    │   ├── test_linear.py
    │   ├── test_loss.py
    │   └── test_activation.py
    ├── test_autograd
    │   ├── __init__.py
    │   ├── test_neg.py
    │   ├── test_pow.py
    │   ├── test_relu.py
    │   ├── test_exp.py
    │   ├── test_sum.py
    │   ├── test_mean.py
    │   ├── test_matmul.py
    │   ├── test_add.py
    │   ├── test_mul.py
    │   ├── test_sub.py
    │   └── test_div.py
    └── test_suite.py
├── requirements.txt
├── setup.py
├── examples
    ├── example.py
    ├── neural_network.py
    └── train.py
├── .flake8
├── .gitignore
└── README.md


/minitorch/autograd/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/minitorch/nn/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/minitorch/optim/__init__.py:
--------------------------------------------------------------------------------
1 | from .sgd import *
2 | 


--------------------------------------------------------------------------------
/tests/test_nn/__init__.py:
--------------------------------------------------------------------------------
1 | from .test_loss import TestLoss


--------------------------------------------------------------------------------
/minitorch/__init__.py:
--------------------------------------------------------------------------------
1 | from .tensor import Tensor, rand
2 | from .autograd.functional import *
3 | 


--------------------------------------------------------------------------------
/minitorch/autograd/edge.py:
--------------------------------------------------------------------------------
1 | class Edge:
2 | 
3 |     def __init__(self, node):
4 |         self.node = node
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | mypy==0.800
2 | mypy-extensions==0.4.3
3 | numpy==1.19.5
4 | typed-ast==1.4.2
5 | typing-extensions==3.7.4.3
6 | 


--------------------------------------------------------------------------------
/minitorch/nn/__init__.py:
--------------------------------------------------------------------------------
1 | from .modules.activation import *
2 | from .modules.linear import *
3 | from .modules.loss import *
4 | from .modules.module import *
5 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | 
 4 | setup(
 5 |     name="minitorch",
 6 |     version="1.0",
 7 |     author="zhouzaida",
 8 |     description="a minimal neural network libary",
 9 |     packages=["minitorch"],
10 | )
11 | 


--------------------------------------------------------------------------------
/minitorch/optim/optimizer.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | 
 4 | class Optimizer(metaclass=ABCMeta):
 5 |     """Base class for all optimizers."""
 6 | 
 7 |     @abstractmethod
 8 |     def step(self):
 9 |         pass
10 | 


--------------------------------------------------------------------------------
/examples/example.py:
--------------------------------------------------------------------------------
 1 | from minitorch import Tensor
 2 | 
 3 | 
 4 | t1 = Tensor(4.0)
 5 | t2 = Tensor(3.0, requires_grad=True)
 6 | t3 = -t2
 7 | t4 = t1 * t3
 8 | t4.backward()
 9 | print(f"t1 grad: {t1.grad}")  # t1 grad: Tensor(7.0, requires_grad=False)
10 | print(f"t2 grad: {t2.grad}")  # t2 grad: None
11 | 


--------------------------------------------------------------------------------
/minitorch/nn/parameter.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from minitorch import Tensor
 4 | 
 5 | 
 6 | class Parameter(Tensor):
 7 |     """A kind of Tensor that is to be considered a module parameter."""
 8 | 
 9 |     def __init__(self, *shape) -> None:
10 |         data = np.random.randn(*shape)
11 |         super().__init__(data=data, requires_grad=True)
12 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | select = B,C,E,F,P,T4,W,B9
3 | max-line-length = 120
4 | max-doc-length = 120
5 | # C408 ignored because we like the dict keyword argument syntax
6 | # E501 is not flexible enough, we're using B950 instead
7 | ignore =
8 |     E203,E305,E402,E501,E721,E741,F403,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303
9 | exclude = .git,__pycache__,docs/source/conf.py,old,build,dist


--------------------------------------------------------------------------------
/minitorch/nn/modules/activation.py:
--------------------------------------------------------------------------------
 1 | from minitorch import Tensor
 2 | import minitorch.autograd.functional as F
 3 | from .module import Module
 4 | 
 5 | 
 6 | class Sigmoid(Module):
 7 |     def forward(self, input: Tensor) -> Tensor:
 8 |         return 1 / (1 + F.exp(-input))
 9 | 
10 | 
11 | class ReLU(Module):
12 |     def forward(self, input: Tensor) -> Tensor:
13 |         return F.relu(input)
14 | 


--------------------------------------------------------------------------------
/tests/test_autograd/__init__.py:
--------------------------------------------------------------------------------
 1 | from .test_add import TestAdd
 2 | from .test_div import TestDiv
 3 | from .test_exp import TestExp
 4 | from .test_matmul import TestMatmul
 5 | from .test_mean import TestMean
 6 | from .test_mul import TestMul
 7 | from .test_neg import TestNeg
 8 | from .test_pow import TestPow
 9 | from .test_relu import TestReLU
10 | from .test_sub import TestSub
11 | from .test_sum import TestSum
12 | 


--------------------------------------------------------------------------------
/tests/test_nn/test_linear.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from minitorch import Tensor
 4 | import minitorch.nn as nn
 5 | 
 6 | 
 7 | class TestLinear(TestCase):
 8 | 
 9 |     def test_linear(self):
10 |         input = Tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
11 |         target = Tensor([[6, 7, 8, 9, 10], [1, 2, 3, 4, 5]])
12 |         loss = nn.MSELoss()
13 |         output = loss(input, target)
14 |         self.assertEqual(output.data.tolist(), 25.)
15 | 


--------------------------------------------------------------------------------
/tests/test_nn/test_loss.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from minitorch import Tensor
 4 | import minitorch.nn as nn
 5 | 
 6 | 
 7 | class TestLoss(TestCase):
 8 | 
 9 |     def test_mse_loss(self):
10 |         input = Tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
11 |         target = Tensor([[6, 7, 8, 9, 10], [1, 2, 3, 4, 5]])
12 |         loss = nn.MSELoss()
13 |         output = loss(input, target)
14 |         self.assertEqual(output.data.tolist(), 25.)
15 | 


--------------------------------------------------------------------------------
/minitorch/optim/sgd.py:
--------------------------------------------------------------------------------
 1 | from .optimizer import Optimizer
 2 | 
 3 | 
 4 | class SGD(Optimizer):
 5 |     """Implements stochastic gradient descent"""
 6 | 
 7 |     def __init__(self, params, lr):
 8 |         if lr < 0.0:
 9 |             raise ValueError("Invalid learning rate: {}".format(lr))
10 |         self.params = list(params)
11 |         self.lr = lr
12 | 
13 |     def step(self):
14 |         for param in self.params:
15 |             param.data = param.data - self.lr * param.grad.data
16 | 


--------------------------------------------------------------------------------
/tests/test_nn/test_activation.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from minitorch import Tensor
 4 | import minitorch.nn as nn
 5 | 
 6 | 
 7 | class TestActivation(TestCase):
 8 | 
 9 |     def test_sigmoid(self):
10 |         input = Tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
11 |         target = Tensor([[6, 7, 8, 9, 10], [1, 2, 3, 4, 5]])
12 |         loss = nn.MSELoss()
13 |         output = loss(input, target)
14 |         self.assertEqual(output.data.tolist(), 25.)
15 | 
16 |     def test_relu(self):
17 |         ...
18 | 


--------------------------------------------------------------------------------
/tests/test_suite.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import test_autograd
 4 | import test_nn
 5 | 
 6 | 
 7 | if __name__ == '__main__':
 8 |     suite = unittest.TestSuite()
 9 |     suite.addTests(unittest.TestLoader().loadTestsFromModule(test_autograd))
10 |     suite.addTests(unittest.TestLoader().loadTestsFromModule(test_nn))
11 | 
12 |     # with open('UnittestTextReport.txt', 'a') as f:
13 |     # runner = unittest.TextTestRunner(stream=f, verbosity=2)
14 |     runner = unittest.TextTestRunner(verbosity=2)
15 |     runner.run(suite)
16 | 


--------------------------------------------------------------------------------
/tests/test_autograd/test_neg.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from minitorch import Tensor
 4 | 
 5 | 
 6 | class TestNeg(TestCase):
 7 | 
 8 |     def test_neg(self):
 9 |         # scalar neg
10 |         t1 = Tensor(1.0)
11 |         t2 = -t1
12 |         self.assertEqual(t2.data.tolist(), -1.0)
13 | 
14 |         t1 = Tensor(2.0, requires_grad=True)
15 |         t2 = -t1
16 |         t2.backward()
17 |         self.assertEqual(t1.grad.data.tolist(), -1.0)
18 | 
19 |         # vector neg
20 |         t1 = Tensor([1.0, 2.0])
21 |         t2 = -t1
22 |         self.assertEqual(t2.data.tolist(), [-1.0, -2.0])
23 | 
24 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
25 |         t2 = -t1
26 |         t2.backward(Tensor([1.0, 1.0]))
27 |         self.assertEqual(t1.grad.data.tolist(), [-1.0, -1.0])
28 | 


--------------------------------------------------------------------------------
/tests/test_autograd/test_pow.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from minitorch import Tensor
 4 | 
 5 | 
 6 | class TestPow(TestCase):
 7 | 
 8 |     def test_pow(self):
 9 |         # scalar pow
10 |         t1 = Tensor(2.0)
11 |         t2 = t1 ** 3
12 |         self.assertEqual(t2.data.tolist(), 8.0)
13 | 
14 |         t1 = Tensor(2.0, requires_grad=True)
15 |         t2 = t1 ** 3
16 |         t2.backward()
17 |         self.assertEqual(t1.grad.data.tolist(), 12.0)
18 | 
19 |         # vector pow
20 |         t1 = Tensor([1.0, 2.0])
21 |         t2 = t1 ** 3
22 |         self.assertEqual(t2.data.tolist(), [1.0, 8.0])
23 | 
24 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
25 |         t2 = t1 ** 3
26 |         t2.backward(Tensor([1.0, 1.0]))
27 |         self.assertEqual(t1.grad.data.tolist(), [3.0, 12.0])
28 | 


--------------------------------------------------------------------------------
/tests/test_autograd/test_relu.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from minitorch import Tensor
 4 | 
 5 | 
 6 | class TestReLU(TestCase):
 7 | 
 8 |     def test_relu(self):
 9 |         # scalar relu
10 |         t1 = Tensor(2.0)
11 |         t2 = t1.relu()
12 |         self.assertEqual(t2.data.tolist(), 2.0)
13 | 
14 |         t1 = Tensor(2.0, requires_grad=True)
15 |         t2 = t1.relu()
16 |         t2.backward()
17 |         self.assertEqual(t1.grad.data.tolist(), 1.0)
18 | 
19 |         # vector relu
20 |         t1 = Tensor([-1.0, 2.0])
21 |         t2 = t1.relu()
22 |         self.assertEqual(t2.data.tolist(), [0, 2.0])
23 | 
24 |         t1 = Tensor([-1.0, 2.0], requires_grad=True)
25 |         t2 = t1.relu()
26 |         t2.backward(Tensor([1.0, 1.0]))
27 |         self.assertEqual(t1.grad.data.tolist(), [0, 1.0])
28 | 


--------------------------------------------------------------------------------
/minitorch/nn/modules/linear.py:
--------------------------------------------------------------------------------
 1 | from minitorch import Tensor
 2 | from .module import Module
 3 | from ..parameter import Parameter
 4 | 
 5 | 
 6 | class Linear(Module):
 7 |     """Applies a linear transformation to the incoming data: y = xA + b"""
 8 | 
 9 |     def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None:
10 |         super().__init__()
11 |         self.in_features = in_features
12 |         self.out_features = out_features
13 |         self.weight = Parameter(out_features, in_features)
14 |         if bias:
15 |             self.bias = Parameter(out_features)
16 |         else:
17 |             self.bias = None
18 | 
19 |     def forward(self, input: Tensor) -> Tensor:
20 |         output = input @ self.weight.t()
21 |         if self.bias is not None:
22 |             output = output + self.bias
23 |         return output
24 | 


--------------------------------------------------------------------------------
/tests/test_autograd/test_exp.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | import numpy as np
 4 | 
 5 | from minitorch import Tensor
 6 | 
 7 | 
 8 | class TestExp(TestCase):
 9 | 
10 |     def test_exp(self):
11 |         # scalar exp
12 |         t1 = Tensor(2.0)
13 |         t2 = t1.exp()
14 |         np.testing.assert_allclose(t2.data, np.exp(2))
15 | 
16 |         t1 = Tensor(2.0, requires_grad=True)
17 |         t2 = t1.exp()
18 |         t2.backward()
19 |         np.testing.assert_allclose(t1.grad.data, np.exp(2))
20 | 
21 |         # vector exp
22 |         t1 = Tensor([1.0, 2.0])
23 |         t2 = t1.exp()
24 |         np.testing.assert_allclose(t2.data, np.exp([1, 2]))
25 | 
26 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
27 |         t2 = t1.exp()
28 |         t2.backward(Tensor([1.0, 1.0]))
29 |         np.testing.assert_allclose(t1.grad.data, np.exp([1, 2]))
30 | 


--------------------------------------------------------------------------------
/examples/neural_network.py:
--------------------------------------------------------------------------------
 1 | import minitorch
 2 | import minitorch.nn as nn
 3 | 
 4 | 
 5 | input = minitorch.rand(2, 3)
 6 | linear = nn.Linear(3, 5, bias=True)
 7 | output = linear(input)
 8 | print(f"output: {output}")
 9 | 
10 | 
11 | class Model(nn.Module):
12 | 
13 |     def __init__(self):
14 |         super().__init__()
15 |         self.linear_1 = nn.Linear(3, 5, bias=True)
16 |         self.linear_2 = nn.Linear(5, 6)
17 | 
18 |     def forward(self, input):
19 |         output = self.linear_1(input)
20 |         output = self.linear_2(output)
21 |         return output
22 | 
23 | input = minitorch.rand(2, 3)
24 | model = Model()
25 | output = model(input)
26 | print(f"output: {output}")
27 | 
28 | for name, parameter in model.named_parameters():
29 |     print(f"{name}: {parameter}")
30 | 
31 | for name, module in model.named_modules(prefix='model'):
32 |     print(f"{name}: {module}")
33 | 


--------------------------------------------------------------------------------
/minitorch/nn/modules/loss.py:
--------------------------------------------------------------------------------
 1 | from minitorch import Tensor
 2 | from .module import Module
 3 | 
 4 | 
 5 | class MSELoss(Module):
 6 |     """Creates a criterion that measures the mean squared error (squared L2 norm) between
 7 |     each element in the input x and target y.
 8 |     """
 9 | 
10 |     def __init__(self, reduction: str = 'mean'):
11 |         self.reduction = reduction
12 | 
13 |     def forward(self, input: Tensor, target: Tensor) -> Tensor:
14 |         result = (input - target) ** 2
15 |         if self.reduction is None:
16 |             return result
17 |         elif self.reduction == 'mean':
18 |             return result.mean()
19 |         elif self.reduction == 'sum':
20 |             return result.sum()
21 |         else:
22 |             raise ValueError("reduction should be one of the 'none,mean,sum', "
23 |                              f"rather than {self.reduction}")
24 | 
25 | 
26 | class CrossEntropyLoss(Module):
27 |     pass
28 | 


--------------------------------------------------------------------------------
/tests/test_autograd/test_sum.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from minitorch import Tensor
 4 | 
 5 | 
 6 | class TestSum(TestCase):
 7 | 
 8 |     def test_sum(self):
 9 |         t1 = Tensor([1., 2., 3.])
10 |         t2 = t1.sum()
11 |         self.assertEqual(t2.data.tolist(), 6.)
12 | 
13 |         # (3,) -> ()
14 |         t1 = Tensor([1., 2., 3.], requires_grad=True)
15 |         t2 = t1.sum()
16 |         t2.backward()
17 |         self.assertEqual(t1.grad.data.tolist(), [1., 1., 1.])
18 | 
19 |         # (2, 3) -> (3, )
20 |         t1 = Tensor([[1., 2., 3.], [4., 5., 6.]], requires_grad=True)
21 |         t2 = t1.sum(axis=0)
22 |         t2.backward(Tensor([1., 1., 1.]))
23 |         self.assertEqual(t1.grad.data.tolist(), [[1., 1., 1.], [1., 1., 1.]])
24 | 
25 |         # (2, 3) -> (2, )
26 |         t1 = Tensor([[1., 2., 3.], [4., 5., 6.]], requires_grad=True)
27 |         t2 = t1.sum(axis=1)
28 |         t2.backward(Tensor([1., 1.]))
29 |         self.assertEqual(t1.grad.data.tolist(), [[1., 1., 1.], [1., 1., 1.]])
30 | 
31 |         # (2, 3) -> (,)
32 |         t1 = Tensor([[1., 2., 3.], [4., 5., 6.]], requires_grad=True)
33 |         t2 = t1.sum()
34 |         t2.backward(Tensor(1.0))
35 |         self.assertEqual(t1.grad.data.tolist(), [[1., 1., 1.], [1., 1., 1.]])
36 | 


--------------------------------------------------------------------------------
/tests/test_autograd/test_mean.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from minitorch import Tensor
 4 | 
 5 | 
 6 | class TestMean(TestCase):
 7 | 
 8 |     def test_mean(self):
 9 |         t1 = Tensor([1., 2., 3.])
10 |         t2 = t1.mean()
11 |         self.assertEqual(t2.data.tolist(), 2.)
12 | 
13 |         # (3,) -> ()
14 |         t1 = Tensor([1., 2., 3., 4.], requires_grad=True)
15 |         t2 = t1.mean()
16 |         t2.backward()
17 |         self.assertEqual(t1.grad.data.tolist(), [1/4., 1/4, 1/4, 1/4])
18 | 
19 |         # (2, 3) -> (3, )
20 |         t1 = Tensor([[1., 2., 3.], [4., 5., 6.]], requires_grad=True)
21 |         t2 = t1.mean(axis=0)
22 |         t2.backward(Tensor([1., 1., 1.]))
23 |         self.assertEqual(t1.grad.data.tolist(), [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]])
24 | 
25 |         # (2, 3) -> (2, )
26 |         t1 = Tensor([[1., 2., 3., 4.], [4., 5., 6.,7.]], requires_grad=True)
27 |         t2 = t1.mean(axis=1)
28 |         t2.backward(Tensor([1., 1.]))
29 |         self.assertEqual(t1.grad.data.tolist(), [[1/4., 1/4, 1/4, 1/4], [1/4., 1/4, 1/4, 1/4]])
30 | 
31 |         # (2, 3) -> (,)
32 |         t1 = Tensor([[1., 2., 3.], [4., 5., 6.]], requires_grad=True)
33 |         t2 = t1.mean()
34 |         t2.backward(Tensor(1.0))
35 |         self.assertEqual(t1.grad.data.tolist(), [[1/6, 1/6, 1/6], [1/6, 1/6, 1/6]])
36 | 


--------------------------------------------------------------------------------
/examples/train.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from minitorch import Tensor
 4 | import minitorch.nn as nn
 5 | import minitorch.optim as optim
 6 | 
 7 | 
 8 | class Model(nn.Module):
 9 | 
10 |     def __init__(self, in_features=3):
11 |         super().__init__()
12 |         self.linear1 = nn.Linear(in_features, 5, bias=True)
13 |         self.relu1 = nn.ReLU()
14 |         self.linear2 = nn.Linear(5, 1, bias=True)
15 | 
16 |     def forward(self, input):
17 |         output = self.linear1(input)
18 |         output = self.relu1(output)
19 |         output = self.linear2(output)
20 |         return output
21 | 
22 | 
23 | def train(model, x, y, epoch=30):  # TODO
24 |     optimizer = optim.SGD(model.parameters(), lr=0.1)
25 |     mse_loss = nn.MSELoss()
26 |     for i in range(1, epoch + 1):
27 |         model.zero_grad()
28 |         output = model(x)
29 |         loss = mse_loss(output, y)
30 |         print(f"train: epoch {i}, loss {loss}")
31 |         loss.backward()
32 |         optimizer.step()
33 | 
34 | 
35 | def test(model, x, y):
36 |     output = model(x)
37 |     mse_loss = nn.MSELoss()
38 |     loss = mse_loss(output, y)
39 |     print(f"test: loss {loss}")
40 | 
41 | 
42 | def main():
43 |     coef = Tensor(np.array([1, 3, 2]))
44 |     x_train = Tensor(np.random.rand(100, 3))
45 |     y_train = x_train @ coef + 5
46 |     x_test = Tensor(np.random.rand(20, 3))
47 |     y_test = x_test @ coef + 5
48 |     model = Model()
49 |     train(model, x_train, y_train)
50 |     test(model, x_test, y_test)
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     main()
55 | 


--------------------------------------------------------------------------------
/tests/test_autograd/test_matmul.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from minitorch import Tensor
 4 | 
 5 | 
 6 | class TestMatmul(TestCase):
 7 | 
 8 |     def test_matmul(self):
 9 |         t1 = Tensor([1.0, 2.0])
10 |         t2 = Tensor([2.0, 3.0])
11 |         t3 = t1 @ t2
12 |         self.assertEqual(t3.data.tolist(), 8.0)
13 | 
14 |         t1 = Tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]])  # 3 * 2
15 |         t2 = Tensor([[2.0, 3.0], [3.0, 4.0]])  # 2 * 2
16 |         t3 = t1 @ t2  # 3 * 2
17 |         self.assertEqual(t3.data.tolist(), [[8.0, 11.], [13., 18.0], [18.0, 25.0]])
18 | 
19 |         t1 = Tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]], requires_grad=True)  # 3 * 2
20 |         t2 = Tensor([[2.0, 3.0], [3.0, 4.0]])  # 2 * 2
21 |         t3 = t1 @ t2
22 |         t3.backward(Tensor([[1.0, 2.0], [3.0, 4.0], [3.0, 5.0]]))
23 |         self.assertEqual(t1.grad.data.tolist(), [[8.0, 11.0], [18.0, 25.0], [21.0, 29.0]])
24 | 
25 |         t1 = Tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]])  # 3 * 2
26 |         t2 = Tensor([[2.0, 3.0], [3.0, 4.0]], requires_grad=True)  # 2 * 2
27 |         t3 = t1 @ t2
28 |         t3.backward(Tensor([[1.0, 2.0], [3.0, 4.0], [3.0, 5.0]]))
29 |         self.assertEqual(t2.grad.data.tolist(), [[16.0, 25.0], [23.0, 36.0]])
30 | 
31 |         t1 = Tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]], requires_grad=True)  # 3 * 2
32 |         t2 = Tensor([[2.0, 3.0], [3.0, 4.0]], requires_grad=True)  # 2 * 2
33 |         t3 = t1 @ t2
34 |         t3.backward(Tensor([[1.0, 2.0], [3.0, 4.0], [3.0, 5.0]]))
35 |         self.assertEqual(t1.grad.data.tolist(), [[8.0, 11.0], [18.0, 25.0], [21.0, 29.0]])
36 |         self.assertEqual(t2.grad.data.tolist(), [[16.0, 25.0], [23.0, 36.0]])
37 | 


--------------------------------------------------------------------------------
/minitorch/autograd/engine.py:
--------------------------------------------------------------------------------
 1 | """Execute danamic computational graph
 2 | 
 3 | Topological Sorting
 4 | """
 5 | 
 6 | from collections import defaultdict, deque
 7 | 
 8 | from minitorch import Tensor
 9 | from .node import Node
10 | 
11 | 
12 | class NodeTask:
13 |     def __init__(self, node: Node, grad_input: Tensor):
14 |         self.node = node
15 |         self.grad_input = grad_input
16 | 
17 |     def update_grad_input(self, grad_input: Tensor):
18 |         self.grad_input += grad_input
19 | 
20 | 
21 | class Engine:
22 | 
23 |     def execute(self, tensor, grad_input):
24 |         dependencies = self._compute_dependencies(tensor.grad_fn)
25 |         not_ready_dict = {}
26 |         ready_queue = deque([NodeTask(tensor.grad_fn, grad_input)])
27 |         while ready_queue:
28 |             node_task = ready_queue.popleft()
29 |             grad_outputs = node_task.node(node_task.grad_input)
30 |             if grad_outputs is None:
31 |                 continue
32 |             for grad_output, edge in zip(grad_outputs, node_task.node.next_edges):
33 |                 next_node = edge.node
34 |                 dependencies[next_node] -= 1
35 |                 if next_node not in not_ready_dict:
36 |                     not_ready_dict[next_node] = NodeTask(next_node, grad_output)
37 |                 else:
38 |                     not_ready_dict[next_node].update_grad_input(grad_output)
39 |                 if dependencies[next_node] == 0:
40 |                     ready_queue.append(not_ready_dict[next_node])
41 | 
42 |     def _compute_dependencies(self, root: Node):
43 |         dependencies = defaultdict(int)
44 |         dependencies[root] = 0
45 |         queue = deque([root])
46 |         while queue:
47 |             node = queue.pop()
48 |             if hasattr(node, "next_edges"):
49 |                 for edge in node.next_edges:
50 |                     next_node = edge.node
51 |                     dependencies[next_node] += 1
52 |                     queue.append(next_node)
53 |         return dependencies
54 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .vscode/
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | *.py,cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | cover/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | db.sqlite3-journal
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | .pybuilder/
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | #   For a library or package, you might want to ignore these files since the code is
 89 | #   intended to run in multiple environments; otherwise, check them in:
 90 | # .python-version
 91 | 
 92 | # pipenv
 93 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 94 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 95 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 96 | #   install all needed dependencies.
 97 | #Pipfile.lock
 98 | 
 99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
100 | __pypackages__/
101 | 
102 | # Celery stuff
103 | celerybeat-schedule
104 | celerybeat.pid
105 | 
106 | # SageMath parsed files
107 | *.sage.py
108 | 
109 | # Environments
110 | .env
111 | .venv
112 | env/
113 | venv/
114 | ENV/
115 | env.bak/
116 | venv.bak/
117 | 
118 | # Spyder project settings
119 | .spyderproject
120 | .spyproject
121 | 
122 | # Rope project settings
123 | .ropeproject
124 | 
125 | # mkdocs documentation
126 | /site
127 | 
128 | # mypy
129 | .mypy_cache/
130 | .dmypy.json
131 | dmypy.json
132 | 
133 | # Pyre type checker
134 | .pyre/
135 | 
136 | # pytype static type analyzer
137 | .pytype/
138 | 
139 | # Cython debug symbols
140 | cython_debug/


--------------------------------------------------------------------------------
/minitorch/nn/modules/module.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | from collections import OrderedDict
 3 | 
 4 | from typing import Iterator, Union, Tuple
 5 | 
 6 | from ..parameter import Parameter
 7 | from minitorch import Tensor
 8 | 
 9 | 
10 | class Module(metaclass=ABCMeta):
11 |     r"""Base class for all neural network modules.
12 | 
13 |     Your models should also subclass this class.
14 | 
15 |     Modules can also contain other Modules, allowing to nest them in
16 |     a tree structure. 
17 |     """
18 | 
19 |     def __init__(self):
20 |         self._parameters = OrderedDict()
21 |         self._modules = OrderedDict()
22 | 
23 |     def __call__(self, *inputs):
24 |         return self.forward(*inputs)
25 | 
26 |     @abstractmethod
27 |     def forward(self, *inputs):
28 |         """subclass must implement the method."""
29 |         pass
30 | 
31 |     def __getattr__(self, name: str) -> Union[Tensor, 'Module']:
32 |         _parameters = self.__dict__['_parameters']
33 |         if name in _parameters:
34 |             return _parameters[name]
35 |         _modules = self.__dict__['_modules']
36 |         if name in _modules:
37 |             return _modules[name]
38 |         raise AttributeError("'{}' object has no attribute '{}'".format(
39 |             type(self).__name__, name))
40 | 
41 |     def __setattr__(self, name: str, value: Union[Tensor, 'Module']) -> None:
42 |         if isinstance(value, Parameter):
43 |             self._parameters[name] = value
44 |         elif isinstance(value, Module):
45 |             self._modules[name] = value
46 |         else:
47 |             object.__setattr__(self, name, value)
48 | 
49 |     def _named_members(self, get_members_fn, prefix='', recurse=True):
50 |         memo = set()
51 |         modules = self.named_modules(prefix=prefix) if recurse else [(prefix, self)]
52 |         for module_prefix, module in modules:
53 |             members = get_members_fn(module)
54 |             for k, v in members:
55 |                 if v is None or v in memo:
56 |                     continue
57 |                 memo.add(v)
58 |                 name = module_prefix + ('.' if module_prefix else '') + k
59 |                 yield name, v
60 | 
61 |     def parameters(self, recurse: bool = True) -> Iterator[Parameter]:
62 |         for name, param in self.named_parameters(recurse=recurse):
63 |             yield param
64 | 
65 |     def named_parameters(self, prefix: str = '', recurse: bool = True) -> Iterator[Tuple[str, Parameter]]:
66 |         gen = self._named_members(
67 |             lambda module: module._parameters.items(),
68 |             prefix=prefix, recurse=recurse)
69 |         for elem in gen:
70 |             yield elem
71 | 
72 |     def modules(self) -> Iterator['Module']:
73 |         for name, module in self.named_modules():
74 |             yield module
75 | 
76 |     def named_modules(self, prefix: str = '') -> Iterator[Tuple[str, 'Module']]:
77 |         yield prefix, self
78 |         for name, module in self._modules.items():
79 |             submodule_prefix = prefix + ('.' if prefix else '') + name
80 |             yield from module.named_modules(submodule_prefix)
81 | 
82 |     def zero_grad(self) -> None:
83 |         for p in self.parameters():
84 |             if p.grad is not None:
85 |                 p.grad = None
86 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # minitorch
  2 | 
  3 | Implement a minimal library from scratch to help understand the Dynamic Computaional Graph of PyTorch.
  4 | 
  5 | # TODO
  6 | + support CUDA
  7 | 
  8 | # Requirements
  9 | 
 10 | 1. Create virtual environment
 11 |     ```bash
 12 |     python3 -m venv minitorch-env
 13 |     ```
 14 | 
 15 | 2. Activate virtual environment
 16 |     ```bash 
 17 |     source minitorch-env/bin/activate
 18 |     ```
 19 | 
 20 | 3. Install dependencies
 21 |     ```bash
 22 |     pip install -r requirements.txt
 23 |     ```
 24 | 
 25 | # Quick Start
 26 | 
 27 | 1. Clone the codebase
 28 |     ```bash
 29 |     git clone git@github.com:zhouzaida/minitorch.git
 30 |     ```
 31 | 
 32 | 2. Install or develop
 33 |     ```python
 34 |     python setup.py install
 35 |     # or
 36 |     python setup.py develop
 37 |     ```
 38 | 
 39 | # Examples
 40 | 
 41 | + create Tensor
 42 | 
 43 |     ```python
 44 |     from minitorch import Tensor
 45 | 
 46 |     t1 = Tensor(2.0)
 47 |     t2 = Tensor(3.0)
 48 |     t3 = t1 + t2
 49 |     print(t3)  # Tensor(3.0, requires_grad=False)
 50 |     ```
 51 | 
 52 | + autograd
 53 | 
 54 |     ```python
 55 |     from minitorch import Tensor
 56 | 
 57 |     t1 = Tensor(2.0, requires_grad=True)
 58 |     t2 = Tensor(3.0)
 59 |     t3 = t1 + t2
 60 |     t4 = t1 * t3
 61 |     t4.backward()
 62 |     print(f"t1 grad: {t1.grad}")  # t1 grad: Tensor(7.0, requires_grad=False)
 63 |     print(f"t2 grad: {t2.grad}")  # t2 grad: None
 64 |     ```
 65 | 
 66 | + gradient for broadcast
 67 | 
 68 |     ```python
 69 |     from minitorch import Tensor
 70 | 
 71 |     t1 = Tensor([1.0, 2.0], requires_grad=True)
 72 |     t2 = Tensor(2.0, requires_grad=True)
 73 |     t3 = t1 + t2
 74 |     t3.backward(Tensor([1.0, 1.0]))
 75 |     print(f"t1 grad: {t1.grad}")  # t1 grad: Tensor([1., 1.], requires_grad=False)
 76 |     print(f"t2 grad: {t2.grad}")  # t2 grad: Tensor(2.0, requires_grad=False)
 77 |     ```
 78 | 
 79 | + create neural network
 80 | 
 81 |     ```python
 82 |     import minitorch
 83 |     import minitorch.nn as nn
 84 | 
 85 |     input = minitorch.rand(2, 3)
 86 |     linear = nn.Linear(3, 5, bias=True)
 87 |     output = nn.linear(input)
 88 |     print(f"output: {output}")
 89 | 
 90 |     class Model(nn.Module):
 91 | 
 92 |         def __init__(self):
 93 |             super().__init__()
 94 |             self.linear_1 = nn.Linear(3, 5, bias=True)
 95 |             self.linear_2 = nn.Linear(5, 6)
 96 | 
 97 |         def forward(self, input):
 98 |             output = self.linear_1(input)
 99 |             output = self.linear_2(output)
100 |             return output
101 | 
102 |     input = minitorch.rand(2, 3)
103 |     model = Model()
104 |     output = model(input)
105 |     print(f"output: {output}")
106 | 
107 |     for name, module in model.named_modules(prefix='model'):
108 |         print(f"{name}: {module}")
109 |     ```
110 | 
111 | # Tools
112 | + [mypy is a static type checker for Python](https://mypy.readthedocs.io/)
113 | + [Flake8: Your Tool For Style Guide Enforcement](https://flake8.pycqa.org/en/latest/)
114 | + [unittest](https://docs.python.org/3/library/unittest.html)
115 | 
116 | # References
117 | + [PyTorch](https://github.com/pytorch/pytorch)
118 | + [autograd](https://github.com/joelgrus/autograd)
119 | + [tinygrad](https://github.com/geohot/tinygrad)
120 | 


--------------------------------------------------------------------------------
/tests/test_autograd/test_add.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from minitorch import Tensor
 4 | 
 5 | 
 6 | class TestAdd(TestCase):
 7 | 
 8 |     def test_simple_add(self):
 9 |         # scalar add
10 |         t1 = Tensor(1.0)
11 |         t2 = Tensor(2.0)
12 |         t3 = t1 + t2
13 |         self.assertEqual(t3.data.tolist(), 3.0)
14 | 
15 |         t1 = Tensor(2.0, requires_grad=True)
16 |         t2 = Tensor(3.0)
17 |         t3 = t1 + t2
18 |         t3.backward()
19 |         self.assertEqual(t1.grad.data.tolist(), 1.0)
20 | 
21 |         t1 = Tensor(2.0)
22 |         t2 = Tensor(3.0, requires_grad=True)
23 |         t3 = t1 + t2
24 |         t3.backward()
25 |         self.assertEqual(t2.grad.data.tolist(), 1.0)
26 | 
27 |         t1 = Tensor(2.0, requires_grad=True)
28 |         t2 = Tensor(3.0, requires_grad=True)
29 |         t3 = t1 + t2
30 |         t3.backward()
31 |         self.assertEqual(t1.grad.data.tolist(), 1.0)
32 |         self.assertEqual(t2.grad.data.tolist(), 1.0)
33 | 
34 |         # vector add
35 |         t1 = Tensor([1.0, 2.0])
36 |         t2 = Tensor([2.0, 3.0])
37 |         t3 = t1 + t2
38 |         self.assertEqual(t3.data.tolist(), [3.0, 5.0])
39 | 
40 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
41 |         t2 = Tensor([2.0, 3.0])
42 |         t3 = t1 + t2
43 |         t3.backward(Tensor([1.0, 1.0]))
44 |         self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0])
45 | 
46 |         t1 = Tensor([1.0, 2.0])
47 |         t2 = Tensor([2.0, 3.0], requires_grad=True)
48 |         t3 = t1 + t2
49 |         t3.backward(Tensor([1.0, 1.0]))
50 |         self.assertEqual(t2.grad.data.tolist(), [1.0, 1.0])
51 | 
52 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
53 |         t2 = Tensor([2.0, 3.0], requires_grad=True)
54 |         t3 = t1 + t2
55 |         t3.backward(Tensor([1.0, 1.0]))
56 |         self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0])
57 |         self.assertEqual(t2.grad.data.tolist(), [1.0, 1.0])
58 | 
59 |     def test_broadcast_add(self):
60 |         # (2,) + ()
61 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
62 |         t2 = Tensor(2.0, requires_grad=True)
63 |         t3 = t1 + t2
64 |         t3.backward(Tensor([1.0, 1.0]))
65 |         self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0])
66 |         self.assertEqual(t2.grad.data.tolist(), 2.0)
67 | 
68 |         # (2,) + (1,)
69 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
70 |         t2 = Tensor([2.0], requires_grad=True)
71 |         t3 = t1 + t2
72 |         t3.backward(Tensor([1.0, 1.0]))
73 |         self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0])
74 |         self.assertEqual(t2.grad.data.tolist(), [2.0])
75 | 
76 |         # (2, 2) + ()
77 |         t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
78 |         t2 = Tensor(2.0, requires_grad=True)
79 |         t3 = t1 + t2
80 |         t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]]))
81 |         self.assertEqual(t1.grad.data.tolist(), [[1.0, 1.0], [1.0, 1.0]])
82 |         self.assertEqual(t2.grad.data.tolist(), 4.0)
83 | 
84 |         # (2, 2) + (1,)
85 |         t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
86 |         t2 = Tensor([2.0], requires_grad=True)
87 |         t3 = t1 + t2
88 |         t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]]))
89 |         self.assertEqual(t1.grad.data.tolist(), [[1.0, 1.0], [1.0, 1.0]])
90 |         self.assertEqual(t2.grad.data.tolist(), [4.0])
91 | 
92 |         # (2, 2) + (2, )
93 |         t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
94 |         t2 = Tensor([2.0, 3.0], requires_grad=True)
95 |         t3 = t1 + t2
96 |         t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]]))
97 |         self.assertEqual(t1.grad.data.tolist(), [[1.0, 1.0], [1.0, 1.0]])
98 |         self.assertEqual(t2.grad.data.tolist(), [2.0, 2.0])
99 | 


--------------------------------------------------------------------------------
/tests/test_autograd/test_mul.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from minitorch import Tensor
 4 | 
 5 | 
 6 | class TestMul(TestCase):
 7 | 
 8 |     def test_simple_mul(self):
 9 |         # scalar mul
10 |         t1 = Tensor(1.0)
11 |         t2 = Tensor(2.0)
12 |         t3 = t1 * t2
13 |         self.assertEqual(t3.data.tolist(), 2.0)
14 | 
15 |         t1 = Tensor(1.0, requires_grad=True)
16 |         t2 = Tensor(2.0)
17 |         t3 = t1 * t2
18 |         t3.backward()
19 |         self.assertEqual(t1.grad.data.tolist(), 2.0)
20 | 
21 |         t1 = Tensor(1.0)
22 |         t2 = Tensor(2.0, requires_grad=True)
23 |         t3 = t1 * t2
24 |         t3.backward()
25 |         self.assertEqual(t2.grad.data.tolist(), 1.0)
26 | 
27 |         t1 = Tensor(1.0, requires_grad=True)
28 |         t2 = Tensor(2.0, requires_grad=True)
29 |         t3 = t1 * t2
30 |         t3.backward()
31 |         self.assertEqual(t1.grad.data.tolist(), 2.0)
32 |         self.assertEqual(t2.grad.data.tolist(), 1.0)
33 | 
34 |         # vector mul
35 |         t1 = Tensor([1.0, 2.0])
36 |         t2 = Tensor([2.0, 3.0])
37 |         t3 = t1 * t2
38 |         self.assertEqual(t3.data.tolist(), [2.0, 6.0])
39 | 
40 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
41 |         t2 = Tensor([2.0, 3.0])
42 |         t3 = t1 * t2
43 |         t3.backward(Tensor([1.0, 1.0]))
44 |         self.assertEqual(t1.grad.data.tolist(), [2.0, 3.0])
45 | 
46 |         t1 = Tensor([1.0, 2.0])
47 |         t2 = Tensor([2.0, 3.0], requires_grad=True)
48 |         t3 = t1 * t2
49 |         t3.backward(Tensor([1.0, 1.0]))
50 |         self.assertEqual(t2.grad.data.tolist(), [1.0, 2.0])
51 | 
52 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
53 |         t2 = Tensor([2.0, 3.0], requires_grad=True)
54 |         t3 = t1 * t2
55 |         t3.backward(Tensor([1.0, 1.0]))
56 |         self.assertEqual(t1.grad.data.tolist(), [2.0, 3.0])
57 |         self.assertEqual(t2.grad.data.tolist(), [1.0, 2.0])
58 | 
59 |     def test_broadcast_mul(self):
60 |         # (2,) * ()
61 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
62 |         t2 = Tensor(2.0, requires_grad=True)
63 |         t3 = t1 * t2
64 |         t3.backward(Tensor([1.0, 1.0]))
65 |         self.assertEqual(t1.grad.data.tolist(), [2.0, 2.0])
66 |         self.assertEqual(t2.grad.data.tolist(), 3.0)
67 | 
68 |         # (2,) * (1,)
69 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
70 |         t2 = Tensor([2.0], requires_grad=True)
71 |         t3 = t1 * t2
72 |         t3.backward(Tensor([1.0, 1.0]))
73 |         self.assertEqual(t1.grad.data.tolist(), [2.0, 2.0])
74 |         self.assertEqual(t2.grad.data.tolist(), [3.0])
75 | 
76 |         # (2, 2) * ()
77 |         t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
78 |         t2 = Tensor(2.0, requires_grad=True)
79 |         t3 = t1 * t2
80 |         t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]]))
81 |         self.assertEqual(t1.grad.data.tolist(), [[2.0, 2.0], [2.0, 2.0]])
82 |         self.assertEqual(t2.grad.data.tolist(), 10.0)
83 | 
84 |         # (2, 2) * (1,)
85 |         t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
86 |         t2 = Tensor([2.0], requires_grad=True)
87 |         t3 = t1 * t2
88 |         t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]]))
89 |         self.assertEqual(t1.grad.data.tolist(), [[2.0, 2.0], [2.0, 2.0]])
90 |         self.assertEqual(t2.grad.data.tolist(), [10.0])
91 | 
92 |         # (2, 2) * (2, )
93 |         t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
94 |         t2 = Tensor([2.0, 3.0], requires_grad=True)
95 |         t3 = t1 * t2
96 |         t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]]))
97 |         self.assertEqual(t1.grad.data.tolist(), [[2.0, 3.0], [2.0, 3.0]])
98 |         self.assertEqual(t2.grad.data.tolist(), [4.0, 6.0])
99 | 


--------------------------------------------------------------------------------
/tests/test_autograd/test_sub.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from minitorch import Tensor
 4 | 
 5 | 
 6 | class TestSub(TestCase):
 7 | 
 8 |     def test_simple_sub(self):
 9 |         # scalar sub
10 |         t1 = Tensor(1.0)
11 |         t2 = Tensor(2.0)
12 |         t3 = t1 - t2
13 |         self.assertEqual(t3.data.tolist(), -1.0)
14 | 
15 |         t1 = Tensor(2.0, requires_grad=True)
16 |         t2 = Tensor(3.0)
17 |         t3 = t1 - t2
18 |         t3.backward()
19 |         self.assertEqual(t1.grad.data.tolist(), 1.0)
20 | 
21 |         t1 = Tensor(2.0)
22 |         t2 = Tensor(3.0, requires_grad=True)
23 |         t3 = t1 - t2
24 |         t3.backward()
25 |         self.assertEqual(t2.grad.data.tolist(), -1.0)
26 | 
27 |         t1 = Tensor(2.0, requires_grad=True)
28 |         t2 = Tensor(3.0, requires_grad=True)
29 |         t3 = t1 - t2
30 |         t3.backward()
31 |         self.assertEqual(t1.grad.data.tolist(), 1.0)
32 |         self.assertEqual(t2.grad.data.tolist(), -1.0)
33 | 
34 |         # vector sub
35 |         t1 = Tensor([1.0, 2.0])
36 |         t2 = Tensor([2.0, 3.0])
37 |         t3 = t1 - t2
38 |         self.assertEqual(t3.data.tolist(), [-1.0, -1.0])
39 | 
40 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
41 |         t2 = Tensor([2.0, 3.0])
42 |         t3 = t1 - t2
43 |         t3.backward(Tensor([1.0, 1.0]))
44 |         self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0])
45 | 
46 |         t1 = Tensor([1.0, 2.0])
47 |         t2 = Tensor([2.0, 3.0], requires_grad=True)
48 |         t3 = t1 - t2
49 |         t3.backward(Tensor([1.0, 1.0]))
50 |         self.assertEqual(t2.grad.data.tolist(), [-1.0, -1.0])
51 | 
52 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
53 |         t2 = Tensor([2.0, 3.0], requires_grad=True)
54 |         t3 = t1 - t2
55 |         t3.backward(Tensor([1.0, 1.0]))
56 |         self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0])
57 |         self.assertEqual(t2.grad.data.tolist(), [-1.0, -1.0])
58 | 
59 |     def test_broadcast_sub(self):
60 |         # (2,) - ()
61 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
62 |         t2 = Tensor(2.0, requires_grad=True)
63 |         t3 = t1 - t2
64 |         t3.backward(Tensor([1.0, 1.0]))
65 |         self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0])
66 |         self.assertEqual(t2.grad.data.tolist(), -2.0)
67 | 
68 |         # (2,) - (1,)
69 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
70 |         t2 = Tensor([2.0], requires_grad=True)
71 |         t3 = t1 - t2
72 |         t3.backward(Tensor([1.0, 1.0]))
73 |         self.assertEqual(t1.grad.data.tolist(), [1.0, 1.0])
74 |         self.assertEqual(t2.grad.data.tolist(), [-2.0])
75 | 
76 |         # (2, 2) - ()
77 |         t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
78 |         t2 = Tensor(2.0, requires_grad=True)
79 |         t3 = t1 - t2
80 |         t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]]))
81 |         self.assertEqual(t1.grad.data.tolist(), [[1.0, 1.0], [1.0, 1.0]])
82 |         self.assertEqual(t2.grad.data.tolist(), -4.0)
83 | 
84 |         # (2, 2) - (1,)
85 |         t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
86 |         t2 = Tensor([2.0], requires_grad=True)
87 |         t3 = t1 - t2
88 |         t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]]))
89 |         self.assertEqual(t1.grad.data.tolist(), [[1.0, 1.0], [1.0, 1.0]])
90 |         self.assertEqual(t2.grad.data.tolist(), [-4.0])
91 | 
92 |         # (2, 2) - (2, )
93 |         t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
94 |         t2 = Tensor([2.0, 3.0], requires_grad=True)
95 |         t3 = t1 - t2
96 |         t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]]))
97 |         self.assertEqual(t1.grad.data.tolist(), [[1.0, 1.0], [1.0, 1.0]])
98 |         self.assertEqual(t2.grad.data.tolist(), [-2.0, -2.0])
99 | 


--------------------------------------------------------------------------------
/tests/test_autograd/test_div.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from minitorch import Tensor
 4 | 
 5 | 
 6 | class TestDiv(TestCase):
 7 | 
 8 |     def test_simple_div(self):
 9 |         # scalar div
10 |         t1 = Tensor(1.0)
11 |         t2 = Tensor(2.0)
12 |         t3 = t1 / t2
13 |         self.assertEqual(t3.data.tolist(), 0.5)
14 | 
15 |         t1 = Tensor(1.0, requires_grad=True)
16 |         t2 = Tensor(2.0)
17 |         t3 = t1 / t2
18 |         t3.backward()
19 |         self.assertEqual(t1.grad.data.tolist(), 0.5)
20 | 
21 |         t1 = Tensor(1.0)
22 |         t2 = Tensor(2.0, requires_grad=True)
23 |         t3 = t1 / t2
24 |         t3.backward()
25 |         self.assertEqual(t2.grad.data.tolist(), -0.25)
26 | 
27 |         t1 = Tensor(1.0, requires_grad=True)
28 |         t2 = Tensor(2.0, requires_grad=True)
29 |         t3 = t1 / t2
30 |         t3.backward()
31 |         self.assertEqual(t1.grad.data.tolist(), 0.5)
32 |         self.assertEqual(t2.grad.data.tolist(), -0.25)
33 | 
34 |         # vector div
35 |         t1 = Tensor([1.0, 2.0])
36 |         t2 = Tensor([2.0, 4.0])
37 |         t3 = t1 / t2
38 |         self.assertEqual(t3.data.tolist(), [0.5, 0.5])
39 | 
40 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
41 |         t2 = Tensor([2.0, 4.0])
42 |         t3 = t1 / t2
43 |         t3.backward(Tensor([1.0, 1.0]))
44 |         self.assertEqual(t1.grad.data.tolist(), [0.5, 0.25])
45 | 
46 |         t1 = Tensor([1.0, 2.0])
47 |         t2 = Tensor([2.0, 4.0], requires_grad=True)
48 |         t3 = t1 / t2
49 |         t3.backward(Tensor([1.0, 1.0]))
50 |         self.assertEqual(t2.grad.data.tolist(), [-0.25, -1/8])
51 | 
52 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
53 |         t2 = Tensor([2.0, 4.0], requires_grad=True)
54 |         t3 = t1 / t2
55 |         t3.backward(Tensor([1.0, 1.0]))
56 |         self.assertEqual(t1.grad.data.tolist(), [0.5, 0.25])
57 |         self.assertEqual(t2.grad.data.tolist(), [-0.25, -1/8])
58 | 
59 |     def test_broadcast_div(self):
60 |         # (2,) / ()
61 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
62 |         t2 = Tensor(2.0, requires_grad=True)
63 |         t3 = t1 / t2
64 |         t3.backward(Tensor([1.0, 1.0]))
65 |         self.assertEqual(t1.grad.data.tolist(), [0.5, 0.5])
66 |         self.assertEqual(t2.grad.data.tolist(), -0.75)
67 | 
68 |         # (2,) / (1,)
69 |         t1 = Tensor([1.0, 2.0], requires_grad=True)
70 |         t2 = Tensor([2.0], requires_grad=True)
71 |         t3 = t1 / t2
72 |         t3.backward(Tensor([1.0, 1.0]))
73 |         self.assertEqual(t1.grad.data.tolist(), [0.5, 0.5])
74 |         self.assertEqual(t2.grad.data.tolist(), [-0.75])
75 | 
76 |         # (2, 2) / ()
77 |         t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
78 |         t2 = Tensor(2.0, requires_grad=True)
79 |         t3 = t1 / t2
80 |         t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]]))
81 |         self.assertEqual(t1.grad.data.tolist(), [[0.5, 0.5], [0.5, 0.5]])
82 |         self.assertEqual(t2.grad.data.tolist(), -2.5)
83 | 
84 |         # (2, 2) / (1,)
85 |         t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
86 |         t2 = Tensor([2.0], requires_grad=True)
87 |         t3 = t1 / t2
88 |         t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]]))
89 |         self.assertEqual(t1.grad.data.tolist(), [[0.5, 0.5], [0.5, 0.5]])
90 |         self.assertEqual(t2.grad.data.tolist(), [-2.5])
91 | 
92 |         # (2, 2) / (2, )
93 |         t1 = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
94 |         t2 = Tensor([2.0, 4.0], requires_grad=True)
95 |         t3 = t1 / t2
96 |         t3.backward(Tensor([[1.0, 1.0], [1.0, 1.0]]))
97 |         self.assertEqual(t1.grad.data.tolist(), [[0.5, 0.25], [0.5, 0.25]])
98 |         self.assertEqual(t2.grad.data.tolist(), [-1.0, -0.375])
99 | 


--------------------------------------------------------------------------------
/minitorch/tensor.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from typing import Union, Tuple
  3 | 
  4 | from minitorch import autograd
  5 | 
  6 | 
  7 | Arrayable = Union[float, list, np.ndarray]
  8 | 
  9 | 
 10 | def ensure_ndarray(data: Arrayable) -> np.ndarray:
 11 |     if isinstance(data, np.ndarray):
 12 |         return data
 13 |     else:
 14 |         return np.array(data)
 15 | 
 16 | 
 17 | def ensure_tensor(data):
 18 |     if isinstance(data, Tensor):
 19 |         return data
 20 |     else:
 21 |         return Tensor(data)
 22 | 
 23 | 
 24 | class Tensor:
 25 |     def __init__(self,
 26 |                  data: Arrayable,
 27 |                  requires_grad: bool = False,
 28 |                  grad_fn=None):
 29 |         self.data = ensure_ndarray(data)
 30 |         self.requires_grad = requires_grad
 31 |         self.grad = None
 32 |         self.grad_fn = grad_fn
 33 | 
 34 |     @property
 35 |     def shape(self):
 36 |         return self.data.shape
 37 | 
 38 |     def __repr__(self):
 39 |         return f"Tensor({self.data}, requires_grad={self.requires_grad})"
 40 | 
 41 |     def __add__(self, other) -> 'Tensor':
 42 |         return autograd.functional.add(self, ensure_tensor(other))
 43 | 
 44 |     def __radd__(self, other) -> 'Tensor':
 45 |         return autograd.functional.add(ensure_tensor(other), self)
 46 | 
 47 |     def __iadd__(self, other) -> 'Tensor':
 48 |         self.data += ensure_tensor(other).data
 49 |         return self
 50 | 
 51 |     def __neg__(self) -> 'Tensor':
 52 |         return autograd.functional.neg(self)
 53 | 
 54 |     def __sub__(self, other) -> 'Tensor':
 55 |         return autograd.functional.sub(self, ensure_tensor(other))
 56 | 
 57 |     def __rsub__(self, othere) -> 'Tensor':
 58 |         return autograd.functional.sub(ensure_tensor(other), self)
 59 | 
 60 |     def __isub__(self, other) -> 'Tensor':
 61 |         self.data -= ensure_tensor(other).data
 62 |         return self
 63 | 
 64 |     def __mul__(self, other) -> 'Tensor':
 65 |         return autograd.functional.mul(self, ensure_tensor(other))
 66 | 
 67 |     def __rmul__(self, other) -> 'Tensor':
 68 |         return autograd.functional.mul(ensure_tensor(other), self)
 69 | 
 70 |     def __truediv__(self, other) -> 'Tensor':
 71 |         return autograd.functional.div(self, ensure_tensor(other))
 72 | 
 73 |     def __rtruediv__(self, other) -> 'Tensor':
 74 |         return autograd.functional.div(ensure_tensor(other), self)
 75 | 
 76 |     def __matmul__(self, other) -> 'Tensor':
 77 |         return autograd.functional.matmul(self, other)
 78 | 
 79 |     def __pow__(self, other: float) -> 'Tensor':
 80 |         return autograd.functional.pow(self, other)
 81 | 
 82 |     def sum(self, axis: Union[int, Tuple[int]] = None) -> 'Tensor':
 83 |         return autograd.functional.sum(self, axis)
 84 | 
 85 |     def mean(self, axis: Union[int, Tuple[int]] = None) -> 'Tensor':
 86 |         return autograd.functional.mean(self, axis)
 87 | 
 88 |     def t(self) -> 'Tensor':
 89 |         """transpose"""
 90 |         return autograd.functional.t(self)
 91 | 
 92 |     def exp(self) -> 'Tensor':
 93 |         return autograd.functional.exp(self)
 94 | 
 95 |     def relu(self) -> 'Tensor':
 96 |         return autograd.functional.relu(self)
 97 | 
 98 |     def backward(self, grad: 'Tensor' = None) -> None:
 99 |         assert self.requires_grad
100 |         if grad is None and self.shape != ():
101 |             raise RuntimeError("grad can be implicitly created only for scalar outputs")
102 |         grad = grad if grad else Tensor(1.0)
103 |         from minitorch.autograd.engine import Engine
104 |         engine = Engine()
105 |         engine.execute(self, grad)
106 | 
107 |     def zero_grad(self) -> None:
108 |         self.grad = None
109 | 
110 | 
111 | def rand(*shape, requires_grad=False) -> Tensor:
112 |     data = np.random.randn(*shape)
113 |     return Tensor(data=data, requires_grad=requires_grad)
114 | 


--------------------------------------------------------------------------------
/minitorch/autograd/functional.py:
--------------------------------------------------------------------------------
  1 | from typing import Union, Tuple
  2 | 
  3 | import numpy as np
  4 | 
  5 | from minitorch import Tensor
  6 | from .node import collect_next_edges
  7 | from .node import *
  8 | 
  9 | 
 10 | ############## reduce operator ##################
 11 | 
 12 | def sum(t: Tensor, axis: Union[int, Tuple[int]] = None) -> Tensor:
 13 |     data = t.data.sum(axis=axis)
 14 |     requires_grad = t.requires_grad
 15 |     if requires_grad:
 16 |         sum_bw = SumBackward()
 17 |         sum_bw.set_next_edges(collect_next_edges(t))
 18 |         sum_bw.axis = axis
 19 |         sum_bw.shape = t.shape
 20 |         return Tensor(data=data,
 21 |                       requires_grad=True,
 22 |                       grad_fn=sum_bw)
 23 |     else:
 24 |         return Tensor(data=data)
 25 | 
 26 | 
 27 | def mean(t: Tensor, axis: Union[int, Tuple[int]] = None) -> Tensor:
 28 |     data = t.data.mean(axis=axis)
 29 |     requires_grad = t.requires_grad
 30 |     if requires_grad:
 31 |         mean_bw = MeanBackward()
 32 |         mean_bw.set_next_edges(collect_next_edges(t))
 33 |         mean_bw.axis = axis
 34 |         mean_bw.shape = t.shape
 35 |         return Tensor(data=data,
 36 |                       requires_grad=True,
 37 |                       grad_fn=mean_bw)
 38 |     else:
 39 |         return Tensor(data=data)
 40 | 
 41 | ############## unary operator ##################
 42 | 
 43 | def neg(t: Tensor) -> Tensor:
 44 |     data = -t.data
 45 |     requires_grad = t.requires_grad
 46 |     if requires_grad:
 47 |         neg_bw = NegBackward()
 48 |         neg_bw.set_next_edges(collect_next_edges(t))
 49 |         return Tensor(data=data,
 50 |                       requires_grad=True,
 51 |                       grad_fn=neg_bw)
 52 |     else:
 53 |         return Tensor(data=data)
 54 | 
 55 | 
 56 | def t(t: Tensor) -> Tensor:
 57 |     # transpose
 58 |     data = t.data.T
 59 |     requires_grad = t.requires_grad
 60 |     if requires_grad:
 61 |         t_bw = TBackward()
 62 |         t_bw.set_next_edges(collect_next_edges(t))
 63 |         return Tensor(data=data,
 64 |                       requires_grad=True,
 65 |                       grad_fn=t_bw)
 66 |     else:
 67 |         return Tensor(data=data)
 68 | 
 69 | 
 70 | def relu(t: Tensor) -> Tensor:
 71 |     data = np.maximum(t.data, 0)
 72 |     requires_grad = t.requires_grad
 73 |     if requires_grad:
 74 |         relu_bw = ReluBackward()
 75 |         relu_bw.set_next_edges(collect_next_edges(t))
 76 |         relu_bw.input = Tensor(data=t.data)
 77 |         return Tensor(data=data,
 78 |                       requires_grad=True,
 79 |                       grad_fn=relu_bw)
 80 |     else:
 81 |         return Tensor(data=data)
 82 | 
 83 | 
 84 | def exp(t: Tensor) -> Tensor:
 85 |     data = np.exp(t.data)
 86 |     requires_grad = t.requires_grad
 87 |     if requires_grad:
 88 |         exp_bw = ExpBackward()
 89 |         exp_bw.set_next_edges(collect_next_edges(t))
 90 |         exp_bw.output = Tensor(data=data)
 91 |         return Tensor(data=data,
 92 |                       requires_grad=True,
 93 |                       grad_fn=exp_bw)
 94 |     else:
 95 |         return Tensor(data=data)
 96 | 
 97 | ############## binary operator ##################
 98 | def add(t1: Tensor, t2: Tensor) -> Tensor:
 99 |     data = t1.data + t2.data
100 |     requires_grad = t1.requires_grad or t2.requires_grad
101 |     if requires_grad:
102 |         add_bw = AddBackward()
103 |         add_bw.set_next_edges(collect_next_edges(t1, t2))
104 |         if t1.requires_grad:
105 |             add_bw.t1_shape = t1.shape
106 |         if t2.requires_grad:
107 |             add_bw.t2_shape = t2.shape
108 |         return Tensor(data=data,
109 |                       requires_grad=True,
110 |                       grad_fn=add_bw)
111 |     else:
112 |         return Tensor(data=data)
113 | 
114 | 
115 | def sub(t1: Tensor, t2: Tensor) -> Tensor:
116 |     data = t1.data - t2.data
117 |     requires_grad = t1.requires_grad or t2.requires_grad
118 |     if requires_grad:
119 |         sub_bw = SubBackward()
120 |         sub_bw.set_next_edges(collect_next_edges(t1, t2))
121 |         if t1.requires_grad:
122 |             sub_bw.t1_shape = t1.shape
123 |         if t2.requires_grad:
124 |             sub_bw.t2_shape = t2.shape
125 |         return Tensor(data=data,
126 |                       requires_grad=True,
127 |                       grad_fn=sub_bw)
128 |     else:
129 |         return Tensor(data=data)
130 | 
131 | 
132 | def mul(t1: Tensor, t2: Tensor) -> Tensor:
133 |     data = t1.data * t2.data
134 |     requires_grad = t1.requires_grad or t2.requires_grad
135 |     if requires_grad:
136 |         mul_bw = MulBackward()
137 |         mul_bw.set_next_edges(collect_next_edges(t1, t2))
138 |         if t1.requires_grad:
139 |             mul_bw.t2 = Tensor(data=t2.data)
140 |             mul_bw.t1_shape = t1.shape
141 |         if t2.requires_grad:
142 |             mul_bw.t1 = Tensor(data=t1.data)
143 |             mul_bw.t2_shape = t2.shape
144 |         return Tensor(data=data,
145 |                       requires_grad=True,
146 |                       grad_fn=mul_bw)
147 |     else:
148 |         return Tensor(data=data)
149 | 
150 | 
151 | def div(t1: Tensor, t2: Tensor) -> Tensor:
152 |     data = t1.data / t2.data
153 |     requires_grad = t1.requires_grad or t2.requires_grad
154 |     if requires_grad:
155 |         div_bw = DivBackward()
156 |         div_bw.set_next_edges(collect_next_edges(t1, t2))
157 |         if t1.requires_grad:
158 |             div_bw.t2 = Tensor(data=t2.data)
159 |             div_bw.t1_shape = t1.shape
160 |         if t2.requires_grad:
161 |             div_bw.t1 = Tensor(data=t1.data)
162 |             div_bw.t2 = Tensor(data=t2.data) if div_bw.t2 is None else div_bw.t2
163 |             div_bw.t2_shape = t2.shape
164 |         return Tensor(data=data,
165 |                       requires_grad=True,
166 |                       grad_fn=div_bw)
167 |     else:
168 |         return Tensor(data=data)
169 | 
170 | 
171 | def matmul(t1: Tensor, t2: Tensor) -> Tensor:
172 |     data = t1.data @ t2.data
173 |     requires_grad = t1.requires_grad or t2.requires_grad
174 |     if requires_grad:
175 |         matmul_bw = MatMulBackward()
176 |         matmul_bw.set_next_edges(collect_next_edges(t1, t2))
177 |         if t1.requires_grad:
178 |             matmul_bw.t2 = t2
179 |         if t2.requires_grad:
180 |             matmul_bw.t1 = t1
181 |         return Tensor(data=data,
182 |                       requires_grad=True,
183 |                       grad_fn=matmul_bw)
184 |     else:
185 |         return Tensor(data=data)
186 | 
187 | 
188 | def pow(t1: Tensor, t2: float) -> Tensor:
189 |     data = t1.data ** t2
190 |     requires_grad = t1.requires_grad
191 |     if requires_grad:
192 |         pow_bw = PowBackward()
193 |         pow_bw.set_next_edges(collect_next_edges(t1))
194 |         pow_bw.t1 = Tensor(data=t1.data)
195 |         pow_bw.t2 = t2
196 |         return Tensor(data=data,
197 |                       requires_grad=True,
198 |                       grad_fn=pow_bw)
199 |     else:
200 |         return Tensor(data=data)
201 | 


--------------------------------------------------------------------------------
/minitorch/autograd/node.py:
--------------------------------------------------------------------------------
  1 | from abc import ABCMeta, abstractmethod
  2 | 
  3 | import numpy as np
  4 | from typing import List
  5 | 
  6 | from minitorch import Tensor
  7 | from .edge import Edge
  8 | 
  9 | 
 10 | def collect_next_edges(*tensors) -> List[Edge]:
 11 |     next_edges = []
 12 |     for t in tensors:
 13 |         if not t.requires_grad:
 14 |             continue
 15 |         if t.grad_fn is None:
 16 |             t.grad_fn = AccumulateGrad(t)
 17 |             next_edges.append(Edge(t.grad_fn))
 18 |         else:
 19 |             next_edges.append(Edge(t.grad_fn))
 20 |     return next_edges
 21 | 
 22 | 
 23 | def unbroadcast(grad_input: Tensor, input_shape: tuple) -> Tensor:
 24 |     """When broadcast is applied to an operation, unbroadcast should also
 25 |        be executed when backpropagating.
 26 | 
 27 |     References:
 28 |         1. https://numpy.org/doc/stable/user/basics.broadcasting.html
 29 |         2. http://coldattic.info/post/116/
 30 |         3. https://github.com/joelgrus/autograd/blob/part06/autograd/tensor.py#L150
 31 |     """
 32 |     if grad_input.shape == input_shape:
 33 |         return grad_input
 34 |     data = grad_input.data
 35 |     ndims_added = len(grad_input.shape) - len(input_shape)
 36 |     for _ in range(ndims_added):
 37 |         data = data.sum(axis=0)
 38 |     for i, dim in enumerate(input_shape):
 39 |         if dim == 1:
 40 |             data = data.sum(axis=i, keepdims=True)
 41 | 
 42 |     return Tensor(data=data)
 43 | 
 44 | 
 45 | class Node(metaclass=ABCMeta):
 46 | 
 47 |     def __call__(self, *grad_outputs):
 48 |         return self.apply(*grad_outputs)
 49 | 
 50 |     def set_next_edges(self, next_edges: List[Edge] = None):
 51 |         self.next_edges = next_edges
 52 | 
 53 |     @abstractmethod
 54 |     def apply(self, *grad_outputs):
 55 |         """You must implement the abstract method for custome Node"""
 56 |         pass
 57 | 
 58 | 
 59 | class AccumulateGrad(Node):
 60 | 
 61 |     def __init__(self, leaf_tensor: Tensor):
 62 |         self.leaf_tensor = leaf_tensor
 63 | 
 64 |     def apply(self, grad_output: Tensor):
 65 |         if self.leaf_tensor.grad is None:
 66 |             self.leaf_tensor.grad = grad_output
 67 |         else:
 68 |             self.leaf_tensor.grad += grad_output
 69 |         return None
 70 | 
 71 | 
 72 | ############## reduce operator ##################
 73 | 
 74 | class SumBackward(Node):
 75 | 
 76 |     def __init__(self):
 77 |         self.axis = None
 78 |         self.shape: tuple = None
 79 | 
 80 |     def apply(self, grad_output: Tensor) -> tuple:
 81 |         if isinstance(self.axis, int):
 82 |             self.axis = [self.axis]
 83 |         if self.axis is None:
 84 |             shape = [1] * len(self.shape)
 85 |         else:
 86 |             shape = [1 if i in self.axis else self.shape[i] for i in range(len(self.shape))]
 87 |         data = grad_output.data.reshape(shape) + np.zeros(self.shape)
 88 |         return Tensor(data=data),
 89 | 
 90 | 
 91 | class MeanBackward(Node):
 92 | 
 93 |     def __init__(self):
 94 |         self.axis = None
 95 |         self.shape: tuple = None
 96 | 
 97 |     def apply(self, grad_output: Tensor) -> tuple:
 98 |         if isinstance(self.axis, int):
 99 |             self.axis = [self.axis]
100 |         if self.axis is None:
101 |             shape = [1] * len(self.shape)
102 |         else:
103 |             shape = [1 if i in self.axis else self.shape[i] for i in range(len(self.shape))]
104 |         scale = np.prod(grad_output.shape) / np.prod(self.shape)
105 |         data = scale * grad_output.data.reshape(shape) + np.zeros(self.shape)
106 |         return Tensor(data=data),
107 | 
108 | 
109 | ############## unary operator ##################
110 | 
111 | class NegBackward(Node):
112 | 
113 |     def apply(self, grad_output: Tensor) -> list:
114 |         return -grad_output,
115 | 
116 | 
117 | class TBackward(Node):
118 |     """Transpose"""
119 | 
120 |     def apply(self, grad_output: Tensor) -> list:
121 |         return Tensor(data=grad_output.data.T),
122 | 
123 | 
124 | class ReluBackward(Node):
125 | 
126 |     def __init__(self):
127 |         self.input: Tensor = None
128 | 
129 |     def apply(self, grad_output: Tensor) -> list:
130 |         return grad_output * Tensor(data=(self.input.data >= 0)),
131 | 
132 | 
133 | class ExpBackward(Node):
134 | 
135 |     def __init__(self):
136 |         self.output: Tensor = None
137 | 
138 |     def apply(self, grad_output: Tensor) -> list:
139 |         return grad_output * self.output,
140 | 
141 | ############## binary operator ##################
142 | 
143 | class AddBackward(Node):
144 | 
145 |     def __init__(self):
146 |         self.t1_shape: tuple = None
147 |         self.t2_shape: tuple = None
148 | 
149 |     def apply(self, grad_output: Tensor) -> list:
150 |         grad_input = []
151 |         if self.t1_shape is not None:
152 |             grad_input.append(unbroadcast(grad_output, self.t1_shape))
153 |         if self.t2_shape is not None:
154 |             grad_input.append(unbroadcast(grad_output, self.t2_shape))
155 |         return grad_input
156 | 
157 | 
158 | class SubBackward(Node):
159 | 
160 |     def __init__(self):
161 |         self.t1_shape: tuple = None
162 |         self.t2_shape: tuple = None
163 | 
164 |     def apply(self, grad_output: Tensor) -> list:
165 |         grad_input = []
166 |         if self.t1_shape is not None:
167 |             grad_input.append(unbroadcast(grad_output, self.t1_shape))
168 |         if self.t2_shape is not None:
169 |             grad_input.append(unbroadcast(-grad_output, self.t2_shape))
170 |         return grad_input
171 | 
172 | 
173 | class MulBackward(Node):
174 | 
175 |     def __init__(self):
176 |         self.t1: Tensor = None
177 |         self.t1_shape: tuple = None
178 |         self.t2: Tensor = None
179 |         self.t2_shape: tuple = None
180 | 
181 |     def apply(self, grad_output: Tensor) -> list:
182 |         grad_input = []
183 |         if self.t2 is not None:
184 |             grad_input.append(unbroadcast(self.t2 * grad_output, self.t1_shape))
185 |         if self.t1 is not None:
186 |             grad_input.append(unbroadcast(self.t1 * grad_output, self.t2_shape))
187 |         return grad_input
188 | 
189 | 
190 | class DivBackward(Node):
191 | 
192 |     def __init__(self):
193 |         self.t1: Tensor = None
194 |         self.t1_shape: tuple = None
195 |         self.t2: Tensor = None
196 |         self.t2_shape: tuple = None
197 | 
198 |     def apply(self, grad_output: Tensor) -> list:
199 |         grad_input = []
200 |         if self.t2 is not None and self.t1_shape is not None:
201 |             grad_input.append(unbroadcast(1 / self.t2 * grad_output, self.t1_shape))
202 |         if self.t1 is not None:
203 |             grad_input.append(unbroadcast(-self.t1 / (self.t2 ** 2) * grad_output, self.t2_shape))
204 |         return grad_input
205 | 
206 | 
207 | class MatMulBackward(Node):
208 | 
209 |     def __init__(self):
210 |         self.t1: Tensor = None
211 |         self.t2: Tensor = None
212 | 
213 |     def apply(self, grad_output: Tensor) -> list:
214 |         grad_input = []
215 |         if self.t2 is not None:
216 |             grad_input.append(grad_output @ Tensor(self.t2.data.T))
217 |         if self.t1 is not None:
218 |             grad_input.append(Tensor(self.t1.data.T) @ grad_output)
219 |         return grad_input
220 | 
221 | 
222 | class PowBackward(Node):
223 | 
224 |     def __init__(self):
225 |         self.t1: Tensor = None
226 |         self.t2: float = None
227 | 
228 |     def apply(self, grad_output: Tensor) -> tuple:
229 |         return grad_output * self.t2 * self.t1 ** (self.t2-1),
230 | 


--------------------------------------------------------------------------------