├── .gitignore
├── nf
    ├── __init__.py
    ├── backend
    │   ├── numcy.c
    │   ├── numcy.pyx
    │   └── setup.py
    ├── nn
    │   ├── __init__.py
    │   ├── functional.py
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── batchnorm.py
    │   │   ├── conv.py
    │   │   ├── init.py
    │   │   ├── linear.py
    │   │   └── module.py
    │   └── ops
    │   │   ├── activation.py
    │   │   └── conv.py
    ├── op.py
    ├── operation
    │   ├── __init__.py
    │   ├── creation
    │   │   └── funcs.py
    │   ├── indexing
    │   │   ├── __init__.py
    │   │   ├── funcs.py
    │   │   └── ops.py
    │   ├── manipulation
    │   │   ├── __init__.py
    │   │   ├── axis
    │   │   │   ├── __init__.py
    │   │   │   ├── funcs.py
    │   │   │   └── ops.py
    │   │   └── shape
    │   │   │   ├── __init__.py
    │   │   │   ├── funcs.py
    │   │   │   └── ops.py
    │   └── math
    │   │   ├── arithmetic
    │   │       ├── __init__.py
    │   │       ├── funcs.py
    │   │       └── ops.py
    │   │   ├── exp_log
    │   │       ├── funcs.py
    │   │       └── ops.py
    │   │   ├── linalg
    │   │       ├── funcs.py
    │   │       └── ops.py
    │   │   ├── misc
    │   │       ├── funcs.py
    │   │       └── ops.py
    │   │   ├── nondifferentiable.py
    │   │   └── statistics
    │   │       ├── funcs.py
    │   │       └── ops.py
    ├── optimizer
    │   ├── optimizer.py
    │   └── sgd.py
    └── tensor.py
├── readme.md
├── setup.py
├── test
    ├── dog.jpg
    ├── mnist.pkl
    ├── mnist.py
    ├── resnet.py
    ├── resnet_nf.py
    ├── test_Conv_Linear.py
    ├── test_Conv_mnist.py
    ├── test_Layers.py
    ├── test_Linear_mnist.py
    ├── test_Tensor.py
    └── test_nnfuncs.py
├── torch_playground
    ├── mnist.pkl
    ├── mnist.py
    ├── nf mnist.ipynb
    ├── pytorch mnist.ipynb
    └── test.png
├── 未命名.ipynb
├── 第一课.ipynb
├── 第三课.ipynb
├── 第二课.ipynb
└── 第四课.ipynb


/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # MacOs shit
  3 | .DS_Store
  4 | 
  5 | # idea shit
  6 | .idea/
  7 | 
  8 | # torch model
  9 | .pth
 10 | 
 11 | # Byte-compiled / optimized / DLL files
 12 | __pycache__/
 13 | *.py[cod]
 14 | *$py.class
 15 | 
 16 | # C extensions
 17 | *.so
 18 | 
 19 | *.egg-info/
 20 | 
 21 | # Distribution / packaging
 22 | .Python
 23 | build/
 24 | develop-eggs/
 25 | dist/
 26 | downloads/
 27 | eggs/
 28 | .eggs/
 29 | lib/
 30 | lib64/
 31 | parts/
 32 | sdist/
 33 | var/
 34 | wheels/
 35 | share/python-wheels/
 36 | *.egg-info/
 37 | .installed.cfg
 38 | *.egg
 39 | MANIFEST
 40 | 
 41 | # PyInstaller
 42 | #  Usually these files are written by a python script from a template
 43 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 44 | *.manifest
 45 | *.spec
 46 | 
 47 | # Installer logs
 48 | pip-log.txt
 49 | pip-delete-this-directory.txt
 50 | 
 51 | # Unit test / coverage reports
 52 | htmlcov/
 53 | .tox/
 54 | .nox/
 55 | .coverage
 56 | .coverage.*
 57 | .cache
 58 | nosetests.xml
 59 | coverage.xml
 60 | *.cover
 61 | *.py,cover
 62 | .hypothesis/
 63 | .pytest_cache/
 64 | cover/
 65 | 
 66 | # Translations
 67 | *.mo
 68 | *.pot
 69 | 
 70 | # Django stuff:
 71 | *.log
 72 | local_settings.py
 73 | db.sqlite3
 74 | db.sqlite3-journal
 75 | 
 76 | # Flask stuff:
 77 | instance/
 78 | .webassets-cache
 79 | 
 80 | # Scrapy stuff:
 81 | .scrapy
 82 | 
 83 | # Sphinx documentation
 84 | docs/_build/
 85 | 
 86 | # PyBuilder
 87 | .pybuilder/
 88 | target/
 89 | 
 90 | # Jupyter Notebook
 91 | .ipynb_checkpoints
 92 | 
 93 | # IPython
 94 | profile_default/
 95 | ipython_config.py
 96 | 
 97 | # pyenv
 98 | #   For a library or package, you might want to ignore these files since the code is
 99 | #   intended to run in multiple environments; otherwise, check them in:
100 | # .python-version
101 | 
102 | # pipenv
103 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
104 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
105 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
106 | #   install all needed dependencies.
107 | #Pipfile.lock
108 | 
109 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
110 | __pypackages__/
111 | 
112 | # Celery stuff
113 | celerybeat-schedule
114 | celerybeat.pid
115 | 
116 | # SageMath parsed files
117 | *.sage.py
118 | 
119 | # Environments
120 | .env
121 | .venv
122 | env/
123 | venv/
124 | ENV/
125 | env.bak/
126 | venv.bak/
127 | 
128 | # Spyder project settings
129 | .spyderproject
130 | .spyproject
131 | 
132 | # Rope project settings
133 | .ropeproject
134 | 
135 | # mkdocs documentation
136 | /site
137 | 
138 | # mypy
139 | .mypy_cache/
140 | .dmypy.json
141 | dmypy.json
142 | 
143 | # Pyre type checker
144 | .pyre/
145 | 
146 | # pytype static type analyzer
147 | .pytype/
148 | 
149 | # Cython debug symbols
150 | cython_debug/
151 | 


--------------------------------------------------------------------------------
/nf/__init__.py:
--------------------------------------------------------------------------------
 1 | from nf.tensor import *
 2 | from nf.operation.creation.funcs import *
 3 | from nf.operation.indexing.funcs import *
 4 | from nf.operation.manipulation.axis.funcs import *
 5 | from nf.operation.manipulation.shape.funcs import *
 6 | from nf.operation.math.nondifferentiable import *
 7 | from nf.operation.math.arithmetic.funcs import *
 8 | from nf.operation.math.exp_log.funcs import *
 9 | from nf.operation.math.misc.funcs import *
10 | from nf.operation.math.statistics.funcs import *
11 | from nf.operation.math.linalg.funcs import *
12 | 
13 | 
14 | 
15 | for attr in (
16 |     sum,
17 |     mean,
18 |     std,
19 |     var,
20 |     max,
21 |     min,
22 |     argmax,
23 |     argmin,
24 |     swapaxes,
25 |     permute,
26 |     transpose,
27 |     # moveaxis,
28 |     flatten,
29 |     reshape,
30 |     squeeze,
31 |     dot,
32 |     matmul,
33 |     einsum,
34 | ):
35 |     setattr(Tensor, attr.__name__, attr)


--------------------------------------------------------------------------------
/nf/backend/numcy.pyx:
--------------------------------------------------------------------------------
 1 | from time import time
 2 | import numpy as np
 3 | cimport numpy as np
 4 | cimport cython
 5 | from cython.parallel import prange
 6 | 
 7 | ctypedef np.float32_t FLOAT32
 8 | ctypedef np.int32_t INT32
 9 | ctypedef np.long_t LONG
10 | ctypedef np.float64_t FLOAT64
11 | ctypedef fused FLOAT:
12 |     FLOAT32
13 |     FLOAT64
14 | 
15 | def test():
16 |     print("aaaaaaaaaa啊啊")
17 | 
18 | @cython.boundscheck(False)  # 数组确定不会越界？
19 | @cython.wraparound(False)   # 确定不会使用负号作为数组index？
20 | def maxpool2d_fw(np.ndarray[FLOAT, ndim=6] xview):
21 |     cdef long B = xview.shape[0]
22 |     cdef long C = xview.shape[1]
23 |     cdef long H = xview.shape[2]
24 |     cdef long W = xview.shape[3]
25 |     cdef long K = xview.shape[4]
26 |     cdef long L = xview.shape[5]
27 |     cdef long b,c,h,w,k,l
28 |     cdef FLOAT r
29 |     cdef np.ndarray[FLOAT, ndim=4] result = np.zeros((B,C,H,W), dtype=xview.dtype)
30 |     cdef FLOAT[:,:,:,::1] result_view = result
31 |     cdef FLOAT[:,:,:,:,:,:] xview_view = xview
32 |     for b in prange(B, nogil=True, schedule='guided'):
33 |         for c in range(C):
34 |             for h in range(H):
35 |                 for w in range(W):
36 |                     r = xview[b,c,h,w,0,0]
37 |                     for k in range(K):
38 |                         for l in range(L):
39 |                             r = r if r > xview_view[b,c,h,w,k,l] else xview_view[b,c,h,w,k,l]
40 |                     result_view[b,c,h,w] = r
41 |     return result
42 | 
43 | @cython.boundscheck(False)  # 数组确定不会越界？
44 | @cython.wraparound(False)   # 确定不会使用负号作为数组index？
45 | def maxpool2d_bp(np.ndarray[FLOAT, ndim=6] xview, np.ndarray[FLOAT, ndim=6] dst, np.ndarray[FLOAT, ndim=4] grad):
46 |     cdef long B = xview.shape[0]
47 |     cdef long C = xview.shape[1]
48 |     cdef long H = xview.shape[2]
49 |     cdef long W = xview.shape[3]
50 |     cdef long K = xview.shape[4]
51 |     cdef long L = xview.shape[5]
52 |     cdef long b,c,h,w,k,l,k0,k1
53 |     cdef FLOAT[:,:,:,:] grad_view = grad
54 |     cdef FLOAT[:,:,:,:,:,:] dst_view = dst
55 |     cdef FLOAT[:,:,:,:,:,:] xview_view = xview
56 |     for b in prange(B, nogil=True, schedule='guided'):
57 |         for c in range(C):
58 |             for h in range(H):
59 |                 for w in range(W):
60 |                     k0 = 0
61 |                     k1 = 0
62 |                     for k in range(K):
63 |                         for l in range(L):
64 |                             if xview_view[b,c,h,w,k0,k1] < xview_view[b,c,h,w,k,l]:
65 |                                 k0 = k
66 |                                 k1 = l
67 |                     dst_view[b,c,h,w,k0,k1] += grad_view[b,c,h,w]
68 | 
69 | # def relu(np.ndarray[FLOAT] x):
70 | #     """
71 | #     :param x: relu函数改变x的值，逐位修改x
72 | #     :return:
73 | #     """


--------------------------------------------------------------------------------
/nf/backend/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup, Extension
 2 | from Cython.Build import cythonize
 3 | import numpy
 4 | 
 5 | setup(
 6 |     name='numcy',
 7 |     ext_modules=cythonize("numcy.pyx"),
 8 |     extra_compile_args = ["-O3", "-ffast-math", "-march=native", "-fopenmp"],
 9 |     extra_link_args=['-fopenmp'],
10 |     include_dirs=[numpy.get_include()]
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/nf/nn/__init__.py:
--------------------------------------------------------------------------------
1 | from .modules import *
2 | 


--------------------------------------------------------------------------------
/nf/nn/functional.py:
--------------------------------------------------------------------------------
  1 | import nf
  2 | from .ops.activation import *
  3 | from .ops.conv import *
  4 | # from .ops.batchnorm import *
  5 | 
  6 | from nf import Tensor
  7 | import numpy as np
  8 | 
  9 | # batchnorm modules
 10 | 
 11 | 
 12 | # conv modules
 13 | 
 14 | def pad(a, expanded_padding, mode='zeros'):
 15 |     return Tensor._op(Pad, a, op_args=(expanded_padding, mode))
 16 | 
 17 | def conv2d(a, weight, bias=None, padding='valid', stride=(1,1), dilation=(1,1), groups=1):
 18 |     """
 19 |     NCHW
 20 |     :param a: NCHW
 21 |     :param weight: OIHW
 22 |     :param bias:
 23 |     :param padding:
 24 |     :param stride: sH, sW
 25 |     :param dilation: dH, dW
 26 |     :param groups:
 27 |     :return:
 28 |     """
 29 |     assert padding in ('valid', 'same', 'full') or isinstance(padding, (tuple,list))
 30 |     if isinstance(stride, int):
 31 |         stride = (stride, stride)
 32 |     assert isinstance(stride, (tuple,list))
 33 |     if isinstance(dilation, int):
 34 |         dilation = (dilation, dilation)
 35 |     assert isinstance(dilation, (tuple,list))
 36 |     bs, xch, xh, xw = a.shape
 37 |     zch, _, k0, k1 = weight.shape
 38 |     if isinstance(padding, (tuple, list)):
 39 |         a = Tensor._op(Pad, a, op_args=(padding, 'zeros'))
 40 |     if padding is 'same':
 41 |         zshape = np.ceil([xh / stride[0], xw / stride[1]]).astype(int)
 42 |         if stride[0] < k0:
 43 |             ph = (k0-1) * dilation[0] + zshape[0] * stride[0] - xh
 44 |         else:
 45 |             ph = zshape[0] * stride[0] - xh
 46 |         if stride[1] < k1:
 47 |             pw = (k1-1) * dilation[1] + zshape[1] * stride[1] - xw
 48 |         else:
 49 |             pw = zshape[1] * stride[1] - xw
 50 |         # padding = (pw//2, (pw+1)//2, ph//2, (ph+1)//2)
 51 |         padding = (ph//2, (ph+1)//2, pw//2, (pw+1)//2)
 52 |         # print(padding, pw, ph, zshape, xh, xw, stride)
 53 |         a = Tensor._op(Pad, a, op_args=(padding, 'zeros'))
 54 |     out = Tensor._op(Conv2d, a, weight, op_args=(stride, dilation))
 55 |     if bias is not None:
 56 |         out = out + bias
 57 |     return out
 58 | 
 59 | def max_pool2d(a, pool_size=(2, 2), stride=(2, 2), padding='valid'):
 60 |     """
 61 |     :param a:
 62 |     :param pool_size:
 63 |     :param stride:
 64 |     :param padding:
 65 |     :return:
 66 |     """
 67 |     assert padding in ('valid', 'same', 'full') or isinstance(padding, (tuple,list))
 68 |     if isinstance(stride, int):
 69 |         stride = (stride, stride)
 70 |     assert isinstance(stride, (tuple,list))
 71 |     if isinstance(pool_size, int):
 72 |         pool_size = (pool_size, pool_size)
 73 |     assert isinstance(pool_size, (tuple,list))
 74 |     bs, xch, xh, xw = a.shape
 75 |     if isinstance(padding, (tuple, list)):
 76 |         a = Tensor._op(Pad, a, op_args=(padding, 'zeros'))
 77 |     # if padding is 'same':
 78 |     #     zshape = np.ceil([xh / stride[0], xw / stride[1]]).astype(int)
 79 |     #     if stride[0] < pool_size[0]:
 80 |     #         ph = (pool_size[0]-1) * dilation[0] + zshape[0] * stride[0] - xh
 81 |     #     else:
 82 |     #         ph = zshape[0] * stride[0] - xh
 83 |     #     if stride[1] < pool_size[1]:
 84 |     #         pw = (pool_size[1]-1) * dilation[1] + zshape[1] * stride[1] - xw
 85 |     #     else:
 86 |     #         pw = zshape[1] * stride[1] - xw
 87 |     #     # padding = (pw//2, (pw+1)//2, ph//2, (ph+1)//2)
 88 |     #     padding = (ph//2, (ph+1)//2, pw//2, (pw+1)//2)
 89 |     #     print(padding, pw, ph, zshape, xh, xw, stride)
 90 |     #     a = Tensor._op(Pad, a, op_args=(padding, 'zeros'))
 91 |     out = Tensor._op(MaxPool2d, a, op_args=(pool_size, stride, ))
 92 |     # return out
 93 |     return out
 94 | 
 95 | 
 96 | 
 97 | # activation modules
 98 | def relu(a, inplace=False):
 99 |     return Tensor._op(ReLU, a)
100 | 
101 | def sigmoid(a):
102 |     return Tensor._op(Sigmoid, a)
103 | 
104 | def softmax(a, dim = None):
105 |     if dim is None:
106 |         dim = -1
107 |     return Tensor._op(Softmax, a, op_args=(dim,))
108 | 
109 | 


--------------------------------------------------------------------------------
/nf/nn/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .module import *
2 | from .conv import *
3 | from .linear import *
4 | from .batchnorm import *
5 | 


--------------------------------------------------------------------------------
/nf/nn/modules/batchnorm.py:
--------------------------------------------------------------------------------
 1 | import nf
 2 | from .module import Module
 3 | from . import init
 4 | 
 5 | 
 6 | __all__ = ['BatchNorm2d']
 7 | 
 8 | 
 9 | 
10 | class BatchNorm2d(Module):
11 |     def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True):
12 |         super(BatchNorm2d, self).__init__()
13 |         self.num_features = num_features
14 |         self.eps = eps
15 |         self.momentum = momentum
16 |         self.affine = affine
17 |         if self.affine:
18 |             self.weight = nf.ones((1,num_features,1,1), requires_grad=True)
19 |             self.bias = nf.zeros((1,num_features,1,1), requires_grad=True)
20 |         else:
21 |             self.weight = None
22 |             self.bias = None
23 |         self.running_mean = nf.zeros((1,num_features,1,1))
24 |         self.running_var = nf.ones((1,num_features,1,1))
25 |         self.num_batches_tracked = nf.zeros(1)
26 |         # self.reset_parameters()
27 | 
28 |     def reset_parameters(self):
29 |         init.zeros_(self.running_mean)
30 |         init.ones_(self.running_var)
31 |         self.num_batches_tracked = nf.zeros(1)
32 |         if self.affine:
33 |             init.ones_(self.weight)
34 |             init.zeros_(self.bias)
35 | 
36 |     def forward(self, a):
37 |         if self.momentum is None:
38 |             exponential_average_factor = 0.0
39 |         else:
40 |             exponential_average_factor = self.momentum
41 |         if self.trainable:
42 |             self.num_batches_tracked += 1
43 |             if self.momentum is None:  # use cumulative moving average
44 |                 exponential_average_factor = 1.0 / float(self.num_batches_tracked)
45 |             else:  # use exponential moving average
46 |                 exponential_average_factor = self.momentum
47 |         running_mean = nf.mean(a, axis=(0,2,3), keepdims=True)
48 |         running_var = nf.var(a, axis=(0,2,3), keepdims=True, ddof=1)
49 |         out = (a - running_mean)
50 |         if self.weight:
51 |             out = out * self.weight
52 |         out = out / (running_var + self.eps) ** 0.5
53 |         if self.bias:
54 |             out += self.bias
55 |         self.running_var = (1 - exponential_average_factor) * self.running_var + exponential_average_factor * running_var
56 |         self.running_mean = (1 - exponential_average_factor) * self.running_mean + exponential_average_factor * running_mean
57 |         return out


--------------------------------------------------------------------------------
/nf/nn/modules/conv.py:
--------------------------------------------------------------------------------
 1 | import nf
 2 | from nf.nn import functional as F
 3 | from .module import Module
 4 | from . import init
 5 | 
 6 | __all__ = ['Conv2d','MaxPool2d']
 7 | 
 8 | class _ConvNd(Module):
 9 | 
10 |     def __init__(self, in_channels, out_channels, kernel_size, stride,
11 |                  padding='valid', dilation=1, transposed=False, output_padding=None,
12 |                  bias=True, padding_mode='zeros'):
13 |         super(_ConvNd, self).__init__()
14 |         self.in_channels = in_channels
15 |         self.out_channels = out_channels
16 |         self.kernel_size = kernel_size
17 |         self.stride = stride
18 |         self.padding = padding
19 |         self.dilation = dilation
20 |         self.transposed = transposed
21 |         self.output_padding = output_padding
22 |         self.padding_mode = padding_mode
23 |         if transposed:
24 |             self.weight = nf.zeros((in_channels, out_channels, *kernel_size), requires_grad=True)
25 |         else:
26 |             self.weight = nf.zeros((out_channels, in_channels, *kernel_size), requires_grad=True)
27 |         if bias:
28 |             self.bias = nf.zeros((out_channels, ) + (1,) * len(kernel_size), requires_grad=True)  # 2020/4/26 bias的正向和反向传播还未测试
29 |         else:
30 |             self.bias = None
31 |         self.reset_parameters()
32 | 
33 |     def reset_parameters(self):
34 |         # 5 ** 0.5 = 2.23606798
35 |         init.kaiming_uniform_(self.weight, a=2.23606798)
36 |         if self.bias is not None:
37 |             fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
38 |             bound = 1 / (fan_in ** 0.5)
39 |             init.uniform_(self.bias, -bound, bound)
40 | 
41 | class _MaxPoolNd(Module):
42 |     def __init__(self, pool_size, stride, padding='valid'):
43 |         super(_MaxPoolNd, self).__init__()
44 |         self.pool_size = pool_size
45 |         self.stride = stride
46 |         self.padding = padding
47 | 
48 | class Conv2d(_ConvNd):
49 |     # NCHW
50 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1,
51 |                  padding='valid', dilation=1,
52 |                  bias=True, padding_mode='zeros'):
53 |         if isinstance(kernel_size, int):
54 |             kernel_size = (kernel_size, kernel_size)
55 |         if isinstance(stride, int):
56 |             stride = (stride, stride)
57 |         if isinstance(dilation, int):
58 |             dilation = (dilation, dilation)
59 |         assert len(kernel_size) == 2
60 |         assert len(stride) == 2
61 |         assert len(dilation) == 2
62 |         assert padding in ('valid', 'same', 'full') or isinstance(padding, (tuple,list))
63 |         assert padding_mode in ('zeros')
64 | 
65 |         super(Conv2d, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation,
66 |             False, None, bias, padding_mode)
67 | 
68 | 
69 |     def forward(self, a):
70 |         return F.conv2d(a, self.weight, self.bias, self.padding,
71 |                         stride=self.stride, dilation=self.dilation)
72 | 
73 | class MaxPool2d(_MaxPoolNd):
74 |     # NCHW
75 |     def __init__(self, pool_size, stride, padding='valid'):
76 |         if isinstance(pool_size, int):
77 |             pool_size = (pool_size, pool_size)
78 |         if isinstance(stride, int):
79 |             stride = (stride, stride)
80 |         assert len(pool_size) == 2
81 |         assert len(stride) == 2
82 |         assert padding in ('valid', 'same', 'full')
83 | 
84 |         super(MaxPool2d, self).__init__(pool_size, stride, padding)
85 | 
86 | 
87 |     def forward(self, a):
88 |         return F.max_pool2d(a, self.pool_size, self.stride, self.padding)
89 |         # return Functional.linear(input, self.weight, self.bias)
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/nf/nn/modules/init.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | 
 4 | def _calculate_fan_in_and_fan_out(tensor):
 5 |     dimensions = tensor.ndim
 6 |     if dimensions < 2:
 7 |         raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions")
 8 | 
 9 |     num_input_fmaps = tensor.shape[1]
10 |     num_output_fmaps = tensor.shape[0]
11 |     receptive_field_size = 1
12 |     if tensor.ndim > 2:
13 |         receptive_field_size = tensor.data[0][0].size
14 |     fan_in = num_input_fmaps * receptive_field_size
15 |     fan_out = num_output_fmaps * receptive_field_size
16 | 
17 |     return fan_in, fan_out
18 | 
19 | def _calculate_correct_fan(tensor, mode):
20 |     mode = mode.lower()
21 |     valid_modes = ['fan_in', 'fan_out']
22 |     if mode not in valid_modes:
23 |         raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes))
24 | 
25 |     fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
26 |     return fan_in if mode == 'fan_in' else fan_out
27 | 
28 | 
29 | def calculate_gain(nonlinearity, param=None):
30 |     linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d']
31 |     if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
32 |         return 1
33 |     elif nonlinearity == 'tanh':
34 |         return 5.0 / 3
35 |     elif nonlinearity == 'relu':
36 |         return math.sqrt(2.0)
37 |     elif nonlinearity == 'leaky_relu':
38 |         if param is None:
39 |             negative_slope = 0.01
40 |         elif not isinstance(param, bool) and isinstance(param, int) or isinstance(param, float):
41 |             # True/False are instances of int, hence check above
42 |             negative_slope = param
43 |         else:
44 |             raise ValueError("negative_slope {} not a valid number".format(param))
45 |         return math.sqrt(2.0 / (1 + negative_slope ** 2))
46 |     else:
47 |         raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
48 | 
49 | 
50 | 
51 | def kaiming_uniform_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu'):
52 |     fan = _calculate_correct_fan(tensor, mode)
53 |     gain = calculate_gain(nonlinearity, a)
54 |     std = gain / math.sqrt(fan)
55 |     bound = math.sqrt(3.0) * std  # Calculate uniform bounds from standard deviation
56 |     uniform_(tensor,-bound, bound)
57 | 
58 | 
59 | 
60 | def uniform_(tensor, low, high):
61 |     tensor.data[:] = np.random.uniform(low, high, tensor.shape)
62 | 
63 | 
64 | def ones_(tensor):
65 |     tensor.data[:] = 1.0
66 | 
67 | 
68 | def zeros_(tensor):
69 |     tensor.data[:] = 0.0
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/nf/nn/modules/linear.py:
--------------------------------------------------------------------------------
 1 | import nf
 2 | from .module import Module
 3 | from . import init
 4 | 
 5 | 
 6 | __all__ = ['Linear']
 7 | 
 8 | class Linear(Module):
 9 |     def __init__(self, in_features, out_features, bias=True):
10 |         super(Linear, self).__init__()
11 |         self.in_features = in_features
12 |         self.out_features = out_features
13 |         self.weight = nf.zeros([out_features, in_features], requires_grad=True)
14 |         self.bias = nf.zeros([out_features], requires_grad=True) if bias else None
15 |         self.reset_parameters()
16 | 
17 |     def reset_parameters(self):
18 |         # 5 ** 0.5 = 2.23606798
19 |         init.kaiming_uniform_(self.weight, a=2.23606798)
20 | 
21 |         if self.bias is not None:
22 |             fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
23 |             bound = 1 / (fan_in ** 0.5)
24 |             init.uniform_(self.bias, -bound, bound)
25 | 
26 |     def forward(self, input):
27 |         if input.ndim == 2 and self.bias is not None:
28 |             ret = input @ self.weight.T + self.bias
29 |         else:
30 |             output = input @ self.weight.T
31 |             if self.bias is not None:
32 |                 output += self.bias
33 |             ret = output
34 |         return ret
35 | 


--------------------------------------------------------------------------------
/nf/nn/modules/module.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import OrderedDict
  3 | import itertools
  4 | from nf import Tensor
  5 | 
  6 | __all__ = ["Module"]
  7 | 
  8 | 
  9 | def _addindent(s_, numSpaces):
 10 |     s = s_.split('\n')
 11 |     # don't do anything for single-line stuff
 12 |     if len(s) == 1:
 13 |         return s_
 14 |     first = s.pop(0)
 15 |     s = [(numSpaces * ' ') + line for line in s]
 16 |     s = '\n'.join(s)
 17 |     s = first + '\n' + s
 18 |     return s
 19 | 
 20 | class Module(object):
 21 |     """
 22 |     Module模块大部分参照torch的Module模块，因为nf设定与torch模型兼容，所以此处代码可直接从torch抄来啦。
 23 |     已完成一般参数从torch模型中导入。
 24 |     """
 25 |     _version = 1.0
 26 |     def __init__(self):
 27 |         self.trainable = True
 28 |         self._buffers = OrderedDict()
 29 |         self._parameters = OrderedDict()
 30 |         self._modules = OrderedDict()
 31 | 
 32 |     def forward(self, *input):
 33 |         raise NotImplementedError
 34 | 
 35 |     def _get_name(self):
 36 |         return self.__class__.__name__
 37 | 
 38 |     def named_modules(self, memo=None, prefix=''):
 39 |         if memo is None:
 40 |             memo = set()
 41 |         if self not in memo:
 42 |             memo.add(self)
 43 |             yield prefix, self
 44 |             for name, module in self._modules.items():
 45 |                 if module is None:
 46 |                     continue
 47 |                 submodule_prefix = prefix + ('.' if prefix else '') + name
 48 |                 for m in module.named_modules(memo, submodule_prefix):
 49 |                     yield m
 50 | 
 51 |     def register_parameter(self, name, param):
 52 |         if hasattr(self, name) and name not in self._parameters:
 53 |             raise KeyError("属性已存在 '{}'".format(name))
 54 |         if not isinstance(param, Tensor):
 55 |             raise TypeError("无法将 '{}' 类型设为 '{}' "
 56 |                             "(权值参数需要 nf.Tensor 类型)"
 57 |                             .format(type(param), name))
 58 |         elif not param.requires_grad:
 59 |             raise ValueError(
 60 |                 "权值 {} 梯度应当开启".format(name))
 61 |         self._parameters[name] = param
 62 | 
 63 |     def __getattr__(self, name):
 64 |         """
 65 |         结合 __setattr__ 中的 remove_from ，可以完成属性的隐藏和可控地访问。
 66 |         :param name:
 67 |         :return:
 68 |         """
 69 |         _parameters = self.__dict__['_parameters']
 70 |         if name in _parameters:
 71 |             return _parameters[name]
 72 |         _modules = self.__dict__['_modules']
 73 |         if name in _modules:
 74 |             return _modules[name]
 75 |         _buffers = self.__dict__['_buffers']
 76 |         if name in _buffers:
 77 |             return _buffers[name]
 78 |         raise AttributeError("'{}' 属性不存在".format(name))
 79 | 
 80 |     def __setattr__(self, name, value):
 81 |         """
 82 |         从 __dict__ 中删除某个属性，是为了让外部可控地访问该属性，相当于java中可控的private
 83 |         NOTE: modules中所有可求梯度的tensor一律视为权值，所以对于那些非权值的Tensor属性，将其requires_grad设为False！
 84 |         :param name:
 85 |         :param value:
 86 |         :return:
 87 |         """
 88 |         def remove_from(*dicts):
 89 |             for d in dicts:
 90 |                 if name in d:
 91 |                     del d[name]
 92 | 
 93 |         if isinstance(value, Tensor) and value.requires_grad:   # modules中所有可求梯度的tensor一律视为权值
 94 |             remove_from(self.__dict__, self._modules, self._buffers)
 95 |             self.register_parameter(name, value)
 96 |         elif isinstance(value, Module):
 97 |             remove_from(self.__dict__, self._parameters, self._buffers)
 98 |             self._modules[name] = value
 99 |         elif isinstance(value, Tensor) and not value.requires_grad:
100 |             remove_from(self.__dict__, self._parameters, self._modules)
101 |             self._buffers[name] = value
102 |         else:
103 |             object.__setattr__(self, name, value)
104 | 
105 |     def _named_members(self, get_members_fn, prefix='', recurse=True):
106 |         r"""Helper method for yielding various names + members of modules."""
107 |         memo = set()
108 |         modules = self.named_modules(prefix=prefix) if recurse else [(prefix, self)]
109 |         for module_prefix, module in modules:
110 |             members = get_members_fn(module)
111 |             for k, v in members:
112 |                 if v is None or v in memo:
113 |                     continue
114 |                 memo.add(v)
115 |                 name = module_prefix + ('.' if module_prefix else '') + k
116 |                 yield name, v
117 | 
118 |     def named_parameters(self, prefix='', recurse=True):
119 |         gen = self._named_members(
120 |             lambda module: module._parameters.items(),
121 |             prefix=prefix, recurse=recurse)
122 |         for elem in gen:
123 |             yield elem
124 | 
125 |     def parameters(self, recurse=True):
126 |         for name, param in self.named_parameters(recurse=recurse):
127 |             yield param
128 | 
129 |     def _save_to_state_dict(self, destination, prefix, keep_vars):
130 |         state_dict = itertools.chain(self._parameters.items(), self._buffers.items())
131 |         for name, param in state_dict:
132 |             if param is not None:
133 |                 destination[prefix + name] = param if keep_vars else param
134 | 
135 | 
136 |     def state_dict(self, destination=None, prefix='', keep_vars=False):
137 |         if destination is None:
138 |             destination = OrderedDict()
139 |             destination._metadata = OrderedDict()
140 |         destination._metadata[prefix[:-1]] = dict(version=self._version)
141 |         self._save_to_state_dict(destination, prefix, keep_vars)
142 |         for name, module in self._modules.items():
143 |             if module is not None:
144 |                 module.state_dict(destination, prefix + name + '.', keep_vars=keep_vars)
145 |         return destination
146 | 
147 |     def _load_from_state_dict(self, state_dict, prefix,
148 |                               missing_keys, unexpected_keys, error_msgs):
149 |         local_name_params = itertools.chain(self._parameters.items(), self._buffers.items())
150 |         local_state = {k: v for k, v in local_name_params if v is not None}
151 | 
152 |         for name, param in local_state.items():
153 |             key = prefix + name
154 |             if key in state_dict:
155 |                 input_param = state_dict[key]
156 | 
157 |                 # Backward compatibility: loading 1-dim tensor from 0.3.* to version 0.4+
158 |                 if len(param.shape) == 0 and len(input_param.shape) == 1:
159 |                     input_param = input_param[0]
160 | 
161 |                 if input_param.shape != param.shape:
162 |                     if input_param.size == param.size:
163 |                         input_param = input_param.reshape(param.shape)
164 |                     else:
165 |                         # local shape should match the one in checkpoint
166 |                         error_msgs.append('size mismatch for {}: copying a param with shape {} from checkpoint, '
167 |                                           'the shape in current model is {}.'
168 |                                           .format(key, input_param.shape, param.shape))
169 |                         continue
170 | 
171 |                 if isinstance(input_param, np.ndarray):
172 |                     # backwards compatibility for serialized parameters
173 |                     input_param = Tensor(input_param)
174 |                 try:
175 |                     # print("ppp",id(param), param.requires_grad)
176 |                     param.copy_(input_param)
177 |                     # print("ppp", id(param), param.requires_grad)
178 |                 except Exception:
179 |                     error_msgs.append('While copying the parameter named "{}", '
180 |                                       'whose dimensions in the model are {} and '
181 |                                       'whose dimensions in the checkpoint are {}.'
182 |                                       .format(key, param.shape, input_param.shape))
183 |             else:
184 |                 missing_keys.append(key)
185 | 
186 |         for key in state_dict.keys():
187 |             if key.startswith(prefix):
188 |                 input_name = key[len(prefix):]
189 |                 input_name = input_name.split('.', 1)[0]  # get the name of param/buffer/child
190 |                 if input_name not in self._modules and input_name not in local_state:
191 |                     unexpected_keys.append(key)
192 | 
193 |     def load_state_dict(self, state_dict):
194 |         missing_keys = []
195 |         unexpected_keys = []
196 |         error_msgs = []
197 | 
198 |         def load(module, prefix=''):
199 |             module._load_from_state_dict(state_dict, prefix, missing_keys, unexpected_keys, error_msgs)
200 |             for name, child in module._modules.items():
201 |                 if child is not None:
202 |                     load(child, prefix + name + '.')
203 | 
204 |         load(self)
205 |         load = None  # break load->load reference cycle
206 | 
207 |         if len(unexpected_keys) > 0:
208 |             error_msgs.insert(
209 |                 0, 'Unexpected key(s) in state_dict: {}. '.format(
210 |                     ', '.join('"{}"'.format(k) for k in unexpected_keys)))
211 |         if len(missing_keys) > 0:
212 |             error_msgs.insert(
213 |                 0, 'Missing key(s) in state_dict: {}. '.format(
214 |                     ', '.join('"{}"'.format(k) for k in missing_keys)))
215 | 
216 |         if len(error_msgs) > 0:
217 |             raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
218 |                                self.__class__.__name__, "\n\t".join(error_msgs)))
219 | 
220 |     def __call__(self, *inputs):
221 |         return self.forward(*inputs)
222 | 
223 | 
224 | 


--------------------------------------------------------------------------------
/nf/nn/ops/activation.py:
--------------------------------------------------------------------------------
 1 | from nf.op import Operation
 2 | import numpy as np
 3 | 
 4 | __all__ = ['ReLU','Sigmoid','Softmax']
 5 | 
 6 | 
 7 | 
 8 | 
 9 | 
10 | class ReLU(Operation):
11 |     def __call__(self, a):
12 |         self.variables = (a,)
13 |         self.mask = a.data>0.0
14 |         out = a.data * self.mask
15 |         return out
16 | 
17 |     def backward(self, grad, **kwargs):
18 |         a = self.variables[0]
19 |         self.variables[0].backward(grad * self.mask)
20 | 
21 | class Sigmoid(Operation):
22 |     testLevel = 2
23 | 
24 |     def __call__(self, a):
25 |         self.variables = (a,)
26 |         out = 1 / (1 + np.exp(-a.data))
27 |         self.out = out
28 |         return out
29 | 
30 |     def backward(self, grad, **kwargs):
31 |         a = self.variables[0]
32 |         # f = 1 / (1 + np.exp(-a.data))
33 |         f = self.out * (1 - self.out)
34 |         self.variables[0].backward(grad * f)
35 | 
36 | class Softmax(Operation):
37 |     def __call__(self, a, dim):
38 |         self.variables = (a,)
39 |         self.dim = dim
40 |         # print(a.data)
41 |         out = np.exp(a.data - np.max(a.data, axis=self.dim, keepdims=True))
42 |         out /= np.sum(out, axis=self.dim, keepdims=True)
43 |         self.out = out
44 |         return out
45 | 
46 | 
47 |     def backward(self, grad, **kwargs):
48 |         a = self.variables[0]
49 |         grad *= self.out
50 |         p = grad.sum(axis=self.dim, keepdims=True)
51 |         grad -= self.out * p
52 |         self.variables[0].backward(grad)
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/nf/nn/ops/conv.py:
--------------------------------------------------------------------------------
  1 | from nf.op import Operation
  2 | import numpy as np
  3 | from nf.backend import numcy
  4 | 
  5 | __all__ = ['Conv2d','Pad','MaxPool2d']
  6 | 
  7 | 
  8 | def _pad(x, ep):
  9 |     # 保持跟numpy的pad接口一致
 10 |     xshape = np.array(x.shape)
 11 |     ep = np.array(ep)
 12 |     es = ep.sum(1)
 13 |     pad_array = np.zeros(xshape+es)
 14 |     ep = tuple(slice(si[0], -si[1]) if si[1] > 0 else slice(None, None) for si in ep)
 15 |     pad_array[ep] = x[:]
 16 |     return pad_array
 17 | 
 18 | 
 19 | def im2bchwkl(x, ksize, stride=(1, 1), dilation=(1, 1), writeable=False):
 20 |     """
 21 |     :param x: [bs, xch, x1, x2]
 22 |     :param ksize: [k1, k2]
 23 |     :param stride: [s1, s2]
 24 |     :param dilation: [d1, d2]
 25 |     :param writeable:
 26 |     :return: [bs, xch, z0, z1, k0, k1]
 27 |     """
 28 |     bs, xch, x1, x2 = x.shape
 29 |     H = (x1-(dilation[0]*(ksize[0]-1)+1))//(stride[0])+1
 30 |     W = (x2-(dilation[1]*(ksize[1]-1)+1))//(stride[1])+1
 31 |     _s = np.array([xch * x1 * x2, x1 * x2, x2 * stride[0], stride[1], x2 * dilation[0], dilation[1]]) * x.itemsize
 32 |     return np.lib.stride_tricks.as_strided(x,
 33 |                                            (bs, xch, H, W, ksize[0], ksize[1]),
 34 |                                            _s,
 35 |                                            writeable=writeable,)
 36 | 
 37 | 
 38 | def _conv2d_fw(x, w, stride, dilation):
 39 |     x = im2bchwkl(x, w.shape[-2:], stride, dilation)  # [bs, xch, z0, z1, k0, k1]
 40 |     # out = np.tensordot(x, weight.data, ((1, 4, 5), (1, 2, 3))) # [bs, z0, z1, zch]
 41 |     # out = out.transpose(0, 3, 1, 2)
 42 |     # if self.path is None:
 43 |     #     t1 = time()
 44 |     #     self.path = np.einsum_path('bchwkl,zckl->bzhw',x, weight.data, optimize=True)[1]
 45 |     #     print(time()-t1)
 46 |     # print(a.shape, x.shape, weight.shape)
 47 |     out = np.einsum('bchwkl,zckl->bzhw', x, w, optimize=True, order='C')
 48 |     return out
 49 | 
 50 | def _conv2d_bpl(xt, w, s, d):
 51 |     """
 52 |     xt is result tensor grad, w is weight tensor, zt is input gradient
 53 |     在卷积层前向传播时，有：
 54 |         z = (x - d * (k - 1) - 1 + s) // s
 55 |     在反向传播求上一层梯度时，我们先推导，在前向传播时，上式的x一般不能使得完成整除，
 56 |     必然有后面y位被舍弃，未参加卷积，故上式可改成 ：
 57 |         z = (x - d * (k - 1) - 1 + s - y) / s
 58 |     反向传播时，我们期望得到大小为x的矩阵，但是由于y的存在，y的那部分的梯度应当为0，
 59 |     所以我们在反卷积的时候，只能求出 (x-y) 部分的导数。
 60 |         xt = z
 61 |         经过dilate和padding之后
 62 |         xt = (z - 1) * s + 1 + 2 * (k - 1)
 63 |     我们先以特殊情况 d = 1 来计算：
 64 |         zt = xt - k + 1
 65 |            = z * s - s + 2 * k - 1 - k + 1
 66 |            = x - y - (k - 1) - 1 + s - s + k
 67 |            = x - y
 68 |     至此，求得导数是 zt = x - y 大小的矩阵
 69 |     :param x: ( bs, zch, x0, x1)
 70 |     :param w: (zch, xch, k0, k1)
 71 |     :param s: (s1, s2)
 72 |     :return: (bs, xch, z0, z1)
 73 |     """
 74 |     bs , zch, x0, x1 = xt.shape
 75 |     zch, xch, k0, k1 = w.shape
 76 | 
 77 |     # dilate a numpy array
 78 |     ph = 2*d[0]*(k0-1) + s[0] * (x0-1) + 1
 79 |     pw = 2*d[1]*(k1-1) + s[1] * (x1-1) + 1
 80 |     dil_array = np.zeros((bs, zch, ph, pw))
 81 |     if k0 is 1:
 82 |         slice_h = slice(None, None, s[0])
 83 |     else:
 84 |         slice_h = slice(d[0]*(k0-1),-d[0]*(k0-1),s[0])
 85 |     if k1 is 1:
 86 |         slice_w = slice(None, None, s[1])
 87 |     else:
 88 |         slice_w = slice(d[1]*(k1-1),-d[1]*(k1-1),s[1])
 89 |     dil_array[:,:,slice_h,slice_w] = xt[:,:]
 90 |     x = im2bchwkl(dil_array, w.shape[-2:], (1,1), d)  # [bs, xch, z0, z1, k0, k1]
 91 |     zt = np.einsum('bchwkl,czkl->bzhw', x, w[:,:,::-1,::-1], optimize=True, order='C')
 92 |     # w = w[:, :, ::-1, ::-1].transpose(1, 0, 2, 3)  # (xch, zch, k0, k1)
 93 |     # zt = _conv2d_fw(dil_array, w, (1,1), d)
 94 |     return zt
 95 | 
 96 | def _conv2d_bpw(x, z, s, d):
 97 |     """
 98 |     x is input tensor, z is output gradient, w is weight tensor gradient
 99 |     以 z 作为卷积核，对x做valid卷积操作，步长为1，空洞为s
100 |     有 z = (x - d * (k - 1) - 1 + s - y) / s
101 |        kt = (z - 1) * s + 1
102 |           = x - y - (k - 1)
103 |        zt = x - kt + 1
104 |           = x - x + y + k - 1 + 1
105 |           = k + y
106 |     最终得到的 zt 需要将后面的 y 位舍弃
107 |     :param x: (bs, xch, x0, x1)
108 |     :param z: (bs, zch, z0, z1)
109 |     :param s: (s1, s2)
110 |     :return: (zsh, xch, k0, k1)
111 |     """
112 |     x = im2bchwkl(x, z.shape[-2:], d, s)  # [bs, xch, z0, z1, k0, k1]
113 |     zt = np.einsum('bchwkl,bzkl->zchw', x, z, optimize=True, order='C')
114 |     return zt
115 | 
116 | from time import time
117 | class Conv2d(Operation):
118 |     path = None
119 |     def __call__(self, a, weight, stride=(1,1), dilation=(1,1)):
120 |         """
121 |         NCHW
122 |         :param a: (bs, xch, xh, xw)
123 |         :param weight: (zch, xch, k1, k2)
124 |         :return: z: (bs, zch, z1, z2)
125 |         """
126 |         self.variables = (a, weight)
127 |         self.stride = stride
128 |         self.dilation = dilation
129 |         out = _conv2d_fw(a.data, weight.data, stride, dilation)
130 |         return out
131 | 
132 |     def backward(self, grad, **kwargs):
133 |         a, b = self.variables
134 |         grada = _conv2d_bpl(xt=grad, w=b.data, s=self.stride, d=self.dilation)
135 | 
136 |         if grada.shape != a.shape:
137 |             _, _, zh, zw = grada.shape
138 |             zt = np.zeros(a.shape)
139 |             zt[:,:,:zh,:zw] = grada[:,:]
140 |             grada = zt
141 |         # print("bp", grad.shape, grada.shape, a.shape, b.shape)
142 |         self.variables[0].backward(grada)
143 |         # 获得本层的 delta_w
144 |         delta_w = _conv2d_bpw(a.data, grad, self.stride, self.dilation)
145 |         # print("d",delta_w.shape, b.shape)
146 |         # print(delta_w)
147 |         _w1, _w2, _w3, _w4 = b.shape
148 |         delta_w = delta_w[:_w1, :_w2, :_w3, :_w4]
149 |         self.variables[1].backward(delta_w)
150 | 
151 | class Pad(Operation):
152 |     def __call__(self, a, expanded_padding, mode='zeros'):
153 |         """
154 |         :param a:
155 |         :param expanded_padding:
156 |             注意，torch的padding格式为(a,b,c,d,e,f,...)，其中(a,b)表示0维的前后padding数量，(c,d)表示1维的padding数量，以此类推
157 |             而numpy的padding格式为((a,b),(c,d),(e,f),...)，其中(a,b)表示0维的前后padding数量，(c,d)表示1维的padding数量，以此类推
158 |             torch的padding，如果缺少，比如输入(a,b)，而实际上输入有多个维度，则表示其他维度padding都为0，只有-1维padding(a,b)，
159 |                 并且torch强制用户输入的len(expanded_padding) 是偶数个，此处我们与torch保持一致，要求输入为偶数个
160 |             numpy的padding，如果缺少，比如输入((a,b),)，而实际上有多个维度，则表示所有维度padding都为(a,b)
161 |             为了与torch对线，我们此处用numpy的padding实现torch的padding接口。
162 |         :param mode:
163 |         :return:
164 |         """
165 |         assert len(expanded_padding) % 2 == 0
166 |         self.variables = (a,)
167 |         t = a.ndim * 2 - len(expanded_padding)
168 |         self.expanded = (0,) * t
169 |         self.expanded += expanded_padding
170 | 
171 |         self.expanded = np.array(self.expanded).reshape(-1,2)
172 |         # self.expanded = np.array(self.expanded).reshape(-1,2)[::-1,:]
173 |         # print(self.expanded, a)
174 |         # out = np.pad(a.data, self.expanded)
175 |         out = _pad(a.data, self.expanded)
176 |         return out
177 | 
178 |     def backward(self, grad, **kwargs):
179 |         a = self.variables[0]
180 |         ashape = a.shape
181 |         ep = tuple(slice(si[0], -si[1]) if si[1] > 0 else slice(None, None) for si in self.expanded)
182 |         grad = grad[ep]
183 |         # print("pad grad", grad.shape,ep)
184 |         self.variables[0].backward(grad)
185 | 
186 | class MaxPool2d(Operation):
187 |     def __call__(self, a, pool_size=(2, 2), stride=(2, 2)):
188 |         """
189 |         :param a:
190 |         :param pool_size:
191 |         :param strides:
192 |         :param pool_mode: 'max' or 'avg'
193 |         """
194 |         self.variables = (a, )
195 |         self.stride = stride
196 |         self.pool_size = pool_size
197 |         self.block_view = im2bchwkl(a.data, self.pool_size, self.stride, (1, 1), True)
198 |         out = numcy.maxpool2d_fw(self.block_view)
199 |         return out
200 | 
201 |     def backward(self, grad, **kwargs):
202 |         a = self.variables[0]
203 |         outgrad = np.zeros_like(a.data)
204 |         dst = im2bchwkl(outgrad, self.pool_size, self.stride, (1, 1), True)
205 |         numcy.maxpool2d_bp(self.block_view, dst, grad)
206 |         self.variables[0].backward(outgrad)
207 | 


--------------------------------------------------------------------------------
/nf/op.py:
--------------------------------------------------------------------------------
 1 | __all__ = ['Operation']
 2 | 
 3 | class Operation:
 4 |     """
 5 |     所有Op的基类
 6 | 
 7 |     testLevel 表明当前类的测试等级，
 8 |     0表示已经经过大量测试，
 9 |     1表示已经经过中等数量测试，
10 |     2表示经过少量无极端示例测试，
11 |     3表示未经过测试
12 |     4表示不够信任
13 |     """
14 |     testLevel = 0
15 |     def __call__(self, *input_vars):
16 |         self.variables = input_vars
17 |         raise NotImplementedError
18 | 
19 |     def broadcastable(self, grad, ashape):
20 |         """
21 |         保证传递的梯度shape一致，用于兼容广播机制的反向传播
22 |         :param grad:
23 |         :param ashape:
24 |         :return:
25 |         """
26 |         if grad.shape == ashape:
27 |             return grad
28 |         grad_bak = grad.sum(axis=tuple(range(grad.ndim - len(ashape))))
29 |         # print("g", grad_bak.shape, ashape)
30 |         keepdims = tuple(n for (n, i) in enumerate(grad_bak.shape) if i != ashape[n])
31 |         if keepdims:
32 |             grad_bak = grad_bak.sum(axis=keepdims, keepdims=True)
33 |         # print("g", grad_bak.shape, ashape)
34 |         return grad_bak
35 | 
36 |     def backward(self, grad, **kwargs):
37 |         raise NotImplementedError
38 | 


--------------------------------------------------------------------------------
/nf/operation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RanFeng/NumpyFlow/9502b7328ef81ed7905baa31a12f303da8ab2e66/nf/operation/__init__.py


--------------------------------------------------------------------------------
/nf/operation/creation/funcs.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from nf.tensor import Tensor
 3 | 
 4 | 
 5 | __all__ = [
 6 |     "empty",
 7 |     "empty_like",
 8 |     "ones",
 9 |     "ones_like",
10 |     "zeros",
11 |     "zeros_like",
12 |     "rands"
13 | ]
14 | 
15 | 
16 | def empty(shape, dtype=np.float32, requires_grad=False):
17 |     """
18 |     将 np.empty(shape, dtype) 填入Tensor的数据中
19 |     :return
20 |     返回一个给定shape和dtype的Tensor
21 |     """
22 |     return Tensor(np.empty(shape, dtype), requires_grad=requires_grad)
23 | 
24 | 
25 | def empty_like(other, dtype=None, requires_grad=False):
26 |     """
27 |     将 np.empty_like(other, dtype) 填入Tensor的数据中
28 |     :return
29 |     返回一个与目标形状和类型一致的Tensor
30 |     """
31 |     if isinstance(other, Tensor):
32 |         other = other.data
33 | 
34 |     return Tensor(np.empty_like(other, dtype), requires_grad=requires_grad)
35 | 
36 | 
37 | def ones(shape, dtype=np.float32, requires_grad=False):
38 |     """
39 |     将 np.ones(shape, dtype) 填入Tensor的数据中
40 |     :return
41 |     返回一个给定shape和dtype的Tensor
42 |     """
43 |     return Tensor(np.ones(shape, dtype), requires_grad=requires_grad)
44 | 
45 | 
46 | def ones_like(other, dtype=None, requires_grad=False):
47 |     """
48 |     将 np.ones_like(other, dtype) 填入Tensor的数据中
49 |     :return
50 |     返回一个与目标形状和类型一致的Tensor
51 |     """
52 |     if isinstance(other, Tensor):
53 |         other = other.data
54 | 
55 |     return Tensor(np.ones_like(other, dtype), requires_grad=requires_grad)
56 | 
57 | 
58 | def zeros(shape, dtype=np.float32, requires_grad=False):
59 |     """
60 |     将 np.zeros(shape, dtype) 填入Tensor的数据中
61 |     :return
62 |     返回一个给定shape和dtype的Tensor
63 |     """
64 |     return Tensor(np.zeros(shape, dtype), requires_grad=requires_grad)
65 | 
66 | 
67 | def zeros_like(other, dtype=None, requires_grad=False):
68 |     """
69 |     将 np.zeros_like(other, dtype) 填入Tensor的数据中
70 |     :return
71 |     返回一个与目标形状和类型一致的Tensor
72 |     """
73 |     if isinstance(other, Tensor):
74 |         other = other.data
75 | 
76 |     return Tensor(np.zeros_like(other, dtype), requires_grad=requires_grad)
77 | 
78 | def rands(shape, requires_grad=False):
79 |     """
80 |     将 np.random.random(shape) 填入Tensor的数据中
81 |     :return
82 |     返回一个给定shape的随机Tensor
83 |     """
84 |     return Tensor(np.random.random(shape), requires_grad=requires_grad)
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/nf/operation/indexing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RanFeng/NumpyFlow/9502b7328ef81ed7905baa31a12f303da8ab2e66/nf/operation/indexing/__init__.py


--------------------------------------------------------------------------------
/nf/operation/indexing/funcs.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from nf.tensor import Tensor
 3 | from .ops import *
 4 | 
 5 | __all__ = ["where"]
 6 | 
 7 | 
 8 | def where(condition, x=None, y=None, requires_grad=False):
 9 |     if x is None and y is None:
10 |         if isinstance(condition, Tensor):
11 |             condition = condition.data
12 |         return np.where(condition)
13 | 
14 |     return Tensor._op(Where, x, y, op_kwargs=dict(condition=condition), requires_grad=requires_grad)
15 | 


--------------------------------------------------------------------------------
/nf/operation/indexing/ops.py:
--------------------------------------------------------------------------------
 1 | from nf.op import Operation
 2 | import numpy as np
 3 | 
 4 | 
 5 | __all__ = ['GetItem','Where']
 6 | 
 7 | class GetItem(Operation):
 8 | 
 9 |     def __call__(self, a, index):
10 |         """
11 |         使得Tensor能像numpy数组一样被index访问并支持反向传播，例如a[3],a[3,2,1]等等
12 |         """
13 |         self.variables = (a,)
14 |         self.index = index
15 |         return a.data[index]
16 | 
17 |     def backward(self, grad, **kwargs):
18 |         a = self.variables[0]
19 |         out = np.zeros_like(a.data)
20 |         grad = grad.sum(axis=tuple(range(grad.ndim - out[self.index].ndim)))
21 |         keepdims = tuple(n for (n, i) in enumerate(grad.shape) if i != out[self.index].shape[n])
22 |         if keepdims:
23 |             grad = grad.sum(axis=keepdims, keepdims=True)
24 |         np.add.at(out, self.index, grad)
25 |         self.variables[0].backward(out)
26 | 
27 | class Where(Operation):
28 |     def __call__(self, a, b, *, condition):
29 |         self.variables = (a, b)
30 |         self.condition = np.asarray(condition, dtype=bool)
31 |         return np.where(condition, a.data, b.data)
32 | 
33 |     def backward(self, grad, **kwargs):
34 |         self.variables[0].backward(np.where(self.condition, grad, 0))
35 |         self.variables[1].backward(np.where(~self.condition, grad, 0))
36 | 
37 | 


--------------------------------------------------------------------------------
/nf/operation/manipulation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RanFeng/NumpyFlow/9502b7328ef81ed7905baa31a12f303da8ab2e66/nf/operation/manipulation/__init__.py


--------------------------------------------------------------------------------
/nf/operation/manipulation/axis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RanFeng/NumpyFlow/9502b7328ef81ed7905baa31a12f303da8ab2e66/nf/operation/manipulation/axis/__init__.py


--------------------------------------------------------------------------------
/nf/operation/manipulation/axis/funcs.py:
--------------------------------------------------------------------------------
 1 | from nf.tensor import Tensor
 2 | 
 3 | from .ops import *
 4 | 
 5 | __all__ = ["permute","transpose", "swapaxes"]
 6 | 
 7 | 
 8 | def permute(a, *axes, requires_grad=False):
 9 |     """
10 |     重新排列Tensor的各个维度，等同于numpy中的np.transpose操作
11 |     """
12 |     if not axes:
13 |         axes = None
14 |     return Tensor._op(Permute, a, op_args=(axes,), requires_grad=requires_grad)
15 | 
16 | def transpose(a, *axes, requires_grad=False):
17 |     """
18 |     转置矩阵，目前适用二维Tensor
19 |     :param a:
20 |     :param axes:
21 |     :param requires_grad:
22 |     :return:
23 |     """
24 |     if(a.ndim < 2):
25 |         raise NotImplemented("此处应当自动扩维，但是还未实现")
26 |     alist = list(range(a.ndim))
27 |     if not axes:    # 若不指定axes，则默认置换最后两个维度
28 |         axes = [a.ndim-1, a.ndim-2]
29 |     alist[axes[0]], alist[axes[1]] = alist[axes[1]], alist[axes[0]],
30 |     return Tensor._op(Permute, a, op_args=(alist,), requires_grad=requires_grad)
31 | 
32 | 
33 | def swapaxes(a, axis1, axis2, requires_grad=False):
34 |     """
35 |     交换Tensor的两个维度
36 |     """
37 |     return Tensor._op(SwapAxes, a, op_args=(axis1, axis2), requires_grad=requires_grad)
38 | 


--------------------------------------------------------------------------------
/nf/operation/manipulation/axis/ops.py:
--------------------------------------------------------------------------------
 1 | from nf.op import Operation
 2 | import numpy as np
 3 | 
 4 | __all__ = ['Permute', "SwapAxes"]
 5 | 
 6 | class Permute(Operation):
 7 |     testLevel = 4
 8 |     def __call__(self, a, axes=None):
 9 | 
10 |         self.variables = (a,)
11 |         if axes is not None:
12 |             self.axes = tuple(axis % a.ndim for axis in axes)
13 |         else:
14 |             self.axes = tuple(range(a.ndim)[::-1])
15 |         # print(self.axes,a.data.shape)
16 | 
17 |         return np.transpose(a.data, self.axes)
18 | 
19 |     def backward(self, grad, **kwargs):
20 |         a = self.variables[0]
21 |         # print(grad.shape, a.shape, self.axes)
22 |         try:
23 |             grad = grad.transpose(np.argsort(self.axes))
24 |         except ValueError:
25 |             grad = self.broadcastable(grad, self.variables[0].shape[::-1])
26 |             grad = grad.transpose(np.argsort(self.axes))
27 |         # print(grad.shape, a.shape, self.axes)
28 |         self.variables[0].backward(grad)
29 | 
30 | class SwapAxes(Operation):
31 |     def __call__(self, a, axis1, axis2):
32 |         self.variables = (a,)
33 |         self.axis1 = axis1
34 |         self.axis2 = axis2
35 |         return np.swapaxes(a.data, axis1, axis2)
36 | 
37 |     def backward(self, grad, **kwargs):
38 |         self.variables[0].backward(grad.swapaxes(self.axis2, self.axis1))
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/nf/operation/manipulation/shape/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RanFeng/NumpyFlow/9502b7328ef81ed7905baa31a12f303da8ab2e66/nf/operation/manipulation/shape/__init__.py


--------------------------------------------------------------------------------
/nf/operation/manipulation/shape/funcs.py:
--------------------------------------------------------------------------------
 1 | from nf.tensor import Tensor
 2 | 
 3 | from .ops import *
 4 | 
 5 | __all__ = ["flatten","reshape", "squeeze", "expand_dims"]
 6 | 
 7 | 
 8 | def flatten(a, requires_grad=False):
 9 |     return Tensor._op(Flatten, a, requires_grad=requires_grad)
10 | 
11 | def reshape(a, *newshape, requires_grad=False):
12 |     if not newshape:
13 |         raise TypeError("reshape() takes at least 1 argument (0 given)")
14 |     return Tensor._op(Reshape, a, op_args=(newshape,), requires_grad=requires_grad)
15 | 
16 | 
17 | def squeeze(a, axis=None, requires_grad=False):
18 |     return Tensor._op(Squeeze, a, op_args=(axis,), requires_grad=requires_grad)
19 | 
20 | 
21 | def expand_dims(a, axis, requires_grad=False):
22 |     return Tensor._op(ExpandDims, a, op_args=(axis,), requires_grad=requires_grad)
23 | 


--------------------------------------------------------------------------------
/nf/operation/manipulation/shape/ops.py:
--------------------------------------------------------------------------------
 1 | from nf.op import Operation
 2 | import numpy as np
 3 | 
 4 | 
 5 | __all__ = ['Flatten','Reshape','Squeeze','ExpandDims']
 6 | 
 7 | 
 8 | class Flatten(Operation):
 9 |     testLevel = 2
10 |     def __call__(self, a):
11 |         self.variables = (a,)
12 |         return a.data.flatten(order="C")
13 | 
14 |     def backward(self, grad, **kwargs):
15 |         a = self.variables[0]
16 |         self.variables[0].backward(grad.reshape(*a.data.shape))
17 | 
18 | 
19 | 
20 | class Reshape(Operation):
21 |     testLevel = 2
22 |     def __call__(self, a, shape):
23 |         self.variables = (a,)
24 |         if shape is not None and hasattr(shape, "__iter__"):
25 |             shape = shape[0]
26 |         self.shape = shape
27 |         return a.data.reshape(shape)
28 | 
29 |     def backward(self, grad, **kwargs):
30 |         a = self.variables[0]
31 |         try:
32 |             grad = grad.reshape(*a.shape)
33 |         except ValueError:
34 |             grad = self.broadcastable(grad, self.shape)
35 |             grad = grad.reshape(*a.shape)
36 |         self.variables[0].backward(grad)
37 | 
38 | 
39 | class Squeeze(Operation):
40 |     testLevel = 4
41 |     def __call__(self, a, axis):
42 |         self.variables = (a,)
43 |         return np.squeeze(a.data, axis=axis)
44 | 
45 |     def backward(self, grad, **kwargs):
46 |         a = self.variables[0]
47 |         self.variables[0].backward(grad.reshape(*a.shape))
48 | 
49 | 
50 | 
51 | class ExpandDims(Operation):
52 |     testLevel = 4
53 |     def __call__(self, a, axis):
54 |         self.variables = (a,)
55 |         out = np.expand_dims(a.data, axis=axis)
56 |         self.outshape = out
57 |         return
58 | 
59 |     def backward(self, grad, **kwargs):
60 |         a = self.variables[0]
61 |         try:
62 |             grad = grad.reshape(*a.shape)
63 |         except ValueError:
64 |             grad = self.broadcastable(grad, self.outshape)
65 |             grad = grad.reshape(*a.shape)
66 |         self.variables[0].backward(grad)
67 |         # grad = self.broadcastable(grad, a.shape)
68 |         # self.variables[0].backward(grad)
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 
78 | 
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/nf/operation/math/arithmetic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RanFeng/NumpyFlow/9502b7328ef81ed7905baa31a12f303da8ab2e66/nf/operation/math/arithmetic/__init__.py


--------------------------------------------------------------------------------
/nf/operation/math/arithmetic/funcs.py:
--------------------------------------------------------------------------------
 1 | from nf.tensor import Tensor
 2 | 
 3 | from .ops import *
 4 | 
 5 | __all__ = [
 6 |     "add",
 7 |     "divide",
 8 |     "multiply",
 9 |     "negative",
10 |     "positive",
11 |     "power",
12 |     "subtract",
13 | ]
14 | 
15 | 
16 | def add(a, b, requires_grad=False):
17 |     return Tensor._op(Add, a, b, requires_grad=requires_grad)
18 | 
19 | 
20 | def subtract(a, b, requires_grad=False):
21 |     return Tensor._op(Subtract, a, b, requires_grad=requires_grad)
22 | 
23 | 
24 | def divide(a, b, requires_grad=False):
25 |     return Tensor._op(Divide, a, b, requires_grad=requires_grad)
26 | 
27 | 
28 | def power(a, b, requires_grad=False):
29 |     return Tensor._op(Power, a, b, requires_grad=requires_grad)
30 | 
31 | 
32 | def multiply(a, b, requires_grad=False):
33 |     return Tensor._op(Multiply, a, b, requires_grad=requires_grad)
34 | 
35 | def positive(a, requires_grad=False):
36 |     return Tensor._op(Positive, a, requires_grad=requires_grad)
37 | 
38 | def negative(a, requires_grad=False):
39 |     return Tensor._op(Negative, a, requires_grad=requires_grad)
40 | 


--------------------------------------------------------------------------------
/nf/operation/math/arithmetic/ops.py:
--------------------------------------------------------------------------------
  1 | from nf.op import Operation
  2 | import numpy as np
  3 | 
  4 | __all__ = ['Assign',
  5 |            'Add',
  6 |            'Multiply',
  7 |            'Subtract',
  8 |            'Divide',
  9 |            'Negative',
 10 |            'Positive',
 11 |            'Power']
 12 | 
 13 | 
 14 | 
 15 | class Assign(Operation):
 16 |     def __call__(self, a):
 17 |         self.variables = (a)
 18 |         return a
 19 | 
 20 |     def backward(self, grad, **kwargs):
 21 |         return None
 22 | 
 23 | 
 24 | class Add(Operation):
 25 |     def __call__(self, a, b):
 26 |         self.variables = (a, b)
 27 |         out = a.data + b.data
 28 |         return out
 29 | 
 30 | 
 31 |     def backward(self, grad, **kwargs):
 32 |         self.variables[0].backward(grad)
 33 |         self.variables[1].backward(grad)
 34 | 
 35 | 
 36 | class Multiply(Operation):
 37 |     def __call__(self, a, b):
 38 |         self.variables = (a, b)
 39 |         out = a.data * b.data
 40 |         return out
 41 | 
 42 |     def backward(self, grad, **kwargs):
 43 |         a, b = self.variables
 44 |         self.variables[0].backward(b.data * grad)
 45 |         self.variables[1].backward(a.data * grad)
 46 | 
 47 | 
 48 | class Subtract(Operation):
 49 |     def __call__(self, a, b):
 50 |         self.variables = (a, b)
 51 |         out = a.data - b.data
 52 |         return out
 53 | 
 54 |     def backward(self, grad,**kwargs):
 55 |         self.variables[0].backward(grad)
 56 |         self.variables[1].backward(-grad)
 57 | 
 58 | 
 59 | class Divide(Operation):
 60 |     def __call__(self, a, b):
 61 |         self.variables = (a, b)
 62 |         self.out = a.data / b.data
 63 |         return self.out
 64 | 
 65 |     def backward(self, grad, **kwargs):
 66 |         a, b = self.variables
 67 |         p = grad / b.data
 68 |         self.variables[0].backward(p)
 69 |         self.variables[1].backward(-p * self.out)
 70 | 
 71 | 
 72 | class Negative(Operation):
 73 | 
 74 |     def __call__(self, a, where=True):
 75 |         self.variables = (a,)
 76 |         return -a.data
 77 | 
 78 |     def backward(self, grad, **kwargs):
 79 |         self.variables[0].backward(-grad)
 80 | 
 81 | 
 82 | class Positive(Operation):
 83 | 
 84 |     def __call__(self, a):
 85 |         self.variables = (a,)
 86 |         return np.positive(a.data)
 87 | 
 88 |     def backward(self, grad,**kwargs):
 89 |         self.variables[0].backward(np.positive(grad))
 90 | 
 91 | 
 92 | 
 93 | class Power(Operation):
 94 |     def __call__(self, a, b):
 95 |         self.variables = (a, b)
 96 |         out = a.data ** b.data
 97 |         return out
 98 | 
 99 |     def backward(self, grad,**kwargs):
100 |         a, b = self.variables
101 |         x, y = a.data, b.data
102 |         self.variables[0].backward(grad * y * (x ** np.where(y, (y - 1), 1)))
103 |         self.variables[1].backward(grad * (x ** y) * np.log(np.where(x, x, 1)))
104 | 
105 | 


--------------------------------------------------------------------------------
/nf/operation/math/exp_log/funcs.py:
--------------------------------------------------------------------------------
 1 | from nf.tensor import Tensor
 2 | 
 3 | from .ops import *
 4 | 
 5 | __all__ = [
 6 |     "exp",
 7 |     "log",
 8 |     "log2",
 9 |     "log10",
10 | ]
11 | 
12 | 
13 | def exp(a, requires_grad=False):
14 |     return Tensor._op(Exp, a, requires_grad=requires_grad)
15 | 
16 | def log(a, requires_grad=False):
17 |     return Tensor._op(Log, a, requires_grad=requires_grad)
18 | 
19 | 
20 | def log2(a, requires_grad=False):
21 |     return Tensor._op(Log2, a, requires_grad=requires_grad)
22 | 
23 | 
24 | def log10(a, requires_grad=False):
25 |     return Tensor._op(Log10, a, requires_grad=requires_grad)
26 | 


--------------------------------------------------------------------------------
/nf/operation/math/exp_log/ops.py:
--------------------------------------------------------------------------------
 1 | from nf.op import Operation
 2 | import numpy as np
 3 | 
 4 | 
 5 | __all__ = [
 6 |     "Exp",
 7 |     "Log",
 8 |     "Log2",
 9 |     "Log10",
10 | ]
11 | 
12 | 
13 | class Exp(Operation):
14 |     def __call__(self, a):
15 |         self.variables = (a,)
16 |         return np.exp(a.data)
17 | 
18 |     def backward(self, grad, **kwargs):
19 |         a = self.variables[0]
20 |         self.variables[0].backward(grad * np.exp(a.data))
21 | 
22 | class Log(Operation):
23 |     def __call__(self, a):
24 |         self.variables = (a,)
25 |         return np.log(a.data)
26 | 
27 |     def backward(self, grad, **kwargs):
28 |         a = self.variables[0]
29 |         self.variables[0].backward(grad / a.data)
30 | 
31 | class Log2(Operation):
32 |     def __call__(self, a):
33 |         self.variables = (a,)
34 |         return np.log2(a.data)
35 | 
36 |     def backward(self, grad, **kwargs):
37 |         a = self.variables[0]
38 |         self.variables[0].backward(grad / a.data * np.log(2))
39 | 
40 | 
41 | class Log10(Operation):
42 |     def __call__(self, a):
43 |         self.variables = (a,)
44 |         return np.log10(a.data)
45 | 
46 |     def backward(self, grad, **kwargs):
47 |         a = self.variables[0]
48 |         self.variables[0].backward(grad / a.data * np.log(10))
49 | 
50 | 


--------------------------------------------------------------------------------
/nf/operation/math/linalg/funcs.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numpy.core.einsumfunc import _parse_einsum_input
 3 | 
 4 | 
 5 | from nf.tensor import Tensor
 6 | 
 7 | 
 8 | from .ops import *
 9 | 
10 | __all__ = ["dot","matmul", "einsum"]
11 | 
12 | 
13 | def matmul(a, b, requires_grad=False):
14 |     return Tensor._op(MatMul, a, b, requires_grad=requires_grad)
15 | 
16 | dot = matmul
17 | 
18 | def einsum(*operands, optimize=False, requires_grad=False):
19 |     # 这段没有验证过，直接超过来的
20 |     operands = list(operands)
21 |     if isinstance(operands[0], str):
22 |         # operands form: "ijk, ijk", x, y
23 |         variables = operands[1:]
24 |         if any(isinstance(i, Tensor) for i in operands):
25 |             operands[1:] = (
26 |                 var.data if isinstance(var, Tensor) else var for var in operands[1:]
27 |             )
28 |     else:
29 |         # operands form: op0, sublist0, op1, sublist1, ..., [sublistout]
30 |         end = -1 if len(operands) % 2 else None  # -1 if sublistout is included
31 |         variables = operands[:end:2]
32 |         if any(isinstance(i, Tensor) for i in operands):
33 |             operands[:end:2] = (
34 |                 var.data if isinstance(var, Tensor) else var for var in operands[:end:2]
35 |             )
36 | 
37 |     in_lbls, out_lbls, _ = _parse_einsum_input(operands)
38 |     return Tensor._op(
39 |         EinSum,
40 |         *variables,
41 |         op_kwargs=dict(in_lbls=in_lbls, out_lbls=out_lbls, optimize=optimize),
42 |         requires_grad=requires_grad
43 |     )
44 | 
45 | 


--------------------------------------------------------------------------------
/nf/operation/math/linalg/ops.py:
--------------------------------------------------------------------------------
  1 | from collections import Counter
  2 | from copy import copy
  3 | from functools import reduce
  4 | from itertools import chain
  5 | 
  6 | import numpy as np
  7 | from numpy.lib.stride_tricks import as_strided
  8 | 
  9 | from nf.op import Operation
 10 | 
 11 | 
 12 | __all__ = ["MatMul", "EinSum"]
 13 | 
 14 | 
 15 | class MatMul(Operation):
 16 |     def __call__(self, a, b):
 17 |         self.variables = (a, b)
 18 |         return np.matmul(a.data, b.data)
 19 | 
 20 |     def backward(self, grad, **kwargs):
 21 |         a, b = (i.data for i in self.variables)
 22 | 
 23 |         # handle 1D w/ 1D (dot product of vectors)
 24 |         if a.ndim == 1 and b.ndim == 1:
 25 |             self.variables[0].backward(grad * b)
 26 |             self.variables[1].backward(grad * a)
 27 |             return
 28 |         # print(type(grad),grad.shape, a.shape, b.shape, b.swapaxes(-1,-2).shape)
 29 |         # print(grad.swapaxes(-1,-2).shape, np.matmul(grad, b.swapaxes(-1,-2)).shape)
 30 | 
 31 |         # print(a.shape, b.shape, grad.shape)
 32 |         grada = np.matmul(grad, b.swapaxes(-1,-2))
 33 |         gradb = np.matmul(a.swapaxes(-1,-2), grad)
 34 |         # print(grada.shape, gradb.shape, a.shape, b.shape, grad.shape)
 35 | 
 36 |         grada = self.broadcastable(grada, a.shape)
 37 |         gradb = self.broadcastable(gradb, b.shape)
 38 |         # print(grada.shape, gradb.shape, a.shape, b.shape, grad.shape)
 39 |         # exit(20)
 40 |         self.variables[0].backward(grada)
 41 |         self.variables[1].backward(gradb)
 42 | 
 43 | 
 44 | 
 45 | # EinSum #
 46 | 
 47 | 
 48 | def _unique_from_end(in_str):
 49 |     """ Return a string with all redundant characters removed,
 50 |         removing left-most redundant entries
 51 | 
 52 |         i.e. "ijikik" -> "jik"
 53 | 
 54 |         Parameters
 55 |         ----------
 56 |         in_str: str
 57 | 
 58 |         Returns
 59 |         -------
 60 |         str
 61 | 
 62 |         Examples
 63 |         --------
 64 |         >>> _unique_from_end("ijikik")
 65 |         "jik"
 66 |     """
 67 | 
 68 |     return reduce(lambda acc, x: acc + x if x not in acc else acc, in_str[::-1], "")[::-1]
 69 | 
 70 | 
 71 | def _merge_max_mappings(*mappings):
 72 |     """ Merge dictionaries based on largest values in key->value.
 73 | 
 74 |         Parameters
 75 |         ----------
 76 |         *mappings : Dict[Any, Any]
 77 | 
 78 |         Returns
 79 |         -------
 80 |         Dict[Any, Any]
 81 | 
 82 |         Examples
 83 |         --------
 84 |         >>> _merge_max_mappings({"a":1, "b":4}, {"a":2})
 85 |         {"a":2, "b":4}
 86 |     """
 87 | 
 88 |     def _merge_max(d1, d2):
 89 |         d1.update((k, v) for k, v in d2.items() if d1.get(k, 0) < v)
 90 |         return d1
 91 | 
 92 |     return reduce(_merge_max, mappings, {})
 93 | 
 94 | 
 95 | def _get_indices(item, seq):
 96 |     """ Return the indices where `item` occurs in `seq`
 97 | 
 98 |         Returns
 99 |         -------
100 |         Generator[int]"""
101 |     return (n for n, x in enumerate(seq) if x == item)
102 | 
103 | 
104 | class EinSum(Operation):
105 |     scalar_only = True
106 | 
107 |     def __call__(self, *variables, in_lbls, out_lbls, optimize=False):
108 |         """
109 |         einsum('{in_lbls}->{out_lbls}', *variables, optimize=optimize)
110 | 
111 |         Parameters
112 |         ----------
113 |         variables : mygrad.Tensor
114 |         in_lbls : str
115 |         out_lbls : str
116 |         optimize : bool
117 | 
118 |         Returns
119 |         -------
120 |         numpy.ndarray
121 |         """
122 |         self.in_lbls = in_lbls.split(",")
123 |         self.out_lbls = out_lbls
124 |         self.variables = variables
125 |         self.optimize = optimize
126 | 
127 |         # cache counts the number of redundant tensor-label pairs
128 |         # fed to einsum. Only one gradient will be computed for a
129 |         # unique tensor-label pair
130 |         self.cache = Counter(zip(variables, self.in_lbls))
131 |         return np.einsum(
132 |             "->".join((in_lbls, out_lbls)),
133 |             *(var.data for var in self.variables),
134 |             optimize=optimize
135 |         )
136 | 
137 |     def backward(self, grad, **kwargs):
138 |         [var.backward(self.backward_var(grad, index)) for (index,var) in enumerate(self.variables)]
139 | 
140 | 
141 |     def backward_var(self, grad, index, **kwargs):
142 |         """
143 |         example
144 |         -------
145 |         fwd:          "ijk, k -> ji", x, y
146 |         bkwd (var: 0): "ji, k -> ijk", grad, y
147 |         bkwd (var: 1): "ji, ijk -> k", grad, x
148 |         """
149 | 
150 |         # ijk, k
151 |         in_lbls = copy(self.in_lbls)
152 |         original_var_lbl = in_lbls.pop(index)
153 |         var = self.variables[index]
154 | 
155 |         factor = self.cache[(var, original_var_lbl)]
156 |         if factor == 0:
157 |             # the gradient for the current tensor-label pair
158 |             # has already been computed, scaled, and back-propped,
159 |             # skip gradient calculation.
160 |             # raise SkipGradient()
161 |             print("梯度计算跳过")
162 |             return None
163 | 
164 | 
165 |         numpy_arrays = tuple(i.data for i in self.variables)
166 |         self.cache[(var, original_var_lbl)] = 0
167 | 
168 |         var_lbl = _unique_from_end(original_var_lbl)
169 |         repeat_lbls = len(var_lbl) != len(original_var_lbl)
170 | 
171 |         if repeat_lbls:
172 |             # example fwd-prop: einsum("iji -> ij", x)
173 |             # "iji" becomes "ji", later we will write along
174 |             # the diagonal of an array to reinstate this axis that
175 |             # we just removed
176 |             mapping_gen = (
177 |                 {k: v for k, v in zip(lbl, arr.shape)}
178 |                 for lbl, arr in zip(self.in_lbls, numpy_arrays)
179 |             )
180 |             lbl_to_size = _merge_max_mappings(*mapping_gen)
181 |             var_shape = tuple(lbl_to_size[lbl] for lbl in var_lbl)
182 |         else:
183 |             var_shape = self.variables[index].shape
184 | 
185 |         # ji
186 |         grad_lbl = self.out_lbls
187 | 
188 |         # Catch indices over which un-contracted sum was performed
189 |         # for the given variable: e.g for var-0 in "ijk, jk -> k"
190 |         # i is summed over without contraction with another tensor
191 |         #
192 |         # Backpropping through this is illegal, as it requires the creation
193 |         # of an axis; e.g. k, jk -> ijk
194 |         # Broadcast the gradient along all such dimensions; e.g. k -> ik
195 |         # then proceed as usual; e.g. ik, jk -> ijk
196 |         unique_in_lbls = set(chain.from_iterable(in_lbls)) | set(grad_lbl)
197 |         if len(set(var_lbl) - unique_in_lbls) > 0:
198 |             exp_dims = [slice(None) for i in range(grad.ndim)]
199 |             grad_shape = list(grad.shape)
200 |             for n, lbl in enumerate(var_lbl):
201 |                 if lbl not in unique_in_lbls:
202 |                     grad_lbl = grad_lbl[:n] + lbl + grad_lbl[n:]
203 |                     exp_dims.insert(n, np.newaxis)
204 |                     grad_shape.insert(n, var_shape[n])
205 | 
206 |             gradNew = np.broadcast_to(
207 |                 grad if not grad.ndim else grad[tuple(exp_dims)], grad_shape
208 |             )
209 | 
210 |         # "ji, k -> ijk"
211 |         back_prop_lbls = ",".join([grad_lbl] + in_lbls) + "->" + var_lbl
212 | 
213 |         # (grad, y)
214 |         operands = (gradNew,) + numpy_arrays[:index] + numpy_arrays[index + 1 :]
215 | 
216 |         if not repeat_lbls:
217 |             # dfdx: einsum("ji, k -> ijk", grad, y)
218 |             outshape = self.variables[index].shape
219 |             dfdx = self.broadcastable(
220 |                 np.einsum(back_prop_lbls, *operands, optimize=self.optimize), outshape
221 |             )
222 |             if var_shape != dfdx.shape:
223 |                 # if y was broadcast over x, the gradient needs to
224 |                 # be broadcast to x's shape: dfdx-shape (i,j,1) -> (i,j,k)
225 |                 dfdx = np.broadcast_to(dfdx, var_shape)
226 |             if factor > 1:
227 |                 # This tensor-label pair appears several times as
228 |                 # input to einsum. Scale the gradient accordingly
229 |                 # such that the full contribution of the tensor-label
230 |                 # pair is accounted for.
231 |                 dfdx *= factor
232 |             return dfdx
233 | 
234 |         # Accommodate trace by writing to strided view on array of zeros
235 |         # For example:
236 |         #
237 |         # fwd:  einsum('ijkji, k -> jk', x, y)
238 |         # dfdx: einsum('jk, k -> kji', grad, y, out=view_of_x)
239 |         #
240 |         # writing to `view_of_x`, which is a view along the appropriate
241 |         # diagonals of x, is equivalent to:
242 |         #
243 |         # dfdx: einsum('jk, k -> ijkji', grad, y)
244 |         #
245 |         # which is formally correct but not supported by einsum.
246 |         dfdx = np.zeros(tuple(lbl_to_size[i] for i in original_var_lbl))
247 |         out_view_shape = tuple(lbl_to_size[i] for i in var_lbl)
248 | 
249 |         # compute strides required to traverse the appropriate diagonals of
250 |         # the output tensor.
251 |         strides = tuple(
252 |             sum(dfdx.strides[ind] for ind in _get_indices(lbl, original_var_lbl))
253 |             for lbl in var_lbl
254 |         )
255 |         out_view = as_strided(dfdx, shape=out_view_shape, strides=strides)
256 |         np.einsum(back_prop_lbls, *operands, out=out_view, optimize=self.optimize)
257 |         if factor > 1:
258 |             # This tensor-label pair appears several times as
259 |             # input to einsum. Scale the gradient accordingly
260 |             # such that the full contribution of the tensor-label
261 |             # pair is accounted for.
262 |             dfdx *= factor
263 |         return dfdx
264 | 


--------------------------------------------------------------------------------
/nf/operation/math/misc/funcs.py:
--------------------------------------------------------------------------------
 1 | from nf.tensor import Tensor
 2 | 
 3 | from .ops import *
 4 | __all__ = ["abs", "absolute", "cbrt", "clip", "maximum", "minimum"]
 5 | 
 6 | 
 7 | def abs(a, requires_grad=False):
 8 |     return Tensor._op(Abs, a, requires_grad=requires_grad)
 9 | 
10 | 
11 | absolute = abs
12 | 
13 | 
14 | def cbrt(a, requires_grad=False):
15 |     return Tensor._op(Cbrt, a, requires_grad=requires_grad)
16 | 
17 | 
18 | def maximum(a, b, requires_grad=False):
19 |     return Tensor._op(Maximum, a, b, requires_grad=requires_grad)
20 | 
21 | 
22 | def minimum(a, b, requires_grad=False):
23 |     return Tensor._op(Minimum, a, b, requires_grad=requires_grad)
24 | 
25 | 
26 | def clip(a, a_min, a_max, requires_grad=False):
27 |     if a_min is None and a_max is None:
28 |         raise ValueError("`a_min` 与 `a_max` 不能都为空")
29 | 
30 |     if a_min is not None:
31 |         a = maximum(a_min, a, requires_grad=requires_grad)
32 | 
33 |     if a_max is not None:
34 |         a = minimum(a_max, a, requires_grad=requires_grad)
35 | 
36 |     return a
37 | 
38 | 


--------------------------------------------------------------------------------
/nf/operation/math/misc/ops.py:
--------------------------------------------------------------------------------
 1 | from nf.op import Operation
 2 | import numpy as np
 3 | __all__ = ["Abs", "Cbrt", "Maximum", "Minimum"]
 4 | 
 5 | 
 6 | class Abs(Operation):
 7 |     def __call__(self, a):
 8 |         self.variables = (a,)
 9 |         return np.abs(a.data)
10 | 
11 |     def backward(self, grad, **kwargs):
12 |         a = self.variables[0]
13 |         grad = grad * np.piecewise(
14 |             a.data, [a.data < 0, a.data == 0, a.data > 0], [-1, np.nan, 1]
15 |         )
16 |         self.variables[0].backward(grad)
17 | 
18 | class Cbrt(Operation):
19 |     def __call__(self, a):
20 |         self.variables = (a,)
21 |         return np.cbrt(a.data)
22 | 
23 |     def backward(self, grad, **kwargs):
24 |         a = self.variables[0]
25 |         self.variables[0].backward(grad / (3 * np.cbrt(a.data ** 2)))
26 | 
27 | class Maximum(Operation):
28 |     def __call__(self, a, b):
29 |         self.variables = (a, b)
30 |         self.greater_than_mask = a.data > b.data
31 |         self.equal_mask = a.data == b.data
32 |         return np.where(self.greater_than_mask, a.data, b.data)
33 | 
34 |     def backward(self, grad, **kwargs):
35 |         self.variables[0].backward(grad * self.greater_than_mask)
36 |         mask = np.logical_not(self.greater_than_mask)
37 |         if mask.ndim:
38 |             np.logical_not(mask, out=mask, where=self.equal_mask)
39 |         elif self.equal_mask:
40 |             mask = np.logical_not(mask)
41 |         self.variables[1].backward(grad * mask)
42 | 
43 | 
44 | class Minimum(Operation):
45 |     def __call__(self, a, b):
46 |         self.variables = (a, b)
47 |         self.less_than_mask = a.data < b.data
48 |         self.equal_mask = a.data == b.data
49 |         return np.where(self.less_than_mask, a.data, b.data)
50 | 
51 |     def backward(self, grad, **kwargs):
52 |         self.variables[0].backward(grad * self.less_than_mask)
53 |         mask = np.logical_not(self.less_than_mask)
54 |         if mask.ndim:
55 |             np.logical_not(mask, out=mask, where=self.equal_mask)
56 |         elif self.equal_mask:
57 |             mask = np.logical_not(mask)
58 |         self.variables[1].backward(grad * mask)


--------------------------------------------------------------------------------
/nf/operation/math/nondifferentiable.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from nf.tensor import Tensor
 4 | 
 5 | __all__ = ["argmin", "argmax"]
 6 | 
 7 | 
 8 | def argmax(a, axis=None, out=None):
 9 |     a = a.data if isinstance(a, Tensor) else a
10 |     return np.argmax(a, axis, out)
11 | 
12 | 
13 | def argmin(a, axis=None, out=None):
14 |     a = a.data if isinstance(a, Tensor) else a
15 |     return np.argmin(a, axis, out)
16 | 


--------------------------------------------------------------------------------
/nf/operation/math/statistics/funcs.py:
--------------------------------------------------------------------------------
 1 | from nf.tensor import Tensor
 2 | 
 3 | from .ops import *
 4 | __all__ = [
 5 |     "sum",
 6 |     "mean",
 7 |     "var",
 8 |     "std",
 9 |     "amax",
10 |     "amin",
11 |     "max",
12 |     "min",
13 | ]
14 | 
15 | 
16 | def sum(x, axis=None, keepdims=False, requires_grad=False):
17 |     return Tensor._op(Sum, x, op_args=(axis, keepdims), requires_grad=requires_grad)
18 | 
19 | 
20 | def mean(x, axis=None, keepdims=False, requires_grad=False):
21 |     return Tensor._op(Mean, x, op_args=(axis, keepdims), requires_grad=requires_grad)
22 | 
23 | 
24 | def var(x, axis=None, ddof=0, keepdims=False, requires_grad=False):
25 | 
26 |     return Tensor._op(
27 |         Variance,
28 |         x,
29 |         op_kwargs=dict(axis=axis, keepdims=keepdims, ddof=ddof),
30 |         requires_grad=requires_grad,
31 |     )
32 | 
33 | 
34 | def std(x, axis=None, ddof=0, keepdims=False, requires_grad=False):
35 |     return Tensor._op(
36 |         StdDev,
37 |         x,
38 |         op_kwargs=dict(axis=axis, keepdims=keepdims, ddof=ddof),
39 |         requires_grad=requires_grad,
40 |     )
41 | 
42 | 
43 | def max(x, axis=None, keepdims=False, requires_grad=False):
44 |     return Tensor._op(
45 |         MaxMin,
46 |         x,
47 |         op_kwargs=dict(axis=axis, keepdims=keepdims, maxmin="max"),
48 |         requires_grad=requires_grad,
49 |     )
50 | 
51 | 
52 | def min(x, axis=None, keepdims=False, requires_grad=False):
53 |     return Tensor._op(
54 |         MaxMin,
55 |         x,
56 |         op_kwargs=dict(axis=axis, keepdims=keepdims, maxmin="min"),
57 |         requires_grad=requires_grad,
58 |     )
59 | 
60 | 
61 | # aliases
62 | amin = min
63 | amax = max
64 | 
65 | 


--------------------------------------------------------------------------------
/nf/operation/math/statistics/ops.py:
--------------------------------------------------------------------------------
  1 | from nf.op import Operation
  2 | import numpy as np
  3 | from collections.abc import Sequence
  4 | from typing import Any
  5 | __all__ = ["MaxMin", "Sum", "Mean", "Variance", "StdDev"]
  6 | 
  7 | 
  8 | class MaxMin(Operation):
  9 |     def __call__(self, a, axis=None, keepdims=False, maxmin=None):
 10 | 
 11 |         assert maxmin in ("max", "min"), "Invalid keyword argument"
 12 |         op = np.argmax if maxmin == "max" else np.argmin
 13 | 
 14 |         # let numpy handle error checking
 15 |         np.amax(np.empty([1] * a.ndim), axis=axis, keepdims=keepdims)
 16 | 
 17 |         self.variables = (a,)
 18 | 
 19 |         if a.ndim == 0:
 20 |             return a.data
 21 | 
 22 |         if hasattr(axis, "__iter__"):
 23 |             assert isinstance(axis, tuple)
 24 |             axis = tuple(ax % a.ndim for ax in axis)
 25 |             axis = None if len(axis) == a.ndim else tuple(sorted(axis))
 26 |         elif axis is not None:
 27 |             axis = (axis % a.ndim,)
 28 | 
 29 |         self.axis = axis
 30 |         self.keepdims = keepdims
 31 | 
 32 |         # max(a) -> use argmax
 33 |         if self.axis is None:
 34 |             self.indices = np.unravel_index(op(a.data), a.shape)
 35 |             dat = a.data[self.indices]
 36 | 
 37 |         # max(x, axis=i) -> use argmax with specified axis
 38 |         elif len(self.axis) == 1:  #
 39 |             op_index = op(a.data, axis=self.axis[0])
 40 |             self.indices = list(np.indices(op_index.shape))
 41 |             self.indices.insert(self.axis[0], op_index)
 42 |             self.indices = tuple(self.indices)
 43 |             dat = a.data[self.indices]
 44 | 
 45 |         # max(x, axis=(i,j,...) ) -> Reshape data to use argmax along trailing axis
 46 |         else:
 47 |             self.static_ax = tuple(
 48 |                 sorted(set(range(a.ndim)) - set(self.axis))
 49 |             )  # non-reduced axes (m, n, ..)
 50 |             self.to_trans = self.static_ax + self.axis  # (m, n, ..., i, j, ...)
 51 |             self.from_trans = tuple(np.argsort(self.to_trans))
 52 |             outshape = tuple(a.shape[i] for i in self.static_ax)
 53 | 
 54 |             z = a.data.transpose(*self.to_trans).reshape(
 55 |                 *outshape, -1
 56 |             )  # (m, n, ..., i*j*[...])
 57 | 
 58 |             k = op(z, axis=-1)
 59 |             self.indices = tuple(i for i in np.indices(k.shape))
 60 |             self.indices += (k,)
 61 |             self.tmp_grad_shape = z.shape
 62 |             z = z[self.indices]
 63 | 
 64 |             dat = z.reshape(outshape)  # (m, n, ...)
 65 | 
 66 |         if not self.keepdims:
 67 |             return dat
 68 | 
 69 |         elif self.axis is None:
 70 |             keep_index = (np.newaxis,) * a.ndim
 71 |         else:
 72 |             keep_index = [slice(None)] * a.ndim
 73 |             for i in self.axis:
 74 |                 keep_index[i] = np.newaxis
 75 |             keep_index = tuple(keep_index)
 76 | 
 77 |         return np.asarray(dat)[keep_index]
 78 | 
 79 | 
 80 |     def backward(self, grad, **kwargs):
 81 |         a = self.variables[0]
 82 |         if a.ndim == 0:
 83 |             self.variables[0].backward(grad)
 84 |             return
 85 | 
 86 |         # normalize shape of grad to be same as when keepdims=False
 87 |         if self.keepdims:
 88 |             if self.axis is not None:
 89 |                 reduce = [slice(None)] * a.ndim
 90 |                 for i in self.axis:
 91 |                     reduce[i] = 0
 92 |                 reduce = tuple(reduce)
 93 |             else:
 94 |                 reduce = (0,) * a.ndim
 95 |             grad = grad[reduce]
 96 | 
 97 |         # use argmax indices to broadcast grad to correct elements
 98 |         if self.axis is None or len(self.axis) == 1:
 99 |             out = np.zeros_like(a.data, dtype=float)
100 |             out[self.indices] = grad
101 |         else:
102 |             out = np.zeros(self.tmp_grad_shape, dtype=float)
103 |             out[self.indices] = grad
104 |             shape = tuple(a.shape[i] for i in self.to_trans)
105 |             out = out.reshape(shape).transpose(*self.from_trans)
106 |         self.variables[0].backward(out)
107 | 
108 | 
109 | class Sum(Operation):
110 |     testLevel = 2
111 |     def __call__(self, a, axis=None, keepdims=False):
112 |         self.variables = (a,)
113 | 
114 |         if axis is not None and not hasattr(axis, "__iter__"):
115 |             axis = (axis,)
116 |         if axis is None:
117 |             axis = tuple(i for i in range(len(a.shape)))
118 |         self.axis = axis
119 | 
120 |         self.keepdims = keepdims
121 |         out = a.data.sum(axis=axis, keepdims=keepdims)
122 |         out = np.array(out)
123 |         self.outshape = out.shape
124 |         return out
125 | 
126 |     def backward(self, grad, **kwargs):
127 |         a = self.variables[0]
128 |         # print(grad.shape, a.shape, self.axis, self.outshape)
129 |         grad = self.broadcastable(grad, self.outshape)
130 |         # grad = self.broadcastable(grad, a.shape)
131 |         # print(grad.shape, a.shape, self.axis, self.outshape)
132 |         if not self.keepdims:
133 |             index = [slice(None) for i in range(a.ndim)]
134 |             for i in self.axis:
135 |                 index[i] = np.newaxis
136 |             grad = grad[tuple(index)]
137 |         grad = np.broadcast_to(grad, a.data.shape).astype(float)
138 |         # print("sum",grad.shape, a.shape, self.axis, self.outshape)
139 |         self.variables[0].backward(grad)
140 | 
141 | class Mean(Operation):
142 |     testLevel = 2
143 |     def __call__(self, a, axis=None, keepdims=False):
144 |         self.variables = (a,)
145 | 
146 |         if axis is not None and not hasattr(axis, "__iter__"):
147 |             axis = (axis,)
148 |         if axis is None:
149 |             axis = tuple(i for i in range(len(a.shape)))
150 |         self.axis = axis
151 |         self.size = np.prod([a.shape[i] for i in self.axis])
152 |         self.keepdims = keepdims
153 |         out = a.data.mean(axis=axis, keepdims=keepdims)
154 |         out = np.array(out)
155 |         self.outshape = out.shape
156 |         return out
157 | 
158 |     def backward(self, grad, **kwargs):
159 |         a = self.variables[0]
160 |         # print(grad.shape, a.shape, self.axis, self.outshape)
161 |         # print(grad.sum() / self.size)
162 |         grad = self.broadcastable(grad, self.outshape)
163 |         # grad = self.broadcastable(grad, a.shape)
164 |         # print(grad.shape, a.shape, self.axis, self.outshape)
165 |         if not self.keepdims:
166 |             index = [slice(None) for i in range(a.ndim)]
167 |             for i in self.axis:
168 |                 index[i] = np.newaxis
169 |             grad = grad[tuple(index)]
170 |         grad = np.broadcast_to(grad, a.data.shape).astype(float)
171 |         # print("mean",grad.shape, a.shape, self.axis, self.outshape, grad[0])
172 |         self.variables[0].backward(grad / self.size)
173 | 
174 | 
175 | class Variance(Operation):
176 |     testLevel = 2
177 |     def __call__(self, a, axis=None, keepdims=False, ddof=0):
178 |         self.variables = (a,)
179 |         if axis is not None and not hasattr(axis, "__iter__"):
180 |             axis = (axis,)
181 |         if axis is None:
182 |             axis = tuple(i for i in range(len(a.shape)))
183 |         self.axis = axis
184 |         self.size = np.prod([a.shape[i] for i in self.axis])
185 |         self.keepdims = keepdims
186 |         self.ddof = ddof
187 |         out = np.var(a.data, axis=axis, keepdims=keepdims, ddof=ddof)
188 |         out = np.array(out)
189 |         self.outshape = out.shape
190 |         return out
191 | 
192 | 
193 |     def backward(self, grad, **kwargs):
194 |         a = self.variables[0]
195 |         N = self.size - self.ddof
196 |         grad = self.broadcastable(grad, self.outshape)
197 |         if not self.keepdims:
198 |             index = [slice(None)] * a.ndim
199 |             for i in self.axis:
200 |                 index[i] = np.newaxis
201 |             grad = grad[tuple(index)]
202 |         back = (2.0 / N) * (a.data - a.data.mean(axis=self.axis, keepdims=True))
203 |         self.variables[0].backward(back * grad)
204 | 
205 | 
206 | class StdDev(Operation):
207 |     def _grad_preprocess(self, grad: Any) -> np.ndarray:
208 |         a = self.variables[0]
209 |         return np.asarray(grad) / (2 * np.sqrt(a.data.var(**self.kwargs)))
210 | 
211 |     def __call__(self, a, axis=None, keepdims=False, ddof=0):
212 |         self.variables = (a,)
213 | 
214 |         if axis is not None and not hasattr(axis, "__iter__"):
215 |             axis = (axis,)
216 | 
217 |         self.kwargs = dict(axis=axis, keepdims=keepdims, ddof=ddof)
218 |         return getattr(a.data, 'std')(**self.kwargs)
219 | 
220 | 
221 |     def backward(self, grad, **kwargs):
222 |         a = self.variables[0]
223 |         if isinstance(self.kwargs["axis"], Sequence) and len(self.kwargs["axis"]) == 0:
224 |             self.variables[0].backward(np.zeros(a.shape, dtype=float))
225 |             return
226 | 
227 |         N = (
228 |             a.size
229 |             if self.kwargs["axis"] is None
230 |             else np.prod([a.shape[i] for i in self.kwargs["axis"]])
231 |         )
232 |         N -= self.kwargs["ddof"]
233 | 
234 |         grad = self._grad_preprocess(grad)
235 |         if grad.ndim == 0:
236 |             grad = np.full(a.shape, grad, dtype=float)
237 |         else:
238 |             if not self.kwargs["keepdims"]:
239 |                 index = [slice(None)] * a.ndim
240 |                 for i in self.kwargs["axis"]:
241 |                     index[i] = np.newaxis
242 |                 grad = grad[tuple(index)]
243 |         back = (2.0 / N) * (
244 |             a.data - a.data.mean(axis=self.kwargs["axis"], keepdims=True)
245 |         )
246 |         self.variables[0].backward(back * grad)
247 | 


--------------------------------------------------------------------------------
/nf/optimizer/optimizer.py:
--------------------------------------------------------------------------------
 1 | import nf
 2 | 
 3 | class Optimizer(object):
 4 |     r"""
 5 |     所有优化器的基类
 6 |     """
 7 | 
 8 |     def __init__(self, params):
 9 |         if isinstance(params, nf.Tensor):
10 |             raise TypeError("待优化参数必须是nf.Tensor类，不应当是：" + type(params))
11 | 
12 |         self.param_groups = []
13 |         params = list(params)
14 |         if len(params) == 0:
15 |             raise ValueError("待优化参数为空")
16 |         self.param_groups += params
17 |         self.grad_last = [None, ] * len(self.param_groups)
18 | 
19 |     def __getstate__(self):
20 |         return {
21 |             'grad_last': self.grad_last,
22 |             'param_groups': self.param_groups,
23 |         }
24 | 
25 |     def __setstate__(self, state):
26 |         self.__dict__.update(state)
27 | 
28 |     def zero_grad(self):
29 |         r"""
30 |         将所有参数的梯度清零，准备下一次反向传播
31 |         :return:
32 |         """
33 |         for para in self.param_groups:
34 |             # print("gg",para.requires_grad, para.grad)
35 |             if para.grad is not None:
36 |                 para.grad.fill(0.0)
37 | 
38 |     def step(self):
39 |         raise NotImplementedError
40 | 


--------------------------------------------------------------------------------
/nf/optimizer/sgd.py:
--------------------------------------------------------------------------------
 1 | from .optimizer import Optimizer
 2 | 
 3 | 
 4 | class SGD(Optimizer):
 5 |     r"""
 6 |     随机梯度下降优化器，支持momentum和nesterov。
 7 |     """
 8 | 
 9 |     def __init__(self, params, lr=1.e-5, momentum=0, dampening=0,
10 |                  weight_decay=0, nesterov=False):
11 |         if lr < 0.0:
12 |             raise ValueError("请输入正确的learning rate: {}".format(lr))
13 |         if momentum < 0.0:
14 |             raise ValueError("请输入正确的momentum: {}".format(momentum))
15 |         if weight_decay < 0.0:
16 |             raise ValueError("请输入正确的weight decay: {}".format(weight_decay))
17 |         if nesterov and (momentum <= 0 or dampening != 0):
18 |             raise ValueError("Nesterov 动量必须要提供动量值")
19 | 
20 |         self.lr = lr
21 |         self.momentum = momentum
22 |         self.dampening = dampening
23 |         self.weight_decay = weight_decay
24 |         self.nesterov = nesterov
25 |         super(SGD, self).__init__(params)
26 | 
27 |     def __getstate__(self):
28 |         return {
29 |             'lr': self.lr,
30 |             'momentum': self.momentum,
31 |             'dampening': self.dampening,
32 |             'weight_decay': self.weight_decay,
33 |             'nesterov': self.nesterov,
34 |             'grad_last': self.grad_last,
35 |             'param_groups': self.param_groups,
36 |         }
37 | 
38 |     def step(self):
39 |         for pid, para in enumerate(self.param_groups):
40 |             d_p = para.grad
41 |             if d_p is None:
42 |                 continue
43 |             if self.weight_decay != 0:
44 |                 d_p[:] += self.weight_decay * para.data
45 |             if self.momentum != 0:
46 |                 if self.grad_last[pid] is None:
47 |                     buf = self.grad_last[pid] = d_p.copy()
48 |                 else:
49 |                     buf = self.grad_last[pid]
50 |                     buf *= self.momentum
51 |                     buf += (1 - self.dampening) * d_p
52 |                 if self.nesterov:
53 |                     d_p += self.momentum * buf
54 |                 else:
55 |                     d_p = buf
56 |             # print(p.data, p.grad)
57 |             para.data[:] += -self.lr * d_p
58 |             # print(p.data, p.grad)
59 | 
60 | 


--------------------------------------------------------------------------------
/nf/tensor.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from typing import Optional, Set, Type, Union
  3 | 
  4 | from .op import Operation
  5 | from .operation.math.arithmetic.ops import *
  6 | from .operation.math.linalg.ops import *
  7 | from .operation.manipulation.shape.ops import *
  8 | from .operation.manipulation.axis.ops import *
  9 | from .operation.indexing.ops import *
 10 | 
 11 | np.set_printoptions(suppress=True,linewidth=300)
 12 | 
 13 | __all__ = ["Tensor"]
 14 | 
 15 | class Tensor:
 16 |     def __init__(self, data=None, *, requires_grad=False, creator=None):
 17 |         assert isinstance(requires_grad, bool)
 18 |         assert isinstance(creator, (Operation, None.__class__))
 19 |         self.data = None
 20 |         if isinstance(data, (int, float, bool)):
 21 |             data = [data]
 22 |         if isinstance(data, (list, tuple)):
 23 |             data = np.array(data)
 24 |         if isinstance(data, np.ndarray):
 25 |             self.data = data.copy()
 26 |         elif isinstance(data, Tensor):
 27 |             raise ValueError("输入的是 Tensor")
 28 |         else:
 29 |             raise ValueError("输入类型未知", type(data), data)
 30 |         self.requires_grad = requires_grad
 31 | 
 32 |         if creator is None:
 33 |             creator = Assign()
 34 |             creator(self)   # 看似没有用，但是可以用来在计算图的可视化，可视化Assign节点
 35 |         self.creator = creator
 36 |         self.grad = None
 37 | 
 38 |     @classmethod
 39 |     def _op(cls,
 40 |             Op: Type[Operation],
 41 |             *input_vars,
 42 |             op_args=None,
 43 |             op_kwargs=None,
 44 |             requires_grad=False
 45 |             ):
 46 |         if op_args is None:
 47 |             op_args = tuple()
 48 | 
 49 |         if op_kwargs is None:
 50 |             op_kwargs = dict()
 51 | 
 52 |         tensor_vars = tuple(
 53 |             cls(var, requires_grad=False) if not isinstance(var, cls) else var
 54 |             for var in input_vars
 55 |         )
 56 |         requires_grad = requires_grad or any(var.requires_grad for var in tensor_vars)
 57 |         f = Op()
 58 |         op_out = f(*tensor_vars, *op_args, **op_kwargs)
 59 |         return cls(op_out, requires_grad=requires_grad, creator=f)
 60 | 
 61 | 
 62 |     def __add__(self, other):
 63 |         return self._op(Add, self, other)
 64 | 
 65 |     def __radd__(self, other):
 66 |         return self._op(Add, other, self)
 67 | 
 68 |     def __mul__(self, other):
 69 |         return self._op(Multiply, self, other)
 70 | 
 71 |     def __rmul__(self, other):
 72 |         return self._op(Multiply, other, self)
 73 | 
 74 |     def __sub__(self, other):
 75 |         return self._op(Subtract, self, other)
 76 | 
 77 |     def __rsub__(self, other):
 78 |         return self._op(Subtract, other, self)
 79 | 
 80 |     def __truediv__(self, other):
 81 |         return self._op(Divide, self, other)
 82 | 
 83 |     def __rtruediv__(self, other):
 84 |         return self._op(Divide, other, self)
 85 | 
 86 |     def __pow__(self, other):
 87 |         return self._op(Power, self, other)
 88 | 
 89 |     def __rpow__(self, other):
 90 |         return self._op(Power, other, self)
 91 | 
 92 |     def __neg__(self):
 93 |         return self._op(Negative, self)
 94 | 
 95 |     def __pos__(self):
 96 |         return self._op(Positive, self)
 97 | 
 98 | 
 99 |     def __matmul__(self, other):
100 |         return self._op(MatMul, self, other)
101 | 
102 |     def __rmatmul__(self, other):
103 |         return self._op(MatMul, other, self)
104 | 
105 |     def backward(self, grad=None):
106 |         if not self.requires_grad:
107 |             return
108 |         if grad is None:
109 |             grad = np.ones_like(self.data, dtype=np.float64)
110 |         if isinstance(grad, Tensor):
111 |             grad = grad.data
112 |         if isinstance(self.creator, Assign) or True: # 计算原子
113 |             if self.grad is None:
114 |                 self.grad = np.zeros_like(self.data, dtype=np.float64)
115 | 
116 |             try:    # try except 成本比if低？那就不亏
117 |                 self.grad += grad
118 |             except ValueError:  # self.grad.shape 长度或大小小于 grad.shape，用于适应广播机制
119 |                 grad_bak = grad.sum(axis=tuple(range(grad.ndim - self.grad.ndim)))
120 |                 keepdims = tuple(n for (n, i) in enumerate(grad_bak.shape) if i != self.grad.shape[n])
121 |                 if keepdims:
122 |                     grad_bak = grad_bak.sum(axis=keepdims, keepdims=True)
123 |                 self.grad += grad_bak
124 |         self.creator.backward(grad)
125 | 
126 |     def numpy(self):
127 |         return self.data.copy()
128 | 
129 |     def copy(self):
130 |         return self.__copy__()
131 | 
132 |     def __copy__(self):
133 |         """
134 |         复制当前Tensor的grad、data、requires_grad，设定creator=None
135 |         如果当前的Tensor没有梯度，则梯度为None
136 |         Returns
137 |         -------
138 |         Tensor
139 |         """
140 |         copy = Tensor(np.copy(self.data),requires_grad=self.requires_grad, creator=None)
141 |         try:
142 |             copy.grad[:] = self.grad[:]    # 尽量复制梯度
143 |         except:
144 |             pass
145 |         return copy
146 | 
147 |     def copy_(self, other):
148 |         assert isinstance(other, self.__class__)
149 |         self.data[:] = other.data
150 |         try:
151 |             self.grad[:] = other.grad[:]    # 尽量复制梯度
152 |         except:
153 |             pass
154 | 
155 | 
156 | 
157 |     def __str__(self):
158 |         return self.__repr__()
159 | 
160 |     def __repr__(self):
161 |         return "Tensor with shape: {}\n{}".format(self.shape, self.data)
162 | 
163 |     def __len__(self):
164 |         return len(self.data)
165 | 
166 |     def __contains__(self, item):
167 |         return self.data.__contains__(item)
168 | 
169 |     def __getitem__(self, item):
170 |         return self._op(GetItem, self, op_args=(item,))
171 | 
172 |     def __setitem__(self, key, value):
173 |         raise NotImplemented("还没实现，好像很多的样子，下次一定")
174 | 
175 |     def item(self):
176 |         """
177 |         用来将Tensor转化成一般的python类型，返回值不支持求导
178 |         Returns
179 |         -------
180 |         z : 一般的python类型，如float等等
181 | 
182 |         Examples
183 |         --------
184 |         >>> import nf
185 |         >>> x = Tensor([22.2])
186 |         >>> x.item()
187 |         22.2
188 |         >>> type(x.item())
189 |         float
190 |         """
191 |         if self.size > 1:
192 |             raise ValueError("不能转化size大于1的Tensor")
193 |         return self.data.item()
194 | 
195 |     def __float__(self):
196 |         if self.size > 1:
197 |             raise TypeError("不能转化size大于1的Tensor")
198 |         return float(self.data)
199 | 
200 |     def __int__(self):
201 |         if self.size > 1:
202 |             raise TypeError("不能转化size大于1的Tensor")
203 |         return int(self.data)
204 | 
205 |     @property
206 |     def size(self):
207 |         """
208 |         返回一个int值，表示当前Tensor的data的size
209 |         Returns
210 |         -------
211 |         int
212 | 
213 |         Examples
214 |         --------
215 |         >>> import nf
216 |         >>> x = nf.zeros((3, 5, 2))  # creates a tensor with 3x5x2 (= 30) elements
217 |         >>> x.size
218 |         30
219 |         """
220 |         return self.data.size
221 | 
222 |     @property
223 |     def ndim(self):
224 |         """
225 |         返回当前Tensor的维度
226 | 
227 |         Returns
228 |         -------
229 |         int
230 | 
231 |         Examples
232 |         --------
233 |         >>> import nf
234 |         >>> x = nf.ones_like((2,3,4,1,4))
235 |         >>> x.ndim
236 |         5
237 |         """
238 |         return self.data.ndim
239 | 
240 |     @property
241 |     def dtype(self):
242 |         """
243 |         返回当前Tensor的数组类型，也就是numpy中的类型
244 | 
245 |         Returns
246 |         -------
247 |         numpy dtype object
248 | 
249 |         <type 'numpy.dtype'>"""
250 |         return self.data.dtype
251 | 
252 |     @property
253 |     def shape(self):
254 |         """
255 |         返回当前Tensor的shape
256 | 
257 |         Returns
258 |         -------
259 |         Tuple[int, ...]
260 | 
261 |         Examples
262 |         --------
263 |         >>> import nf
264 |         >>> x = nf.Tensor([1, 2, 3, 4])  # axis-0 has size 4
265 |         >>> x.shape
266 |         (4,)
267 |         >>> y = nf.Tensor([[1, 2, 3],    # axis-0 has size 2, axis-1 has size 3
268 |         ...                [4, 5, 6]])
269 |         >>> y.shape
270 |         (2, 3)
271 | 
272 |         """
273 |         return self.data.shape
274 | 
275 |     @property
276 |     def T(self):
277 |         """
278 |         返回当前Tensor的转置，在此与numpy一致，是一个属性，
279 |         如果当前Tensor.ndim > 2，返回的就是整个数组的转置。
280 | 
281 |         Returns
282 |         -------
283 |         Tensor
284 | 
285 |         Examples
286 |         --------
287 |         >>> import nf
288 |         >>> y = nf.Tensor([[1, 2, 3],
289 |         ...                [4, 5, 6]])
290 |         >>> y.T()
291 |         Tensor([[1, 4],
292 |                 [2, 5],
293 |                 [3, 6]])
294 |         """
295 |         return self._op(Permute, self)
296 | 
297 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | # NumpyFlow
  2 | 
  3 | 详细文档请参照：https://blog.csdn.net/kid_14_12/article/details/105852626
  4 | 
  5 | 
  6 | 
  7 | 待添加.....
  8 | 
  9 | 
 10 | 
 11 | # Tensor
 12 | 
 13 | 封装numpy数组，是NumpyFlow的数据载体，相当于torch中的Tensor。
 14 | 
 15 | 
 16 | 
 17 | # 已完成
 18 | 
 19 | ## 基本类
 20 | - [x] Operation：用于支持基本的运算及对应的梯度计算，是支持自动微分的基本算子
 21 | - [x] Tensor：
 22 | - [x] Optimizer
 23 | - [x] Module
 24 | 
 25 | ## Operation
 26 | - [x] Assign、Add、Multiply、Subtract
 27 | - [x] Divide、Negative、Positive、Power
 28 | - [x] Exp、Log、Log2、Log10
 29 | - [x] MatMul、EinSum
 30 | 
 31 | 
 32 | ## Optimizer
 33 | - [x] sgd
 34 | - [ ] adam
 35 | 
 36 | 
 37 | ## 初始化方法
 38 | - [x] Kaiming初始化
 39 | - [ ] Xavier初始化
 40 | - [ ] 随机初始化
 41 | - [x] 填充0初始化
 42 | - [x] 填充1初始化
 43 | 
 44 | ## Module
 45 | 
 46 | ### 核心网络层
 47 | 
 48 | - [x] Linear
 49 | - [x] Relu、Sigmid、Softmax
 50 | - [ ] Conv1D
 51 | - [x] Conv2D
 52 | - [ ] Conv3D
 53 | - [ ] MaxPool1D
 54 | - [x] MaxPool2D
 55 | - [ ] MaxPool3D
 56 | - [ ] BatchNorm1D
 57 | - [x] BatchNorm2D
 58 | - [ ] BatchNorm3D
 59 | - [ ] Reshape
 60 | - [ ] Permute
 61 | - [ ] Flatten
 62 | - [ ] RepeatVector
 63 | - [ ] Lambda
 64 | - [ ] ActivityRegularization
 65 | - [ ] Masking
 66 | - [ ] SpatialDropout1D
 67 | - [ ] SpatialDropout2D
 68 | - [ ] SpatialDropout3D
 69 | 
 70 | 
 71 | ### 局部连接层和循环层
 72 | 
 73 | - [ ] LocallyConnected1D
 74 | - [ ] LocallyConnected2D
 75 | - [ ] RNN
 76 | - [ ] GRU
 77 | - [ ] LSTM
 78 | - [ ] ConvLSTM2D
 79 | - [ ] SimpleRNNCell
 80 | - [ ] GRUCell
 81 | - [ ] LSTMCell
 82 | - [ ] CuDNNGRU
 83 | - [ ] CuDNNLSTM
 84 | - [ ] Embedding
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='nf',
 5 |     version='0.0.2',
 6 |     keywords='DL',
 7 |     description='a toy deep learning library write by pure numpy named NumpyFlow',
 8 |     license='',
 9 |     url='https://github.com/RanFeng/NumpyFlow',
10 |     author='Xun Ai',
11 |     author_email='kidformyself@gmail.com',
12 |     packages=find_packages(),
13 |     include_package_data=True,
14 |     platforms='any',
15 |     install_requires=["numpy"],
16 | )


--------------------------------------------------------------------------------
/test/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RanFeng/NumpyFlow/9502b7328ef81ed7905baa31a12f303da8ab2e66/test/dog.jpg


--------------------------------------------------------------------------------
/test/mnist.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RanFeng/NumpyFlow/9502b7328ef81ed7905baa31a12f303da8ab2e66/test/mnist.pkl


--------------------------------------------------------------------------------
/test/mnist.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from urllib import request
 3 | import gzip
 4 | import pickle
 5 | 
 6 | filename = [
 7 | ["training_images","train-images-idx3-ubyte.gz"],
 8 | ["test_images","t10k-images-idx3-ubyte.gz"],
 9 | ["training_labels","train-labels-idx1-ubyte.gz"],
10 | ["test_labels","t10k-labels-idx1-ubyte.gz"]
11 | ]
12 | 
13 | def download_mnist():
14 |     base_url = "http://yann.lecun.com/exdb/mnist/"
15 |     for name in filename:
16 |         print("Downloading "+name[1]+"...")
17 |         request.urlretrieve(base_url+name[1], name[1])
18 |     print("Download complete.")
19 | 
20 | def save_mnist():
21 |     mnist = {}
22 |     for name in filename[:2]:
23 |         with gzip.open(name[1], 'rb') as f:
24 |             mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1,28*28)
25 |     for name in filename[-2:]:
26 |         with gzip.open(name[1], 'rb') as f:
27 |             mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=8)
28 |     with open("mnist.pkl", 'wb') as f:
29 |         pickle.dump(mnist,f)
30 |     print("Save complete.")
31 | 
32 | def init():
33 |     download_mnist()
34 |     save_mnist()
35 | 
36 | def load():
37 |     with open("mnist.pkl",'rb') as f:
38 |         mnist = pickle.load(f)
39 |     return mnist["training_images"], mnist["training_labels"], mnist["test_images"], mnist["test_labels"]
40 | 
41 | if __name__ == '__main__':
42 |     init()


--------------------------------------------------------------------------------
/test/resnet.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | # from .utils import load_state_dict_from_url
  3 | 
  4 | 
  5 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
  6 |            'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
  7 |            'wide_resnet50_2', 'wide_resnet101_2']
  8 | 
  9 | 
 10 | model_urls = {
 11 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 12 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 13 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 14 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 15 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 16 |     'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
 17 |     'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
 18 |     'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
 19 |     'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
 20 | }
 21 | 
 22 | 
 23 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
 24 |     """3x3 convolution with padding"""
 25 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 26 |                      padding=dilation, groups=groups, bias=False, dilation=dilation)
 27 | 
 28 | 
 29 | def conv1x1(in_planes, out_planes, stride=1):
 30 |     """1x1 convolution"""
 31 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 32 | 
 33 | 
 34 | class BasicBlock(nn.Module):
 35 |     expansion = 1
 36 | 
 37 |     def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
 38 |                  base_width=64, dilation=1, norm_layer=None):
 39 |         super(BasicBlock, self).__init__()
 40 |         if norm_layer is None:
 41 |             norm_layer = nn.BatchNorm2d
 42 |         if groups != 1 or base_width != 64:
 43 |             raise ValueError('BasicBlock only supports groups=1 and base_width=64')
 44 |         if dilation > 1:
 45 |             raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
 46 |         # Both self.conv1 and self.downsample layers downsample the input when stride != 1
 47 |         self.conv1 = conv3x3(inplanes, planes, stride)
 48 |         self.bn1 = norm_layer(planes)
 49 |         self.relu = nn.ReLU(inplace=True)
 50 |         self.conv2 = conv3x3(planes, planes)
 51 |         self.bn2 = norm_layer(planes)
 52 |         self.downsample = downsample
 53 |         self.stride = stride
 54 | 
 55 |     def forward(self, x):
 56 |         identity = x
 57 | 
 58 |         out = self.conv1(x)
 59 |         out = self.bn1(out)
 60 |         out = self.relu(out)
 61 | 
 62 |         out = self.conv2(out)
 63 |         out = self.bn2(out)
 64 | 
 65 |         if self.downsample is not None:
 66 |             identity = self.downsample(x)
 67 | 
 68 |         out += identity
 69 |         out = self.relu(out)
 70 | 
 71 |         return out
 72 | 
 73 | 
 74 | class Bottleneck(nn.Module):
 75 |     # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
 76 |     # while original implementation places the stride at the first 1x1 convolution(self.conv1)
 77 |     # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
 78 |     # This variant is also known as ResNet V1.5 and improves accuracy according to
 79 |     # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
 80 | 
 81 |     expansion = 4
 82 | 
 83 |     def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
 84 |                  base_width=64, dilation=1, norm_layer=None):
 85 |         super(Bottleneck, self).__init__()
 86 |         if norm_layer is None:
 87 |             norm_layer = nn.BatchNorm2d
 88 |         width = int(planes * (base_width / 64.)) * groups
 89 |         # Both self.conv2 and self.downsample layers downsample the input when stride != 1
 90 |         self.conv1 = conv1x1(inplanes, width)
 91 |         self.bn1 = norm_layer(width)
 92 |         self.conv2 = conv3x3(width, width, stride, groups, dilation)
 93 |         self.bn2 = norm_layer(width)
 94 |         self.conv3 = conv1x1(width, planes * self.expansion)
 95 |         self.bn3 = norm_layer(planes * self.expansion)
 96 |         self.relu = nn.ReLU(inplace=True)
 97 |         self.downsample = downsample
 98 |         self.stride = stride
 99 | 
100 |     def forward(self, x):
101 |         identity = x
102 | 
103 |         out = self.conv1(x)
104 |         out = self.bn1(out)
105 |         out = self.relu(out)
106 | 
107 |         out = self.conv2(out)
108 |         out = self.bn2(out)
109 |         out = self.relu(out)
110 | 
111 |         out = self.conv3(out)
112 |         out = self.bn3(out)
113 | 
114 |         if self.downsample is not None:
115 |             identity = self.downsample(x)
116 | 
117 |         out += identity
118 |         out = self.relu(out)
119 | 
120 |         return out
121 | 
122 | 
123 | class ResNet(nn.Module):
124 | 
125 |     def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
126 |                  groups=1, width_per_group=64, replace_stride_with_dilation=None,
127 |                  norm_layer=None):
128 |         super(ResNet, self).__init__()
129 |         if norm_layer is None:
130 |             norm_layer = nn.BatchNorm2d
131 |         self._norm_layer = norm_layer
132 | 
133 |         self.inplanes = 64
134 |         self.dilation = 1
135 |         if replace_stride_with_dilation is None:
136 |             # each element in the tuple indicates if we should replace
137 |             # the 2x2 stride with a dilated convolution instead
138 |             replace_stride_with_dilation = [False, False, False]
139 |         if len(replace_stride_with_dilation) != 3:
140 |             raise ValueError("replace_stride_with_dilation should be None "
141 |                              "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
142 |         self.groups = groups
143 |         self.base_width = width_per_group
144 |         self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
145 |                                bias=False)
146 |         self.bn1 = norm_layer(self.inplanes)
147 |         self.relu = nn.ReLU(inplace=True)
148 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
149 |         self.layer1 = self._make_layer(block, 64, layers[0])
150 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
151 |                                        dilate=replace_stride_with_dilation[0])
152 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
153 |                                        dilate=replace_stride_with_dilation[1])
154 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
155 |                                        dilate=replace_stride_with_dilation[2])
156 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
157 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
158 |         self.flatten = nn.Flatten()
159 | 
160 |         for m in self.modules():
161 |             if isinstance(m, nn.Conv2d):
162 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
163 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
164 |                 nn.init.constant_(m.weight, 1)
165 |                 nn.init.constant_(m.bias, 0)
166 | 
167 |         # Zero-initialize the last BN in each residual branch,
168 |         # so that the residual branch starts with zeros, and each residual block behaves like an identity.
169 |         # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
170 |         if zero_init_residual:
171 |             for m in self.modules():
172 |                 if isinstance(m, Bottleneck):
173 |                     nn.init.constant_(m.bn3.weight, 0)
174 |                 elif isinstance(m, BasicBlock):
175 |                     nn.init.constant_(m.bn2.weight, 0)
176 | 
177 |     def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
178 |         norm_layer = self._norm_layer
179 |         downsample = None
180 |         previous_dilation = self.dilation
181 |         if dilate:
182 |             self.dilation *= stride
183 |             stride = 1
184 |         if stride != 1 or self.inplanes != planes * block.expansion:
185 |             downsample = nn.Sequential(
186 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
187 |                 norm_layer(planes * block.expansion),
188 |             )
189 | 
190 |         layers = []
191 |         layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
192 |                             self.base_width, previous_dilation, norm_layer))
193 |         self.inplanes = planes * block.expansion
194 |         for _ in range(1, blocks):
195 |             layers.append(block(self.inplanes, planes, groups=self.groups,
196 |                                 base_width=self.base_width, dilation=self.dilation,
197 |                                 norm_layer=norm_layer))
198 | 
199 |         return nn.Sequential(*layers)
200 | 
201 |     def _forward_impl(self, x):
202 |         # See note [TorchScript super()]
203 |         x = self.conv1(x)
204 |         x = self.bn1(x)
205 |         x = self.relu(x)
206 |         x = self.maxpool(x)
207 | 
208 |         x = self.layer1(x)
209 |         x = self.layer2(x)
210 |         x = self.layer3(x)
211 |         x = self.layer4(x)
212 | 
213 |         x = self.avgpool(x)
214 |         # x = torch.flatten(x, 1)
215 |         x = self.flatten(x)
216 |         x = self.fc(x)
217 | 
218 |         return x
219 | 
220 |     def forward(self, x):
221 |         return self._forward_impl(x)
222 | 
223 | 
224 | def _resnet(arch, block, layers, pretrained, progress, **kwargs):
225 |     model = ResNet(block, layers, **kwargs)
226 |     # if pretrained:
227 |     #     state_dict = load_state_dict_from_url(model_urls[arch],
228 |     #                                           progress=progress)
229 |     #     model.load_state_dict(state_dict)
230 |     return model
231 | 
232 | 
233 | def resnet18(pretrained=False, progress=True, **kwargs):
234 |     r"""ResNet-18 model from
235 |     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
236 | 
237 |     Args:
238 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
239 |         progress (bool): If True, displays a progress bar of the download to stderr
240 |     """
241 |     return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
242 |                    **kwargs)
243 | 
244 | 
245 | def resnet34(pretrained=False, progress=True, **kwargs):
246 |     r"""ResNet-34 model from
247 |     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
248 | 
249 |     Args:
250 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
251 |         progress (bool): If True, displays a progress bar of the download to stderr
252 |     """
253 |     return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
254 |                    **kwargs)
255 | 
256 | 
257 | def resnet50(pretrained=False, progress=True, **kwargs):
258 |     r"""ResNet-50 model from
259 |     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
260 | 
261 |     Args:
262 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
263 |         progress (bool): If True, displays a progress bar of the download to stderr
264 |     """
265 |     return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
266 |                    **kwargs)
267 | 
268 | 
269 | def resnet101(pretrained=False, progress=True, **kwargs):
270 |     r"""ResNet-101 model from
271 |     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
272 | 
273 |     Args:
274 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
275 |         progress (bool): If True, displays a progress bar of the download to stderr
276 |     """
277 |     return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
278 |                    **kwargs)
279 | 
280 | 
281 | def resnet152(pretrained=False, progress=True, **kwargs):
282 |     r"""ResNet-152 model from
283 |     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
284 | 
285 |     Args:
286 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
287 |         progress (bool): If True, displays a progress bar of the download to stderr
288 |     """
289 |     return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
290 |                    **kwargs)
291 | 
292 | 
293 | def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
294 |     r"""ResNeXt-50 32x4d model from
295 |     `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
296 | 
297 |     Args:
298 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
299 |         progress (bool): If True, displays a progress bar of the download to stderr
300 |     """
301 |     kwargs['groups'] = 32
302 |     kwargs['width_per_group'] = 4
303 |     return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
304 |                    pretrained, progress, **kwargs)
305 | 
306 | 
307 | def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
308 |     r"""ResNeXt-101 32x8d model from
309 |     `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
310 | 
311 |     Args:
312 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
313 |         progress (bool): If True, displays a progress bar of the download to stderr
314 |     """
315 |     kwargs['groups'] = 32
316 |     kwargs['width_per_group'] = 8
317 |     return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
318 |                    pretrained, progress, **kwargs)
319 | 
320 | 
321 | def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
322 |     r"""Wide ResNet-50-2 model from
323 |     `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
324 | 
325 |     The model is the same as ResNet except for the bottleneck number of channels
326 |     which is twice larger in every block. The number of channels in outer 1x1
327 |     convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
328 |     channels, and in Wide ResNet-50-2 has 2048-1024-2048.
329 | 
330 |     Args:
331 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
332 |         progress (bool): If True, displays a progress bar of the download to stderr
333 |     """
334 |     kwargs['width_per_group'] = 64 * 2
335 |     return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
336 |                    pretrained, progress, **kwargs)
337 | 
338 | 
339 | def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
340 |     r"""Wide ResNet-101-2 model from
341 |     `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
342 | 
343 |     The model is the same as ResNet except for the bottleneck number of channels
344 |     which is twice larger in every block. The number of channels in outer 1x1
345 |     convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
346 |     channels, and in Wide ResNet-50-2 has 2048-1024-2048.
347 | 
348 |     Args:
349 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
350 |         progress (bool): If True, displays a progress bar of the download to stderr
351 |     """
352 |     kwargs['width_per_group'] = 64 * 2
353 |     return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
354 |                    pretrained, progress, **kwargs)
355 | 
356 | 
357 | if __name__ == "__main__":
358 |     import torch
359 |     from PIL import Image
360 |     from torchvision import transforms
361 |     import numpy as np
362 | 
363 |     transform = transforms.Compose([
364 |         transforms.Resize(224),
365 |         transforms.CenterCrop(224),
366 |         transforms.ToTensor(),
367 |         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
368 |     ])
369 | 
370 |     model = resnet50()
371 |     model.eval()
372 |     state_dict = torch.load('../test/resnet50-19c8e357.pth')
373 |     model.load_state_dict(state_dict)
374 |     keys = list()
375 |     for key in sorted(list(state_dict.keys())):
376 |         if 'tracked' not in key:
377 |             keys.append(key)
378 |     for key in keys:
379 |         print(key, state_dict[key].shape)
380 | 
381 |     
382 |     img = Image.open('../test/dog.jpg')
383 |     img_tensor = transform(img).unsqueeze(0)
384 |     conf = model(img_tensor)
385 |     conf_np = conf.detach().numpy()
386 |     print(np.argmax(conf_np))
387 |     
388 | 
389 | 
390 | 
391 | 


--------------------------------------------------------------------------------
/test/resnet_nf.py:
--------------------------------------------------------------------------------
  1 | import nf.nn as nn
  2 | import nf.nn.functional as F
  3 | 
  4 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
  5 |            'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
  6 |            'wide_resnet50_2', 'wide_resnet101_2']
  7 | 
  8 | 
  9 | model_urls = {
 10 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 11 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 12 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 13 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 14 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 15 |     'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
 16 |     'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
 17 |     'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
 18 |     'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
 19 | }
 20 | 
 21 | 
 22 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
 23 |     """3x3 convolution with padding"""
 24 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 25 |                      padding=dilation, groups=groups, bias=False, dilation=dilation)
 26 | 
 27 | 
 28 | def conv1x1(in_planes, out_planes, stride=1):
 29 |     """1x1 convolution"""
 30 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 31 | 
 32 | 
 33 | class BasicBlock(nn.Module):
 34 |     expansion = 1
 35 | 
 36 |     def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
 37 |                  base_width=64, dilation=1, norm_layer=None):
 38 |         super(BasicBlock, self).__init__()
 39 |         if norm_layer is None:
 40 |             norm_layer = nn.BatchNorm2d
 41 |         if groups != 1 or base_width != 64:
 42 |             raise ValueError('BasicBlock only supports groups=1 and base_width=64')
 43 |         if dilation > 1:
 44 |             raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
 45 |         # Both self.conv1 and self.downsample layers downsample the input when stride != 1
 46 |         self.conv1 = conv3x3(inplanes, planes, stride)
 47 |         self.bn1 = norm_layer(planes)
 48 |         self.relu = nn.ReLU(inplace=True)
 49 |         self.conv2 = conv3x3(planes, planes)
 50 |         self.bn2 = norm_layer(planes)
 51 |         self.downsample = downsample
 52 |         self.stride = stride
 53 | 
 54 |     def forward(self, x):
 55 |         identity = x
 56 | 
 57 |         out = self.conv1(x)
 58 |         out = self.bn1(out)
 59 |         out = self.relu(out)
 60 | 
 61 |         out = self.conv2(out)
 62 |         out = self.bn2(out)
 63 | 
 64 |         if self.downsample is not None:
 65 |             identity = self.downsample(x)
 66 | 
 67 |         out += identity
 68 |         out = self.relu(out)
 69 | 
 70 |         return out
 71 | 
 72 | 
 73 | class Bottleneck(nn.Module):
 74 |     # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
 75 |     # while original implementation places the stride at the first 1x1 convolution(self.conv1)
 76 |     # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
 77 |     # This variant is also known as ResNet V1.5 and improves accuracy according to
 78 |     # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
 79 | 
 80 |     expansion = 4
 81 | 
 82 |     def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
 83 |                  base_width=64, dilation=1, norm_layer=None):
 84 |         super(Bottleneck, self).__init__()
 85 |         if norm_layer is None:
 86 |             norm_layer = nn.BatchNorm2d
 87 |         width = int(planes * (base_width / 64.)) * groups
 88 |         # Both self.conv2 and self.downsample layers downsample the input when stride != 1
 89 |         self.conv1 = conv1x1(inplanes, width)
 90 |         self.bn1 = norm_layer(width)
 91 |         self.conv2 = conv3x3(width, width, stride, groups, dilation)
 92 |         self.bn2 = norm_layer(width)
 93 |         self.conv3 = conv1x1(width, planes * self.expansion)
 94 |         self.bn3 = norm_layer(planes * self.expansion)
 95 |         self.relu = nn.ReLU(inplace=True)
 96 |         self.downsample = downsample
 97 |         self.stride = stride
 98 | 
 99 |     def forward(self, x):
100 |         identity = x
101 | 
102 |         out = self.conv1(x)
103 |         out = self.bn1(out)
104 |         out = self.relu(out)
105 | 
106 |         out = self.conv2(out)
107 |         out = self.bn2(out)
108 |         out = self.relu(out)
109 | 
110 |         out = self.conv3(out)
111 |         out = self.bn3(out)
112 | 
113 |         if self.downsample is not None:
114 |             identity = self.downsample(x)
115 | 
116 |         out += identity
117 |         out = self.relu(out)
118 | 
119 |         return out
120 | 
121 | 
122 | class ResNet(nn.Module):
123 | 
124 |     def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
125 |                  groups=1, width_per_group=64, replace_stride_with_dilation=None,
126 |                  norm_layer=None):
127 |         super(ResNet, self).__init__()
128 |         if norm_layer is None:
129 |             norm_layer = nn.BatchNorm2d
130 |         self._norm_layer = norm_layer
131 | 
132 |         self.inplanes = 64
133 |         self.dilation = 1
134 |         if replace_stride_with_dilation is None:
135 |             # each element in the tuple indicates if we should replace
136 |             # the 2x2 stride with a dilated convolution instead
137 |             replace_stride_with_dilation = [False, False, False]
138 |         if len(replace_stride_with_dilation) != 3:
139 |             raise ValueError("replace_stride_with_dilation should be None "
140 |                              "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
141 |         self.groups = groups
142 |         self.base_width = width_per_group
143 |         self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=(3,3,3,3),
144 |                                bias=False)
145 |         self.bn1 = norm_layer(self.inplanes)
146 |         self.relu = nn.ReLU(inplace=True)
147 |         self.maxpool = nn.MaxPool2d(pool_size=3, stride=2, padding=(1,1,1,1))
148 |         self.layer1 = self._make_layer(block, 64, layers[0])
149 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
150 |                                        dilate=replace_stride_with_dilation[0])
151 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
152 |                                        dilate=replace_stride_with_dilation[1])
153 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
154 |                                        dilate=replace_stride_with_dilation[2])
155 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
156 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
157 |         self.flatten = nn.Flatten()
158 | 
159 |         # for m in self.modules():
160 |         #     if isinstance(m, nn.Conv2d):
161 |         #         nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
162 |         #     elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
163 |         #         nn.init.constant_(m.weight, 1)
164 |         #         nn.init.constant_(m.bias, 0)
165 | 
166 |         # Zero-initialize the last BN in each residual branch,
167 |         # so that the residual branch starts with zeros, and each residual block behaves like an identity.
168 |         # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
169 |         # if zero_init_residual:
170 |         #     for m in self.modules():
171 |         #         if isinstance(m, Bottleneck):
172 |         #             nn.init.constant_(m.bn3.weight, 0)
173 |         #         elif isinstance(m, BasicBlock):
174 |         #             nn.init.constant_(m.bn2.weight, 0)
175 | 
176 |     def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
177 |         norm_layer = self._norm_layer
178 |         downsample = None
179 |         previous_dilation = self.dilation
180 |         if dilate:
181 |             self.dilation *= stride
182 |             stride = 1
183 |         if stride != 1 or self.inplanes != planes * block.expansion:
184 |             downsample = nn.Sequential(
185 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
186 |                 norm_layer(planes * block.expansion),
187 |             )
188 | 
189 |         layers = []
190 |         layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
191 |                             self.base_width, previous_dilation, norm_layer))
192 |         self.inplanes = planes * block.expansion
193 |         for _ in range(1, blocks):
194 |             layers.append(block(self.inplanes, planes, groups=self.groups,
195 |                                 base_width=self.base_width, dilation=self.dilation,
196 |                                 norm_layer=norm_layer))
197 | 
198 |         return nn.Sequential(*layers)
199 | 
200 |     def _forward_impl(self, x):
201 |         # See note [TorchScript super()]
202 |         x = self.conv1(x)
203 |         x = self.bn1(x)
204 |         x = self.relu(x)
205 |         x = self.maxpool(x)
206 | 
207 |         x = self.layer1(x)
208 |         x = self.layer2(x)
209 |         x = self.layer3(x)
210 |         x = self.layer4(x)
211 | 
212 |         x = self.avgpool(x)
213 |         # x = torch.flatten(x, 1)
214 |         x = self.flatten(x)
215 |         x = self.fc(x)
216 | 
217 |         return x
218 | 
219 |     def forward(self, x):
220 |         return self._forward_impl(x)
221 | 
222 | 
223 | def _resnet(arch, block, layers, pretrained, progress, **kwargs):
224 |     model = ResNet(block, layers, **kwargs)
225 |     # if pretrained:
226 |     #     state_dict = load_state_dict_from_url(model_urls[arch],
227 |     #                                           progress=progress)
228 |     #     model.load_state_dict(state_dict)
229 |     return model
230 | 
231 | 
232 | def resnet18(pretrained=False, progress=True, **kwargs):
233 |     r"""ResNet-18 model from
234 |     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
235 | 
236 |     Args:
237 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
238 |         progress (bool): If True, displays a progress bar of the download to stderr
239 |     """
240 |     return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
241 |                    **kwargs)
242 | 
243 | 
244 | def resnet34(pretrained=False, progress=True, **kwargs):
245 |     r"""ResNet-34 model from
246 |     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
247 | 
248 |     Args:
249 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
250 |         progress (bool): If True, displays a progress bar of the download to stderr
251 |     """
252 |     return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
253 |                    **kwargs)
254 | 
255 | 
256 | def resnet50(pretrained=False, progress=True, **kwargs):
257 |     r"""ResNet-50 model from
258 |     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
259 | 
260 |     Args:
261 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
262 |         progress (bool): If True, displays a progress bar of the download to stderr
263 |     """
264 |     return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
265 |                    **kwargs)
266 | 
267 | 
268 | def resnet101(pretrained=False, progress=True, **kwargs):
269 |     r"""ResNet-101 model from
270 |     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
271 | 
272 |     Args:
273 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
274 |         progress (bool): If True, displays a progress bar of the download to stderr
275 |     """
276 |     return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
277 |                    **kwargs)
278 | 
279 | 
280 | def resnet152(pretrained=False, progress=True, **kwargs):
281 |     r"""ResNet-152 model from
282 |     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
283 | 
284 |     Args:
285 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
286 |         progress (bool): If True, displays a progress bar of the download to stderr
287 |     """
288 |     return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
289 |                    **kwargs)
290 | 
291 | 
292 | def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
293 |     r"""ResNeXt-50 32x4d model from
294 |     `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
295 | 
296 |     Args:
297 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
298 |         progress (bool): If True, displays a progress bar of the download to stderr
299 |     """
300 |     kwargs['groups'] = 32
301 |     kwargs['width_per_group'] = 4
302 |     return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
303 |                    pretrained, progress, **kwargs)
304 | 
305 | 
306 | def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
307 |     r"""ResNeXt-101 32x8d model from
308 |     `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
309 | 
310 |     Args:
311 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
312 |         progress (bool): If True, displays a progress bar of the download to stderr
313 |     """
314 |     kwargs['groups'] = 32
315 |     kwargs['width_per_group'] = 8
316 |     return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
317 |                    pretrained, progress, **kwargs)
318 | 
319 | 
320 | def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
321 |     r"""Wide ResNet-50-2 model from
322 |     `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
323 | 
324 |     The model is the same as ResNet except for the bottleneck number of channels
325 |     which is twice larger in every block. The number of channels in outer 1x1
326 |     convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
327 |     channels, and in Wide ResNet-50-2 has 2048-1024-2048.
328 | 
329 |     Args:
330 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
331 |         progress (bool): If True, displays a progress bar of the download to stderr
332 |     """
333 |     kwargs['width_per_group'] = 64 * 2
334 |     return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
335 |                    pretrained, progress, **kwargs)
336 | 
337 | 
338 | def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
339 |     r"""Wide ResNet-101-2 model from
340 |     `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
341 | 
342 |     The model is the same as ResNet except for the bottleneck number of channels
343 |     which is twice larger in every block. The number of channels in outer 1x1
344 |     convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
345 |     channels, and in Wide ResNet-50-2 has 2048-1024-2048.
346 | 
347 |     Args:
348 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
349 |         progress (bool): If True, displays a progress bar of the download to stderr
350 |     """
351 |     kwargs['width_per_group'] = 64 * 2
352 |     return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
353 |                    pretrained, progress, **kwargs)
354 | 
355 | 
356 | if __name__ == "__main__":
357 |     import torch
358 |     from PIL import Image
359 |     from torchvision import transforms
360 |     import numpy as np
361 | 
362 |     transform = transforms.Compose([
363 |         transforms.Resize(224),
364 |         transforms.CenterCrop(224),
365 |         transforms.ToTensor(),
366 |         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
367 |     ])
368 | 
369 |     model = resnet50()
370 |     model.eval()
371 |     state_dict = torch.load('../tests/resnet50-19c8e357.pth')
372 |     model.load_state_dict(state_dict)
373 |     
374 |     img = Image.open('../tests/dog.jpg')
375 |     img_tensor = transform(img).unsqueeze(0)
376 |     img_np = np.asarray(img_tensor)
377 |     
378 |     conf_np = model(img_np)
379 |     print(np.argmax(conf_np))
380 |     
381 | 
382 | 
383 | 
384 | 


--------------------------------------------------------------------------------
/test/test_Conv_Linear.py:
--------------------------------------------------------------------------------
  1 | import nf
  2 | from nf import Tensor, Parameter
  3 | import nf.Module as nm
  4 | from nf.Module import Functional as nmF
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as nnF
  9 | from torch.autograd import *
 10 | from nf.Optimizer.sgd import SGD
 11 | from torch import optim
 12 | 
 13 | # def categorical_crossentropy(output, target, from_logits=False):
 14 | #     output /= output.sum(axis=-1, keepdims=True)
 15 | #     # output = np.clip(output, 1e-7, 1 - 1e-7)
 16 | #     a = target * -np.log(output)
 17 | #     return np.sum(, axis=-1, keepdims=False)
 18 | 
 19 | def netBuild(bd):
 20 |     f1 = bd.Linear(2,3)
 21 |     # y = backend.ReLU()
 22 |     f2 = bd.Linear(3,3)
 23 |     f3 = bd.Softmax(1)
 24 |     f4 = bd.Sigmoid()
 25 |     f5 = bd.Sigmoid()
 26 |     # f1 = f3
 27 |     # f2 = f4
 28 | 
 29 |     # return lambda z: f3(f4(f2(f1(z))))
 30 |     return lambda z: f1(z)
 31 | 
 32 | 
 33 | class ThModel(nn.Module):
 34 |     def __init__(self):
 35 |         super(ThModel, self).__init__()
 36 | 
 37 |         self.fc1 = nn.Linear(3, 2)
 38 |         # self.fc2 = nn.Linear(2, 4)
 39 |         # self.fc3 = nn.Linear(4, 5)
 40 | 
 41 |     def forward(self, x):
 42 |         if isinstance(x, np.ndarray):
 43 |             x = torch.from_numpy(x)
 44 |         # x = self.pool(nnF.relu(self.conv1(x)))
 45 |         # x = self.pool(nnF.relu(self.conv2(x)))
 46 |         # x = x.view(-1, 16 * 5 * 5)
 47 |         x = nnF.relu(self.fc1(x))
 48 |         # x = nnF.relu(self.fc2(x))
 49 |         # x = nnF.softmax(self.fc3(x), 1)
 50 |         return x
 51 | 
 52 | class NfModel(nm.Module):
 53 |     def __init__(self):
 54 |         super(NfModel, self).__init__()
 55 | 
 56 |         self.fc1 = nm.Linear(3, 2)
 57 |         # self.fc2 = nm.Linear(2, 4)
 58 |         # self.fc3 = nm.Linear(4, 5)
 59 | 
 60 |     def forward(self, x):
 61 |         if isinstance(x, np.ndarray):
 62 |             x = Tensor(x)
 63 | 
 64 |         x = nmF.relu(self.fc1(x))
 65 |         # x = nmF.relu(self.fc2(x))
 66 |         # x = nmF.softmax(self.fc3(x), 1)
 67 |         return x
 68 | 
 69 | 
 70 | def setup_seed(seed):
 71 |     torch.manual_seed(seed)
 72 |     # torch.cuda.manual_seed_all(seed)
 73 |     np.random.seed(seed)
 74 |     # random.seed(seed)
 75 |     # torch.backends.cudnn.deterministic = True
 76 | 
 77 | def th_layer(z):
 78 |     z = Variable(torch.from_numpy(z), requires_grad=True)
 79 |     f1 = nn.Linear(2,3)
 80 | 
 81 |     net = netBuild(nn)
 82 |     for name, parameters in f1.named_parameters():
 83 |         print(name, parameters)
 84 |     print(f1.state_dict())
 85 |     # print(f1.__name__())
 86 |     r = f1(z)
 87 |     # print(r.sum(axis=-1, keepdims=True))
 88 |     # r = r / r.sum(axis=-1, keepdims=True)
 89 |     # a = torch.ones_like(r) * -torch.log(r)
 90 |     # r = a.sum(axis=-1, keepdims=False)
 91 |     r.backward(torch.ones_like(r))
 92 |     return [r.detach().numpy(), z.grad.numpy()]
 93 | 
 94 | def nf_layer(z):
 95 |     z = Tensor(z, requires_grad=True)
 96 |     net = netBuild(nm)
 97 |     r = net(z)
 98 |     f1 = nm.Linear(2, 3)
 99 |     r = f1(z)
100 |     # print(r.sum(axis=-1, keepdims=True))
101 |     # r = r / nf.sum(r, axis=-1, keepdims=True)
102 |     # a = nf.ones_like(r) * -nf.log(r)
103 |     # r = a.sum(axis=-1, keepdims=False)
104 |     r.backward()
105 |     return [r.data, z.grad]
106 | 
107 | 
108 | 
109 | if __name__ =='__main__':
110 |     # setup_seed(20)
111 |     #
112 |     z = np.random.random([5,3]).astype(np.float32) * 20
113 | 
114 |     thnet = ThModel()
115 |     thp = thnet.state_dict()
116 |     for k in thp.keys():
117 |         thp[k] = Tensor(thp[k].numpy())
118 | 
119 |     nfnet = NfModel()
120 |     nfnet.load_state_dict(thp)
121 |     thopt = optim.SGD(thnet.parameters(), lr=1e-3, momentum=0.4,nesterov=True)
122 |     nfopt = SGD(nfnet.parameters(), lr=1e-3, momentum=0.4,nesterov=True)
123 | 
124 |     circle = 800
125 | 
126 |     for i in range(circle):
127 |         thr = thnet(torch.from_numpy(z))
128 |         loss = (3.-thr)
129 |         thopt.zero_grad()
130 |         loss.backward(torch.ones_like(loss))
131 |         thopt.step()
132 | 
133 |     for i in range(circle):
134 |         nfr = nfnet(z)
135 |         loss = (3.-nfr)
136 |         nfopt.zero_grad()
137 |         loss.backward()
138 |         nfopt.step()
139 | 
140 |     thr = thnet(z).detach().numpy()
141 |     nfr = nfnet(z).numpy()
142 |     print(thr)
143 |     print(nfr)
144 | 
145 | 
146 | 
147 | 


--------------------------------------------------------------------------------
/test/test_Conv_mnist.py:
--------------------------------------------------------------------------------
  1 | import nf
  2 | from nf import Tensor
  3 | import nf.nn.modules as nn
  4 | import nf.nn.functional as F
  5 | import numpy as np
  6 | from nf.optimizer.sgd import SGD
  7 | import mnist
  8 | from time import time
  9 | 
 10 | 
 11 | 
 12 | def categorical_crossentropy(output, target, from_logits=False):
 13 |     output /= output.sum(axis=-1, keepdims=True)
 14 |     # output = np.clip(output, 1e-7, 1 - 1e-7)
 15 |     a = target * -nf.log(output)
 16 |     return nf.sum(a, axis=-1, keepdims=False)
 17 | 
 18 | class NfModel(nn.Module):
 19 |     def __init__(self):
 20 |         super(NfModel, self).__init__()
 21 | 
 22 |         self.c1 = nn.Conv2d(1, 32, 3, stride=1, padding='same')
 23 |         self.c2 = nn.Conv2d(32, 64, 3, stride=1, padding='same')
 24 |         self.c3 = nn.Conv2d(64, 4, 3, stride=1, padding='same')
 25 |         self.b1 = nn.BatchNorm2d(3)
 26 |         self.b2 = nn.BatchNorm2d(16)
 27 |         self.b3 = nn.BatchNorm2d(4)
 28 |         # self.fc1 = nn.Linear(1*28*28//4, CLASS_SIZE)
 29 |         self.fc1 = nn.Linear(4 * 3 * 3, 64)
 30 |         # self.fc2 = nn.Linear(64, 64)
 31 |         self.fc2 = nn.Linear(64, CLASS_SIZE)
 32 | 
 33 | 
 34 |     def forward(self, x):
 35 |         if isinstance(x, np.ndarray):
 36 |             x = Tensor(x)
 37 |         # x = F.relu(self.c1(x))
 38 |         # print("x1.shape", x.shape)
 39 |         x = self.c1(x)
 40 |         x = F.max_pool2d(x)
 41 |         x = F.relu(self.c2(x))
 42 |         x = F.max_pool2d(x)
 43 |         x = F.relu(self.c3(x))
 44 |         x = F.max_pool2d(x)
 45 |         # print("XXX", x.shape)
 46 |         x = x.reshape([-1, 4*3*3])
 47 |         # print("XXX", x.shape)
 48 |         x = F.relu(self.fc1(x))
 49 |         x = F.softmax(self.fc2(x), -1)
 50 |         # x = F.softmax(self.fc1(x), -1)
 51 |         # x = self.fc1(x)
 52 |         return x
 53 | 
 54 | 
 55 | 
 56 | 
 57 | if __name__ =='__main__':
 58 |     BATCH_SIZE = 120
 59 |     IMAGE_SIZE = 28
 60 |     CLASS_SIZE = 10
 61 |     x_train, y_train, x_test, y_test = mnist.load()
 62 |     x_train = x_train.astype(float).reshape([-1,1,28,28])
 63 |     x_test = x_test.astype(float).reshape([-1,1,28,28])
 64 |     x_train = x_train / x_train.max()
 65 |     x_test  = x_test / x_test.max()
 66 |     y_train = np.eye(10)[y_train]
 67 |     y_test = np.eye(10)[y_test]
 68 | 
 69 |     print(x_train.shape, y_train.shape)
 70 |     print(x_test.shape, y_test.shape)
 71 |     print(x_train.max(), x_test.max())
 72 | 
 73 |     nfnet = NfModel()
 74 |     nfopt = SGD(nfnet.parameters(), lr=1e-4, momentum=0.9,nesterov=True)
 75 | 
 76 |     # batch = Tensor(x_test[0:BATCH_SIZE])
 77 |     # gt = Tensor(y_test[0:BATCH_SIZE])
 78 |     # nfr = nfnet(batch).numpy()
 79 |     # nfr = np.argmax(nfr, axis=-1)
 80 |     # gt = np.argmax(gt.numpy(), axis=-1)
 81 |     # print(nfr)
 82 |     # print(gt)
 83 |     # acc = (nfr == gt).sum()
 84 |     # print("初始化正确率",acc / nfr.shape[0])
 85 |     epch = 3
 86 |     circle = 500
 87 |     for j in range(epch):
 88 |         # print()
 89 |         t1 = time()
 90 |         for i in range(circle):
 91 |             t2 = time()
 92 |             batch = Tensor(x_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE], requires_grad=True)
 93 |             gt = Tensor(y_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE])
 94 |             nfr = nfnet(batch)
 95 |             loss = categorical_crossentropy(nfr, gt)
 96 |             nfopt.zero_grad()
 97 |             loss.backward()
 98 |             nfopt.step()
 99 |             print(i,time()-t2)
100 |         # if((j+1) % 5 == 0):
101 |         batch = Tensor(x_test)
102 |         gt = Tensor(y_test)
103 |         nfr = nfnet(batch).numpy()
104 |         nfr = np.argmax(nfr, axis=-1)
105 |         gt = np.argmax(gt.numpy(), axis=-1)
106 |         # print(nfr)
107 |         # print(gt.shape)
108 |         acc = (nfr == gt).sum()
109 |         print("epoch:",j," ", time()-t1," ", acc / nfr.shape[0],' ', loss.mean().numpy())
110 |     # print(thr)
111 |     # print(nfr)
112 | 
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------
/test/test_Layers.py:
--------------------------------------------------------------------------------
  1 | import nf
  2 | from nf import Tensor, Parameter
  3 | import nf.nn.modules as nm
  4 | from nf.nn import functional as nmF
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as nnF
  9 | from torch.autograd import *
 10 | from nf.optimizer.sgd import SGD
 11 | from torch import optim
 12 | 
 13 | # def categorical_crossentropy(output, target, from_logits=False):
 14 | #     output /= output.sum(axis=-1, keepdims=True)
 15 | #     # output = np.clip(output, 1e-7, 1 - 1e-7)
 16 | #     a = target * -np.log(output)
 17 | #     return np.sum(, axis=-1, keepdims=False)
 18 | 
 19 | def test_Linear():
 20 |     class ThModel(nn.Module):
 21 |         def __init__(self):
 22 |             super(ThModel, self).__init__()
 23 | 
 24 |             self.fc1 = nn.Linear(3, 2)
 25 |             self.fc2 = nn.Linear(2, 4)
 26 |             self.fc3 = nn.Linear(4, 5)
 27 | 
 28 |         def forward(self, x):
 29 |             if isinstance(x, np.ndarray):
 30 |                 x = torch.from_numpy(x)
 31 |             # x = self.pool(nnF.relu(self.conv1(x)))
 32 |             # x = self.pool(nnF.relu(self.conv2(x)))
 33 |             # x = x.view(-1, 16 * 5 * 5)
 34 |             x = nnF.relu(self.fc1(x))
 35 |             # x = nnF.relu(self.fc2(x))
 36 |             # x = nnF.softmax(self.fc3(x), 1)
 37 |             return x
 38 | 
 39 |     class NfModel(nm.Module):
 40 |         def __init__(self):
 41 |             super(NfModel, self).__init__()
 42 | 
 43 |             self.fc1 = nm.Linear(3, 2)
 44 |             self.fc2 = nm.Linear(2, 4)
 45 |             self.fc3 = nm.Linear(4, 5)
 46 | 
 47 |         def forward(self, x):
 48 |             if isinstance(x, np.ndarray):
 49 |                 x = Tensor(x)
 50 | 
 51 |             x = nmF.relu(self.fc1(x))
 52 |             # x = nmF.relu(self.fc2(x))
 53 |             # x = nmF.softmax(self.fc3(x), 1)
 54 |             return x
 55 | 
 56 |     z = np.random.random([5,3]).astype(np.float32) * 20
 57 | 
 58 |     thnet = ThModel()
 59 |     thp = thnet.state_dict()
 60 |     for k in thp.keys():
 61 |         thp[k] = Tensor(thp[k].numpy())
 62 | 
 63 |     nfnet = NfModel()
 64 |     nfnet.load_state_dict(thp)
 65 |     thopt = optim.SGD(thnet.parameters(), lr=1e-3, momentum=0.4,nesterov=True)
 66 |     nfopt = SGD(nfnet.parameters(), lr=1e-3, momentum=0.4,nesterov=True)
 67 | 
 68 |     circle = 800
 69 | 
 70 |     for i in range(circle):
 71 |         thr = thnet(torch.from_numpy(z))
 72 |         loss = (3.-thr)
 73 |         thopt.zero_grad()
 74 |         loss.backward(torch.ones_like(loss))
 75 |         thopt.step()
 76 | 
 77 |     for i in range(circle):
 78 |         nfr = nfnet(z)
 79 |         loss = (3.-nfr)
 80 |         nfopt.zero_grad()
 81 |         loss.backward()
 82 |         nfopt.step()
 83 | 
 84 |     thr = thnet(z).detach().numpy()
 85 |     nfr = nfnet(z).numpy()
 86 |     print(thr)
 87 |     print(nfr)
 88 | 
 89 | def test_Conv():
 90 |     class ThModel(nn.Module):
 91 |         def __init__(self):
 92 |             super(ThModel, self).__init__()
 93 | 
 94 |             self.c1 = nn.Conv2d(1, 3, 3, stride=2)
 95 |             self.c2 = nn.Conv2d(3, 9, (3, 5), stride=(2, 1), padding=(4, 2))
 96 |             self.c3 = nn.Conv2d(9, 1, (3, 5), stride=(2, 1), padding=(4, 2))
 97 |             self.b1 = nn.BatchNorm2d(3)
 98 |             self.b2 = nn.BatchNorm2d(9)
 99 |             self.b3 = nn.BatchNorm2d(1)
100 | 
101 |         def forward(self, x):
102 |             if isinstance(x, np.ndarray):
103 |                 x = torch.from_numpy(x)
104 | 
105 |             x = nnF.relu(self.b1(self.c1(x)))
106 |             x = nnF.relu(self.b2(self.c2(x)))
107 |             x = nnF.relu(self.b3(self.c3(x)))
108 |             return x
109 | 
110 |     class NfModel(nm.Module):
111 |         def __init__(self):
112 |             super(NfModel, self).__init__()
113 | 
114 |             self.c1 = nm.Conv2d(1, 3, 3, stride=2)
115 |             self.c2 = nm.Conv2d(3, 9, (3, 5), stride=(2, 1), padding=(4,4,2,2))
116 |             self.c3 = nm.Conv2d(9, 1, (3, 5), stride=(2, 1), padding=(4,4,2,2))
117 |             self.b1 = nm.BatchNorm2d(3)
118 |             self.b2 = nm.BatchNorm2d(9)
119 |             self.b3 = nm.BatchNorm2d(1)
120 | 
121 |         def forward(self, x):
122 |             if isinstance(x, np.ndarray):
123 |                 x = Tensor(x)
124 | 
125 |             x = nmF.relu(self.b1(self.c1(x)))
126 |             x = nmF.relu(self.b2(self.c2(x)))
127 |             x = nmF.relu(self.b3(self.c3(x)))
128 |             return x
129 | 
130 |     z = np.random.random([4,1,7,7]).astype(np.float32)
131 | 
132 |     thnet = ThModel()
133 |     thp = thnet.state_dict()
134 |     for k in thp.keys():
135 |         thp[k] = Tensor(thp[k].numpy())
136 | 
137 |     nfnet = NfModel()
138 |     nfnet.load_state_dict(thp)
139 |     thopt = optim.SGD(thnet.parameters(), lr=1e-3, momentum=0.4,nesterov=True)
140 |     nfopt = SGD(nfnet.parameters(), lr=1e-3, momentum=0.4,nesterov=True)
141 | 
142 |     # l = list(nfnet.parameters())
143 |     # print(l)
144 | 
145 |     circle = 10
146 | 
147 |     for i in range(circle):
148 |         thr = thnet(z)
149 |         loss = (3.-thr)
150 |         thopt.zero_grad()
151 |         loss.backward(torch.ones_like(loss))
152 |         # print(thnet.c1.weight.grad)
153 |         thopt.step()
154 | 
155 |     for i in range(circle):
156 |         nfr = nfnet(z)
157 |         loss = (3.-nfr)
158 |         nfopt.zero_grad()
159 |         loss.backward()
160 |         # print(nfnet.c1.weight.grad)
161 |         nfopt.step()
162 | 
163 |     thr = thnet(z).detach().numpy()
164 |     nfr = nfnet(z).numpy()
165 |     print(thr)
166 |     print(nfr)
167 |     try:
168 |         a = np.allclose(thr, nfr)
169 |         print(a)
170 |         # if not a:
171 |         #     print(thr)
172 |         #     print(nfr)
173 |     except:
174 |         print("不合适")
175 |         pass
176 | 
177 | 
178 | 
179 | def setup_seed(seed):
180 |     torch.manual_seed(seed)
181 |     # torch.cuda.manual_seed_all(seed)
182 |     np.random.seed(seed)
183 |     # random.seed(seed)
184 |     # torch.backends.cudnn.deterministic = True
185 | 
186 | 
187 | 
188 | if __name__ =='__main__':
189 |     # setup_seed(20)
190 |     test_Conv()
191 |     # test_Linear()
192 |     # print(thr/nfr)
193 |     # for t,n in zip(thr, nfr):
194 |     #     print(t)
195 |     #     print(n)
196 |         # print(np.allclose(t, n, rtol=1.e-5,atol=1.e-8))
197 | 
198 |     # print(thr[1])
199 |     # print(nfr[1])
200 | 
201 | 
202 | 
203 |     # model = TheModelClass()
204 |     # print("Model's state_dict:")
205 |     # # print(model.state_dict())
206 |     # a = model.state_dict()
207 |     # model.load_state_dict(a)
208 |     # print(a)
209 | 
210 |     # Print optimizer's state_dict
211 |     # print("Optimizer's state_dict:")
212 |     # for var_name in optimizer.state_dict():
213 |     #     print(var_name, "\t", optimizer.state_dict()[var_name])
214 | 
215 | 
216 | 
217 | 


--------------------------------------------------------------------------------
/test/test_Linear_mnist.py:
--------------------------------------------------------------------------------
 1 | import nf
 2 | from nf import Tensor
 3 | import nf.nn.modules as nn
 4 | import nf.nn.functional as F
 5 | import numpy as np
 6 | from nf.optimizer.sgd import SGD
 7 | import mnist
 8 | from time import time
 9 | 
10 | 
11 | 
12 | def categorical_crossentropy(output, target, from_logits=False):
13 |     output /= output.sum(axis=-1, keepdims=True)
14 |     # output = np.clip(output, 1e-7, 1 - 1e-7)
15 |     a = target * -nf.log(output)
16 |     return nf.sum(a, axis=-1, keepdims=False)
17 | 
18 | class NfModel(nn.Module):
19 |     def __init__(self):
20 |         super(NfModel, self).__init__()
21 |         self.fc1 = nn.Linear(IMAGE_SIZE * IMAGE_SIZE, 64)
22 |         self.fc2 = nn.Linear(64, 64)
23 |         self.fc3 = nn.Linear(64, CLASS_SIZE)
24 | 
25 |     def forward(self, x):
26 |         if isinstance(x, np.ndarray):
27 |             x = Tensor(x)
28 | 
29 |         x = F.relu(self.fc1(x))
30 |         x = F.relu(self.fc2(x))
31 |         x = F.softmax(self.fc3(x), -1)
32 |         return x
33 | 
34 | 
35 | 
36 | 
37 | if __name__ =='__main__':
38 |     BATCH_SIZE = 60
39 |     IMAGE_SIZE = 28
40 |     CLASS_SIZE = 10
41 |     x_train, y_train, x_test, y_test = mnist.load()
42 |     x_train = x_train.astype(float)
43 |     x_test = x_test.astype(float)
44 |     x_train = x_train / x_train.max()
45 |     x_test  = x_test / x_test.max()
46 |     y_train = np.eye(10)[y_train]
47 |     y_test = np.eye(10)[y_test]
48 |     _, input_size = x_train.shape
49 |     _, output_size = y_train.shape
50 |     print(x_train.shape, y_train.shape)
51 |     print(x_test.shape, y_test.shape)
52 |     print(x_train.max(), x_test.max())
53 | 
54 |     nfnet = NfModel()
55 |     nfopt = SGD(nfnet.parameters(), lr=1e-4, momentum=0.9,nesterov=True)
56 | 
57 |     batch = Tensor(x_test)
58 |     gt = Tensor(y_test)
59 |     nfr = nfnet(batch).numpy()
60 |     nfr = np.argmax(nfr, axis=-1)
61 |     gt = np.argmax(gt.numpy(), axis=-1)
62 |     # print(nfr)
63 |     # print(gt)
64 |     acc = (nfr == gt).sum()
65 |     print(acc / nfr.shape[0])
66 |     epch = 30
67 |     circle = 999
68 |     for j in range(epch):
69 |         # print()
70 |         t1 = time()
71 |         for i in range(circle):
72 |             batch = Tensor(x_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE])
73 |             gt = Tensor(y_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE])
74 |             nfr = nfnet(batch)
75 |             loss = categorical_crossentropy(nfr, gt)
76 |             nfopt.zero_grad()
77 |             loss.backward()
78 |             nfopt.step()
79 |         # print(time()-t1)
80 |         # if((j+1) % 5 == 0):
81 |         batch = Tensor(x_test)
82 |         gt = Tensor(y_test)
83 |         nfr = nfnet(batch).numpy()
84 |         nfr = np.argmax(nfr, axis=-1)
85 |         gt = np.argmax(gt.numpy(), axis=-1)
86 |         # print(nfr)
87 |         # print(gt)
88 |         acc = (nfr == gt).sum()
89 |         print("epoch: ",j, time()-t1,"\t", acc / nfr.shape[0],'\t', loss.mean().numpy())
90 |     # print(thr)
91 |     # print(nfr)
92 | 
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/test/test_Tensor.py:
--------------------------------------------------------------------------------
  1 | from nf import Tensor
  2 | import numpy as np
  3 | import torch
  4 | from torch.autograd import *
  5 | from time import time
  6 | 
  7 | 
  8 | def func(x,y,z):
  9 |     # f0 = (x[1,0].T * y[0,1].T).T * z * x
 10 |     # f1 = f0 * (x + y + z) * y * y * y * (y+z) #! 有错[9,23,29]
 11 |     # f2 = y[0,3] + x[0,2]
 12 |     # f3 = y * y - z
 13 |     # f4 = z - x
 14 |     # f5 = -x.flatten() + y.flatten() - (x*z).flatten() * 2.0
 15 |     # f6 = f1[1,3] + f1[0,3] * f2 - z[0,1] ** 2.2
 16 |     # f7 = f3 + f4 + f6
 17 |     # f8 = f7 - f3 + f4 * 3.6
 18 |     # f9 = f8.flatten() / f5 + f7.flatten()
 19 |     # f10 = -f9 * f5
 20 |     # f11 = ((x*z) @ x.transpose(3, 4) @ y.permute(0,4,2,3,1)).transpose(0,4)
 21 |     # f12 = f11.transpose(3,4).flatten() * 5.0 ** x.transpose(1,4).flatten() / y.flatten() * (x/z).flatten() + 2.0
 22 |     # f13 = f10.reshape(f11.shape) * f11 / f12.reshape(f11.shape)
 23 |     # f14 = (x.transpose(3,4) @ y).permute(0,2,4,3,1) @ f13.permute(4,2,0,1,3)
 24 |     # f15 = f14.sum() * f14.mean((0,2))
 25 |     f15 = x @ y @ z
 26 |     return f15
 27 | 
 28 | def th_grad_Test(x,y,z):
 29 |     x = Variable(torch.from_numpy(x), requires_grad=True)
 30 |     y = Variable(torch.from_numpy(y), requires_grad=True)
 31 |     z = Variable(torch.from_numpy(z), requires_grad=True)
 32 |     def hook(grad, v=None):
 33 |         print(v, int(grad))
 34 |     t1 = time()
 35 |     f9 = func(x, y, z)
 36 |     t2 = time() - t1
 37 |     t1 = time()
 38 |     f9.backward(torch.ones_like(f9), retain_graph=True)
 39 |     print("th", t2,time() - t1)
 40 | 
 41 |     return [x.grad.numpy(), y.grad.numpy(), z.grad.numpy()]
 42 | 
 43 | 
 44 | def nf_grad_Test(x,y,z):
 45 |     x = Tensor(x, requires_grad=True)
 46 |     y = Tensor(y, requires_grad=True)
 47 |     z = Tensor(z, requires_grad=True)
 48 |     t1 = time()
 49 |     f9 = func(x,y,z)
 50 |     t2 = time() - t1
 51 |     t1 = time()
 52 |     f9.backward()
 53 |     print("nf", t2,time() - t1)
 54 |     return [x.grad, y.grad, z.grad]
 55 | 
 56 | 
 57 | def test1():
 58 |     np.random.seed(28)
 59 |     x = np.random.random([2,4,6,3,4])
 60 |     y = np.random.random([2,4,6,4,7])
 61 |     z = np.random.random([2,4,1,7,1])
 62 | 
 63 |     grad_th = th_grad_Test(x,y,z)
 64 |     grad_nf = nf_grad_Test(x,y,z)
 65 | 
 66 |     for (thi, nfi) in zip(grad_th, grad_nf):
 67 |         a = np.allclose(nfi, thi)
 68 |         print(a)
 69 | 
 70 | 
 71 | if __name__ =='__main__':
 72 |     test1()
 73 |     # np.random.seed(28)
 74 |     # x = np.random.random([3,4])
 75 |     # y = np.random.random([3,4])
 76 | 
 77 |     # x,y,z = 5,2,3
 78 |     #
 79 |     # x = Tensor(x, requires_grad=True)
 80 |     # y = Tensor(y, requires_grad=True)
 81 |     # z = Tensor(z, requires_grad=True)
 82 |     #
 83 |     # f1 = y+z
 84 |     # f2 = y*f1
 85 |     # f3 = y*f2
 86 |     # f4 = y*f3
 87 |     # f5 = z*f4
 88 |     # f6 = x*f5
 89 |     #
 90 |     # print(f1,f2,f3,f4,f5,f6)
 91 |     #
 92 |     # f6.backward()
 93 |     #
 94 |     # print(x.grad,y.grad,z.grad)
 95 |     # print(f1.grad,f2.grad,f3.grad)
 96 |     # print(f4.grad,f5.grad,f6.grad)
 97 |     # print(id(x.grad), id(y.grad), id(z.grad))
 98 |     # z.backward()
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/test/test_nnfuncs.py:
--------------------------------------------------------------------------------
  1 | import nf
  2 | import nf.nn.functional as F
  3 | import numpy as np
  4 | import torch
  5 | from torch.nn.functional import conv2d, max_pool2d
  6 | from torch.autograd import Variable
  7 | from nf.nn.modules.batchnorm import BatchNorm2d as nfBN2d
  8 | from torch.nn.modules.batchnorm import BatchNorm2d
  9 | from time import time
 10 | def test_conv2d(k, d, s=1):
 11 |     x = np.random.random([1,1,28,28]).astype(np.float32)
 12 |     k = np.random.random([1,1, 3, 3]).astype(np.float32)
 13 |     # p = (1,1,1,1)
 14 |     p = (0,0,0,0)
 15 |     dilation = 1
 16 |     stride = 1
 17 |     # x = np.ones([1, 1, 64, 48])
 18 |     # k = np.ones([1, 1, 3, 3])
 19 |     def nf_conv2d(x, k):
 20 |         x = nf.Tensor(x,requires_grad=True)
 21 |         k = nf.Tensor(k,requires_grad=True)
 22 |         t1 = time()
 23 |         out = F.conv2d(x,k, stride=stride, dilation=dilation, padding=p)
 24 |         out = F.max_pool2d(out, stride=2, pool_size=2)
 25 |         # out = F.conv2d(out,k, stride=stride, dilation=dilation, padding=p)
 26 |         # print("nfr:",time()-t1)
 27 |         out.backward()
 28 |         # print(k.grad)
 29 |         return [out.numpy(), x.grad, k.grad]
 30 | 
 31 |     def th_conv2d(x, k):
 32 |         k = Variable(torch.from_numpy(k),requires_grad=True)
 33 |         x = Variable(torch.from_numpy(x),requires_grad=True)
 34 |         t1 = time()
 35 |         out = conv2d(x, k, None, stride=stride, padding=(p[0],p[2]), dilation=dilation)
 36 |         out = max_pool2d(out, kernel_size=2, stride=2)
 37 |         # out = conv2d(out, k, None, stride=stride, padding=(p[0],p[2]), dilation=dilation)
 38 |         # print("tfr:",time() - t1)
 39 |         out.backward(torch.ones_like(out))
 40 |         return [out, x.grad, k.grad]
 41 | 
 42 |     thr = th_conv2d(x, k)
 43 |     nfr = nf_conv2d(x, k)
 44 |     # print(nfr[0].shape)
 45 |     # nfr = [0,0,0]
 46 | 
 47 | 
 48 |     for ni, ki in zip(nfr, thr):
 49 |         # ni = ni.numpy()
 50 |         # ki = ki.detach().numpy()
 51 |         try:
 52 |             ni = ni
 53 |             ki = ki.detach().numpy()
 54 |         except:
 55 |             print("出错")
 56 |             continue
 57 |         # print(ni.shape)
 58 |         # print(ki.shape)
 59 |         try:
 60 |             print(np.allclose(ni, ki))
 61 |             if not np.allclose(ni, ki):
 62 |                 print(ni)
 63 |                 print(ki)
 64 |         except:
 65 |             print("不合适")
 66 |             pass
 67 | 
 68 | def test_pool2d(k, d, s=1):
 69 |     # x = [[
 70 |     #     [[0,1,0,0],
 71 |     #      [0,0,0,0],
 72 |     #      [1,0,0,1],
 73 |     #      [0,1,1,0],],
 74 |     #     [[0,0,0,0],[0,0,0,0],
 75 |     #     [0,0,0,0],[0,0,0,0],],
 76 |     #     [[0,0,0,0],[0,0,0,0],
 77 |     #     [0,0,0,0],[0,0,0,0],],
 78 |     #     [[0,0,0,0],[0,0,0,0],
 79 |     #     [0,0,0,0],[0,0,0,0],]
 80 |     # ]]
 81 |     # x = np.array(x) * 1.0
 82 |     # print(x.shape)
 83 |     x = np.random.random([40, 40, 64, 64])
 84 |     k = np.random.random([1, 1, 3, 3])
 85 |     w = np.random.random([4*3*3, 64])
 86 |     pool_size = 2
 87 |     p = (0,0,0,0)
 88 |     stride = 2
 89 |     # x = np.ones([1, 1, 64, 48])
 90 |     # k = np.ones([1, 1, 3, 3])
 91 |     def nf_conv2d(x, w, k):
 92 |         x = nf.Tensor(x,requires_grad=True)
 93 |         w = nf.Tensor(w,requires_grad=True)
 94 | 
 95 |         t1 = time()
 96 |         # x = F.conv2d(x, k, None, 'same', 1)
 97 |         out = F.max_pool2d(x, stride=stride, pool_size=pool_size)
 98 |         # out = out.reshape([-1,4*3*3])
 99 |         # out = out @ w
100 |         # out = F.conv2d(out,k, stride=stride, dilation=dilation, padding=p)
101 |         out.backward()
102 |         print("nfr:",time()-t1)
103 | 
104 |         # print(k.grad)
105 |         return [out.numpy(), x.grad]
106 | 
107 |     def th_conv2d(x, w, k):
108 |         x = Variable(torch.from_numpy(x),requires_grad=True)
109 |         w = Variable(torch.from_numpy(w),requires_grad=True)
110 |         t1 = time()
111 |         out = max_pool2d(x, kernel_size=pool_size, stride=stride)
112 |         # out = out.reshape([-1,4*3*3])
113 |         # out = out @ w
114 | 
115 |         # out = conv2d(out, k, None, stride=stride, padding=(p[0],p[2]), dilation=dilation)
116 |         out.backward(torch.ones_like(out))
117 |         print("tfr:",time() - t1)
118 | 
119 |         return [out, x.grad]
120 | 
121 |     thr = th_conv2d(x, w, k)
122 |     nfr = nf_conv2d(x, w, k)
123 |     # print(nfr[0].shape)
124 |     # nfr = [0,0,0]
125 | 
126 | 
127 |     for ni, ki in zip(nfr, thr):
128 |         # ni = ni.numpy()
129 |         # ki = ki.detach().numpy()
130 |         try:
131 |             ni = ni
132 |             ki = ki.detach().numpy()
133 |         except:
134 |             print("出错")
135 |             continue
136 |         print(ni.shape)
137 |         print(ki.shape)
138 |         try:
139 |             a = np.allclose(ni, ki)
140 |             print(a)
141 |             if not a:
142 |                 print(ni)
143 |                 print(ki)
144 |         except:
145 |             print("不合适")
146 |             pass
147 | 
148 | def test_bn2d(k, d, s=1):
149 |     x = np.random.random([2, 2, 2, 2]).astype(np.float32)
150 |     feas = x.shape[1]
151 |     # x = np.ones([1, 1, 64, 48])
152 |     # k = np.ones([1, 1, 3, 3])
153 |     def nf_conv2d(x, k):
154 |         x = nf.Tensor(x, requires_grad=True)
155 |         t1 = time()
156 |         f1 = nfBN2d(feas)
157 |         out = f1(x)
158 |         out.backward()
159 |         print("nfr:",time()-t1)
160 |         # print(out)
161 |         # print(k.grad)
162 |         return [out.numpy(), x.grad]
163 | 
164 |     def th_conv2d(x, k):
165 |         x = Variable(torch.from_numpy(x),requires_grad=True)
166 |         t1 = time()
167 |         f1 = BatchNorm2d(feas)
168 |         f1.train()
169 |         out = f1(x)
170 |         out.backward(torch.ones_like(out))
171 |         print("tfr:",time() - t1)
172 | 
173 |         return [out, x.grad]
174 | 
175 |     thr = th_conv2d(x, k)
176 |     nfr = nf_conv2d(x, k)
177 |     # print(nfr[0].shape)
178 |     # nfr = [0,0,0]
179 | 
180 | 
181 |     for ni, ki in zip(nfr, thr):
182 |         # ni = ni.numpy()
183 |         # ki = ki.detach().numpy()
184 |         try:
185 |             ni = ni
186 |             ki = ki.detach().numpy()
187 |         except:
188 |             print("出错")
189 |             continue
190 |         # print(ni.shape)
191 |         # print(ki.shape)
192 |         try:
193 |             a = np.allclose(ni, ki)
194 |             print(a)
195 |             if not a:
196 |                 print(ni)
197 |                 print(ki)
198 |         except:
199 |             print("不合适")
200 |             pass
201 | 
202 | 
203 | def test_mean(k, d, s=1):
204 |     x = np.random.random([3, 2, 4, 4]).astype(np.float64)
205 |     feas = x.shape[1]
206 |     axis = (0,2,3)
207 |     keepdims = False
208 |     eps = 1.e-5
209 |     print(np.var(x, axis=axis, keepdims=keepdims, ddof=1))
210 |     # x = np.ones([1, 1, 64, 48])
211 |     # k = np.ones([1, 1, 3, 3])
212 |     def nf_conv2d(x, k):
213 |         x = nf.Tensor(x, requires_grad=True)
214 |         t1 = time()
215 |         running_mean = nf.mean(x, axis=axis, keepdims=True)
216 |         running_var = nf.var(x, ddof=1, axis=axis, keepdims=True)
217 |         out = (x - running_mean)
218 |         out = out / (running_var + eps) ** 0.5
219 |         # out = x / running_var
220 |         # out = nf.var(x, ddof=1, axis=axis, keepdims=keepdims)
221 |         out.backward()
222 |         print("nfr:",time()-t1)
223 |         # print(out)
224 |         # print(k.grad)
225 |         return [out.numpy(), x.grad]
226 | 
227 |     def th_conv2d(x, k):
228 |         x = Variable(torch.from_numpy(x),requires_grad=True)
229 |         t1 = time()
230 |         running_mean = torch.mean(x, axis=axis, keepdims=True)
231 |         running_var = torch.var(x, axis=axis, keepdims=True)
232 |         out = (x - running_mean)
233 |         out = out / (running_var+eps) ** 0.5
234 |         # out = x / running_var
235 |         # out = torch.var(x, dim=axis, keepdim=keepdims)
236 |         out.backward(torch.ones_like(out))
237 |         print("tfr:",time() - t1)
238 | 
239 |         return [out, x.grad]
240 | 
241 |     thr = th_conv2d(x, k)
242 |     nfr = nf_conv2d(x, k)
243 |     # print(nfr[0].shape)
244 |     # nfr = [0,0,0]
245 | 
246 | 
247 |     for ni, ki in zip(nfr, thr):
248 |         # ni = ni.numpy()
249 |         # ki = ki.detach().numpy()
250 |         try:
251 |             ni = ni
252 |             ki = ki.detach().numpy()
253 |         except:
254 |             print("出错")
255 |             continue
256 |         # print(ni.shape)
257 |         # print(ki.shape)
258 |         try:
259 |             a = np.allclose(ni, ki)
260 |             print(a)
261 |             if not a:
262 |                 print(ni)
263 |                 print(ki)
264 |         except:
265 |             print("不合适")
266 |             pass
267 | 
268 | 
269 | 
270 | if __name__ == '__main__':
271 |     # np.random.seed(20)
272 |     # for s in range(1,7):
273 |     #     for d in range(1,10):
274 |     #         for k in range(1,14):
275 |     #             test_conv2d((k,k), d, s)
276 |     # test_conv2d((3,3), 2, 3)
277 |     # for s in range(1,7):
278 |     # for k in range(2,14):
279 |     #     test_pool2d(k,1,k)
280 |     # test_mean(5,1,5)
281 |     # test_conv2d(5,1,5)
282 |     test_pool2d(5,1,5)
283 | 
284 | 
285 | 
286 | 
287 | 
288 | 
289 | 
290 | 
291 | 
292 | 
293 | 
294 | 
295 | 
296 | 
297 | 
298 | 
299 | 
300 | 
301 | 
302 | 
303 | 
304 | 
305 | 
306 | 


--------------------------------------------------------------------------------
/torch_playground/mnist.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RanFeng/NumpyFlow/9502b7328ef81ed7905baa31a12f303da8ab2e66/torch_playground/mnist.pkl


--------------------------------------------------------------------------------
/torch_playground/mnist.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from urllib import request
 3 | import gzip
 4 | import pickle
 5 | 
 6 | filename = [
 7 | ["training_images","train-images-idx3-ubyte.gz"],
 8 | ["test_images","t10k-images-idx3-ubyte.gz"],
 9 | ["training_labels","train-labels-idx1-ubyte.gz"],
10 | ["test_labels","t10k-labels-idx1-ubyte.gz"]
11 | ]
12 | 
13 | def download_mnist():
14 |     base_url = "http://yann.lecun.com/exdb/mnist/"
15 |     for name in filename:
16 |         print("Downloading "+name[1]+"...")
17 |         request.urlretrieve(base_url+name[1], name[1])
18 |     print("Download complete.")
19 | 
20 | def save_mnist():
21 |     mnist = {}
22 |     for name in filename[:2]:
23 |         with gzip.open(name[1], 'rb') as f:
24 |             mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1,28*28)
25 |     for name in filename[-2:]:
26 |         with gzip.open(name[1], 'rb') as f:
27 |             mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=8)
28 |     with open("mnist.pkl", 'wb') as f:
29 |         pickle.dump(mnist,f)
30 |     print("Save complete.")
31 | 
32 | def init():
33 |     download_mnist()
34 |     save_mnist()
35 | 
36 | def load():
37 |     with open("mnist.pkl",'rb') as f:
38 |         mnist = pickle.load(f)
39 |     return mnist["training_images"], mnist["training_labels"], mnist["test_images"], mnist["test_labels"]
40 | 
41 | if __name__ == '__main__':
42 |     init()


--------------------------------------------------------------------------------
/torch_playground/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RanFeng/NumpyFlow/9502b7328ef81ed7905baa31a12f303da8ab2e66/torch_playground/test.png


--------------------------------------------------------------------------------
/未命名.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 6,
  6 |    "id": "e726bcb8",
  7 |    "metadata": {},
  8 |    "outputs": [
  9 |     {
 10 |      "ename": "NameError",
 11 |      "evalue": "name 'Operation' is not defined",
 12 |      "output_type": "error",
 13 |      "traceback": [
 14 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 15 |       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
 16 |       "\u001b[0;32m<ipython-input-6-c4c09bed5b2f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtyping\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSet\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mType\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0;32mclass\u001b[0m \u001b[0mMul\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mOperation\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      5\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvar\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 17 |       "\u001b[0;31mNameError\u001b[0m: name 'Operation' is not defined"
 18 |      ]
 19 |     }
 20 |    ],
 21 |    "source": [
 22 |     "import numpy as np\n",
 23 |     "from typing import Optional, Set, Type, Union\n",
 24 |     "\n",
 25 |     "class Mul(Operation) :\n",
 26 |     "    def __call__(self, x, y):\n",
 27 |     "        self.var = (x, y)\n",
 28 |     "        out = x.data * y.data\n",
 29 |     "        return out\n",
 30 |     "    def backward(self, grad):\n",
 31 |     "        x, y = self.var\n",
 32 |     "        x.backward(grad * y.data)\n",
 33 |     "        y.backward(grad * x.data)\n",
 34 |     "\n",
 35 |     "class Add(Operation) :\n",
 36 |     "    def __call__(self, x, y):\n",
 37 |     "        self.var = (x, y)\n",
 38 |     "        out = x.data + y.data\n",
 39 |     "        return out\n",
 40 |     "    def backward(self, grad):\n",
 41 |     "        x, y = self.var\n",
 42 |     "        x.backward(grad)\n",
 43 |     "        y.backward(grad)\n",
 44 |     "\n",
 45 |     "class Tensor :\n",
 46 |     "    def __init__(self, data=None, *, requires_grad=False, creator=None):\n",
 47 |     "        assert isinstance(requires_grad, bool)\n",
 48 |     "        assert isinstance(creator, (Operation, None.__class__))\n",
 49 |     "        self.data = None\n",
 50 |     "        if isinstance(data, (int, float, bool)):\n",
 51 |     "            data = [data]\n",
 52 |     "        if isinstance(data, (list, tuple)):\n",
 53 |     "            data = np.array(data)\n",
 54 |     "        if isinstance(data, np.ndarray):\n",
 55 |     "            self.data = data.copy()\n",
 56 |     "        elif isinstance(data, Tensor):\n",
 57 |     "            raise ValueError(\"输入的是 Tensor\")\n",
 58 |     "        else:\n",
 59 |     "            raise ValueError(\"输入类型未知\", type(data), data)\n",
 60 |     "        if creator is None:\n",
 61 |     "            creator = Assign()\n",
 62 |     "            creator(self)\n",
 63 |     "        self.creator = creator\n",
 64 |     "        self.requires_grad = requires_grad\n",
 65 |     "        self.grad = None\n",
 66 |     "        \n",
 67 |     "    \n",
 68 |     "    def op(self, Op:Type[Add], input_vars):\n",
 69 |     "        if Op == None:\n",
 70 |     "            return \n",
 71 |     "        tensor_vars = tuple(\n",
 72 |     "            Tensor(var) if not isinstance(var, Tensor) else var for var in input_vars\n",
 73 |     "        )\n",
 74 |     "        f = Op()\n",
 75 |     "        op_out = f(*tensor_vars)\n",
 76 |     "        return Tensor(op_out, creator=f)\n",
 77 |     "        \n",
 78 |     "    \n",
 79 |     "    def __mul__(self, other):  # 乘法\n",
 80 |     "        return self.op(Mul, (self, other))\n",
 81 |     "    def __rmul__(self, other): # 乘法\n",
 82 |     "        return self.op(Mul, (other, self))\n",
 83 |     "    \n",
 84 |     "    def __add__(self, other):  # 加法\n",
 85 |     "        return self.op(Add, (self, other))\n",
 86 |     "    def __radd__(self, other): # 加法\n",
 87 |     "        return self.op(Add, (other, self))\n",
 88 |     "    \n",
 89 |     "    def __str__(self):\n",
 90 |     "        return self.__repr__()\n",
 91 |     "\n",
 92 |     "    def __repr__(self):\n",
 93 |     "        return \"Tensor with shape: {}\\n{}\".format(self.shape, self.data)\n",
 94 |     "    \n",
 95 |     "    @property\n",
 96 |     "    def shape(self):\n",
 97 |     "        return self.data.shape\n",
 98 |     "    \n",
 99 |     "    def broadcastable(self, grad, ashape):\n",
100 |     "        \"\"\"\n",
101 |     "        保证传递的梯度shape一致，用于兼容广播机制的反向传播\n",
102 |     "        :param grad:\n",
103 |     "        :param ashape:\n",
104 |     "        :return:\n",
105 |     "        \"\"\"\n",
106 |     "        if grad.shape == ashape:\n",
107 |     "            return grad\n",
108 |     "        grad_bak = grad.sum(axis=tuple(range(grad.ndim - len(ashape))))\n",
109 |     "        keepdims = tuple(n for (n, i) in enumerate(grad_bak.shape) if i != ashape[n])\n",
110 |     "        if keepdims:\n",
111 |     "            grad_bak = grad_bak.sum(axis=keepdims, keepdims=True)\n",
112 |     "        return grad_bak\n",
113 |     "    \n",
114 |     "    def backward(self, grad=None):\n",
115 |     "        if not self.requires_grad:\n",
116 |     "            return\n",
117 |     "        if grad is None:\n",
118 |     "            grad = np.ones_like(self.data, dtype=np.float64)\n",
119 |     "        try:    \n",
120 |     "            self.grad += grad\n",
121 |     "        except ValueError:  # self.grad.shape 长度不等于 grad.shape，用于适应广播机制\n",
122 |     "            grad = self.broadcastable(grad, self.grad.shape)\n",
123 |     "            self.grad += grad\n",
124 |     "        self.grad += grad\n",
125 |     "        if self.creator:\n",
126 |     "            self.creator.backward(grad)"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 5,
132 |    "id": "db03840e",
133 |    "metadata": {},
134 |    "outputs": [
135 |     {
136 |      "name": "stdout",
137 |      "output_type": "stream",
138 |      "text": [
139 |       "1 2 3\n"
140 |      ]
141 |     }
142 |    ],
143 |    "source": [
144 |     "def fun(a, b ,*,c):\n",
145 |     "    print(a, b, c)\n",
146 |     "\n",
147 |     "fun(1, 2, c = 3)"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 2,
153 |    "id": "6f3c4b46",
154 |    "metadata": {},
155 |    "outputs": [
156 |     {
157 |      "name": "stdout",
158 |      "output_type": "stream",
159 |      "text": [
160 |       "Tensor with shape: (1, 2)\n",
161 |       "[[2 4]]\n"
162 |      ]
163 |     }
164 |    ],
165 |    "source": [
166 |     "a = Tensor([[2, 4]])\n",
167 |     "d = Tensor(1)\n",
168 |     "e = a * d\n",
169 |     "print(e)\n",
170 |     "# e.backward(1)\n",
171 |     "# print(a.grad, d.grad)\n",
172 |     "# print(Tensor(1))"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 14,
178 |    "id": "ae0a5784",
179 |    "metadata": {},
180 |    "outputs": [],
181 |    "source": [
182 |     "from nf import Tensor\n",
183 |     "import numpy as np\n",
184 |     "import torch\n",
185 |     "from torch.autograd import *\n",
186 |     "from time import time\n",
187 |     "\n",
188 |     "\n",
189 |     "def func(x,y,z):\n",
190 |     "    f0 = (x[1,0].T * y[0,1].T).T * z * x\n",
191 |     "    f1 = f0 * (x + y + z) * y * y * y * (y+z) #! 有错[9,23,29]\n",
192 |     "    f2 = y[0,3] + x[0,2]\n",
193 |     "    f3 = y * y - z\n",
194 |     "    f4 = z - x\n",
195 |     "    f5 = -x.flatten() + y.flatten() - (x*z).flatten() * 2.0\n",
196 |     "    f6 = f1[1,3] + f1[0,3] * f2 - z[0,1] ** 2.2\n",
197 |     "    f7 = f3 + f4 + f6\n",
198 |     "    f8 = f7 - f3 + f4 * 3.6\n",
199 |     "    f9 = f8.flatten() / f5 + f7.flatten()\n",
200 |     "    f10 = -f9 * f5\n",
201 |     "    f11 = ((x*z) @ x.transpose(3, 4) @ y.permute(0,4,2,3,1)).transpose(0,4)\n",
202 |     "    f12 = f11.transpose(3,4).flatten() * 5.0 ** x.transpose(1,4).flatten() / y.flatten() * (x/z).flatten() + 2.0\n",
203 |     "    f13 = f10.reshape(f11.shape) * f11 / f12.reshape(f11.shape)\n",
204 |     "    f14 = (x.transpose(3,4) @ y).permute(0,2,4,3,1) @ f13.permute(4,2,0,1,3)\n",
205 |     "    f15 = f14.sum() * f14.mean((0,2))\n",
206 |     "    return f15\n",
207 |     "\n",
208 |     "def th_grad_Test(x,y,z):\n",
209 |     "    x = Variable(torch.from_numpy(x), requires_grad=True)\n",
210 |     "    y = Variable(torch.from_numpy(y), requires_grad=True)\n",
211 |     "    z = Variable(torch.from_numpy(z), requires_grad=True)\n",
212 |     "    t1 = time()\n",
213 |     "    f9 = func(x, y, z)\n",
214 |     "    t2 = time() - t1\n",
215 |     "    t1 = time()\n",
216 |     "    f9.backward(torch.ones_like(f9), retain_graph=True)\n",
217 |     "    print(\"th\", t2, time() - t1)\n",
218 |     "\n",
219 |     "    return [x.grad.numpy(), y.grad.numpy(), z.grad.numpy()]\n",
220 |     "\n",
221 |     "\n",
222 |     "def nf_grad_Test(x,y,z):\n",
223 |     "    x = Tensor(x, requires_grad=True)\n",
224 |     "    y = Tensor(y, requires_grad=True)\n",
225 |     "    z = Tensor(z, requires_grad=True)\n",
226 |     "    t1 = time()\n",
227 |     "    f9 = func(x,y,z)\n",
228 |     "    t2 = time() - t1\n",
229 |     "    t1 = time()\n",
230 |     "    f9.backward()\n",
231 |     "    print(\"nf\", t2, time() - t1)\n",
232 |     "    return [x.grad, y.grad, z.grad]\n",
233 |     "\n",
234 |     "def test1():\n",
235 |     "    np.random.seed(28)\n",
236 |     "    x = np.random.random([2,4,6,3,4])\n",
237 |     "    y = np.random.random([2,4,6,3,4])\n",
238 |     "    z = np.random.random([2,4,1,1,4])\n",
239 |     "\n",
240 |     "    grad_th = th_grad_Test(x,y,z)\n",
241 |     "    grad_nf = nf_grad_Test(x,y,z)\n",
242 |     "\n",
243 |     "    for (thi, nfi) in zip(grad_th, grad_nf):\n",
244 |     "        a = np.allclose(nfi, thi)\n",
245 |     "        print(a)"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": 27,
251 |    "id": "cf0af6e5",
252 |    "metadata": {},
253 |    "outputs": [
254 |     {
255 |      "name": "stdout",
256 |      "output_type": "stream",
257 |      "text": [
258 |       "th 0.0018968582153320312 0.0019490718841552734\n",
259 |       "nf 0.0017888545989990234 0.011336088180541992\n",
260 |       "True\n",
261 |       "True\n",
262 |       "True\n"
263 |      ]
264 |     }
265 |    ],
266 |    "source": [
267 |     "test1()"
268 |    ]
269 |   }
270 |  ],
271 |  "metadata": {
272 |   "kernelspec": {
273 |    "display_name": "Python [conda env:py37] *",
274 |    "language": "python",
275 |    "name": "conda-env-py37-py"
276 |   },
277 |   "language_info": {
278 |    "codemirror_mode": {
279 |     "name": "ipython",
280 |     "version": 3
281 |    },
282 |    "file_extension": ".py",
283 |    "mimetype": "text/x-python",
284 |    "name": "python",
285 |    "nbconvert_exporter": "python",
286 |    "pygments_lexer": "ipython3",
287 |    "version": "3.7.10"
288 |   }
289 |  },
290 |  "nbformat": 4,
291 |  "nbformat_minor": 5
292 | }
293 | 


--------------------------------------------------------------------------------
/第一课.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "101ecc0b",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# 第一课\n",
  9 |     "> 项目代码：https://github.com/RanFeng/NumpyFlow"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "id": "89968732",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "## 提升开发效率\n",
 18 |     "\n",
 19 |     "> 工欲善其事，必先利其器\n",
 20 |     "\n",
 21 |     "- 使用jupyter\n",
 22 |     "\n",
 23 |     "``` bash\n",
 24 |     "jupyter-notebook\n",
 25 |     "``` \n",
 26 |     "\n",
 27 |     "- 在jupyter中使用自己的conda环境\n",
 28 |     "\n",
 29 |     "``` bash\n",
 30 |     "conda activate py37\n",
 31 |     "conda install nb_conda_kernels\n",
 32 |     "```\n",
 33 |     "\n",
 34 |     "- shell小技巧\n",
 35 |     "\n",
 36 |     "```\n",
 37 |     "alias ca=\"conda activate\"\n",
 38 |     "```\n",
 39 |     "\n",
 40 |     "\n"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 3,
 46 |    "id": "fda873ed",
 47 |    "metadata": {},
 48 |    "outputs": [
 49 |     {
 50 |      "name": "stdout",
 51 |      "output_type": "stream",
 52 |      "text": [
 53 |       "sadasdas\n"
 54 |      ]
 55 |     }
 56 |    ],
 57 |    "source": [
 58 |     "print(\"sadasdas\")"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 82,
 64 |    "id": "2f8079b1",
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "import numpy as np\n",
 69 |     "import nf\n",
 70 |     "import torch as th"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 85,
 76 |    "id": "4b3ed829",
 77 |    "metadata": {},
 78 |    "outputs": [
 79 |     {
 80 |      "data": {
 81 |       "text/plain": [
 82 |        "Tensor with shape: (4, 1)\n",
 83 |        "[[10]\n",
 84 |        " [10]\n",
 85 |        " [ 0]\n",
 86 |        " [12]]"
 87 |       ]
 88 |      },
 89 |      "execution_count": 85,
 90 |      "metadata": {},
 91 |      "output_type": "execute_result"
 92 |     }
 93 |    ],
 94 |    "source": [
 95 |     "nf.abs([[10, -10, 0, 12]]).T"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 29,
101 |    "id": "08bed019",
102 |    "metadata": {},
103 |    "outputs": [
104 |     {
105 |      "data": {
106 |       "text/plain": [
107 |        "torch.Size([5, 1, 1])"
108 |       ]
109 |      },
110 |      "execution_count": 29,
111 |      "metadata": {},
112 |      "output_type": "execute_result"
113 |     }
114 |    ],
115 |    "source": [
116 |     "th.abs(th.Tensor([[[-1,2,0,1,2]]])).T.shape"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 89,
122 |    "id": "c1a171b5",
123 |    "metadata": {},
124 |    "outputs": [
125 |     {
126 |      "name": "stdout",
127 |      "output_type": "stream",
128 |      "text": [
129 |       "[array([3.]), array([6.]), None]\n"
130 |      ]
131 |     }
132 |    ],
133 |    "source": [
134 |     "def func(x,y,z):\n",
135 |     "    f6 = x + y + 1 * x * y \n",
136 |     "    return f6\n",
137 |     "def nf_grad_Test(x,y,z):\n",
138 |     "    x = nf.Tensor(x, requires_grad=True) # grad\n",
139 |     "    y = nf.Tensor(y, requires_grad=True)\n",
140 |     "    z = nf.Tensor(z, requires_grad=True)\n",
141 |     "    f9 = func(x,y,z)\n",
142 |     "    f9.backward() # 计算梯度\n",
143 |     "#     print(\"nf\", t2,time() - t1)\n",
144 |     "    return [x.grad, y.grad, z.grad]\n",
145 |     "x = np.array([5])\n",
146 |     "y = np.array([2])\n",
147 |     "z = np.array([3])\n",
148 |     "grad_nf = nf_grad_Test(x,y,z)\n",
149 |     "print(grad_nf)"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "id": "016938fa",
155 |    "metadata": {},
156 |    "source": [
157 |     "f(x,y) = x + y\n",
158 |     "f/x=1   f/y=1"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 79,
164 |    "id": "bdd6d0ee",
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "\n",
169 |     "class Add:\n",
170 |     "    def __call__(self, a, b):\n",
171 |     "        self.var = (a, b)\n",
172 |     "        return a + b\n",
173 |     "    \n",
174 |     "    def backward(self, grad):\n",
175 |     "        self.var[0].backward(grad)\n",
176 |     "        self.var[1].backward(grad)\n",
177 |     "\n",
178 |     "        \n",
179 |     "class Mul:\n",
180 |     "    def __call__(self, a, b):\n",
181 |     "        self.var = (a, b)\n",
182 |     "        return a * b\n",
183 |     "    \n",
184 |     "    def backward(self, grad):\n",
185 |     "        self.var[0].backward(grad*b.val)\n",
186 |     "        self.var[1].backward(grad*a.val)\n",
187 |     "\n",
188 |     "add_op = Add()\n",
189 |     "mul_op = Mul()\n",
190 |     "        \n",
191 |     "class TestTensor :\n",
192 |     "    def __init__(self, val, creator=None):\n",
193 |     "        self.val = val # shape [5,5]\n",
194 |     "        self.creator = creator\n",
195 |     "        self.grad = 0\n",
196 |     "        \n",
197 |     "    def __add__(self, other): # 魔术方法\n",
198 |     "        return TestTensor(self.val + other.val, add_op)\n",
199 |     "    def __radd__(self, other):\n",
200 |     "        return TestTensor(other.val + self.val, add_op)\n",
201 |     "    \n",
202 |     "    def __mul__(self, other): # 乘法\n",
203 |     "        return TestTensor(self.val * other.val, mul_op)\n",
204 |     "    def __rmul__(self, other):\n",
205 |     "        return TestTensor(other.val * self.val, mul_op)\n",
206 |     "    \n",
207 |     "    def backward(self, grad):\n",
208 |     "        self.grad += grad\n",
209 |     "        if self.creator == None :\n",
210 |     "            return\n",
211 |     "        return self.creator.backward(grad)\n",
212 |     "    "
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "code",
217 |    "execution_count": 81,
218 |    "id": "6baaa04f",
219 |    "metadata": {},
220 |    "outputs": [
221 |     {
222 |      "name": "stdout",
223 |      "output_type": "stream",
224 |      "text": [
225 |       "12\n",
226 |       "2 6 1\n"
227 |      ]
228 |     }
229 |    ],
230 |    "source": [
231 |     "a = TestTensor(6)\n",
232 |     "b = TestTensor(2)\n",
233 |     "\n",
234 |     "e = mul_op(a, b)\n",
235 |     "print(e.val)\n",
236 |     "\n",
237 |     "e.backward(1)\n",
238 |     "print(a.grad, b.grad, e.grad)"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "markdown",
243 |    "id": "e43078a1",
244 |    "metadata": {},
245 |    "source": [
246 |     "## 作业\n",
247 |     "1. 自己实现一个Tensor、算子完成梯度计算"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "markdown",
252 |    "id": "5920995f",
253 |    "metadata": {},
254 |    "source": [
255 |     "## 参考\n",
256 |     "- https://segmentfault.com/a/1190000023346483\n",
257 |     "- https://www.heywhale.com/mw/project/59f29f67c5f3f5119527a2cc"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "markdown",
262 |    "id": "2e0459b2",
263 |    "metadata": {},
264 |    "source": [
265 |     "---"
266 |    ]
267 |   }
268 |  ],
269 |  "metadata": {
270 |   "kernelspec": {
271 |    "display_name": "Python [conda env:py37] *",
272 |    "language": "python",
273 |    "name": "conda-env-py37-py"
274 |   },
275 |   "language_info": {
276 |    "codemirror_mode": {
277 |     "name": "ipython",
278 |     "version": 3
279 |    },
280 |    "file_extension": ".py",
281 |    "mimetype": "text/x-python",
282 |    "name": "python",
283 |    "nbconvert_exporter": "python",
284 |    "pygments_lexer": "ipython3",
285 |    "version": "3.7.10"
286 |   }
287 |  },
288 |  "nbformat": 4,
289 |  "nbformat_minor": 5
290 | }
291 | 


--------------------------------------------------------------------------------
/第三课.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "eef29d66",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# 第三课"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "markdown",
13 |    "id": "44cd9339",
14 |    "metadata": {},
15 |    "source": [
16 |     "## 作业\n",
17 |     "1. 了解python中zip的用法\n",
18 |     "2. softmax的了解，与sigmoid的关系\n",
19 |     "3. 学习mnist.py的所有语句，并添加注释\n",
20 |     "4. 跑通pytorch mnist\n",
21 |     "5. 了解slice、了解 `[300:550, :, :]`作用\n",
22 |     "\n",
23 |     "```\n",
24 |     "pip install torch\n",
25 |     "pip install matplotlib\n",
26 |     "pip install opencv-python\n",
27 |     "```"
28 |    ]
29 |   }
30 |  ],
31 |  "metadata": {
32 |   "kernelspec": {
33 |    "display_name": "Python [conda env:py37] *",
34 |    "language": "python",
35 |    "name": "conda-env-py37-py"
36 |   },
37 |   "language_info": {
38 |    "codemirror_mode": {
39 |     "name": "ipython",
40 |     "version": 3
41 |    },
42 |    "file_extension": ".py",
43 |    "mimetype": "text/x-python",
44 |    "name": "python",
45 |    "nbconvert_exporter": "python",
46 |    "pygments_lexer": "ipython3",
47 |    "version": "3.7.10"
48 |   }
49 |  },
50 |  "nbformat": 4,
51 |  "nbformat_minor": 5
52 | }
53 | 


--------------------------------------------------------------------------------
/第二课.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "101ecc0b",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# 第二课\n",
  9 |     "> 项目代码：https://github.com/RanFeng/NumpyFlow"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "id": "89968732",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "## 提升开发效率\n",
 18 |     "\n",
 19 |     "> 工欲善其事，必先利其器\n",
 20 |     "\n",
 21 |     "- 使用jupyter\n",
 22 |     "\n",
 23 |     "``` bash\n",
 24 |     "jupyter-notebook\n",
 25 |     "``` \n",
 26 |     "\n",
 27 |     "- 在jupyter中使用自己的conda环境\n",
 28 |     "\n",
 29 |     "``` bash\n",
 30 |     "conda activate py37\n",
 31 |     "conda install nb_conda_kernels\n",
 32 |     "```\n",
 33 |     "\n",
 34 |     "- shell小技巧\n",
 35 |     "\n",
 36 |     "```\n",
 37 |     "alias ca=\"conda activate\"\n",
 38 |     "```\n",
 39 |     "\n",
 40 |     "\n"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 3,
 46 |    "id": "fda873ed",
 47 |    "metadata": {},
 48 |    "outputs": [
 49 |     {
 50 |      "name": "stdout",
 51 |      "output_type": "stream",
 52 |      "text": [
 53 |       "sadasdas\n"
 54 |      ]
 55 |     }
 56 |    ],
 57 |    "source": [
 58 |     "print(\"sadasdas\")"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 34,
 64 |    "id": "2f8079b1",
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "import numpy as np\n",
 69 |     "import nf\n",
 70 |     "import torch as th"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 50,
 76 |    "id": "4b3ed829",
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "a = nf.Tensor([[1, 2, 3]], requires_grad=True)\n",
 81 |     "b = nf.Tensor([[5], [5]], requires_grad=True)\n",
 82 |     "c = a * b\n",
 83 |     "\n",
 84 |     "def broadcastable(grad, ashape):\n",
 85 |     "    \"\"\"\n",
 86 |     "    保证传递的梯度shape一致，用于兼容广播机制的反向传播\n",
 87 |     "    :param grad:\n",
 88 |     "    :param ashape:\n",
 89 |     "    :return:\n",
 90 |     "    \"\"\"\n",
 91 |     "    if grad.shape == ashape:\n",
 92 |     "        return grad\n",
 93 |     "    grad_bak = grad.sum(axis=tuple(range(grad.ndim - len(ashape))))\n",
 94 |     "    # print(\"g\", grad_bak.shape, ashape)\n",
 95 |     "    keepdims = tuple(n for (n, i) in enumerate(grad_bak.shape) if i != ashape[n])\n",
 96 |     "    if keepdims:\n",
 97 |     "        grad_bak = grad_bak.sum(axis=keepdims, keepdims=True)\n",
 98 |     "    # print(\"g\", grad_bak.shape, ashape)\n",
 99 |     "    return grad_bak"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 85,
105 |    "id": "0f8061be",
106 |    "metadata": {
107 |     "scrolled": false
108 |    },
109 |    "outputs": [
110 |     {
111 |      "name": "stdout",
112 |      "output_type": "stream",
113 |      "text": [
114 |       "tensor([[101., 202., 303.],\n",
115 |       "        [104., 205., 306.]])\n",
116 |       "None\n",
117 |       "None\n"
118 |      ]
119 |     }
120 |    ],
121 |    "source": [
122 |     "# [[1,2,3], [1,2,3]]\n",
123 |     "# [[5,5,5], [5,5,5]]\n",
124 |     "A = th.Tensor(np.array([[1,2,3],[4,5,6]]))\n",
125 |     "B = th.Tensor(np.array([100, 200, 300]))\n",
126 |     "c = A + B\n",
127 |     "# result = A + [100, 200, 300]\n",
128 |     "\n",
129 |     "# grad = np.array([[[1,1,1], [1,1,1]]])\n",
130 |     "# d = nf.Tensor(grad)\n",
131 |     "# print(d.shape)\n",
132 |     "# print(d.T.shape)\n",
133 |     "# print(grad.shape)\n",
134 |     "# print(grad.T.shape)\n",
135 |     "# c.backward(1)\n",
136 |     "\n",
137 |     "# grad_bak = broadcastable(grad, (2, 1))\n",
138 |     "print(c)\n",
139 |     "print(A.grad)\n",
140 |     "print(B.grad)\n",
141 |     "\n",
142 |     "# print(a.shape, a.grad)\n",
143 |     "# print(b.shape, b.grad)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 29,
149 |    "id": "08bed019",
150 |    "metadata": {},
151 |    "outputs": [
152 |     {
153 |      "data": {
154 |       "text/plain": [
155 |        "torch.Size([5, 1, 1])"
156 |       ]
157 |      },
158 |      "execution_count": 29,
159 |      "metadata": {},
160 |      "output_type": "execute_result"
161 |     }
162 |    ],
163 |    "source": [
164 |     "th.abs(th.Tensor([[[-1,2,0,1,2]]])).T.shape"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 89,
170 |    "id": "c1a171b5",
171 |    "metadata": {},
172 |    "outputs": [
173 |     {
174 |      "name": "stdout",
175 |      "output_type": "stream",
176 |      "text": [
177 |       "[array([3.]), array([6.]), None]\n"
178 |      ]
179 |     }
180 |    ],
181 |    "source": [
182 |     "def func(x,y,z):\n",
183 |     "    f6 = x + y + 1 * x * y \n",
184 |     "    return f6\n",
185 |     "def nf_grad_Test(x,y,z):\n",
186 |     "    x = nf.Tensor(x, requires_grad=True) # grad\n",
187 |     "    y = nf.Tensor(y, requires_grad=True)\n",
188 |     "    z = nf.Tensor(z, requires_grad=True)\n",
189 |     "    f9 = func(x,y,z)\n",
190 |     "    f9.backward() # 计算梯度\n",
191 |     "#     print(\"nf\", t2,time() - t1)\n",
192 |     "    return [x.grad, y.grad, z.grad]\n",
193 |     "x = np.array([5])\n",
194 |     "y = np.array([2])\n",
195 |     "z = np.array([3])\n",
196 |     "grad_nf = nf_grad_Test(x,y,z)\n",
197 |     "print(grad_nf)"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "markdown",
202 |    "id": "016938fa",
203 |    "metadata": {},
204 |    "source": [
205 |     "f(x,y) = x + y\n",
206 |     "f/x=1   f/y=1"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 56,
212 |    "id": "bdd6d0ee",
213 |    "metadata": {},
214 |    "outputs": [],
215 |    "source": [
216 |     "\n",
217 |     "class Add:\n",
218 |     "    def __call__(self, a, b):\n",
219 |     "        self.var = (a, b)\n",
220 |     "        return a + b\n",
221 |     "    \n",
222 |     "    def backward(self, grad):\n",
223 |     "        self.var[0].backward(grad)\n",
224 |     "        self.var[1].backward(grad)\n",
225 |     "\n",
226 |     "class Assign :\n",
227 |     "    def __call__(self, a):\n",
228 |     "        self.var = (a)\n",
229 |     "        return a\n",
230 |     "\n",
231 |     "    def backward(self, grad):\n",
232 |     "        return None\n",
233 |     "        \n",
234 |     "class Mul:\n",
235 |     "    def __call__(self, a, b):\n",
236 |     "        self.var = (a, b)\n",
237 |     "        return a.data * b.data\n",
238 |     "    \n",
239 |     "    def backward(self, grad):\n",
240 |     "        self.var[0].backward(grad*b.data)\n",
241 |     "        self.var[1].backward(grad*a.data)\n",
242 |     "\n",
243 |     "\n",
244 |     "\n",
245 |     "\n",
246 |     "\n",
247 |     "class TestTensor :\n",
248 |     "    def __init__(self, data=None, creator=None):\n",
249 |     "        self.data = None\n",
250 |     "        if isinstance(data, (int, float, bool)):\n",
251 |     "            data = [data]\n",
252 |     "        if isinstance(data, (list, tuple)):\n",
253 |     "            data = np.array(data)\n",
254 |     "        if isinstance(data, (np.ndarray)):\n",
255 |     "            self.data = data.copy()\n",
256 |     "        elif isinstance(data, TestTensor):\n",
257 |     "            raise ValueError(\"输入的是 TestTensor\")\n",
258 |     "        else:\n",
259 |     "            raise ValueError(\"输入类型未知\", type(data), data)\n",
260 |     "        if creator is None:\n",
261 |     "            creator = Assign()\n",
262 |     "            creator(self)\n",
263 |     "        self.creator = creator\n",
264 |     "        self.grad = self.data * 0\n",
265 |     "        \n",
266 |     "    def __mul__(self, other): # 乘法\n",
267 |     "        mul_op = Mul()\n",
268 |     "        return TestTensor(self * other, mul_op)\n",
269 |     "    def __rmul__(self, other): # 乘法\n",
270 |     "        mul_op = Mul()\n",
271 |     "        return TestTensor(mul_op(other, self), mul_op)\n",
272 |     "    def backward(self, grad):\n",
273 |     "        self.grad += grad\n",
274 |     "        if self.creator:\n",
275 |     "            self.creator.backward(grad)"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": 64,
281 |    "id": "6baaa04f",
282 |    "metadata": {},
283 |    "outputs": [
284 |     {
285 |      "name": "stdout",
286 |      "output_type": "stream",
287 |      "text": [
288 |       "[[12 12]\n",
289 |       " [ 3  8]]\n",
290 |       "<__main__.TestTensor object at 0x7f8278b2c350>\n"
291 |      ]
292 |     }
293 |    ],
294 |    "source": [
295 |     "a = TestTensor(np.array([[6, 6], [1, 2]]))\n",
296 |     "b = TestTensor([[2, 2], [3, 4]])\n",
297 |     "# c = TestTensor()\n",
298 |     "e = a * b\n",
299 |     "print(e.data)\n",
300 |     "\n",
301 |     "# e.backward(1)\n",
302 |     "# print(a.grad)\n",
303 |     "# print(b.grad)\n",
304 |     "print(e)"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": 33,
310 |    "id": "672ebc12",
311 |    "metadata": {},
312 |    "outputs": [
313 |     {
314 |      "name": "stdout",
315 |      "output_type": "stream",
316 |      "text": [
317 |       "[[1 2 3]\n",
318 |       " [4 5 6]] (2, 3)\n",
319 |       "[[1 2]\n",
320 |       " [3 4]\n",
321 |       " [5 6]]\n"
322 |      ]
323 |     }
324 |    ],
325 |    "source": [
326 |     "a = np.array([[1, 2, 3], [4,5,6]])\n",
327 |     "b = np.array([4])\n",
328 |     "\n",
329 |     "# print(a, a.shape) # 广播机制\n",
330 |     "print(a.reshape([3,2]))\n",
331 |     "\n",
332 |     "# Reshape(a, [3,2])"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "markdown",
337 |    "id": "e43078a1",
338 |    "metadata": {},
339 |    "source": [
340 |     "## 作业\n",
341 |     "1. 了解numpy的广播机制\n",
342 |     "2. 了解广播机制的反向传播"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "markdown",
347 |    "id": "5920995f",
348 |    "metadata": {},
349 |    "source": [
350 |     "## 参考\n",
351 |     "- https://segmentfault.com/a/1190000023346483\n",
352 |     "- https://www.heywhale.com/mw/project/59f29f67c5f3f5119527a2cc"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "markdown",
357 |    "id": "2e0459b2",
358 |    "metadata": {},
359 |    "source": [
360 |     "---"
361 |    ]
362 |   }
363 |  ],
364 |  "metadata": {
365 |   "kernelspec": {
366 |    "display_name": "Python [conda env:py37] *",
367 |    "language": "python",
368 |    "name": "conda-env-py37-py"
369 |   },
370 |   "language_info": {
371 |    "codemirror_mode": {
372 |     "name": "ipython",
373 |     "version": 3
374 |    },
375 |    "file_extension": ".py",
376 |    "mimetype": "text/x-python",
377 |    "name": "python",
378 |    "nbconvert_exporter": "python",
379 |    "pygments_lexer": "ipython3",
380 |    "version": "3.7.10"
381 |   }
382 |  },
383 |  "nbformat": 4,
384 |  "nbformat_minor": 5
385 | }
386 | 


--------------------------------------------------------------------------------
/第四课.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "1ee796bf",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# 第四课"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 5,
 14 |    "id": "a7f49948",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np\n",
 19 |     "import nf\n",
 20 |     "from nf import Tensor"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 29,
 26 |    "id": "4ad176b8",
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "class Linear:\n",
 31 |     "    def __init__(self, in_num, out_num):\n",
 32 |     "        self.in_num = in_num\n",
 33 |     "        self.out_num = out_num\n",
 34 |     "        self.w = Tensor(np.random.random([in_num, out_num]), requires_grad=True)\n",
 35 |     "        print(self.w.shape)\n",
 36 |     "    \n",
 37 |     "    def __call__(self, feat):\n",
 38 |     "        if isinstance(feat, np.ndarray):\n",
 39 |     "            feat = Tensor(feat)\n",
 40 |     "        y = feat @ self.w\n",
 41 |     "        return y\n",
 42 |     "        "
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 34,
 48 |    "id": "792218af",
 49 |    "metadata": {},
 50 |    "outputs": [
 51 |     {
 52 |      "name": "stdout",
 53 |      "output_type": "stream",
 54 |      "text": [
 55 |       "(10, 2)\n",
 56 |       "(2, 4)\n",
 57 |       "(100, 2)\n",
 58 |       "(10, 2)\n"
 59 |      ]
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "in_num = 10\n",
 64 |     "out_num1 = 2\n",
 65 |     "out_num2 = 4\n",
 66 |     "feat = np.random.random([100, 10])\n",
 67 |     "\n",
 68 |     "layer1 = Linear(in_num, out_num1)\n",
 69 |     "layer2 = Linear(out_num1, out_num2)\n",
 70 |     "y = layer1(feat)\n",
 71 |     "# y = layer2(y)\n",
 72 |     "\n",
 73 |     "print(y.shape)\n",
 74 |     "y.backward()\n",
 75 |     "print(layer1.w.grad.shape)"
 76 |    ]
 77 |   }
 78 |  ],
 79 |  "metadata": {
 80 |   "kernelspec": {
 81 |    "display_name": "Python [conda env:py37] *",
 82 |    "language": "python",
 83 |    "name": "conda-env-py37-py"
 84 |   },
 85 |   "language_info": {
 86 |    "codemirror_mode": {
 87 |     "name": "ipython",
 88 |     "version": 3
 89 |    },
 90 |    "file_extension": ".py",
 91 |    "mimetype": "text/x-python",
 92 |    "name": "python",
 93 |    "nbconvert_exporter": "python",
 94 |    "pygments_lexer": "ipython3",
 95 |    "version": "3.7.10"
 96 |   }
 97 |  },
 98 |  "nbformat": 4,
 99 |  "nbformat_minor": 5
100 | }
101 | 


--------------------------------------------------------------------------------