├── requirements.txt
├── logo.png
├── .gitignore
├── setup.py
├── LICENSE
├── planer
    ├── net.py
    ├── __init__.py
    ├── layer.py
    ├── io.py
    └── util.py
└── readme.md


/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy


--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Image-Py/planer/HEAD/logo.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.pyc
3 | .DS_Store
4 | *egg*
5 | build/*
6 | *.npy
7 | *.onnx
8 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | descr = """A powerful light-weight inference framework for CNN.
 4 | The aim of planer is to provide efficient and adaptable inference environment for CNN model.
 5 | Also in order to enlarge the application scope,
 6 | we support ONNX format, which enables the converting of trained model within various DL frameworks.
 7 | """
 8 | 
 9 | if __name__ == '__main__':
10 |     setup(name='planer',
11 |         version='0.34',
12 |         url='https://github.com/Image-Py/planer',
13 |         description='Powerful Light Artificial NEuRon',
14 |         long_description=descr,
15 |         author='Y.Dong, YXDragon',
16 |         author_email='yxdragon@imagepy.org',
17 |         license='BSD 3-clause',
18 |         packages=['planer'],
19 |         package_data={},
20 |         install_requires=[
21 |             'numpy'
22 |         ],
23 |     )
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017, Yan xiaolong
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/planer/net.py:
--------------------------------------------------------------------------------
  1 | from .layer import wrap, layer_map as key
  2 | import numpy, time
  3 | from .util import np, clear_buf
  4 | 
  5 | class Net:
  6 |     def __init__(self):
  7 |         self.weights, self.body, self.flow = [], [], []
  8 |         self.life, self.timer = {}, {}
  9 | 
 10 |     def load_json(self, inputs, inits, body, flow, debug=False):
 11 |         self.body, self.flow, self.life = [], [], {}
 12 |         for i in body:
 13 |             para = i[2]
 14 |             if debug: print(i)
 15 |             self.body.append((i[0], wrap(key[i[1]], i[1])(**para)))
 16 |         for i in range(len(flow)):
 17 |             keys = flow[i][0]
 18 |             if isinstance(keys, str): keys = [keys]
 19 |             for j in keys: self.life[j] = i
 20 |         for i in inits:
 21 |             self.weights.append(np.zeros(i[1], dtype=i[2]))
 22 | 
 23 |         self.input, self.inits = inputs, [i[0] for i in inits]
 24 |         self.layer, self.flow = body, flow
 25 | 
 26 |     def half(self):
 27 |         for i in range(len(self.weights)):
 28 |             if self.weights[i].dtype == np.float32:
 29 |                 self.weights[i] = self.weights[i].astype('float16')
 30 | 
 31 |     def info(self, obj):
 32 |         if isinstance(obj, list):
 33 |             return [self.info(i) for i in obj]
 34 |         if hasattr(obj, 'shape'): return obj.shape
 35 |         return obj
 36 | 
 37 |     def forward(self, *x, debug=False):
 38 |         dic = dict(self.body)
 39 |         rst = {'None': None}
 40 |         for k, v in zip(self.inits, self.weights): rst[k] = v
 41 | 
 42 |         for k, v in zip(self.input, x): rst[k] = v
 43 |         for i in range(len(self.flow)):
 44 |             x, ls, y = self.flow[i]
 45 |             if not isinstance(ls, list): ls = [ls]
 46 |             for l in ls:
 47 |                 out = x if l == ls[0] else y
 48 |                 if not isinstance(out, str):
 49 |                     p = [rst.get(i) for i in out]
 50 |                 else: p = [rst[out]]
 51 |                 xs = x if isinstance(x, list) else [x]
 52 |                 for k in set(xs): # release wasted obj
 53 |                     if k in rst and self.life[k]<=i: del rst[k]
 54 |                 obj = dic[l]
 55 |                 start = time.time()
 56 |                 if debug:
 57 |                     print(l, obj.name, ':', obj.para())
 58 |                     outp = out #[(i, 'Weights')[i in self.inits] for i in out]
 59 |                     print('\t--> ', outp, ':', self.info(p))
 60 |                 if isinstance(y, str): rst[y] = obj(*p)
 61 |                 else:
 62 |                     for k, v in zip(y, obj(*p)): rst[k] = v
 63 |                 if debug: 
 64 |                     for k in (y, [y])[isinstance(y, str)]:
 65 |                         print('\t<-- ',  k, ':', self.info(rst[k]))
 66 |                 # np.cuda.runtime.deviceSynchronize()
 67 |                 cost = time.time()-start
 68 |                 if not obj.name in self.timer:
 69 |                     self.timer[obj.name] = 0
 70 |                 self.timer[obj.name] += cost
 71 |         clear_buf()
 72 |         return rst[y]
 73 | 
 74 |     def timeit(self, status='start'):
 75 |         if status == 'start': self.timer = {}
 76 |         if status == 'end':
 77 |             for i in self.timer: print(i, self.timer[i])
 78 | 
 79 |     def run(self, output=None, input={}):
 80 |         rst = self(input) # compatible with onnxruntime
 81 |         return rst if isinstance(rst, tuple) else (rst,)
 82 | 
 83 |     def load_weights(self, data):
 84 |         s, data = 0, data.view(dtype=np.uint8)
 85 |         for i in range(len(self.weights)):
 86 |             buf = self.weights[i].ravel().view(dtype=np.uint8)
 87 |             buf[:] = data[s:s+buf.size]
 88 |             s += buf.size
 89 |                 
 90 |     def show(self):
 91 |         from .plot import plot_net
 92 |         plot_net(self.input, self.inits, self.layer, self.flow)
 93 | 
 94 |     def __call__(self, *x, **key):
 95 |         if type(x[0]) is dict: x = [x[0][i] for i in self.input]
 96 |         tp = [isinstance(i, numpy.ndarray) for i in x]
 97 |         need = sum(tp)>0 and not numpy is np
 98 |         if need: x = [np.asarray(i) if b else i for i,b in zip(x, tp)]
 99 |         rst = self.forward(*x, **key)
100 |         if need: rst = tuple([i.get() for i in rst])
101 |         return rst[0] if len(rst)==1 else rst
102 | 
103 | 
104 | if __name__ == '__main__':
105 |     pass
106 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | ## Planer: Powerful Light Artificial NEuRon
  2 | 
  3 | ![](https://user-images.githubusercontent.com/36396315/79580952-57b7d200-80fc-11ea-9841-2b253f4e6293.png)
  4 | 
  5 | A powerful light-weight inference framework for CNN. The aim of planer is to provide efficient and adaptable inference environment for CNN model. Also in order to enlarge the application scope, we support ONNX format, which enables the converting of trained model within various DL frameworks.  
  6 | 
  7 | ## Features
  8 | 
  9 | Planer is a light-weight CNN framework implemented in pure Numpy-like interface. It can run only with Numpy. Or change different backends. (Cupy accelerated with CUDA, ClPy accelerated with OpenCL).
 10 | 
 11 | * Implemented in pure Numpy-like interface. 
 12 | * Extremely streamlined IR based on json
 13 | * Powerful model visualization tools
 14 | * ONNX supported model converting
 15 | * Plenty of inspiring demos
 16 | 
 17 | ## Various Building Options
 18 | All the elements (layers, operations, activation fuctions) are abstracted to be ```layer```, and a json formatted ```flow``` is applied to build the computation graph. We support 3 ways of building a network:
 19 | * PyTorch-like
 20 | ```python
 21 | from planer import *
 22 | # ========== write a net manually ========== 
 23 | class CustomNet(Net):
 24 |     def __init__(self):
 25 |         self.conv = Conv2d(3, 64, 3, 1)
 26 |         self.relu = ReLU()
 27 |         self.pool = Maxpool(2)
 28 |         self.upsample = UpSample(2)
 29 |         self.concatenate = Concatenate()
 30 |         self.sigmoid = Sigmoid()
 31 | 
 32 |     def forward(self, x):
 33 |         x = self.conv(x)
 34 |         x = self.relu(x)
 35 |         y = self.pool(x)
 36 |         y = self.upsample(y)
 37 |         z = self.concatenate([x, y])
 38 |         return self.sigmoid(z)
 39 | ```
 40 | * Json-like (based on our IR)
 41 | ```python
 42 | # ========== load net from json ========== 
 43 | layer = [('conv', 'conv', (3, 64, 3, 1)),
 44 |         ('relu', 'relu', None),
 45 |         ('pool', 'maxpool', (2,)),
 46 |         ('up', 'upsample', (2,)),
 47 |         ('concat', 'concat', None),
 48 |         ('sigmoid', 'sigmoid', None)]
 49 | 
 50 | flow = [('x', ['conv', 'relu'], 'x'),
 51 |         ('x', ['pool', 'up'], 'y'),
 52 |         (['x','y'], ['concat', 'sigmoid'], 'z')]
 53 | 
 54 | net = Net()
 55 | net.load_json(layer, flow)
 56 | ```
 57 | 
 58 | ## Converted from onnx (pytorch 1.1.0)
 59 | 
 60 | It is easy to convert a net from torch after training (through onnx). Here is a demo with resnet18.
 61 | 
 62 | ``` python
 63 | from torchvision.models import resnet18
 64 | import torch
 65 | from planer import torch2planer
 66 | 
 67 | net = resnet18(pretrained=True)
 68 | x = torch.randn(1, 3, 224, 224, device='cpu')
 69 | torch2planer(net, 'resnet18', x)
 70 | 
 71 | # then you will get a resnet18.json and resnet18.npy in current folder.
 72 | 
 73 | from planer import read_net
 74 | import planer
 75 | import numpy as np
 76 | 
 77 | # get the planer array lib
 78 | pal = planer.core(np)
 79 | x = pal.random.randn(1, 3, 224, 224).astype('float32')
 80 | net = read_net('resnet18')
 81 | net(x) # use the net to predict youre data
 82 | ```
 83 | 
 84 | ## Change backend
 85 | 
 86 | Planer is based on Numpy-like interface. So it is easy to change backend to Cupy or ClPy. 
 87 | 
 88 | ```python
 89 | import planer, cupy
 90 | planer.core(cupy) # use cupy as backend
 91 | 
 92 | import planer, clpy
 93 | planer.core(clpy) # use clpy as backend
 94 | ```
 95 | 
 96 | We tested on windows, planer with cupy is 80-100 times faster then numpy. has a equal performance with torch. (but on linux torch is faster)
 97 | 
 98 | ## Network visualization
 99 | 
100 | We provide a powerful visualization tools for the cnn model. Just call ```net.show()``` will work.
101 | 
102 | ![](https://user-images.githubusercontent.com/24822467/78111180-dc350000-742e-11ea-9152-30dad06ee433.png)
103 | 
104 | 
105 | ## Demos
106 | We have released some demos, which can be investigated inside ```demo/``` folder.
107 | 
108 | ![](https://user-images.githubusercontent.com/36396315/79580979-60100d00-80fc-11ea-8565-cd55f5db2395.png)
109 | 
110 | ## Milestone
111 | Yolo-v3 is supported now!
112 | 
113 | ![](https://user-images.githubusercontent.com/36396315/79184976-40e95500-7e48-11ea-9679-a70074c658cf.png)
114 | 
115 | ## Planer-pro
116 | 
117 | Planer is our open source version framework, We also have a professional edition (several times faster than torch).
118 | 


--------------------------------------------------------------------------------
/planer/__init__.py:
--------------------------------------------------------------------------------
  1 | from .layer import *
  2 | from .net import Net
  3 | from .io import *
  4 | from .util import *
  5 | 
  6 | # compatible with onnxruntime
  7 | InferenceSession = read_net
  8 | 
  9 | # planer array library
 10 | backend = None
 11 | import numpy as np
 12 | try: import cupy as cp
 13 | except: cp = None
 14 | try: import numexpr as ep
 15 | except: ep = None
 16 | try: import cupy.cudnn as dnn
 17 | except: dnn = None
 18 | 
 19 | print('numpy:[%s] numexpr:[%s] cupy:[%s] cudnn:[%s] '%tuple(
 20 | 	[('installed', '--')[i is None] for i in (np, ep, cp, dnn)]))
 21 | 
 22 | def core(obj, silent=False):
 23 | 	global np; np = obj
 24 | 	from . import util, layer, net, io
 25 | 	util.np = layer.np = net.np = io.np = obj
 26 | 	#try: import numexpr as ep
 27 | 	#except: ep = None
 28 | 
 29 | 	layer.ep = ep if obj.__name__ == 'numpy' else None
 30 | 	layer.dnn = util.dnn = dnn if obj.__name__ == 'cupy' else None
 31 | 
 32 | 	if obj.__name__=='numpy' and ep is None:
 33 | 		print('numexpr is not installed, optional but recommended.')
 34 | 	if obj.__name__=='cupy' and dnn is None:
 35 | 		print('cudnn is not installed, optional but recommended.')
 36 | 	np.asnumpy = obj.asnumpy if 'asnumpy' in dir(obj) else obj.asarray
 37 | 	if not silent: print('\nuser switch engine:', obj.__name__)   
 38 | 	return np 
 39 | 
 40 | core(np, True)
 41 | 
 42 | def asnumpy(arr, **key): return np.asnumpy(arr, **key)
 43 | 
 44 | def asarray(arr, **key): return np.asarray(arr, **key)
 45 | 
 46 | # ========== planer zoo ==========
 47 | import inspect, importlib, pathlib
 48 | import urllib.request
 49 | 
 50 | root = str(pathlib.Path.home())+'/.planer_zoo'
 51 | if not os.path.exists(root): os.mkdir(root)
 52 | 
 53 | def progress(i, n, bar=[None]):
 54 |     from tqdm import tqdm
 55 |     if bar[0] is None:
 56 |         bar[0] = tqdm()
 57 |     bar[0].total = n
 58 |     bar[0].update(i-bar[0].n)
 59 |     if n==i: bar[0] = None
 60 |    
 61 | def download(url, path, info=print, progress=progress):
 62 |     info('download from %s'%url)
 63 |     f, rst = urllib.request.urlretrieve(url, path,
 64 |         lambda a,b,c: progress(int(100.0 * a * b/c), 100))
 65 | 
 66 | def source(mroot, lst):
 67 |     for i in lst:
 68 |         if len(i)==3: i.insert(2, False)
 69 |         i[2] = os.path.exists(mroot + '/' + i[0])
 70 |     return lst
 71 | 
 72 | def list_source(root, lst):
 73 |     print('%-20s%-10s%-10s\n'%('file name','required', 'installed')+'-'*40)
 74 |     for i in source(root, lst):print('%-20s%-10s%-10s'%(tuple(i[:3])))
 75 | 
 76 | def planer_catlog():
 77 |     req = urllib.request.Request('http://planer.imagepy.org/catlog.txt', 
 78 |         headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)\
 79 |          AppleWebKit/537.36 (KHTML, like Gecko)\
 80 |           Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62'})
 81 |     return json.loads(urllib.request.urlopen(req).read())
 82 | 
 83 | def downloads(root, lst, names='required', force=False, info=print, progress=progress):
 84 |     source(root, lst)
 85 |     if names=='all': lst = [i for i in lst]
 86 |     elif names=='required': lst = [i for i in lst if i[1]]
 87 |     else:
 88 |         if isinstance(names, str): names = [names]
 89 |         lst = [i for i in lst if i[0] in names]
 90 |     if not force: lst = [i for i in lst if not i[2]]
 91 |     # name = model.__name__.replace('planer_zoo.', '')
 92 |     if not os.path.exists(root): os.makedirs(root)
 93 |     if len(lst)==0: return
 94 |     catlog = planer_catlog()
 95 |     for name, a, b, url in lst:
 96 |         if url[:4] != 'http': url = catlog[url]
 97 |         download(url, root+'/'+name, info, progress)
 98 | 
 99 | # parse source from a markdown file
100 | def get_source(path):
101 |     with open(path) as f: cont = f.read().split('\n')
102 |     status, files = False, []
103 |     for i in range(len(cont)):
104 |         if '|File|' in cont[i].replace(' ',''): break
105 |     for i in range(i, len(cont)):
106 |         if not '|' in cont[i]: break
107 |         if not '](' in cont[i]: continue
108 |         nameurl = cont[i].split('|')[1]
109 |         req = cont[i].split('|')[2].strip()!=''
110 |         name, url = nameurl.split('](')
111 |         name = name.split('[')[1]
112 |         url = url.split(')')[0]
113 |         files.append([name, req, url])
114 |     return files
115 | 
116 | def Model(model, auto=True):
117 |     if hasattr(model, 'list_source'): return model
118 |     name = model.__package__.replace('planer_zoo.', '')
119 |     md = model.__file__.replace('__init__.py', 'readme.py')[:-2]+'md'
120 |     mroot = root +'/' +  '/'.join(name.split('.'))
121 |     if hasattr(model, 'source'): 
122 |         lst = [list(i) for i in model.source]
123 |     else: lst = get_source(md)
124 |     for i in lst: 
125 |         i[-1] = model.__package__.replace('.', '/')+'/'+i[0] if i[-1]=='' else i[-1]
126 |     model.source = lambda m=mroot: source(m, lst)
127 |     model.root, oroot = mroot, model.root
128 |     ms = [getattr(model, i) for i in dir(model)]
129 |     for m in set([inspect.getmodule(i) for i in ms]):
130 |         if hasattr(m, 'root') and m.root == oroot: m.root = mroot
131 |     model.list_source = lambda root=mroot, lst=model.source(): list_source(root, lst)
132 | 
133 |     model.download = lambda name='required', force=False, info=print, \
134 |     	progress=progress, m=mroot: downloads(
135 |     		m, model.source(), name, force, info, progress)
136 |     if auto: [model.download(), model.load()]
137 |     return model
138 | 
139 | def load(name, auto=True):
140 | 	model = importlib.import_module(name)
141 | 	return Model(model, auto)
142 | 


--------------------------------------------------------------------------------
/planer/layer.py:
--------------------------------------------------------------------------------
  1 | from .util import maxpool, upsample, avgpool, np, lstm
  2 | from .util import conv_for, conv_stride, conv_dnn
  3 | ep, dnn = None, None # numexpr is help for numpy backend
  4 | import numpy
  5 | 
  6 | def wrap(f, layername='layer'):
  7 |     class Layer:
  8 |         name = layername
  9 |         def __init__(self, **key): self.key = key
 10 |         def para(self): return self.key
 11 |         def forward(self, *x): return f(*x, **self.key)
 12 |         def __call__(self, *x): return self.forward(*x)
 13 |     return Layer
 14 | 
 15 | def Dense(x, K, B, shp=None):
 16 |     y = np.matmul(x, K.T)
 17 |     y += B.reshape((1, -1))
 18 |     return y
 19 | 
 20 | def MatMul(x, y): return np.matmul(x, y)
 21 | 
 22 | def Conv2d(x, K, B=None, group=1, strides=(1,1), dilations=(1,1), pads=(0,0,0,0)):
 23 |     if np is numpy: out = conv_for(x, K, group, pads, strides, dilations)
 24 |     elif not dnn is None: return conv_dnn(x, K, B, group, pads, strides, dilations)
 25 |     else: out = conv_stride(x, K, group, pads, strides, dilations)
 26 |     return out if B is None else np.add(out, B.reshape(1, -1, 1, 1), out=out)
 27 | 
 28 | def ConvTranspose2d(x, K, B=None, strides=[2,2], dilations=[1,1], pads=[0,0,0,0], output_padding=[0,0], group=1):
 29 |     (n, c, h, w), (s1, s2), (d1, d2), (H, W) = x.shape, strides, dilations, K.shape[2:]
 30 |     low_h, high_h = ((H-1)*d1- pads[0]), ((H-1)*d1-pads[2]+output_padding[0])
 31 |     low_w, high_w = ((W-1)*d2- pads[1]), ((W-1)*d2-pads[3]+output_padding[1])
 32 |     buf = np.zeros((n, c, (h-1)*s1+low_h+high_h+1, (w-1)*s2+low_w+high_w+1), dtype=x.dtype)
 33 |     buf[:,:,low_h:buf.shape[2]-high_h:s1,low_w:buf.shape[3]-high_w:s2] = x
 34 |     return Conv2d(buf, K.transpose(1,0,2,3)[:,:,::-1,::-1], B, strides=[1,1], dilations=dilations, group=group)
 35 | 
 36 | def LSTM(X, W, R, B=0, sequence_lens=0, initial_h=0, initial_c=0, hidden_size=None, direction='forward'):
 37 |     dirs = {'forward':[1], 'reverse':[-1], 'bidirectional':[1,-1]}
 38 |     (L, N, input_dim), hidden_size = X.shape, R.shape[-1]
 39 |     Y = np.zeros((L, len(dirs[direction]), N, hidden_size), dtype=X.dtype)
 40 |     for i, d in enumerate(dirs[direction]):
 41 |         _, H, C = lstm(X, Y[:,i], W[i], R[i], B[i], initial_h[i], initial_c[i], d)
 42 |     return Y, H, C
 43 | 
 44 | def ReLU(x): 
 45 |     if ep: return ep.evaluate('x * (x > 0)')
 46 |     return np.multiply(x, x>0, out=x)
 47 | 
 48 | def LeakyReLU(x, alpha=0.2):
 49 |     a, b = np.array(alpha, x.dtype), np.array(1-alpha, x.dtype)
 50 |     if ep: return ep.evaluate('x*((x>0)*b+a)')
 51 |     y = (x>0) * b; y += a; y *= x; return y
 52 | 
 53 | def Sqrt(x): return np.sqrt(x)
 54 | 
 55 | def Identity(x): return x
 56 | 
 57 | def Tile(x, repeat): return np.tile(x, repeat.tolist())
 58 | 
 59 | def Flatten(x): return x.reshape((x.shape[0], -1))
 60 | 
 61 | def Sigmoid(x):
 62 |     if ep: return ep.evaluate('1/(1+exp(-x))')
 63 |     x = -x; np.exp(x, out=x); x += 1
 64 |     return np.divide(1, x, out=x)
 65 | 
 66 | def HardSigmoid(x, alpha=0.2, beta=0.5):
 67 |     x = x * alpha; x += beta
 68 |     x = np.minimum(x, 1, out=x)
 69 |     return np.maximum(x, 0, out=x)
 70 | 
 71 | def Maxpool(x, w=(2,2), pads=(0,0,0,0), strides=(2,2)):
 72 |     return maxpool(x, w, pads, strides)
 73 | 
 74 | def AveragePool(x, w=(2,2), pads=(0,0,0,0), strides=(2,2)):
 75 |     return avgpool(x, w, pads, strides)
 76 | 
 77 | def GlobalAveragePool(x):
 78 |     return x.mean(axis=(-2, -1), keepdims=True)
 79 | 
 80 | def UpSample(x, k, mode='nearest'):
 81 |     if k.size == 0: k = size[-2:] // np.array(x.shape[-2:])
 82 |     return upsample(x, k[-2:].astype(int).tolist(), mode)
 83 | 
 84 | def Resize(x, roi, k, size=None, mode='nearest', 
 85 |     coordinate_transformation_mode='half_pixel', nearest_mode='round_prefer_floor'):
 86 |     if k.size == 0: k = size[-2:] / np.array(x.shape[-2:])
 87 |     return upsample(x, k[-2:].tolist(), mode, 
 88 |         coordinate_transformation_mode, nearest_mode)
 89 | 
 90 | def Concatenate(*xs, axis=0):
 91 |     return np.concatenate(xs, axis=axis)
 92 | 
 93 | def Add(x1, x2): 
 94 |     if ep: return ep.evaluate('x1 + x2')
 95 |     return x1 + x2
 96 | 
 97 | def Sub(x1, x2): 
 98 |     if ep: return ep.evaluate('x1 - x2')
 99 |     return x1 - x2
100 | 
101 | def Mul(x1, x2): 
102 |     if ep: return ep.evaluate('x1 * x2')
103 |     return x1 * x2
104 |     
105 | def Div(x1, x2): 
106 |     if ep: return ep.evaluate('x1 / x2')
107 |     return x1 / x2
108 | 
109 | def Pow(x, p): 
110 |     if ep: return ep.evaluate('x ** p')
111 |     return np.power(x, p)
112 | 
113 | def ReduceSum(x, axes=-1, keepdims=True):
114 |     return x.sum(axis=tuple(axes), keepdims=keepdims)
115 | 
116 | def ReduceMean(x, axes=-1, keepdims=True):
117 |     return x.mean(axis=tuple(axes), keepdims=keepdims)
118 | 
119 | def ReduceMax(x, axes=-1, keepdims=True):
120 |     return x.max(axis=tuple(axes), keepdims=keepdims)
121 | 
122 | def ReduceMin(x, axes=-1, keepdims=True):
123 |     return x.min(axis=tuple(axes), keepdims=keepdims)
124 | 
125 | def BatchNorm(x, K, B):
126 |     if ep: return ep.evaluate('x * K + B')
127 |     x = x * K; x += B; return x
128 | 
129 | def Unsqueeze(x, axes=None): 
130 |     axis = np.array(axes).tolist()
131 |     return np.expand_dims(x, tuple(axis))
132 | 
133 | def Squeeze(x, axes=[0]):
134 |     return np.squeeze(x, axis=axes[0])
135 | 
136 | def Const(value=0, dtype='float32'): 
137 |     if isinstance(value, list):
138 |         return np.array(value, dtype=dtype)
139 |     return value
140 | 
141 | def Softmax(x, axis=-1):
142 |     y = x - np.max(x, axis=axis, keepdims=True)
143 |     ey = ep.evaluate('exp(y)') if ep else np.exp(y)
144 |     eX = np.sum(ey, axis=axis, keepdims=True)
145 |     if ep: return ep.evaluate('exp(y-log(eX))')
146 |     y -= np.log(eX, out=eX); return np.exp(y, out=y)
147 | 
148 | def LogSoftmax(x, axis=-1):
149 |     y = x - np.max(x, axis=axis, keepdims=True)
150 |     ey = ep.evaluate('exp(y)') if ep else np.exp(y)
151 |     eX = np.sum(ey, axis=axis, keepdims=True)
152 |     if ep: return ep.evaluate('y-log(eX)')
153 |     y -= np.log(eX, out=eX); return y
154 | 
155 | def Shape(x): return np.array(x.shape)
156 | 
157 | def Gather(x, idx, axis=0): return np.take(x, idx, axis=axis)
158 | 
159 | def Reshape(x, shp): 
160 |     shp = shp.tolist()
161 |     for i in range(len(shp)): 
162 |         shp[i] = shp[i] or x.shape[i]
163 |     return x.reshape(shp)
164 | 
165 | def Transpose(x, axis): return x.transpose(axis)
166 | 
167 | def ConstantofShape(x, value=0, dtype='float32'):
168 |     return np.full(x.ravel().tolist(), value, dtype=dtype)
169 | 
170 | def Split(x, split=None, axis=0):
171 |     seg = np.cumsum(np.array(split)).tolist()
172 |     return np.split(x[:seg[-1]], seg[:-1], axis)
173 | 
174 | def Tanh(x): 
175 |     if ep: return ep.evaluate('tanh(x)')
176 |     return np.tanh(x)
177 | 
178 | def Exp(x): 
179 |     if ep: return ep.evaluate('exp(x)')
180 |     return np.exp(x)
181 | 
182 | def Log(x): 
183 |     if ep: return ep.evaluate('log(x)')
184 |     return np.log(x)
185 | 
186 | def Reciprocal(x): return 1/x
187 | 
188 | def Slice(x, start, end, axis=None, step=None):
189 |     if step is None: step = np.ones(len(start), dtype=np.uint32)
190 |     if axis is None: axis = np.arange(len(start))
191 |     seas = [start, end, axis, step]
192 |     start, end, axis, step = [i.tolist() for i in seas]
193 |     slis = [slice(None,None,None)] * x.ndim
194 |     for s, e, a, st in zip(start, end, axis, step):
195 |         slis[a] = slice(s, e, st)
196 |     return x[tuple(slis)]
197 | 
198 | def Expand(x, shp):
199 |     ones = np.ones(shp.tolist(), dtype=x.dtype)
200 |     return ones * x
201 | 
202 | def Cast(x, dtype='flaot32'): return x.astype(dtype)
203 | 
204 | def Range(start, end, delta): 
205 |     return np.arange(int(start), int(end), int(delta))
206 | 
207 | def Equal(x1, x2): return np.equal(x1, x2)
208 | 
209 | def Where(msk, x1, x2): return np.where(msk, x1, x2)
210 | 
211 | def Scatternd(data, indices, updates):
212 |     data = data.copy()
213 |     for i in range(len(indices[0])):
214 |         data[tuple(indices[0,i])] = updates[0,i]
215 |     return data
216 | 
217 | def InstanceNormalization(x, s, bias, epsilon=1e-5):
218 |     axis = tuple(range(2, x.ndim))
219 |     mean = np.mean(x, axis=axis, keepdims=True)
220 |     var = x - mean; var **= 2; 
221 |     var = np.mean(var, axis=axis, keepdims=True)
222 |     shapes = (-1,) + (1,) * (x.ndim - 2)
223 |     s.shape = bias.shape = shapes
224 |     var = (var + epsilon) ** 0.5
225 |     x *= s/var; x += bias - s*mean/var
226 |     return x
227 | 
228 | def Greater(a, b): return np.greater(a, b)
229 | 
230 | def NonZero(x): return np.array(np.nonzero(x))
231 | 
232 | def GreaterOrEqual(a, b): return a >= b
233 | 
234 | def TopK(x, k, axis=-1, largest=1, sorted=1):
235 |     idk = np.arange(k) * -largest - (largest>0)
236 |     idx = np.argsort(x, axis=axis)
237 |     idx = np.take(idx, idk, axis=axis)
238 |     vs = np.take_along_axis(x, idx, axis=axis)
239 |     return vs, idx
240 | 
241 | def Pad(x, pads, constant_value=0, mode='constant'):
242 |     pads = pads.reshape(2,-1).T.tolist()
243 |     para = {'mode': mode}; 
244 |     if mode=='constant': para['constant_values'] = constant_value
245 |     return np.pad(x, pads, **para)
246 | 
247 | def Clip(x, min=0, max=1): 
248 |     minv, maxv = np.float16(min), np.float16(max)
249 |     if ep: return ep.evaluate('(((x-minv)*(x>minv))-(maxv-minv))*(x<maxv)+maxv')
250 |     x = np.minimum(x, max, out=x)
251 |     return np.maximum(x, min, out=x)
252 | 
253 | from math import erf
254 | erflut = [erf(i/256-2) for i in range(1025)]
255 | def Erf(x):
256 |     if ep: x = ep.evaluate('((x-2)*(x<2)+4)*(x>-2)*256')
257 |     else: x -= 2; x *= x<0; x += 4; x *= x>0; x *= 256;
258 |     return np.array(erflut, x.dtype)[x.astype('int16')]
259 | 
260 | def Return(*x): return x
261 | 
262 | layer_map = {'dense': Dense, 'conv': Conv2d, 'relu': ReLU, 
263 |              'leakyrelu': LeakyReLU, 'batchnorm': BatchNorm,
264 |              'flatten': Flatten, 'sigmoid': Sigmoid, 'softmax': Softmax, 
265 |              'hardsigmoid': HardSigmoid, 'squeeze': Squeeze,
266 |              'maxpool': Maxpool, 'averagepool': AveragePool, 'const': Const,
267 |              'upsample': UpSample, 'concat': Concatenate, 'add': Add, 
268 |              'resize': Resize, 'pad': Pad, 'convtranspose':ConvTranspose2d,
269 |              'sub': Sub, 'reducemean': ReduceMean, 'exp': Exp, 'log': Log,
270 |              'mul': Mul, 'gap': GlobalAveragePool, 'pow':Pow, 'matmul': MatMul,
271 |              'identity' : Identity, 'tile': Tile, 'lstm':LSTM,
272 |              'reducemax':ReduceMax, 'reducemin': ReduceMin,
273 |              'reducesum':ReduceSum, 'div':Div, 'unsqueeze':Unsqueeze, 
274 |              'shape': Shape, 'gather':Gather, 'reshape':Reshape,
275 |              'split':Split, 'tanh':Tanh, 'constantofshape':ConstantofShape,
276 |              'slice':Slice, 'expand':Expand, 'cast':Cast, 'range':Range, 
277 |              'equal':Equal, 'where':Where, 'scatternd':Scatternd,
278 |              'instancenormalization':InstanceNormalization, 'clip':Clip,
279 |              'greater':Greater, 'nonzero':NonZero, 'greaterorequal':GreaterOrEqual,
280 |              'topk':TopK, 'sqrt': Sqrt, 'erf': Erf, 'reciprocal': Reciprocal,
281 |              'transpose':Transpose, 'logsoftmax':LogSoftmax, 'return':Return}
282 | 
283 | if __name__ == "__main__": pass
284 | 


--------------------------------------------------------------------------------
/planer/io.py:
--------------------------------------------------------------------------------
  1 | import json, re, os
  2 | import numpy, numpy as np
  3 | from .net import Net
  4 | from time import time
  5 | import json, zipfile
  6 | from io import BytesIO
  7 | 
  8 | def read_net(path, debug=False):
  9 |     net = Net()
 10 |     path = path.replace('.onnx', '')
 11 |     if os.path.exists(path+'.pla'):
 12 |         with zipfile.ZipFile(path+'.pla') as f:
 13 |             path = os.path.split(path)[1]
 14 |             body = json.loads(f.read(path+'.json'))
 15 |             lay, flw = body['layers'], body['flow']
 16 |             inputs, inits = body['input'], body['inits']
 17 |             buf = BytesIO(f.read(path+'.npy'))
 18 |             weights = np.load(buf)
 19 |     elif os.path.exists(path+'.json'):
 20 |         with open(path+'.json') as f:
 21 |             body = json.load(f)
 22 |             lay, flw = body['layers'], body['flow']
 23 |             inputs, inits = body['input'], body['inits']
 24 |         weights = np.load(path+'.npy')
 25 |     elif os.path.exists(path+'.onnx'):
 26 |         body, weights = read_onnx(path+'.onnx')
 27 |         if body == 'lost': return weights
 28 |         lay, flw = body['layers'], body['flow']
 29 |         inputs, inits = body['input'], body['inits']
 30 |     else: 
 31 |         return print('model %s not found!'%path)
 32 |     net.load_json(inputs, inits, lay, flw, debug)
 33 |     net.load_weights(weights)
 34 |     return net
 35 | 
 36 | types = [None, 'float32', 'uint8', 'int8', 'uint16', 'int16', 'int32', 'int64', 
 37 |     'str', 'bool', 'float16', 'float64', 'uint32', 'uint64', 'complex64', 'complex128']
 38 | 
 39 | def node(attrs, name, k=None, para=None): 
 40 |     node = None
 41 |     for i in attrs: 
 42 |         if i.name==name: node = i
 43 |     if k is None or node is None: 
 44 |         return node
 45 |     rst = getattr(node, k)
 46 |     if k=='ints': rst = list(rst)
 47 |     if k=='s': rst = rst.decode()
 48 |     if not para is None: 
 49 |         para[name] = rst
 50 |     return rst
 51 | 
 52 | 
 53 | def read_onnx(path):
 54 |     import onnx, onnx.numpy_helper
 55 |     graph = onnx.load(path).graph
 56 |     input_para = [i.name for i in graph.input]
 57 |     layers, inits, weights, flows, values = [], [], [], [], {}
 58 |     for i in graph.initializer: 
 59 |         v = onnx.numpy_helper.to_array(i)
 60 |         values[i.name] = len(weights), v.shape
 61 |         inits.append([i.name, v.shape, str(v.dtype)])
 62 |         if v.ndim==0: v = np.array([v])
 63 |         weights.append(v)
 64 |     for i in graph.node:
 65 |         inpara = [j for j in i.input]
 66 |         outpara = [j for j in i.output]
 67 | 
 68 | 
 69 | 
 70 |         if len(inpara)==1: inpara = inpara[0]
 71 |         if len(outpara)==1: outpara = outpara[0]
 72 | 
 73 |         flows.append([inpara, [i.name], outpara])
 74 |         # weights.extend([values[i] for i in initpara])
 75 |         # print(i.op_type, '===')
 76 |         if i.op_type == 'BatchNormalization':
 77 |             cur = flows[-1]
 78 |             k, b, m, v = [weights[values[cur[0][j]][0]] for j in (1,2,3,4)]
 79 |             v_inv = 1/numpy.sqrt(v + 1e-5)
 80 |             kmv_inv_b = -k*m*v_inv + b
 81 |             kv_inv = k*v_inv
 82 |             kmv_inv_b.shape = kv_inv.shape = (1,-1,1,1)
 83 |             
 84 |             kname, bname = cur[0][1] + '_invK', cur[0][1] + '_invB'
 85 |             values[kname] = len(weights), kv_inv.shape
 86 |             values[bname] = len(weights)+1, kmv_inv_b.shape
 87 |             inits.append([kname, kv_inv.shape, str(kv_inv.dtype)])
 88 |             inits.append([bname, kmv_inv_b.shape, str(kmv_inv_b.dtype)])
 89 |             weights.extend([kv_inv, kmv_inv_b])
 90 |             cur[0] = [cur[0][0], kname, bname]
 91 |             layers.append([i.name, 'batchnorm', {}])
 92 |         elif i.op_type == 'Conv':
 93 |             # attr, w = i.attribute, values[i.input[1]][1]
 94 |             attr = i.attribute
 95 |             g = node(attr, 'group', 'i') or 1
 96 |             d = node(attr, 'dilations', 'ints')
 97 |             p = node(attr, 'pads', 'ints')
 98 |             s = node(attr, 'strides', 'ints')
 99 |             layers.append([i.name, 'conv', {
100 |                 'group':g, 'strides':s, 'dilations':d, 'pads':p}])
101 |         elif i.op_type == 'ConvTranspose':
102 |             attr = i.attribute
103 |             para = {}
104 |             g = node(attr, 'group', 'i', para)
105 |             d = node(attr, 'dilations', 'ints', para)
106 |             p = node(attr, 'pads', 'ints', para)
107 |             s = node(attr, 'strides', 'ints', para)
108 |             op = node(attr, 'output_padding', 'ints', para)
109 |             layers.append([i.name, 'convtranspose', para])
110 |         elif i.op_type == 'Gemm':
111 |             layers.append([i.name, 'dense', {'shp':list(values[i.input[1]][1][::-1])}])
112 |         elif i.op_type == 'MaxPool':
113 |             w = node(i.attribute, 'kernel_shape', 'ints')
114 |             m = node(i.attribute, 'pads', 'ints')
115 |             s = node(i.attribute, 'strides', 'ints')
116 |             layers.append([i.name, 'maxpool', {'w':w, 'pads':m, 'strides':s}])
117 |         elif i.op_type == 'GlobalAveragePool':
118 |             layers.append([i.name, 'gap', {}])
119 |         elif i.op_type == 'Upsample':
120 |             mode = node(i.attribute, 'mode', 's')
121 |             layers.append([i.name, 'upsample', {'mode':mode}])
122 |         elif i.op_type == 'Resize':
123 |             mode = node(i.attribute, 'mode', 's')
124 |             nearest_mode = node(i.attribute, 'nearest_mode', 's')
125 |             trans_mode = node(i.attribute, 'coordinate_transformation_mode', 's')
126 |             layers.append([i.name, 'resize', {'mode':mode, 'nearest_mode':nearest_mode, 
127 |                 'coordinate_transformation_mode': trans_mode}])
128 |         elif i.op_type == 'Flatten':
129 |             layers.append([i.name, 'flatten', {}])
130 |         elif i.op_type == 'Unsqueeze':
131 |             axis = node(i.attribute, 'axes', 'ints')
132 |             layers.append([i.name, 'unsqueeze', {} if axis is None else {'axes':axis}])
133 |         elif i.op_type == 'Squeeze':
134 |             axis = node(i.attribute, 'axes', 'ints')
135 |             layers.append([i.name, 'squeeze', {} if axis is None else {'axes':axis}])
136 |         elif i.op_type == 'Relu':
137 |             layers.append([i.name, 'relu', {}])
138 |         elif i.op_type == 'LeakyRelu':
139 |             alpha = i.attribute[0].f
140 |             layers.append([i.name, 'leakyrelu', {'alpha':alpha}])
141 |         elif i.op_type == 'HardSigmoid':
142 |             para = {}
143 |             node(i.attribute, 'alpha', 'f', para)
144 |             node(i.attribute, 'beta', 'f', para)
145 |             layers.append([i.name, 'hardsigmoid', para])
146 |         elif i.op_type == 'Add':
147 |             layers.append([i.name, 'add', {}])
148 |         elif i.op_type == 'Sub':
149 |             layers.append([i.name, 'sub', {}])
150 |         elif i.op_type == 'Div':
151 |             layers.append([i.name, 'div', {}])
152 |         elif i.op_type == 'Tile':
153 |             layers.append([i.name, 'tile', {}])
154 |         elif i.op_type == 'MatMul':
155 |             layers.append([i.name, 'matmul', {}])
156 |         elif i.op_type == 'Constant':
157 |             _, _, name = flows.pop(-1)
158 |             dim = i.attribute[0].t.dims
159 |             tp = types[i.attribute[0].t.data_type]
160 | 
161 |             v = onnx.numpy_helper.to_array(i.attribute[0].t)
162 |             values[name] = len(weights), v.shape
163 |             inits.append([name, v.shape, str(v.dtype)])
164 |             if v.ndim==0: v = np.array([v])
165 |             weights.append(v)
166 |             #layers.append([i.name, 'const', {'value':v, 'dtype':tp}])
167 |         elif i.op_type == 'Identity':
168 |             layers.append([i.name, 'identity', {}])
169 |         elif i.op_type == 'Pow':
170 |             layers.append([i.name, 'pow', {}])
171 |         elif i.op_type == 'ReduceSum':
172 |             para = {}
173 |             node(i.attribute, 'axes', 'ints', para)
174 |             node(i.attribute, 'keepdims', 'i', para)
175 |             layers.append([i.name, 'reducesum', para])
176 |         elif i.op_type == 'ReduceMean':
177 |             para = {}
178 |             node(i.attribute, 'axes', 'ints', para)
179 |             node(i.attribute, 'keepdims', 'i', para)
180 |             layers.append([i.name, 'reducemean', para])
181 |         elif i.op_type == 'ReduceMax':
182 |             para = {}
183 |             node(i.attribute, 'axes', 'ints', para)
184 |             node(i.attribute, 'keepdims', 'i', para)
185 |             layers.append([i.name, 'reducemax', para])
186 |         elif i.op_type == 'ReduceMin':
187 |             para = {}
188 |             node(i.attribute, 'axes', 'ints', para)
189 |             node(i.attribute, 'keepdims', 'i', para)
190 |             layers.append([i.name, 'reducemin', para])
191 |         elif i.op_type == 'Concat':
192 |             layers.append([i.name, 'concat', {'axis':i.attribute[0].i}])
193 |         elif i.op_type == 'Pad':
194 |             para = {}
195 |             node(i.attribute, 'mode', 's', para)
196 |             node(i.attribute, 'constant_value', 'f', para)
197 |             layers.append([i.name, 'pad', para])
198 |         elif i.op_type == 'Sigmoid':
199 |             layers.append([i.name, 'sigmoid', {}])
200 |         elif i.op_type == 'AveragePool':
201 |             w = node(i.attribute, 'kernel_shape', 'ints')
202 |             m = node(i.attribute, 'pads', 'ints')
203 |             s = node(i.attribute, 'strides', 'ints')
204 |             layers.append([i.name, 'averagepool', {'w':w, 'pads':m, 'strides':s}])
205 |         elif i.op_type == 'LSTM':
206 |             para = {'hidden_size': i.attribute[0].i}
207 |             node(i.attribute, 'direction', 's', para)
208 |             layers.append([i.name, 'lstm', para])
209 |         elif i.op_type == 'Shape':
210 |             layers.append([i.name, 'shape', {}])
211 |         elif i.op_type == 'Gather':
212 |             layers.append([i.name, 'gather', {'axis':node(i.attribute, 'axis', 'i') or 0}])
213 |         elif i.op_type == 'Mul':
214 |             layers.append([i.name, 'mul', {}])
215 |         elif i.op_type == 'Reshape':
216 |             layers.append([i.name, 'reshape', {}])
217 |         elif i.op_type == 'Transpose':
218 |             layers.append([i.name, 'transpose', {'axis':node(i.attribute, 'perm', 'ints')}])
219 |         elif i.op_type == 'LogSoftmax':
220 |             layers.append([i.name, 'logsoftmax', {'axis':i.attribute[0].i}])
221 |         elif i.op_type == 'Softmax':
222 |             layers.append([i.name, 'softmax', {'axis':i.attribute[0].i}])
223 |         elif i.op_type == 'ConstantOfShape': 
224 |             v = onnx.numpy_helper.to_array(i.attribute[0].t)
225 |             tp, v = str(v.dtype), v.tolist()
226 |             v = v[0] if len(v)==1 else 0
227 |             layers.append([i.name, 'constantofshape', {'value':v, 'dtype':tp}])
228 |         elif i.op_type == 'Greater':
229 |             layers.append([i.name, 'greater', {}])
230 |         elif i.op_type == 'NonZero':
231 |             layers.append([i.name, 'nonzero', {}])
232 |         elif i.op_type == 'GreaterOrEqual':
233 |             layers.append([i.name, 'greaterorequal', {}])
234 |         elif i.op_type == 'TopK':
235 |             para = {}
236 |             node(i.attribute, 'axis', 'i', para)
237 |             node(i.attribute, 'largest', 'i', para)
238 |             node(i.attribute, 'sorted', 'i', para)
239 |             layers.append([i.name, 'topk', para])
240 |         elif i.op_type == 'Split': 
241 |             split = node(i.attribute, 'split', 'ints')
242 |             para = {'axis': node(i.attribute, 'axis', 'i')}
243 |             if not split is None: para['split'] = split
244 |             layers.append([i.name, 'split', para])
245 |         elif i.op_type == 'Tanh': 
246 |             layers.append([i.name, 'tanh', {}])
247 |         elif i.op_type == 'Exp': 
248 |             layers.append([i.name, 'exp', {}])
249 |         elif i.op_type == 'Log': 
250 |             layers.append([i.name, 'log', {}])
251 |         elif i.op_type == 'Slice':
252 |             layers.append([i.name, 'slice', {}])
253 |         elif i.op_type == 'Expand':
254 |             layers.append([i.name, 'expand', {}])
255 |         elif i.op_type == 'Equal':
256 |             layers.append([i.name, 'equal', {}])
257 |         elif i.op_type == 'Cast':
258 |             layers.append([i.name, 'cast', {'dtype':types[i.attribute[0].i]}])
259 |         elif i.op_type == 'Range':
260 |             layers.append([i.name, 'range', {}])
261 |         elif i.op_type == 'Where':
262 |             layers.append([i.name, 'where', {}])
263 |         elif i.op_type == 'ScatterND':
264 |             layers.append([i.name, 'scatternd', {}])
265 |         elif i.op_type == 'InstanceNormalization':
266 |             layers.append([i.name, 'instancenormalization', {'epsilon':i.attribute[0].f}])
267 |         elif i.op_type == 'Sqrt':
268 |             layers.append([i.name, 'sqrt', {}])
269 |         elif i.op_type == 'Erf':
270 |             layers.append([i.name, 'erf', {}])
271 |         elif i.op_type=='Reciprocal':
272 |             layers.append([i.name, 'erf', {}])
273 |         elif i.op_type == 'Clip':
274 |             minv = node(i.attribute, 'min', 'f')
275 |             maxv = node(i.attribute, 'max', 'f')
276 |             para = {}
277 |             if minv: para['min']=minv
278 |             if maxv: para['max']=maxv
279 |             layers.append([i.name, 'clip', para])
280 |         else:
281 |             print('lost layer:', i.op_type)
282 |             return 'lost', i
283 | 
284 |     layers.append(['return', 'return', {}])
285 |     flows.append([[i.name for i in graph.output], ['return'], 'plrst'])
286 |     weights = np.hstack([i.view(dtype=np.uint8).ravel() for i in weights])
287 |     return {'input':input_para, 'inits':inits, 'layers':layers, 'flow':flows}, weights
288 | 
289 | def onnx2pla(path, zip=True):
290 |     graph, weights = read_onnx(path)
291 |     np.save(path.replace('onnx', 'npy'), weights)
292 |     with open(path.replace('onnx', 'json'), 'w') as f:
293 |         json.dump(graph, f)
294 |     if zip:
295 |         with zipfile.ZipFile(path.replace('onnx', 'pla'), 'w') as f:
296 |             f.write(path[:-4]+'json', os.path.split(path)[1][:-4]+'json')
297 |             f.write(path[:-4]+'npy', os.path.split(path)[1][:-4]+'npy')
298 |         os.remove(path.replace('onnx','json'))
299 |         os.remove(path.replace('onnx','npy'))
300 | 
301 | if __name__ == '__main__':
302 |     a, b = read_onnx('../demo/yolov3-planer-2/yolov3')
303 | 


--------------------------------------------------------------------------------
/planer/util.py:
--------------------------------------------------------------------------------
  1 | import numpy, numpy as np
  2 | from time import time
  3 | 
  4 | def pad(img, shp, mode='constant', constant_values=0):
  5 |     if shp[2][0]==shp[2][1]==shp[3][0]==shp[3][1]==0: return img
  6 |     if mode != 'constant': return np.pad(img, shp, mode)
  7 |     (n, c, h, w), (mn, mc, mh, mw) = img.shape, shp
  8 |     newimg = np.zeros((n, c, h+mh[0]*2, w+mw[0]*2), dtype=img.dtype)
  9 |     newimg[:,:,mh[0]:h+mh[0],mw[0]:w+mw[0]] = img
 10 |     return newimg
 11 | 
 12 | from concurrent.futures import ThreadPoolExecutor
 13 | 
 14 | conv_buf = []
 15 | def clear_buf(): global conv_buf; conv_buf = []
 16 | 
 17 | def conv_for(img, core, group=1, pads=(1, 1, 1, 1), strides=(1, 1), dilation=(1, 1), mode='constant'):
 18 |     threadPool = ThreadPoolExecutor(max_workers=9) # for 3x3 core
 19 |     (strh, strw), (dh, dw) = strides, dilation
 20 |     (n, c, h, w), (ni, ci, hi, wi)  = core.shape, img.shape
 21 |     cimg_w = c * h * w * group
 22 |     cimg_h, i = (hi//strh)*(wi//strw), 0
 23 |     shp = (0, 0), (0, 0), (pads[0], pads[2]), (pads[1], pads[3])
 24 |     img = pad(img, shp, mode, constant_values=0)
 25 |     nh = (hi + sum(shp[2]) - (h-1)*dh-1 + strh)//strh
 26 |     nw = (wi + sum(shp[3]) - (w-1)*dw-1 + strw)//strw
 27 |     nsh, nsw = nh * strh, nw * strw
 28 |     # ================ img 2 col ================
 29 |     global conv_buf
 30 |     img = img.transpose((1,0,2,3)) # nchw -> cnhw
 31 |     size = ci * w * h * ni * nh * nw
 32 |     if len(conv_buf) < size: conv_buf = np.zeros(size, dtype=img.dtype) 
 33 |     col_img = conv_buf[:size].reshape(ci, w*h,  ni, nh, nw) #(h*w, c, N, H, W)
 34 |     def set_value(img, i, v): img[:,i] = v
 35 |     for r in range(0, h*dh, dh):
 36 |         for c in range(0, w*dw, dw):
 37 |             im, i = img[:,:,0+r:nsh+r:strh, 0+c:nsw+c:strw], i+1
 38 |             threadPool.submit(set_value, col_img, i-1, im)
 39 |     threadPool.shutdown(wait=True)
 40 |     # ============================================
 41 |     col_core = core.reshape((group, core.shape[0]//group, -1)[group==1:])
 42 |     col_img = col_img.reshape((group, cimg_w//group, -1)[group==1:])
 43 |     rst = np.matmul(col_core, col_img)
 44 |     return rst.reshape((n, ni, nh, nw)).transpose(1, 0, 2, 3)
 45 | 
 46 | def conv_stride(img, core, group=1, pads=(1, 1, 1, 1), strides=(1, 1), dilation=(1, 1), mode='constant'):
 47 |     (strh, strw), (dh, dw) = strides, dilation
 48 |     (n, c, h, w), (ni, ci, hi, wi)  = core.shape, img.shape
 49 |     cimg_w = c * h * w * group
 50 |     cimg_h, i = (hi//strh)*(wi//strw), 0
 51 |     shp = (0, 0), (0, 0), (pads[0], pads[2]), (pads[1], pads[3])
 52 |     img = pad(img, shp, mode, constant_values=0)
 53 |     nh = (hi + sum(shp[2]) - (h-1)*dh-1 + strh)//strh
 54 |     nw = (wi + sum(shp[3]) - (w-1)*dw-1 + strw)//strw
 55 |     nsh, nsw = nh * strh, nw * strw
 56 |     # ================ img 2 col ================
 57 |     ss, shape = img.strides, (ci, w, h,  ni, nh, nw)
 58 |     strides  = (ss[-3], ss[-2]*dh, ss[-1]*dw, ss[-4], ss[-2]*strh, ss[-1]*strw)
 59 |     col_img = np.lib.stride_tricks.as_strided(img, shape=shape, strides=strides)
 60 |     # ============================================
 61 |     col_core = core.reshape((group, core.shape[0]//group, -1)[group==1:])
 62 |     col_img = col_img.reshape((group, cimg_w//group, -1)[group==1:])
 63 |     rst = np.matmul(col_core, col_img)
 64 |     return rst.reshape((n, ni, nh, nw)).transpose(1, 0, 2, 3)
 65 | 
 66 | def conv_dnn(img, core, bias=None, group=1, pads=(1, 1, 1, 1), strides=(1, 1), dilation=(1, 1), mode='constant'):
 67 |     (strh, strw), (dh, dw) = strides, dilation
 68 |     (n, c, h, w), (ni, ci, hi, wi)  = core.shape, img.shape
 69 |     shp = (0, 0), (0, 0), (pads[0], pads[2]), (pads[1], pads[3])
 70 |     if pads[0]==pads[2] and pads[1]==pads[3]: pads = (pads[0], pads[2])
 71 |     else: img, pads = pad(img, shp, mode, constant_values=0), (0,0)
 72 |     nh = (hi + sum(shp[2]) - (h-1)*dh-1 + strh)//strh
 73 |     nw = (wi + sum(shp[3]) - (w-1)*dw-1 + strw)//strw    
 74 |     y = np.zeros((ni, n, nh, nw), dtype=img.dtype)
 75 |     dnn.convolution_forward(img, core, bias, y, pads, 
 76 |         (strides[0], strides[1]), (dilation[0], dilation[1]), 1, auto_tune=True, tensor_core='always')
 77 |     return y
 78 | 
 79 | def pool(img, f, core=(2, 2), pads=(0,0,0,0), stride=(2, 2),  const=0):
 80 |     (n, c, h, w), (ch, cw), (strh, strw) = img.shape, core, stride
 81 |     shp = ((0, 0), (0, 0), (pads[0], pads[2]), (pads[1], pads[3]))
 82 |     img = pad(img, shp, 'constant', constant_values=0)
 83 |     (imn, ic, ih, iw), imgs = img.shape, []
 84 |     dh = (h + sum(shp[2]) - core[0] + strh)//strh
 85 |     dw = (w + sum(shp[3]) - core[1] + strw)//strw
 86 |     nsh, nsw = dh * strh, dw * strw
 87 |     buf = np.zeros(img.shape[:2]+(dh, dw), img.dtype)
 88 |     if const != 0: buf[:] = const
 89 |     for r in range(0, ch, 1):
 90 |         for c in range(0, cw, 1):
 91 |             f(img[:,:,r:nsh+r:strh,c:nsw+c:strw], buf, out=buf)
 92 |     return buf
 93 | 
 94 | def maxpool(i, c=(2, 2), mar=(0,0,0,0), s=(2, 2)):
 95 |     return pool(i, np.maximum, c, mar, s, -1e4)
 96 | 
 97 | def avgpool(i, c=(2, 2), mar=(0,0,0,0), s=(2, 2)):
 98 |     rst = pool(i, np.add, c, mar, s, 0)
 99 |     rst /= c[0] * c[1]
100 |     return rst
101 | 
102 | def lstm(X, Y, w, r, b, ht, ct, dir=1):
103 |     def tanh(x): return np.tanh(x, out=x)
104 | 
105 |     def sigmoid(x): 
106 |         x *= -1; np.exp(x, out=x); x += 1
107 |         return np.divide(1, x, out=x)
108 | 
109 |     for t in list(range(X.shape[0]))[::dir]:
110 |         gates = np.matmul(X[t], w.T)
111 |         gates += np.matmul(ht, r.T)
112 |         gates += b[:b.shape[0]//2]
113 |         gates += b[b.shape[0]//2:]
114 |         i, o, f, c = np.split(gates[None,:], 4, -1)
115 |         sigmoid(i), sigmoid(f), tanh(c)
116 |         f *= ct; i *= c; f += i; C = f
117 |         o = sigmoid(o); o *= np.tanh(C)
118 |         Y[t], ht, ct = o[0], o[0], C
119 |     return Y, ht, ct
120 | 
121 | def make_upmat(k):
122 |     ys = np.linspace(0.5/k[0], 1-0.5/k[0], k[0]*1, dtype=np.float16)
123 |     xs = np.linspace(0.5/k[1], 1-0.5/k[1], k[1]*1, dtype=np.float16)
124 |     rs, cs = ys[:,None], xs[None,:]
125 |     if k[0]==1: return np.vstack([1-xs, xs])
126 |     if k[1]==1: return np.vstack([1-ys, ys])
127 |     klt = ((1-cs)*(1-rs)).reshape((1,-1))
128 |     krt = (cs * (1-rs)).reshape((1,-1))
129 |     klb = ((1-cs) * rs).reshape((1,-1))
130 |     krb = (cs * rs).reshape((1,-1))
131 |     return np.vstack([klt, krt, klb, krb])
132 |     
133 | def upsample_blinear(img, k):
134 |     n, c, h, w = img.shape
135 |     if k[0] == k[1] == 1: return img
136 |     if k[0]>1:
137 |         img = (img[:,:,:1,:], img, img[:,:,-1:,:])
138 |         img = np.concatenate(img, axis=2)
139 |     if k[1]>1:
140 |         img = (img[:,:,:,:1], img, img[:,:,:,-1:])
141 |         img = np.concatenate(img, axis=3)
142 |     imgs = [img[:,:,:-1,:-1], img[:,:,:-1,1:],
143 |             img[:,:,1:,:-1], img[:,:,1:,1:]]
144 |     if k[0]==1: imgs = [img[:,:,:,:-1], img[:,:,:,1:]]
145 |     if k[1]==1: imgs = [img[:,:,:-1,:], img[:,:,1:,:]]
146 |     imgs = [i[:,:,:,:,None] for i in imgs]
147 |     rst = np.concatenate(imgs, axis=-1)
148 |     rst = np.matmul(rst.reshape((-1,len(imgs))), make_upmat(k))
149 |     hh, ww = h + (k[0]>1), w + (k[1]>1)
150 |     rst = rst.reshape((-1, ww, k[0], k[1]))
151 |     rst = rst.transpose((0,2,1,3))
152 |     rst = rst.reshape((n, c ,hh*k[0], ww*k[1]))
153 |     return rst[:,:,k[0]//2:h*k[0]+k[0]//2,k[1]//2:w*k[1]+k[1]//2]
154 | 
155 | def offset(k, trans_mode, round_mode):
156 |     idx = np.arange(-64, 64)
157 |     if trans_mode == 'half_pixel':
158 |         idx = (idx + 0.5)/k - 0.5
159 |     if trans_mode == 'asymmetric':
160 |         idx = idx / k
161 |     if round_mode == 'round_prefer_floor':
162 |         idx = np.round(idx-1e-3)
163 |     if round_mode == 'round_prefer_ceil':
164 |         idx = np.round(idx+1e-3)
165 |     if round_mode == 'ceil':
166 |         idx = np.ceil(idx)
167 |     if round_mode == 'floor':
168 |         idx = np.floor(idx)
169 |     idx = idx.astype(np.int16)
170 |     return np.argmax(idx==0)-64
171 | 
172 | def pix_offset(img, dr, dc):
173 |     n, c, h, w = img.shape
174 |     if dr == dc == 0: return img
175 |     if dr>=0: sr1s, sr1e, sr2s, sr2e, sr, rr = dr, h, 0, h-dr, (0, dr), 0
176 |     else: sr1s, sr1e, sr2s, sr2e, sr, rr = 0, h+dr, -dr, h, (h+dr, h), h-1
177 |     if dc>=0: sc1s, sc1e, sc2s, sc2e, sc, cc = dc, w, 0, w-dc, (0, dc), 0
178 |     else: sc1s, sc1e, sc2s, sc2e, sc, cc = 0, w+dc, -dc, w, (w+dc, w), w-1
179 |     img[:, :, sr1s:sr1e, sc1s:sc1e] = img[:, :, sr2s:sr2e, sc2s:sc2e]
180 |     img[:, :, slice(*sr), :] = img[:, :, rr:rr+1, :]
181 |     img[:, :, :, slice(*sc)] = img[:, :, :, cc:cc+1]
182 |     return img
183 | 
184 | def upsample_nearest(img, k, trans_mode='half-pixel', round_mode='round_prefer_ceil'):
185 |     n, c, h, w = img.shape
186 |     rst = np.zeros((n, c, h*k[0], w*k[1]), dtype=img.dtype)
187 |     for r in range(k[0]):
188 |         for c in range(k[1]):
189 |             rst[:,:,r::k[0],c::k[1]]=img
190 | 
191 |     offr, offc = [offset(i, trans_mode, round_mode) for i in k]
192 |     return pix_offset(rst, offr, offc)
193 | 
194 | def upsample_size(img, size):
195 |     nc, (h, w) = img.shape[:-2], img.shape[-2:]
196 |     kh, kw = size[0]/h, size[1]/w
197 |     slicer = -0.5+0.5/kh, h-0.5-0.5/kh, size[0]
198 |     rs = np.linspace(*slicer, dtype=img.dtype)
199 |     slicec = -0.5+0.5/kw, w-0.5-0.5/kw, size[1]
200 |     cs = np.linspace(*slicec, dtype=img.dtype)
201 |     rs = np.clip(rs, 0, h-1, out=rs)
202 |     cs = np.clip(cs, 0, w-1, out=cs)
203 |     ra = np.floor(np.clip(rs, 0, h-1.001))
204 |     ca = np.floor(np.clip(cs, 0, w-1.001))
205 |     ra, ca = ra.astype(int), ca.astype(int)
206 |     rs -= ra; cs -= ca; rb = ra+1; cb = ca+1;
207 |     rs.shape, img.shape = (-1,1), (-1, h, w)
208 |     buf = img[:,:,ca]*(1-cs) + img[:,:,cb]*cs
209 |     result = buf[:,ra,:]*(1-rs) + buf[:,rb,:]*rs
210 |     return result.reshape(nc + size)
211 | 
212 | def upsample(img, k, mode, trans_mode='half-pixcel', round_mode='round_prefer_ceil'):
213 |     kint = [int(k[0]), int(k[1])]
214 |     size = int(round(k[0]*img.shape[2])), int(round(k[1]*img.shape[3]))
215 | 
216 |     if mode=='nearest': return upsample_nearest(img, kint, trans_mode, round_mode)
217 |     if mode=='linear': 
218 |         if k[0]==int(k[0]) and k[1]==int(k[1]): return upsample_blinear(img, kint)
219 |         else: return upsample_size(img, size)
220 |     
221 | # ===== below is some image process function =====
222 | import math, itertools
223 | 
224 | def conv_auto(img, core, mode='reflect', keeptp=True):
225 |     shp, dim, (h, w) = img.shape, img.ndim, core.shape
226 |     img = np.pad(img, ((h//2,h//2),(w//2,w//2),(0,0))[:dim], mode=mode)
227 |     rst, buf = np.zeros((2,) + shp, dtype=np.float32)
228 |     for r,c in np.mgrid[:h,:w].reshape(2,-1).T:
229 |         buf[:] = img[r:r+shp[0],c:c+shp[1]]
230 |         buf *= core[r,c]; rst += buf
231 |     return rst.astype(img.dtype) if keeptp else rst
232 | 
233 | def conv_rc(img, core_r, core_c, mode='reflect'):
234 |     return conv_auto(conv_auto(img, core_r, keeptp=False), core_c)
235 |     
236 | def uniform_filter(img, size=3, mode='reflect'):
237 |     core = np.ones(size, dtype=np.float32)/size
238 |     return conv_rc(img, core[None,:], core[:,None], mode)
239 |     
240 | def gaussian_filter(img, sig=2, mode='reflect'):
241 |     x = np.arange(-int(sig*2.5+0.5), int(sig*2.5+0.5)+1)
242 |     core = np.exp(-x**2/2/sig**2)/sig/(2*np.pi)**0.5
243 |     return conv_rc(img, core[None,:], core[:,None], mode)
244 | 
245 | def make_slice(l, w, mar):
246 |     r = np.linspace(0, l-w, math.ceil((l-mar)/(w-mar)))
247 |     return [slice(i, i+w) for i in r.astype(int).tolist()]
248 | 
249 | def grid_slice(H, W, h, w, mar):
250 |     a, b = make_slice(H, h, mar), make_slice(W, w, mar)
251 |     return list(itertools.product(a, b))
252 | 
253 | def resize(img, size, backend=None):
254 |     nn = np if isinstance(img, np.ndarray) else numpy
255 |     d, (h, w) = img.ndim, img.shape[:2]
256 |     kh, kw = size[0]/h, size[1]/w
257 |     slicer = -0.5+0.5/kh, h-0.5-0.5/kh, size[0]
258 |     rs = nn.linspace(*slicer, dtype=nn.float32)
259 |     slicec = -0.5+0.5/kw, w-0.5-0.5/kw, size[1]
260 |     cs = nn.linspace(*slicec, dtype=nn.float32)
261 |     rs = nn.clip(rs, 0, h-1, out=rs)
262 |     cs = nn.clip(cs, 0, w-1, out=cs)
263 |     ra = nn.floor(nn.clip(rs, 0, h-1.001))
264 |     ca = nn.floor(nn.clip(cs, 0, w-1.001))
265 |     ra, ca = ra.astype(int), ca.astype(int)
266 |     rs -= ra; cs -= ca; rb = ra+1; cb = ca+1;
267 |     rs.shape, cs.shape = (-1,1,1)[:d], (1,-1,1)[:d]
268 |     buf = img[:,ca]*(1-cs) + img[:,cb]*cs
269 |     return buf[ra,:]*(1-rs) + buf[rb,:]*rs
270 | 
271 | def mapcoord(img, rs, cs, keeptp=True, backend=None):
272 |     nn = np if isinstance(img, np.ndarray) else numpy
273 |     d, (h, w) = img.ndim, img.shape[:2]
274 |     nn.clip(rs, 0, h-1, out=rs)
275 |     nn.clip(cs, 0, w-1, out=cs)
276 |     ra = nn.floor(nn.clip(rs, 0, h-1.5))
277 |     ca = nn.floor(nn.clip(cs, 0, w-1.5))
278 |     ra, ca = ra.astype(int), ca.astype(int)
279 |     rs -= ra; cs -= ca; rb = ra+1; cb = ca+1;
280 |     if d==3: rs, cs = rs[:,:,None], cs[:,:,None]
281 |     buf = img[ra,ca]*((1-cs) * (1-rs))
282 |     buf += img[rb,cb] * (cs * rs)
283 |     buf += img[ra,cb] * ((1-rs) * cs)
284 |     buf += img[rb,ca] * ((1-cs) * rs)
285 |     return buf.astype(img.dtype) if keeptp else buf
286 | 
287 | # sample：float or tuple, float means factor tuple means size
288 | # glob：force adjust image to glob's integral multiple.
289 | # window：after sample, if larger than window, then tiled by window
290 | # margin：overlay between window, float means factor and int means width
291 | def tile(sample=1, glob=1, window=1024, margin=0.1, astype='float32', progress=print):
292 |     def wrapf(f):
293 |         def wrap(*p, **key):
294 |             (h, w), ori_img = p[0].shape[:2], p[0]
295 |             samecore = isinstance(ori_img, np.ndarray)
296 |             # img = np.asarray(ori_img, dtype=astype)
297 |             img = ori_img.astype('float32')
298 |             tps = {'sample', 'window', 'glob', 'margin', 'progress'}
299 |             ftp = fp, tp = {}, {}
300 |             for i in key: ftp[i in tps][i] = key[i]
301 |             ssz = tp.get('sample', sample)
302 |             wsz = wsh = wsw = tp.get('window', window)
303 |             gsz = tp.get('glob', glob)
304 |             mar = tp.get('margin', margin)
305 |             info = tp.get('progress', progress)
306 |             if isinstance(ssz, tuple): ssz = list(ssz)
307 |             else: ssz = [int(h*ssz), int(w*ssz)]
308 |             # smaller than window, then scale to glob
309 |             from math import ceil
310 |             if wsh>ssz[0]: wsh = ssz[0] = ceil(ssz[0]/gsz)*gsz
311 |             if wsw>ssz[1]: wsw = ssz[1] = ceil(ssz[1]/gsz)*gsz
312 |             if ssz!=[h, w]:img = resize(img, ssz)
313 |             if isinstance(mar, float): mar = int(wsz*mar)
314 |             rcs = grid_slice(*ssz, wsh, wsw, mar)
315 |             if len(rcs)>1: info(1, len(rcs))
316 |             rst = f(img[rcs[0]], *p[1:], **fp)
317 |             k = rst.shape[0]/(rcs[0][0].stop - rcs[0][0].start)
318 |             if len(rcs)==1 and ssz!=[h, w]:
319 |                 rst = resize(rst, (int(h*k), int(w*k)))
320 |             if len(rcs)==1: return rst
321 |             def sk(ss, k):
322 |                 sr = slice(int(ss[0].start*k), int(ss[0].stop*k))
323 |                 sc = slice(int(ss[1].start*k), int(ss[1].stop*k))
324 |                 return sr, sc
325 |             outshp = int(img.shape[0]*k), int(img.shape[1]*k)
326 |             outshp = outshp + rst.shape[2:]
327 |             weights = numpy.zeros(rst.shape[:2], dtype='uint16')
328 |             if rst.ndim==3: weights = weights[:,:,None]
329 |             weights += int(mar * k) + 1
330 |             for i in range(int(mar*k), 0, -1):
331 |                 weights[i-1,:] = weights[-i,:] = i
332 |                 weights[:,i-1] = weights[:,-i] = i
333 |             buf = numpy.zeros(outshp, dtype=np.float32)
334 |             count = numpy.zeros(outshp[:2], dtype='uint16')
335 |             if rst.ndim==3: count = count[:,:,None]
336 |             buf[sk(rcs[0], k)] = rst * weights
337 |             count[sk(rcs[0], k)] += weights
338 |             for i in range(1, len(rcs)):
339 |                 info(i+1, len(rcs))
340 |                 rst = f(img[rcs[i]], *p[1:], **fp)
341 |                 buf[sk(rcs[i], k)] += rst * weights
342 |                 count[sk(rcs[i], k)] += weights
343 |             numpy.divide(buf, count, out=buf, casting='unsafe')
344 |             if ssz!=[h, w]: 
345 |                 buf = resize(buf, (int(h*k), int(w*k)))
346 |             return buf.astype(rst.dtype)
347 |         return wrap
348 |     return wrapf
349 | 
350 | if __name__ == '__main__':
351 |     '''
352 |     img = np.zeros((1, 64, 512, 512), dtype=np.float32)
353 |     core = np.zeros((32, 64, 3, 3), dtype=np.float32)
354 |     conv(img, core)
355 |     '''
356 |     import numpy as np
357 |     a = np.arange(4).reshape(1,1,2,2)
358 |     print(a)
359 |     core = np.ones(3).reshape(1,1,1,3)
360 |     print(conv(a, core, mar=(0,1)))
361 | 


--------------------------------------------------------------------------------