├── requirements.txt ├── logo.png ├── .gitignore ├── setup.py ├── LICENSE ├── planer ├── net.py ├── __init__.py ├── layer.py ├── io.py └── util.py └── readme.md /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Image-Py/planer/HEAD/logo.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.pyc 3 | .DS_Store 4 | *egg* 5 | build/* 6 | *.npy 7 | *.onnx 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | descr = """A powerful light-weight inference framework for CNN. 4 | The aim of planer is to provide efficient and adaptable inference environment for CNN model. 5 | Also in order to enlarge the application scope, 6 | we support ONNX format, which enables the converting of trained model within various DL frameworks. 7 | """ 8 | 9 | if __name__ == '__main__': 10 | setup(name='planer', 11 | version='0.34', 12 | url='https://github.com/Image-Py/planer', 13 | description='Powerful Light Artificial NEuRon', 14 | long_description=descr, 15 | author='Y.Dong, YXDragon', 16 | author_email='yxdragon@imagepy.org', 17 | license='BSD 3-clause', 18 | packages=['planer'], 19 | package_data={}, 20 | install_requires=[ 21 | 'numpy' 22 | ], 23 | ) 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, Yan xiaolong 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /planer/net.py: -------------------------------------------------------------------------------- 1 | from .layer import wrap, layer_map as key 2 | import numpy, time 3 | from .util import np, clear_buf 4 | 5 | class Net: 6 | def __init__(self): 7 | self.weights, self.body, self.flow = [], [], [] 8 | self.life, self.timer = {}, {} 9 | 10 | def load_json(self, inputs, inits, body, flow, debug=False): 11 | self.body, self.flow, self.life = [], [], {} 12 | for i in body: 13 | para = i[2] 14 | if debug: print(i) 15 | self.body.append((i[0], wrap(key[i[1]], i[1])(**para))) 16 | for i in range(len(flow)): 17 | keys = flow[i][0] 18 | if isinstance(keys, str): keys = [keys] 19 | for j in keys: self.life[j] = i 20 | for i in inits: 21 | self.weights.append(np.zeros(i[1], dtype=i[2])) 22 | 23 | self.input, self.inits = inputs, [i[0] for i in inits] 24 | self.layer, self.flow = body, flow 25 | 26 | def half(self): 27 | for i in range(len(self.weights)): 28 | if self.weights[i].dtype == np.float32: 29 | self.weights[i] = self.weights[i].astype('float16') 30 | 31 | def info(self, obj): 32 | if isinstance(obj, list): 33 | return [self.info(i) for i in obj] 34 | if hasattr(obj, 'shape'): return obj.shape 35 | return obj 36 | 37 | def forward(self, *x, debug=False): 38 | dic = dict(self.body) 39 | rst = {'None': None} 40 | for k, v in zip(self.inits, self.weights): rst[k] = v 41 | 42 | for k, v in zip(self.input, x): rst[k] = v 43 | for i in range(len(self.flow)): 44 | x, ls, y = self.flow[i] 45 | if not isinstance(ls, list): ls = [ls] 46 | for l in ls: 47 | out = x if l == ls[0] else y 48 | if not isinstance(out, str): 49 | p = [rst.get(i) for i in out] 50 | else: p = [rst[out]] 51 | xs = x if isinstance(x, list) else [x] 52 | for k in set(xs): # release wasted obj 53 | if k in rst and self.life[k]<=i: del rst[k] 54 | obj = dic[l] 55 | start = time.time() 56 | if debug: 57 | print(l, obj.name, ':', obj.para()) 58 | outp = out #[(i, 'Weights')[i in self.inits] for i in out] 59 | print('\t--> ', outp, ':', self.info(p)) 60 | if isinstance(y, str): rst[y] = obj(*p) 61 | else: 62 | for k, v in zip(y, obj(*p)): rst[k] = v 63 | if debug: 64 | for k in (y, [y])[isinstance(y, str)]: 65 | print('\t<-- ', k, ':', self.info(rst[k])) 66 | # np.cuda.runtime.deviceSynchronize() 67 | cost = time.time()-start 68 | if not obj.name in self.timer: 69 | self.timer[obj.name] = 0 70 | self.timer[obj.name] += cost 71 | clear_buf() 72 | return rst[y] 73 | 74 | def timeit(self, status='start'): 75 | if status == 'start': self.timer = {} 76 | if status == 'end': 77 | for i in self.timer: print(i, self.timer[i]) 78 | 79 | def run(self, output=None, input={}): 80 | rst = self(input) # compatible with onnxruntime 81 | return rst if isinstance(rst, tuple) else (rst,) 82 | 83 | def load_weights(self, data): 84 | s, data = 0, data.view(dtype=np.uint8) 85 | for i in range(len(self.weights)): 86 | buf = self.weights[i].ravel().view(dtype=np.uint8) 87 | buf[:] = data[s:s+buf.size] 88 | s += buf.size 89 | 90 | def show(self): 91 | from .plot import plot_net 92 | plot_net(self.input, self.inits, self.layer, self.flow) 93 | 94 | def __call__(self, *x, **key): 95 | if type(x[0]) is dict: x = [x[0][i] for i in self.input] 96 | tp = [isinstance(i, numpy.ndarray) for i in x] 97 | need = sum(tp)>0 and not numpy is np 98 | if need: x = [np.asarray(i) if b else i for i,b in zip(x, tp)] 99 | rst = self.forward(*x, **key) 100 | if need: rst = tuple([i.get() for i in rst]) 101 | return rst[0] if len(rst)==1 else rst 102 | 103 | 104 | if __name__ == '__main__': 105 | pass 106 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## Planer: Powerful Light Artificial NEuRon 2 | 3 | ![](https://user-images.githubusercontent.com/36396315/79580952-57b7d200-80fc-11ea-9841-2b253f4e6293.png) 4 | 5 | A powerful light-weight inference framework for CNN. The aim of planer is to provide efficient and adaptable inference environment for CNN model. Also in order to enlarge the application scope, we support ONNX format, which enables the converting of trained model within various DL frameworks. 6 | 7 | ## Features 8 | 9 | Planer is a light-weight CNN framework implemented in pure Numpy-like interface. It can run only with Numpy. Or change different backends. (Cupy accelerated with CUDA, ClPy accelerated with OpenCL). 10 | 11 | * Implemented in pure Numpy-like interface. 12 | * Extremely streamlined IR based on json 13 | * Powerful model visualization tools 14 | * ONNX supported model converting 15 | * Plenty of inspiring demos 16 | 17 | ## Various Building Options 18 | All the elements (layers, operations, activation fuctions) are abstracted to be ```layer```, and a json formatted ```flow``` is applied to build the computation graph. We support 3 ways of building a network: 19 | * PyTorch-like 20 | ```python 21 | from planer import * 22 | # ========== write a net manually ========== 23 | class CustomNet(Net): 24 | def __init__(self): 25 | self.conv = Conv2d(3, 64, 3, 1) 26 | self.relu = ReLU() 27 | self.pool = Maxpool(2) 28 | self.upsample = UpSample(2) 29 | self.concatenate = Concatenate() 30 | self.sigmoid = Sigmoid() 31 | 32 | def forward(self, x): 33 | x = self.conv(x) 34 | x = self.relu(x) 35 | y = self.pool(x) 36 | y = self.upsample(y) 37 | z = self.concatenate([x, y]) 38 | return self.sigmoid(z) 39 | ``` 40 | * Json-like (based on our IR) 41 | ```python 42 | # ========== load net from json ========== 43 | layer = [('conv', 'conv', (3, 64, 3, 1)), 44 | ('relu', 'relu', None), 45 | ('pool', 'maxpool', (2,)), 46 | ('up', 'upsample', (2,)), 47 | ('concat', 'concat', None), 48 | ('sigmoid', 'sigmoid', None)] 49 | 50 | flow = [('x', ['conv', 'relu'], 'x'), 51 | ('x', ['pool', 'up'], 'y'), 52 | (['x','y'], ['concat', 'sigmoid'], 'z')] 53 | 54 | net = Net() 55 | net.load_json(layer, flow) 56 | ``` 57 | 58 | ## Converted from onnx (pytorch 1.1.0) 59 | 60 | It is easy to convert a net from torch after training (through onnx). Here is a demo with resnet18. 61 | 62 | ``` python 63 | from torchvision.models import resnet18 64 | import torch 65 | from planer import torch2planer 66 | 67 | net = resnet18(pretrained=True) 68 | x = torch.randn(1, 3, 224, 224, device='cpu') 69 | torch2planer(net, 'resnet18', x) 70 | 71 | # then you will get a resnet18.json and resnet18.npy in current folder. 72 | 73 | from planer import read_net 74 | import planer 75 | import numpy as np 76 | 77 | # get the planer array lib 78 | pal = planer.core(np) 79 | x = pal.random.randn(1, 3, 224, 224).astype('float32') 80 | net = read_net('resnet18') 81 | net(x) # use the net to predict youre data 82 | ``` 83 | 84 | ## Change backend 85 | 86 | Planer is based on Numpy-like interface. So it is easy to change backend to Cupy or ClPy. 87 | 88 | ```python 89 | import planer, cupy 90 | planer.core(cupy) # use cupy as backend 91 | 92 | import planer, clpy 93 | planer.core(clpy) # use clpy as backend 94 | ``` 95 | 96 | We tested on windows, planer with cupy is 80-100 times faster then numpy. has a equal performance with torch. (but on linux torch is faster) 97 | 98 | ## Network visualization 99 | 100 | We provide a powerful visualization tools for the cnn model. Just call ```net.show()``` will work. 101 | 102 | ![](https://user-images.githubusercontent.com/24822467/78111180-dc350000-742e-11ea-9152-30dad06ee433.png) 103 | 104 | 105 | ## Demos 106 | We have released some demos, which can be investigated inside ```demo/``` folder. 107 | 108 | ![](https://user-images.githubusercontent.com/36396315/79580979-60100d00-80fc-11ea-8565-cd55f5db2395.png) 109 | 110 | ## Milestone 111 | Yolo-v3 is supported now! 112 | 113 | ![](https://user-images.githubusercontent.com/36396315/79184976-40e95500-7e48-11ea-9679-a70074c658cf.png) 114 | 115 | ## Planer-pro 116 | 117 | Planer is our open source version framework, We also have a professional edition (several times faster than torch). 118 | -------------------------------------------------------------------------------- /planer/__init__.py: -------------------------------------------------------------------------------- 1 | from .layer import * 2 | from .net import Net 3 | from .io import * 4 | from .util import * 5 | 6 | # compatible with onnxruntime 7 | InferenceSession = read_net 8 | 9 | # planer array library 10 | backend = None 11 | import numpy as np 12 | try: import cupy as cp 13 | except: cp = None 14 | try: import numexpr as ep 15 | except: ep = None 16 | try: import cupy.cudnn as dnn 17 | except: dnn = None 18 | 19 | print('numpy:[%s] numexpr:[%s] cupy:[%s] cudnn:[%s] '%tuple( 20 | [('installed', '--')[i is None] for i in (np, ep, cp, dnn)])) 21 | 22 | def core(obj, silent=False): 23 | global np; np = obj 24 | from . import util, layer, net, io 25 | util.np = layer.np = net.np = io.np = obj 26 | #try: import numexpr as ep 27 | #except: ep = None 28 | 29 | layer.ep = ep if obj.__name__ == 'numpy' else None 30 | layer.dnn = util.dnn = dnn if obj.__name__ == 'cupy' else None 31 | 32 | if obj.__name__=='numpy' and ep is None: 33 | print('numexpr is not installed, optional but recommended.') 34 | if obj.__name__=='cupy' and dnn is None: 35 | print('cudnn is not installed, optional but recommended.') 36 | np.asnumpy = obj.asnumpy if 'asnumpy' in dir(obj) else obj.asarray 37 | if not silent: print('\nuser switch engine:', obj.__name__) 38 | return np 39 | 40 | core(np, True) 41 | 42 | def asnumpy(arr, **key): return np.asnumpy(arr, **key) 43 | 44 | def asarray(arr, **key): return np.asarray(arr, **key) 45 | 46 | # ========== planer zoo ========== 47 | import inspect, importlib, pathlib 48 | import urllib.request 49 | 50 | root = str(pathlib.Path.home())+'/.planer_zoo' 51 | if not os.path.exists(root): os.mkdir(root) 52 | 53 | def progress(i, n, bar=[None]): 54 | from tqdm import tqdm 55 | if bar[0] is None: 56 | bar[0] = tqdm() 57 | bar[0].total = n 58 | bar[0].update(i-bar[0].n) 59 | if n==i: bar[0] = None 60 | 61 | def download(url, path, info=print, progress=progress): 62 | info('download from %s'%url) 63 | f, rst = urllib.request.urlretrieve(url, path, 64 | lambda a,b,c: progress(int(100.0 * a * b/c), 100)) 65 | 66 | def source(mroot, lst): 67 | for i in lst: 68 | if len(i)==3: i.insert(2, False) 69 | i[2] = os.path.exists(mroot + '/' + i[0]) 70 | return lst 71 | 72 | def list_source(root, lst): 73 | print('%-20s%-10s%-10s\n'%('file name','required', 'installed')+'-'*40) 74 | for i in source(root, lst):print('%-20s%-10s%-10s'%(tuple(i[:3]))) 75 | 76 | def planer_catlog(): 77 | req = urllib.request.Request('http://planer.imagepy.org/catlog.txt', 78 | headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)\ 79 | AppleWebKit/537.36 (KHTML, like Gecko)\ 80 | Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62'}) 81 | return json.loads(urllib.request.urlopen(req).read()) 82 | 83 | def downloads(root, lst, names='required', force=False, info=print, progress=progress): 84 | source(root, lst) 85 | if names=='all': lst = [i for i in lst] 86 | elif names=='required': lst = [i for i in lst if i[1]] 87 | else: 88 | if isinstance(names, str): names = [names] 89 | lst = [i for i in lst if i[0] in names] 90 | if not force: lst = [i for i in lst if not i[2]] 91 | # name = model.__name__.replace('planer_zoo.', '') 92 | if not os.path.exists(root): os.makedirs(root) 93 | if len(lst)==0: return 94 | catlog = planer_catlog() 95 | for name, a, b, url in lst: 96 | if url[:4] != 'http': url = catlog[url] 97 | download(url, root+'/'+name, info, progress) 98 | 99 | # parse source from a markdown file 100 | def get_source(path): 101 | with open(path) as f: cont = f.read().split('\n') 102 | status, files = False, [] 103 | for i in range(len(cont)): 104 | if '|File|' in cont[i].replace(' ',''): break 105 | for i in range(i, len(cont)): 106 | if not '|' in cont[i]: break 107 | if not '](' in cont[i]: continue 108 | nameurl = cont[i].split('|')[1] 109 | req = cont[i].split('|')[2].strip()!='' 110 | name, url = nameurl.split('](') 111 | name = name.split('[')[1] 112 | url = url.split(')')[0] 113 | files.append([name, req, url]) 114 | return files 115 | 116 | def Model(model, auto=True): 117 | if hasattr(model, 'list_source'): return model 118 | name = model.__package__.replace('planer_zoo.', '') 119 | md = model.__file__.replace('__init__.py', 'readme.py')[:-2]+'md' 120 | mroot = root +'/' + '/'.join(name.split('.')) 121 | if hasattr(model, 'source'): 122 | lst = [list(i) for i in model.source] 123 | else: lst = get_source(md) 124 | for i in lst: 125 | i[-1] = model.__package__.replace('.', '/')+'/'+i[0] if i[-1]=='' else i[-1] 126 | model.source = lambda m=mroot: source(m, lst) 127 | model.root, oroot = mroot, model.root 128 | ms = [getattr(model, i) for i in dir(model)] 129 | for m in set([inspect.getmodule(i) for i in ms]): 130 | if hasattr(m, 'root') and m.root == oroot: m.root = mroot 131 | model.list_source = lambda root=mroot, lst=model.source(): list_source(root, lst) 132 | 133 | model.download = lambda name='required', force=False, info=print, \ 134 | progress=progress, m=mroot: downloads( 135 | m, model.source(), name, force, info, progress) 136 | if auto: [model.download(), model.load()] 137 | return model 138 | 139 | def load(name, auto=True): 140 | model = importlib.import_module(name) 141 | return Model(model, auto) 142 | -------------------------------------------------------------------------------- /planer/layer.py: -------------------------------------------------------------------------------- 1 | from .util import maxpool, upsample, avgpool, np, lstm 2 | from .util import conv_for, conv_stride, conv_dnn 3 | ep, dnn = None, None # numexpr is help for numpy backend 4 | import numpy 5 | 6 | def wrap(f, layername='layer'): 7 | class Layer: 8 | name = layername 9 | def __init__(self, **key): self.key = key 10 | def para(self): return self.key 11 | def forward(self, *x): return f(*x, **self.key) 12 | def __call__(self, *x): return self.forward(*x) 13 | return Layer 14 | 15 | def Dense(x, K, B, shp=None): 16 | y = np.matmul(x, K.T) 17 | y += B.reshape((1, -1)) 18 | return y 19 | 20 | def MatMul(x, y): return np.matmul(x, y) 21 | 22 | def Conv2d(x, K, B=None, group=1, strides=(1,1), dilations=(1,1), pads=(0,0,0,0)): 23 | if np is numpy: out = conv_for(x, K, group, pads, strides, dilations) 24 | elif not dnn is None: return conv_dnn(x, K, B, group, pads, strides, dilations) 25 | else: out = conv_stride(x, K, group, pads, strides, dilations) 26 | return out if B is None else np.add(out, B.reshape(1, -1, 1, 1), out=out) 27 | 28 | def ConvTranspose2d(x, K, B=None, strides=[2,2], dilations=[1,1], pads=[0,0,0,0], output_padding=[0,0], group=1): 29 | (n, c, h, w), (s1, s2), (d1, d2), (H, W) = x.shape, strides, dilations, K.shape[2:] 30 | low_h, high_h = ((H-1)*d1- pads[0]), ((H-1)*d1-pads[2]+output_padding[0]) 31 | low_w, high_w = ((W-1)*d2- pads[1]), ((W-1)*d2-pads[3]+output_padding[1]) 32 | buf = np.zeros((n, c, (h-1)*s1+low_h+high_h+1, (w-1)*s2+low_w+high_w+1), dtype=x.dtype) 33 | buf[:,:,low_h:buf.shape[2]-high_h:s1,low_w:buf.shape[3]-high_w:s2] = x 34 | return Conv2d(buf, K.transpose(1,0,2,3)[:,:,::-1,::-1], B, strides=[1,1], dilations=dilations, group=group) 35 | 36 | def LSTM(X, W, R, B=0, sequence_lens=0, initial_h=0, initial_c=0, hidden_size=None, direction='forward'): 37 | dirs = {'forward':[1], 'reverse':[-1], 'bidirectional':[1,-1]} 38 | (L, N, input_dim), hidden_size = X.shape, R.shape[-1] 39 | Y = np.zeros((L, len(dirs[direction]), N, hidden_size), dtype=X.dtype) 40 | for i, d in enumerate(dirs[direction]): 41 | _, H, C = lstm(X, Y[:,i], W[i], R[i], B[i], initial_h[i], initial_c[i], d) 42 | return Y, H, C 43 | 44 | def ReLU(x): 45 | if ep: return ep.evaluate('x * (x > 0)') 46 | return np.multiply(x, x>0, out=x) 47 | 48 | def LeakyReLU(x, alpha=0.2): 49 | a, b = np.array(alpha, x.dtype), np.array(1-alpha, x.dtype) 50 | if ep: return ep.evaluate('x*((x>0)*b+a)') 51 | y = (x>0) * b; y += a; y *= x; return y 52 | 53 | def Sqrt(x): return np.sqrt(x) 54 | 55 | def Identity(x): return x 56 | 57 | def Tile(x, repeat): return np.tile(x, repeat.tolist()) 58 | 59 | def Flatten(x): return x.reshape((x.shape[0], -1)) 60 | 61 | def Sigmoid(x): 62 | if ep: return ep.evaluate('1/(1+exp(-x))') 63 | x = -x; np.exp(x, out=x); x += 1 64 | return np.divide(1, x, out=x) 65 | 66 | def HardSigmoid(x, alpha=0.2, beta=0.5): 67 | x = x * alpha; x += beta 68 | x = np.minimum(x, 1, out=x) 69 | return np.maximum(x, 0, out=x) 70 | 71 | def Maxpool(x, w=(2,2), pads=(0,0,0,0), strides=(2,2)): 72 | return maxpool(x, w, pads, strides) 73 | 74 | def AveragePool(x, w=(2,2), pads=(0,0,0,0), strides=(2,2)): 75 | return avgpool(x, w, pads, strides) 76 | 77 | def GlobalAveragePool(x): 78 | return x.mean(axis=(-2, -1), keepdims=True) 79 | 80 | def UpSample(x, k, mode='nearest'): 81 | if k.size == 0: k = size[-2:] // np.array(x.shape[-2:]) 82 | return upsample(x, k[-2:].astype(int).tolist(), mode) 83 | 84 | def Resize(x, roi, k, size=None, mode='nearest', 85 | coordinate_transformation_mode='half_pixel', nearest_mode='round_prefer_floor'): 86 | if k.size == 0: k = size[-2:] / np.array(x.shape[-2:]) 87 | return upsample(x, k[-2:].tolist(), mode, 88 | coordinate_transformation_mode, nearest_mode) 89 | 90 | def Concatenate(*xs, axis=0): 91 | return np.concatenate(xs, axis=axis) 92 | 93 | def Add(x1, x2): 94 | if ep: return ep.evaluate('x1 + x2') 95 | return x1 + x2 96 | 97 | def Sub(x1, x2): 98 | if ep: return ep.evaluate('x1 - x2') 99 | return x1 - x2 100 | 101 | def Mul(x1, x2): 102 | if ep: return ep.evaluate('x1 * x2') 103 | return x1 * x2 104 | 105 | def Div(x1, x2): 106 | if ep: return ep.evaluate('x1 / x2') 107 | return x1 / x2 108 | 109 | def Pow(x, p): 110 | if ep: return ep.evaluate('x ** p') 111 | return np.power(x, p) 112 | 113 | def ReduceSum(x, axes=-1, keepdims=True): 114 | return x.sum(axis=tuple(axes), keepdims=keepdims) 115 | 116 | def ReduceMean(x, axes=-1, keepdims=True): 117 | return x.mean(axis=tuple(axes), keepdims=keepdims) 118 | 119 | def ReduceMax(x, axes=-1, keepdims=True): 120 | return x.max(axis=tuple(axes), keepdims=keepdims) 121 | 122 | def ReduceMin(x, axes=-1, keepdims=True): 123 | return x.min(axis=tuple(axes), keepdims=keepdims) 124 | 125 | def BatchNorm(x, K, B): 126 | if ep: return ep.evaluate('x * K + B') 127 | x = x * K; x += B; return x 128 | 129 | def Unsqueeze(x, axes=None): 130 | axis = np.array(axes).tolist() 131 | return np.expand_dims(x, tuple(axis)) 132 | 133 | def Squeeze(x, axes=[0]): 134 | return np.squeeze(x, axis=axes[0]) 135 | 136 | def Const(value=0, dtype='float32'): 137 | if isinstance(value, list): 138 | return np.array(value, dtype=dtype) 139 | return value 140 | 141 | def Softmax(x, axis=-1): 142 | y = x - np.max(x, axis=axis, keepdims=True) 143 | ey = ep.evaluate('exp(y)') if ep else np.exp(y) 144 | eX = np.sum(ey, axis=axis, keepdims=True) 145 | if ep: return ep.evaluate('exp(y-log(eX))') 146 | y -= np.log(eX, out=eX); return np.exp(y, out=y) 147 | 148 | def LogSoftmax(x, axis=-1): 149 | y = x - np.max(x, axis=axis, keepdims=True) 150 | ey = ep.evaluate('exp(y)') if ep else np.exp(y) 151 | eX = np.sum(ey, axis=axis, keepdims=True) 152 | if ep: return ep.evaluate('y-log(eX)') 153 | y -= np.log(eX, out=eX); return y 154 | 155 | def Shape(x): return np.array(x.shape) 156 | 157 | def Gather(x, idx, axis=0): return np.take(x, idx, axis=axis) 158 | 159 | def Reshape(x, shp): 160 | shp = shp.tolist() 161 | for i in range(len(shp)): 162 | shp[i] = shp[i] or x.shape[i] 163 | return x.reshape(shp) 164 | 165 | def Transpose(x, axis): return x.transpose(axis) 166 | 167 | def ConstantofShape(x, value=0, dtype='float32'): 168 | return np.full(x.ravel().tolist(), value, dtype=dtype) 169 | 170 | def Split(x, split=None, axis=0): 171 | seg = np.cumsum(np.array(split)).tolist() 172 | return np.split(x[:seg[-1]], seg[:-1], axis) 173 | 174 | def Tanh(x): 175 | if ep: return ep.evaluate('tanh(x)') 176 | return np.tanh(x) 177 | 178 | def Exp(x): 179 | if ep: return ep.evaluate('exp(x)') 180 | return np.exp(x) 181 | 182 | def Log(x): 183 | if ep: return ep.evaluate('log(x)') 184 | return np.log(x) 185 | 186 | def Reciprocal(x): return 1/x 187 | 188 | def Slice(x, start, end, axis=None, step=None): 189 | if step is None: step = np.ones(len(start), dtype=np.uint32) 190 | if axis is None: axis = np.arange(len(start)) 191 | seas = [start, end, axis, step] 192 | start, end, axis, step = [i.tolist() for i in seas] 193 | slis = [slice(None,None,None)] * x.ndim 194 | for s, e, a, st in zip(start, end, axis, step): 195 | slis[a] = slice(s, e, st) 196 | return x[tuple(slis)] 197 | 198 | def Expand(x, shp): 199 | ones = np.ones(shp.tolist(), dtype=x.dtype) 200 | return ones * x 201 | 202 | def Cast(x, dtype='flaot32'): return x.astype(dtype) 203 | 204 | def Range(start, end, delta): 205 | return np.arange(int(start), int(end), int(delta)) 206 | 207 | def Equal(x1, x2): return np.equal(x1, x2) 208 | 209 | def Where(msk, x1, x2): return np.where(msk, x1, x2) 210 | 211 | def Scatternd(data, indices, updates): 212 | data = data.copy() 213 | for i in range(len(indices[0])): 214 | data[tuple(indices[0,i])] = updates[0,i] 215 | return data 216 | 217 | def InstanceNormalization(x, s, bias, epsilon=1e-5): 218 | axis = tuple(range(2, x.ndim)) 219 | mean = np.mean(x, axis=axis, keepdims=True) 220 | var = x - mean; var **= 2; 221 | var = np.mean(var, axis=axis, keepdims=True) 222 | shapes = (-1,) + (1,) * (x.ndim - 2) 223 | s.shape = bias.shape = shapes 224 | var = (var + epsilon) ** 0.5 225 | x *= s/var; x += bias - s*mean/var 226 | return x 227 | 228 | def Greater(a, b): return np.greater(a, b) 229 | 230 | def NonZero(x): return np.array(np.nonzero(x)) 231 | 232 | def GreaterOrEqual(a, b): return a >= b 233 | 234 | def TopK(x, k, axis=-1, largest=1, sorted=1): 235 | idk = np.arange(k) * -largest - (largest>0) 236 | idx = np.argsort(x, axis=axis) 237 | idx = np.take(idx, idk, axis=axis) 238 | vs = np.take_along_axis(x, idx, axis=axis) 239 | return vs, idx 240 | 241 | def Pad(x, pads, constant_value=0, mode='constant'): 242 | pads = pads.reshape(2,-1).T.tolist() 243 | para = {'mode': mode}; 244 | if mode=='constant': para['constant_values'] = constant_value 245 | return np.pad(x, pads, **para) 246 | 247 | def Clip(x, min=0, max=1): 248 | minv, maxv = np.float16(min), np.float16(max) 249 | if ep: return ep.evaluate('(((x-minv)*(x>minv))-(maxv-minv))*(x-2)*256') 257 | else: x -= 2; x *= x<0; x += 4; x *= x>0; x *= 256; 258 | return np.array(erflut, x.dtype)[x.astype('int16')] 259 | 260 | def Return(*x): return x 261 | 262 | layer_map = {'dense': Dense, 'conv': Conv2d, 'relu': ReLU, 263 | 'leakyrelu': LeakyReLU, 'batchnorm': BatchNorm, 264 | 'flatten': Flatten, 'sigmoid': Sigmoid, 'softmax': Softmax, 265 | 'hardsigmoid': HardSigmoid, 'squeeze': Squeeze, 266 | 'maxpool': Maxpool, 'averagepool': AveragePool, 'const': Const, 267 | 'upsample': UpSample, 'concat': Concatenate, 'add': Add, 268 | 'resize': Resize, 'pad': Pad, 'convtranspose':ConvTranspose2d, 269 | 'sub': Sub, 'reducemean': ReduceMean, 'exp': Exp, 'log': Log, 270 | 'mul': Mul, 'gap': GlobalAveragePool, 'pow':Pow, 'matmul': MatMul, 271 | 'identity' : Identity, 'tile': Tile, 'lstm':LSTM, 272 | 'reducemax':ReduceMax, 'reducemin': ReduceMin, 273 | 'reducesum':ReduceSum, 'div':Div, 'unsqueeze':Unsqueeze, 274 | 'shape': Shape, 'gather':Gather, 'reshape':Reshape, 275 | 'split':Split, 'tanh':Tanh, 'constantofshape':ConstantofShape, 276 | 'slice':Slice, 'expand':Expand, 'cast':Cast, 'range':Range, 277 | 'equal':Equal, 'where':Where, 'scatternd':Scatternd, 278 | 'instancenormalization':InstanceNormalization, 'clip':Clip, 279 | 'greater':Greater, 'nonzero':NonZero, 'greaterorequal':GreaterOrEqual, 280 | 'topk':TopK, 'sqrt': Sqrt, 'erf': Erf, 'reciprocal': Reciprocal, 281 | 'transpose':Transpose, 'logsoftmax':LogSoftmax, 'return':Return} 282 | 283 | if __name__ == "__main__": pass 284 | -------------------------------------------------------------------------------- /planer/io.py: -------------------------------------------------------------------------------- 1 | import json, re, os 2 | import numpy, numpy as np 3 | from .net import Net 4 | from time import time 5 | import json, zipfile 6 | from io import BytesIO 7 | 8 | def read_net(path, debug=False): 9 | net = Net() 10 | path = path.replace('.onnx', '') 11 | if os.path.exists(path+'.pla'): 12 | with zipfile.ZipFile(path+'.pla') as f: 13 | path = os.path.split(path)[1] 14 | body = json.loads(f.read(path+'.json')) 15 | lay, flw = body['layers'], body['flow'] 16 | inputs, inits = body['input'], body['inits'] 17 | buf = BytesIO(f.read(path+'.npy')) 18 | weights = np.load(buf) 19 | elif os.path.exists(path+'.json'): 20 | with open(path+'.json') as f: 21 | body = json.load(f) 22 | lay, flw = body['layers'], body['flow'] 23 | inputs, inits = body['input'], body['inits'] 24 | weights = np.load(path+'.npy') 25 | elif os.path.exists(path+'.onnx'): 26 | body, weights = read_onnx(path+'.onnx') 27 | if body == 'lost': return weights 28 | lay, flw = body['layers'], body['flow'] 29 | inputs, inits = body['input'], body['inits'] 30 | else: 31 | return print('model %s not found!'%path) 32 | net.load_json(inputs, inits, lay, flw, debug) 33 | net.load_weights(weights) 34 | return net 35 | 36 | types = [None, 'float32', 'uint8', 'int8', 'uint16', 'int16', 'int32', 'int64', 37 | 'str', 'bool', 'float16', 'float64', 'uint32', 'uint64', 'complex64', 'complex128'] 38 | 39 | def node(attrs, name, k=None, para=None): 40 | node = None 41 | for i in attrs: 42 | if i.name==name: node = i 43 | if k is None or node is None: 44 | return node 45 | rst = getattr(node, k) 46 | if k=='ints': rst = list(rst) 47 | if k=='s': rst = rst.decode() 48 | if not para is None: 49 | para[name] = rst 50 | return rst 51 | 52 | 53 | def read_onnx(path): 54 | import onnx, onnx.numpy_helper 55 | graph = onnx.load(path).graph 56 | input_para = [i.name for i in graph.input] 57 | layers, inits, weights, flows, values = [], [], [], [], {} 58 | for i in graph.initializer: 59 | v = onnx.numpy_helper.to_array(i) 60 | values[i.name] = len(weights), v.shape 61 | inits.append([i.name, v.shape, str(v.dtype)]) 62 | if v.ndim==0: v = np.array([v]) 63 | weights.append(v) 64 | for i in graph.node: 65 | inpara = [j for j in i.input] 66 | outpara = [j for j in i.output] 67 | 68 | 69 | 70 | if len(inpara)==1: inpara = inpara[0] 71 | if len(outpara)==1: outpara = outpara[0] 72 | 73 | flows.append([inpara, [i.name], outpara]) 74 | # weights.extend([values[i] for i in initpara]) 75 | # print(i.op_type, '===') 76 | if i.op_type == 'BatchNormalization': 77 | cur = flows[-1] 78 | k, b, m, v = [weights[values[cur[0][j]][0]] for j in (1,2,3,4)] 79 | v_inv = 1/numpy.sqrt(v + 1e-5) 80 | kmv_inv_b = -k*m*v_inv + b 81 | kv_inv = k*v_inv 82 | kmv_inv_b.shape = kv_inv.shape = (1,-1,1,1) 83 | 84 | kname, bname = cur[0][1] + '_invK', cur[0][1] + '_invB' 85 | values[kname] = len(weights), kv_inv.shape 86 | values[bname] = len(weights)+1, kmv_inv_b.shape 87 | inits.append([kname, kv_inv.shape, str(kv_inv.dtype)]) 88 | inits.append([bname, kmv_inv_b.shape, str(kmv_inv_b.dtype)]) 89 | weights.extend([kv_inv, kmv_inv_b]) 90 | cur[0] = [cur[0][0], kname, bname] 91 | layers.append([i.name, 'batchnorm', {}]) 92 | elif i.op_type == 'Conv': 93 | # attr, w = i.attribute, values[i.input[1]][1] 94 | attr = i.attribute 95 | g = node(attr, 'group', 'i') or 1 96 | d = node(attr, 'dilations', 'ints') 97 | p = node(attr, 'pads', 'ints') 98 | s = node(attr, 'strides', 'ints') 99 | layers.append([i.name, 'conv', { 100 | 'group':g, 'strides':s, 'dilations':d, 'pads':p}]) 101 | elif i.op_type == 'ConvTranspose': 102 | attr = i.attribute 103 | para = {} 104 | g = node(attr, 'group', 'i', para) 105 | d = node(attr, 'dilations', 'ints', para) 106 | p = node(attr, 'pads', 'ints', para) 107 | s = node(attr, 'strides', 'ints', para) 108 | op = node(attr, 'output_padding', 'ints', para) 109 | layers.append([i.name, 'convtranspose', para]) 110 | elif i.op_type == 'Gemm': 111 | layers.append([i.name, 'dense', {'shp':list(values[i.input[1]][1][::-1])}]) 112 | elif i.op_type == 'MaxPool': 113 | w = node(i.attribute, 'kernel_shape', 'ints') 114 | m = node(i.attribute, 'pads', 'ints') 115 | s = node(i.attribute, 'strides', 'ints') 116 | layers.append([i.name, 'maxpool', {'w':w, 'pads':m, 'strides':s}]) 117 | elif i.op_type == 'GlobalAveragePool': 118 | layers.append([i.name, 'gap', {}]) 119 | elif i.op_type == 'Upsample': 120 | mode = node(i.attribute, 'mode', 's') 121 | layers.append([i.name, 'upsample', {'mode':mode}]) 122 | elif i.op_type == 'Resize': 123 | mode = node(i.attribute, 'mode', 's') 124 | nearest_mode = node(i.attribute, 'nearest_mode', 's') 125 | trans_mode = node(i.attribute, 'coordinate_transformation_mode', 's') 126 | layers.append([i.name, 'resize', {'mode':mode, 'nearest_mode':nearest_mode, 127 | 'coordinate_transformation_mode': trans_mode}]) 128 | elif i.op_type == 'Flatten': 129 | layers.append([i.name, 'flatten', {}]) 130 | elif i.op_type == 'Unsqueeze': 131 | axis = node(i.attribute, 'axes', 'ints') 132 | layers.append([i.name, 'unsqueeze', {} if axis is None else {'axes':axis}]) 133 | elif i.op_type == 'Squeeze': 134 | axis = node(i.attribute, 'axes', 'ints') 135 | layers.append([i.name, 'squeeze', {} if axis is None else {'axes':axis}]) 136 | elif i.op_type == 'Relu': 137 | layers.append([i.name, 'relu', {}]) 138 | elif i.op_type == 'LeakyRelu': 139 | alpha = i.attribute[0].f 140 | layers.append([i.name, 'leakyrelu', {'alpha':alpha}]) 141 | elif i.op_type == 'HardSigmoid': 142 | para = {} 143 | node(i.attribute, 'alpha', 'f', para) 144 | node(i.attribute, 'beta', 'f', para) 145 | layers.append([i.name, 'hardsigmoid', para]) 146 | elif i.op_type == 'Add': 147 | layers.append([i.name, 'add', {}]) 148 | elif i.op_type == 'Sub': 149 | layers.append([i.name, 'sub', {}]) 150 | elif i.op_type == 'Div': 151 | layers.append([i.name, 'div', {}]) 152 | elif i.op_type == 'Tile': 153 | layers.append([i.name, 'tile', {}]) 154 | elif i.op_type == 'MatMul': 155 | layers.append([i.name, 'matmul', {}]) 156 | elif i.op_type == 'Constant': 157 | _, _, name = flows.pop(-1) 158 | dim = i.attribute[0].t.dims 159 | tp = types[i.attribute[0].t.data_type] 160 | 161 | v = onnx.numpy_helper.to_array(i.attribute[0].t) 162 | values[name] = len(weights), v.shape 163 | inits.append([name, v.shape, str(v.dtype)]) 164 | if v.ndim==0: v = np.array([v]) 165 | weights.append(v) 166 | #layers.append([i.name, 'const', {'value':v, 'dtype':tp}]) 167 | elif i.op_type == 'Identity': 168 | layers.append([i.name, 'identity', {}]) 169 | elif i.op_type == 'Pow': 170 | layers.append([i.name, 'pow', {}]) 171 | elif i.op_type == 'ReduceSum': 172 | para = {} 173 | node(i.attribute, 'axes', 'ints', para) 174 | node(i.attribute, 'keepdims', 'i', para) 175 | layers.append([i.name, 'reducesum', para]) 176 | elif i.op_type == 'ReduceMean': 177 | para = {} 178 | node(i.attribute, 'axes', 'ints', para) 179 | node(i.attribute, 'keepdims', 'i', para) 180 | layers.append([i.name, 'reducemean', para]) 181 | elif i.op_type == 'ReduceMax': 182 | para = {} 183 | node(i.attribute, 'axes', 'ints', para) 184 | node(i.attribute, 'keepdims', 'i', para) 185 | layers.append([i.name, 'reducemax', para]) 186 | elif i.op_type == 'ReduceMin': 187 | para = {} 188 | node(i.attribute, 'axes', 'ints', para) 189 | node(i.attribute, 'keepdims', 'i', para) 190 | layers.append([i.name, 'reducemin', para]) 191 | elif i.op_type == 'Concat': 192 | layers.append([i.name, 'concat', {'axis':i.attribute[0].i}]) 193 | elif i.op_type == 'Pad': 194 | para = {} 195 | node(i.attribute, 'mode', 's', para) 196 | node(i.attribute, 'constant_value', 'f', para) 197 | layers.append([i.name, 'pad', para]) 198 | elif i.op_type == 'Sigmoid': 199 | layers.append([i.name, 'sigmoid', {}]) 200 | elif i.op_type == 'AveragePool': 201 | w = node(i.attribute, 'kernel_shape', 'ints') 202 | m = node(i.attribute, 'pads', 'ints') 203 | s = node(i.attribute, 'strides', 'ints') 204 | layers.append([i.name, 'averagepool', {'w':w, 'pads':m, 'strides':s}]) 205 | elif i.op_type == 'LSTM': 206 | para = {'hidden_size': i.attribute[0].i} 207 | node(i.attribute, 'direction', 's', para) 208 | layers.append([i.name, 'lstm', para]) 209 | elif i.op_type == 'Shape': 210 | layers.append([i.name, 'shape', {}]) 211 | elif i.op_type == 'Gather': 212 | layers.append([i.name, 'gather', {'axis':node(i.attribute, 'axis', 'i') or 0}]) 213 | elif i.op_type == 'Mul': 214 | layers.append([i.name, 'mul', {}]) 215 | elif i.op_type == 'Reshape': 216 | layers.append([i.name, 'reshape', {}]) 217 | elif i.op_type == 'Transpose': 218 | layers.append([i.name, 'transpose', {'axis':node(i.attribute, 'perm', 'ints')}]) 219 | elif i.op_type == 'LogSoftmax': 220 | layers.append([i.name, 'logsoftmax', {'axis':i.attribute[0].i}]) 221 | elif i.op_type == 'Softmax': 222 | layers.append([i.name, 'softmax', {'axis':i.attribute[0].i}]) 223 | elif i.op_type == 'ConstantOfShape': 224 | v = onnx.numpy_helper.to_array(i.attribute[0].t) 225 | tp, v = str(v.dtype), v.tolist() 226 | v = v[0] if len(v)==1 else 0 227 | layers.append([i.name, 'constantofshape', {'value':v, 'dtype':tp}]) 228 | elif i.op_type == 'Greater': 229 | layers.append([i.name, 'greater', {}]) 230 | elif i.op_type == 'NonZero': 231 | layers.append([i.name, 'nonzero', {}]) 232 | elif i.op_type == 'GreaterOrEqual': 233 | layers.append([i.name, 'greaterorequal', {}]) 234 | elif i.op_type == 'TopK': 235 | para = {} 236 | node(i.attribute, 'axis', 'i', para) 237 | node(i.attribute, 'largest', 'i', para) 238 | node(i.attribute, 'sorted', 'i', para) 239 | layers.append([i.name, 'topk', para]) 240 | elif i.op_type == 'Split': 241 | split = node(i.attribute, 'split', 'ints') 242 | para = {'axis': node(i.attribute, 'axis', 'i')} 243 | if not split is None: para['split'] = split 244 | layers.append([i.name, 'split', para]) 245 | elif i.op_type == 'Tanh': 246 | layers.append([i.name, 'tanh', {}]) 247 | elif i.op_type == 'Exp': 248 | layers.append([i.name, 'exp', {}]) 249 | elif i.op_type == 'Log': 250 | layers.append([i.name, 'log', {}]) 251 | elif i.op_type == 'Slice': 252 | layers.append([i.name, 'slice', {}]) 253 | elif i.op_type == 'Expand': 254 | layers.append([i.name, 'expand', {}]) 255 | elif i.op_type == 'Equal': 256 | layers.append([i.name, 'equal', {}]) 257 | elif i.op_type == 'Cast': 258 | layers.append([i.name, 'cast', {'dtype':types[i.attribute[0].i]}]) 259 | elif i.op_type == 'Range': 260 | layers.append([i.name, 'range', {}]) 261 | elif i.op_type == 'Where': 262 | layers.append([i.name, 'where', {}]) 263 | elif i.op_type == 'ScatterND': 264 | layers.append([i.name, 'scatternd', {}]) 265 | elif i.op_type == 'InstanceNormalization': 266 | layers.append([i.name, 'instancenormalization', {'epsilon':i.attribute[0].f}]) 267 | elif i.op_type == 'Sqrt': 268 | layers.append([i.name, 'sqrt', {}]) 269 | elif i.op_type == 'Erf': 270 | layers.append([i.name, 'erf', {}]) 271 | elif i.op_type=='Reciprocal': 272 | layers.append([i.name, 'erf', {}]) 273 | elif i.op_type == 'Clip': 274 | minv = node(i.attribute, 'min', 'f') 275 | maxv = node(i.attribute, 'max', 'f') 276 | para = {} 277 | if minv: para['min']=minv 278 | if maxv: para['max']=maxv 279 | layers.append([i.name, 'clip', para]) 280 | else: 281 | print('lost layer:', i.op_type) 282 | return 'lost', i 283 | 284 | layers.append(['return', 'return', {}]) 285 | flows.append([[i.name for i in graph.output], ['return'], 'plrst']) 286 | weights = np.hstack([i.view(dtype=np.uint8).ravel() for i in weights]) 287 | return {'input':input_para, 'inits':inits, 'layers':layers, 'flow':flows}, weights 288 | 289 | def onnx2pla(path, zip=True): 290 | graph, weights = read_onnx(path) 291 | np.save(path.replace('onnx', 'npy'), weights) 292 | with open(path.replace('onnx', 'json'), 'w') as f: 293 | json.dump(graph, f) 294 | if zip: 295 | with zipfile.ZipFile(path.replace('onnx', 'pla'), 'w') as f: 296 | f.write(path[:-4]+'json', os.path.split(path)[1][:-4]+'json') 297 | f.write(path[:-4]+'npy', os.path.split(path)[1][:-4]+'npy') 298 | os.remove(path.replace('onnx','json')) 299 | os.remove(path.replace('onnx','npy')) 300 | 301 | if __name__ == '__main__': 302 | a, b = read_onnx('../demo/yolov3-planer-2/yolov3') 303 | -------------------------------------------------------------------------------- /planer/util.py: -------------------------------------------------------------------------------- 1 | import numpy, numpy as np 2 | from time import time 3 | 4 | def pad(img, shp, mode='constant', constant_values=0): 5 | if shp[2][0]==shp[2][1]==shp[3][0]==shp[3][1]==0: return img 6 | if mode != 'constant': return np.pad(img, shp, mode) 7 | (n, c, h, w), (mn, mc, mh, mw) = img.shape, shp 8 | newimg = np.zeros((n, c, h+mh[0]*2, w+mw[0]*2), dtype=img.dtype) 9 | newimg[:,:,mh[0]:h+mh[0],mw[0]:w+mw[0]] = img 10 | return newimg 11 | 12 | from concurrent.futures import ThreadPoolExecutor 13 | 14 | conv_buf = [] 15 | def clear_buf(): global conv_buf; conv_buf = [] 16 | 17 | def conv_for(img, core, group=1, pads=(1, 1, 1, 1), strides=(1, 1), dilation=(1, 1), mode='constant'): 18 | threadPool = ThreadPoolExecutor(max_workers=9) # for 3x3 core 19 | (strh, strw), (dh, dw) = strides, dilation 20 | (n, c, h, w), (ni, ci, hi, wi) = core.shape, img.shape 21 | cimg_w = c * h * w * group 22 | cimg_h, i = (hi//strh)*(wi//strw), 0 23 | shp = (0, 0), (0, 0), (pads[0], pads[2]), (pads[1], pads[3]) 24 | img = pad(img, shp, mode, constant_values=0) 25 | nh = (hi + sum(shp[2]) - (h-1)*dh-1 + strh)//strh 26 | nw = (wi + sum(shp[3]) - (w-1)*dw-1 + strw)//strw 27 | nsh, nsw = nh * strh, nw * strw 28 | # ================ img 2 col ================ 29 | global conv_buf 30 | img = img.transpose((1,0,2,3)) # nchw -> cnhw 31 | size = ci * w * h * ni * nh * nw 32 | if len(conv_buf) < size: conv_buf = np.zeros(size, dtype=img.dtype) 33 | col_img = conv_buf[:size].reshape(ci, w*h, ni, nh, nw) #(h*w, c, N, H, W) 34 | def set_value(img, i, v): img[:,i] = v 35 | for r in range(0, h*dh, dh): 36 | for c in range(0, w*dw, dw): 37 | im, i = img[:,:,0+r:nsh+r:strh, 0+c:nsw+c:strw], i+1 38 | threadPool.submit(set_value, col_img, i-1, im) 39 | threadPool.shutdown(wait=True) 40 | # ============================================ 41 | col_core = core.reshape((group, core.shape[0]//group, -1)[group==1:]) 42 | col_img = col_img.reshape((group, cimg_w//group, -1)[group==1:]) 43 | rst = np.matmul(col_core, col_img) 44 | return rst.reshape((n, ni, nh, nw)).transpose(1, 0, 2, 3) 45 | 46 | def conv_stride(img, core, group=1, pads=(1, 1, 1, 1), strides=(1, 1), dilation=(1, 1), mode='constant'): 47 | (strh, strw), (dh, dw) = strides, dilation 48 | (n, c, h, w), (ni, ci, hi, wi) = core.shape, img.shape 49 | cimg_w = c * h * w * group 50 | cimg_h, i = (hi//strh)*(wi//strw), 0 51 | shp = (0, 0), (0, 0), (pads[0], pads[2]), (pads[1], pads[3]) 52 | img = pad(img, shp, mode, constant_values=0) 53 | nh = (hi + sum(shp[2]) - (h-1)*dh-1 + strh)//strh 54 | nw = (wi + sum(shp[3]) - (w-1)*dw-1 + strw)//strw 55 | nsh, nsw = nh * strh, nw * strw 56 | # ================ img 2 col ================ 57 | ss, shape = img.strides, (ci, w, h, ni, nh, nw) 58 | strides = (ss[-3], ss[-2]*dh, ss[-1]*dw, ss[-4], ss[-2]*strh, ss[-1]*strw) 59 | col_img = np.lib.stride_tricks.as_strided(img, shape=shape, strides=strides) 60 | # ============================================ 61 | col_core = core.reshape((group, core.shape[0]//group, -1)[group==1:]) 62 | col_img = col_img.reshape((group, cimg_w//group, -1)[group==1:]) 63 | rst = np.matmul(col_core, col_img) 64 | return rst.reshape((n, ni, nh, nw)).transpose(1, 0, 2, 3) 65 | 66 | def conv_dnn(img, core, bias=None, group=1, pads=(1, 1, 1, 1), strides=(1, 1), dilation=(1, 1), mode='constant'): 67 | (strh, strw), (dh, dw) = strides, dilation 68 | (n, c, h, w), (ni, ci, hi, wi) = core.shape, img.shape 69 | shp = (0, 0), (0, 0), (pads[0], pads[2]), (pads[1], pads[3]) 70 | if pads[0]==pads[2] and pads[1]==pads[3]: pads = (pads[0], pads[2]) 71 | else: img, pads = pad(img, shp, mode, constant_values=0), (0,0) 72 | nh = (hi + sum(shp[2]) - (h-1)*dh-1 + strh)//strh 73 | nw = (wi + sum(shp[3]) - (w-1)*dw-1 + strw)//strw 74 | y = np.zeros((ni, n, nh, nw), dtype=img.dtype) 75 | dnn.convolution_forward(img, core, bias, y, pads, 76 | (strides[0], strides[1]), (dilation[0], dilation[1]), 1, auto_tune=True, tensor_core='always') 77 | return y 78 | 79 | def pool(img, f, core=(2, 2), pads=(0,0,0,0), stride=(2, 2), const=0): 80 | (n, c, h, w), (ch, cw), (strh, strw) = img.shape, core, stride 81 | shp = ((0, 0), (0, 0), (pads[0], pads[2]), (pads[1], pads[3])) 82 | img = pad(img, shp, 'constant', constant_values=0) 83 | (imn, ic, ih, iw), imgs = img.shape, [] 84 | dh = (h + sum(shp[2]) - core[0] + strh)//strh 85 | dw = (w + sum(shp[3]) - core[1] + strw)//strw 86 | nsh, nsw = dh * strh, dw * strw 87 | buf = np.zeros(img.shape[:2]+(dh, dw), img.dtype) 88 | if const != 0: buf[:] = const 89 | for r in range(0, ch, 1): 90 | for c in range(0, cw, 1): 91 | f(img[:,:,r:nsh+r:strh,c:nsw+c:strw], buf, out=buf) 92 | return buf 93 | 94 | def maxpool(i, c=(2, 2), mar=(0,0,0,0), s=(2, 2)): 95 | return pool(i, np.maximum, c, mar, s, -1e4) 96 | 97 | def avgpool(i, c=(2, 2), mar=(0,0,0,0), s=(2, 2)): 98 | rst = pool(i, np.add, c, mar, s, 0) 99 | rst /= c[0] * c[1] 100 | return rst 101 | 102 | def lstm(X, Y, w, r, b, ht, ct, dir=1): 103 | def tanh(x): return np.tanh(x, out=x) 104 | 105 | def sigmoid(x): 106 | x *= -1; np.exp(x, out=x); x += 1 107 | return np.divide(1, x, out=x) 108 | 109 | for t in list(range(X.shape[0]))[::dir]: 110 | gates = np.matmul(X[t], w.T) 111 | gates += np.matmul(ht, r.T) 112 | gates += b[:b.shape[0]//2] 113 | gates += b[b.shape[0]//2:] 114 | i, o, f, c = np.split(gates[None,:], 4, -1) 115 | sigmoid(i), sigmoid(f), tanh(c) 116 | f *= ct; i *= c; f += i; C = f 117 | o = sigmoid(o); o *= np.tanh(C) 118 | Y[t], ht, ct = o[0], o[0], C 119 | return Y, ht, ct 120 | 121 | def make_upmat(k): 122 | ys = np.linspace(0.5/k[0], 1-0.5/k[0], k[0]*1, dtype=np.float16) 123 | xs = np.linspace(0.5/k[1], 1-0.5/k[1], k[1]*1, dtype=np.float16) 124 | rs, cs = ys[:,None], xs[None,:] 125 | if k[0]==1: return np.vstack([1-xs, xs]) 126 | if k[1]==1: return np.vstack([1-ys, ys]) 127 | klt = ((1-cs)*(1-rs)).reshape((1,-1)) 128 | krt = (cs * (1-rs)).reshape((1,-1)) 129 | klb = ((1-cs) * rs).reshape((1,-1)) 130 | krb = (cs * rs).reshape((1,-1)) 131 | return np.vstack([klt, krt, klb, krb]) 132 | 133 | def upsample_blinear(img, k): 134 | n, c, h, w = img.shape 135 | if k[0] == k[1] == 1: return img 136 | if k[0]>1: 137 | img = (img[:,:,:1,:], img, img[:,:,-1:,:]) 138 | img = np.concatenate(img, axis=2) 139 | if k[1]>1: 140 | img = (img[:,:,:,:1], img, img[:,:,:,-1:]) 141 | img = np.concatenate(img, axis=3) 142 | imgs = [img[:,:,:-1,:-1], img[:,:,:-1,1:], 143 | img[:,:,1:,:-1], img[:,:,1:,1:]] 144 | if k[0]==1: imgs = [img[:,:,:,:-1], img[:,:,:,1:]] 145 | if k[1]==1: imgs = [img[:,:,:-1,:], img[:,:,1:,:]] 146 | imgs = [i[:,:,:,:,None] for i in imgs] 147 | rst = np.concatenate(imgs, axis=-1) 148 | rst = np.matmul(rst.reshape((-1,len(imgs))), make_upmat(k)) 149 | hh, ww = h + (k[0]>1), w + (k[1]>1) 150 | rst = rst.reshape((-1, ww, k[0], k[1])) 151 | rst = rst.transpose((0,2,1,3)) 152 | rst = rst.reshape((n, c ,hh*k[0], ww*k[1])) 153 | return rst[:,:,k[0]//2:h*k[0]+k[0]//2,k[1]//2:w*k[1]+k[1]//2] 154 | 155 | def offset(k, trans_mode, round_mode): 156 | idx = np.arange(-64, 64) 157 | if trans_mode == 'half_pixel': 158 | idx = (idx + 0.5)/k - 0.5 159 | if trans_mode == 'asymmetric': 160 | idx = idx / k 161 | if round_mode == 'round_prefer_floor': 162 | idx = np.round(idx-1e-3) 163 | if round_mode == 'round_prefer_ceil': 164 | idx = np.round(idx+1e-3) 165 | if round_mode == 'ceil': 166 | idx = np.ceil(idx) 167 | if round_mode == 'floor': 168 | idx = np.floor(idx) 169 | idx = idx.astype(np.int16) 170 | return np.argmax(idx==0)-64 171 | 172 | def pix_offset(img, dr, dc): 173 | n, c, h, w = img.shape 174 | if dr == dc == 0: return img 175 | if dr>=0: sr1s, sr1e, sr2s, sr2e, sr, rr = dr, h, 0, h-dr, (0, dr), 0 176 | else: sr1s, sr1e, sr2s, sr2e, sr, rr = 0, h+dr, -dr, h, (h+dr, h), h-1 177 | if dc>=0: sc1s, sc1e, sc2s, sc2e, sc, cc = dc, w, 0, w-dc, (0, dc), 0 178 | else: sc1s, sc1e, sc2s, sc2e, sc, cc = 0, w+dc, -dc, w, (w+dc, w), w-1 179 | img[:, :, sr1s:sr1e, sc1s:sc1e] = img[:, :, sr2s:sr2e, sc2s:sc2e] 180 | img[:, :, slice(*sr), :] = img[:, :, rr:rr+1, :] 181 | img[:, :, :, slice(*sc)] = img[:, :, :, cc:cc+1] 182 | return img 183 | 184 | def upsample_nearest(img, k, trans_mode='half-pixel', round_mode='round_prefer_ceil'): 185 | n, c, h, w = img.shape 186 | rst = np.zeros((n, c, h*k[0], w*k[1]), dtype=img.dtype) 187 | for r in range(k[0]): 188 | for c in range(k[1]): 189 | rst[:,:,r::k[0],c::k[1]]=img 190 | 191 | offr, offc = [offset(i, trans_mode, round_mode) for i in k] 192 | return pix_offset(rst, offr, offc) 193 | 194 | def upsample_size(img, size): 195 | nc, (h, w) = img.shape[:-2], img.shape[-2:] 196 | kh, kw = size[0]/h, size[1]/w 197 | slicer = -0.5+0.5/kh, h-0.5-0.5/kh, size[0] 198 | rs = np.linspace(*slicer, dtype=img.dtype) 199 | slicec = -0.5+0.5/kw, w-0.5-0.5/kw, size[1] 200 | cs = np.linspace(*slicec, dtype=img.dtype) 201 | rs = np.clip(rs, 0, h-1, out=rs) 202 | cs = np.clip(cs, 0, w-1, out=cs) 203 | ra = np.floor(np.clip(rs, 0, h-1.001)) 204 | ca = np.floor(np.clip(cs, 0, w-1.001)) 205 | ra, ca = ra.astype(int), ca.astype(int) 206 | rs -= ra; cs -= ca; rb = ra+1; cb = ca+1; 207 | rs.shape, img.shape = (-1,1), (-1, h, w) 208 | buf = img[:,:,ca]*(1-cs) + img[:,:,cb]*cs 209 | result = buf[:,ra,:]*(1-rs) + buf[:,rb,:]*rs 210 | return result.reshape(nc + size) 211 | 212 | def upsample(img, k, mode, trans_mode='half-pixcel', round_mode='round_prefer_ceil'): 213 | kint = [int(k[0]), int(k[1])] 214 | size = int(round(k[0]*img.shape[2])), int(round(k[1]*img.shape[3])) 215 | 216 | if mode=='nearest': return upsample_nearest(img, kint, trans_mode, round_mode) 217 | if mode=='linear': 218 | if k[0]==int(k[0]) and k[1]==int(k[1]): return upsample_blinear(img, kint) 219 | else: return upsample_size(img, size) 220 | 221 | # ===== below is some image process function ===== 222 | import math, itertools 223 | 224 | def conv_auto(img, core, mode='reflect', keeptp=True): 225 | shp, dim, (h, w) = img.shape, img.ndim, core.shape 226 | img = np.pad(img, ((h//2,h//2),(w//2,w//2),(0,0))[:dim], mode=mode) 227 | rst, buf = np.zeros((2,) + shp, dtype=np.float32) 228 | for r,c in np.mgrid[:h,:w].reshape(2,-1).T: 229 | buf[:] = img[r:r+shp[0],c:c+shp[1]] 230 | buf *= core[r,c]; rst += buf 231 | return rst.astype(img.dtype) if keeptp else rst 232 | 233 | def conv_rc(img, core_r, core_c, mode='reflect'): 234 | return conv_auto(conv_auto(img, core_r, keeptp=False), core_c) 235 | 236 | def uniform_filter(img, size=3, mode='reflect'): 237 | core = np.ones(size, dtype=np.float32)/size 238 | return conv_rc(img, core[None,:], core[:,None], mode) 239 | 240 | def gaussian_filter(img, sig=2, mode='reflect'): 241 | x = np.arange(-int(sig*2.5+0.5), int(sig*2.5+0.5)+1) 242 | core = np.exp(-x**2/2/sig**2)/sig/(2*np.pi)**0.5 243 | return conv_rc(img, core[None,:], core[:,None], mode) 244 | 245 | def make_slice(l, w, mar): 246 | r = np.linspace(0, l-w, math.ceil((l-mar)/(w-mar))) 247 | return [slice(i, i+w) for i in r.astype(int).tolist()] 248 | 249 | def grid_slice(H, W, h, w, mar): 250 | a, b = make_slice(H, h, mar), make_slice(W, w, mar) 251 | return list(itertools.product(a, b)) 252 | 253 | def resize(img, size, backend=None): 254 | nn = np if isinstance(img, np.ndarray) else numpy 255 | d, (h, w) = img.ndim, img.shape[:2] 256 | kh, kw = size[0]/h, size[1]/w 257 | slicer = -0.5+0.5/kh, h-0.5-0.5/kh, size[0] 258 | rs = nn.linspace(*slicer, dtype=nn.float32) 259 | slicec = -0.5+0.5/kw, w-0.5-0.5/kw, size[1] 260 | cs = nn.linspace(*slicec, dtype=nn.float32) 261 | rs = nn.clip(rs, 0, h-1, out=rs) 262 | cs = nn.clip(cs, 0, w-1, out=cs) 263 | ra = nn.floor(nn.clip(rs, 0, h-1.001)) 264 | ca = nn.floor(nn.clip(cs, 0, w-1.001)) 265 | ra, ca = ra.astype(int), ca.astype(int) 266 | rs -= ra; cs -= ca; rb = ra+1; cb = ca+1; 267 | rs.shape, cs.shape = (-1,1,1)[:d], (1,-1,1)[:d] 268 | buf = img[:,ca]*(1-cs) + img[:,cb]*cs 269 | return buf[ra,:]*(1-rs) + buf[rb,:]*rs 270 | 271 | def mapcoord(img, rs, cs, keeptp=True, backend=None): 272 | nn = np if isinstance(img, np.ndarray) else numpy 273 | d, (h, w) = img.ndim, img.shape[:2] 274 | nn.clip(rs, 0, h-1, out=rs) 275 | nn.clip(cs, 0, w-1, out=cs) 276 | ra = nn.floor(nn.clip(rs, 0, h-1.5)) 277 | ca = nn.floor(nn.clip(cs, 0, w-1.5)) 278 | ra, ca = ra.astype(int), ca.astype(int) 279 | rs -= ra; cs -= ca; rb = ra+1; cb = ca+1; 280 | if d==3: rs, cs = rs[:,:,None], cs[:,:,None] 281 | buf = img[ra,ca]*((1-cs) * (1-rs)) 282 | buf += img[rb,cb] * (cs * rs) 283 | buf += img[ra,cb] * ((1-rs) * cs) 284 | buf += img[rb,ca] * ((1-cs) * rs) 285 | return buf.astype(img.dtype) if keeptp else buf 286 | 287 | # sample:float or tuple, float means factor tuple means size 288 | # glob:force adjust image to glob's integral multiple. 289 | # window:after sample, if larger than window, then tiled by window 290 | # margin:overlay between window, float means factor and int means width 291 | def tile(sample=1, glob=1, window=1024, margin=0.1, astype='float32', progress=print): 292 | def wrapf(f): 293 | def wrap(*p, **key): 294 | (h, w), ori_img = p[0].shape[:2], p[0] 295 | samecore = isinstance(ori_img, np.ndarray) 296 | # img = np.asarray(ori_img, dtype=astype) 297 | img = ori_img.astype('float32') 298 | tps = {'sample', 'window', 'glob', 'margin', 'progress'} 299 | ftp = fp, tp = {}, {} 300 | for i in key: ftp[i in tps][i] = key[i] 301 | ssz = tp.get('sample', sample) 302 | wsz = wsh = wsw = tp.get('window', window) 303 | gsz = tp.get('glob', glob) 304 | mar = tp.get('margin', margin) 305 | info = tp.get('progress', progress) 306 | if isinstance(ssz, tuple): ssz = list(ssz) 307 | else: ssz = [int(h*ssz), int(w*ssz)] 308 | # smaller than window, then scale to glob 309 | from math import ceil 310 | if wsh>ssz[0]: wsh = ssz[0] = ceil(ssz[0]/gsz)*gsz 311 | if wsw>ssz[1]: wsw = ssz[1] = ceil(ssz[1]/gsz)*gsz 312 | if ssz!=[h, w]:img = resize(img, ssz) 313 | if isinstance(mar, float): mar = int(wsz*mar) 314 | rcs = grid_slice(*ssz, wsh, wsw, mar) 315 | if len(rcs)>1: info(1, len(rcs)) 316 | rst = f(img[rcs[0]], *p[1:], **fp) 317 | k = rst.shape[0]/(rcs[0][0].stop - rcs[0][0].start) 318 | if len(rcs)==1 and ssz!=[h, w]: 319 | rst = resize(rst, (int(h*k), int(w*k))) 320 | if len(rcs)==1: return rst 321 | def sk(ss, k): 322 | sr = slice(int(ss[0].start*k), int(ss[0].stop*k)) 323 | sc = slice(int(ss[1].start*k), int(ss[1].stop*k)) 324 | return sr, sc 325 | outshp = int(img.shape[0]*k), int(img.shape[1]*k) 326 | outshp = outshp + rst.shape[2:] 327 | weights = numpy.zeros(rst.shape[:2], dtype='uint16') 328 | if rst.ndim==3: weights = weights[:,:,None] 329 | weights += int(mar * k) + 1 330 | for i in range(int(mar*k), 0, -1): 331 | weights[i-1,:] = weights[-i,:] = i 332 | weights[:,i-1] = weights[:,-i] = i 333 | buf = numpy.zeros(outshp, dtype=np.float32) 334 | count = numpy.zeros(outshp[:2], dtype='uint16') 335 | if rst.ndim==3: count = count[:,:,None] 336 | buf[sk(rcs[0], k)] = rst * weights 337 | count[sk(rcs[0], k)] += weights 338 | for i in range(1, len(rcs)): 339 | info(i+1, len(rcs)) 340 | rst = f(img[rcs[i]], *p[1:], **fp) 341 | buf[sk(rcs[i], k)] += rst * weights 342 | count[sk(rcs[i], k)] += weights 343 | numpy.divide(buf, count, out=buf, casting='unsafe') 344 | if ssz!=[h, w]: 345 | buf = resize(buf, (int(h*k), int(w*k))) 346 | return buf.astype(rst.dtype) 347 | return wrap 348 | return wrapf 349 | 350 | if __name__ == '__main__': 351 | ''' 352 | img = np.zeros((1, 64, 512, 512), dtype=np.float32) 353 | core = np.zeros((32, 64, 3, 3), dtype=np.float32) 354 | conv(img, core) 355 | ''' 356 | import numpy as np 357 | a = np.arange(4).reshape(1,1,2,2) 358 | print(a) 359 | core = np.ones(3).reshape(1,1,1,3) 360 | print(conv(a, core, mar=(0,1))) 361 | --------------------------------------------------------------------------------