├── _config.yml ├── .gitignore ├── Images ├── art1.gif ├── art2.gif └── lenet.gif ├── BrouhahaDemo ├── BrouhahaDemo │ ├── LeNet │ │ ├── params │ │ │ ├── b0 │ │ │ ├── b1 │ │ │ ├── b2 │ │ │ ├── b3 │ │ │ ├── w0 │ │ │ ├── w1 │ │ │ ├── w2 │ │ │ ├── w3 │ │ │ ├── testX100 │ │ │ └── testY100 │ │ ├── PaintView.h │ │ ├── LeNetViewController.h │ │ └── PaintView.m │ ├── ViewController.h │ ├── Art transform │ │ ├── images │ │ │ ├── tk.png │ │ │ ├── zgr.jpg │ │ │ ├── 800X800.png │ │ │ ├── 900X900.png │ │ │ └── 1024X1024.png │ │ ├── params │ │ │ ├── conv1_alpha │ │ │ ├── conv1_beta │ │ │ ├── conv1_weight │ │ │ ├── conv2_alpha │ │ │ ├── conv2_beta │ │ │ ├── conv2_weight │ │ │ ├── conv3_alpha │ │ │ ├── conv3_beta │ │ │ ├── conv3_weight │ │ │ ├── conv4_alpha │ │ │ ├── conv4_beta │ │ │ ├── conv4_weight │ │ │ ├── res1_conv1_alpha │ │ │ ├── res1_conv1_beta │ │ │ ├── res1_conv1_weight │ │ │ ├── res1_conv2_alpha │ │ │ ├── res1_conv2_beta │ │ │ ├── res1_conv2_weight │ │ │ ├── res2_conv1_alpha │ │ │ ├── res2_conv1_beta │ │ │ ├── res2_conv1_weight │ │ │ ├── res2_conv2_alpha │ │ │ ├── res2_conv2_beta │ │ │ ├── res2_conv2_weight │ │ │ ├── res3_conv1_alpha │ │ │ ├── res3_conv1_beta │ │ │ ├── res3_conv1_weight │ │ │ ├── res3_conv2_alpha │ │ │ ├── res3_conv2_beta │ │ │ ├── res3_conv2_weight │ │ │ ├── res4_conv1_alpha │ │ │ ├── res4_conv1_beta │ │ │ ├── res4_conv1_weight │ │ │ ├── res4_conv2_alpha │ │ │ ├── res4_conv2_beta │ │ │ ├── res4_conv2_weight │ │ │ ├── res5_conv1_alpha │ │ │ ├── res5_conv1_beta │ │ │ ├── res5_conv1_weight │ │ │ ├── res5_conv2_alpha │ │ │ ├── res5_conv2_beta │ │ │ ├── res5_conv2_weight │ │ │ ├── transpose_conv1_alpha │ │ │ ├── transpose_conv1_beta │ │ │ ├── transpose_conv1_weight │ │ │ ├── transpose_conv2_alpha │ │ │ ├── transpose_conv2_beta │ │ │ └── transpose_conv2_weight │ │ ├── ArtTransformViewController.h │ │ ├── ArtTransformHalfViewController.h │ │ ├── BrouResidualLayer_half.h │ │ ├── BrouResidualLayer_float.h │ │ └── BrouResidualLayer_float.m │ ├── Support │ │ ├── AppDelegate.h │ │ ├── main.m │ │ ├── Info.plist │ │ ├── Base.lproj │ │ │ ├── Main.storyboard │ │ │ └── LaunchScreen.storyboard │ │ ├── Assets.xcassets │ │ │ └── AppIcon.appiconset │ │ │ │ └── Contents.json │ │ └── AppDelegate.m │ └── ViewController.m └── BrouhahaDemo.xcodeproj │ └── xcuserdata │ └── yanyuanchi.xcuserdatad │ └── xcschemes │ ├── xcschememanagement.plist │ └── BrouhahaDemo.xcscheme ├── Brouhaha.xcworkspace ├── xcuserdata │ └── yanyuanchi.xcuserdatad │ │ ├── xcdebugger │ │ └── Breakpoints_v2.xcbkptlist │ │ └── UserInterfaceState.xcuserstate └── contents.xcworkspacedata ├── Brouhaha ├── Brouhaha │ ├── Utils │ │ ├── Generate │ │ │ ├── BrouGenerate.h │ │ │ └── BrouMatrix.c │ │ ├── BrouUtils.c │ │ ├── BrouUtils.h │ │ ├── BrouConvertType.h │ │ └── BrouConvertType.c │ ├── Layer │ │ ├── Generate │ │ │ ├── BrouReLuLayer.h │ │ │ ├── BrouTanHLayer.h │ │ │ ├── BrouConvertLayer.h │ │ │ ├── BrouPReLuLayer.h │ │ │ ├── BrouLinearLayer.h │ │ │ ├── BrouFullConnectLayer.h │ │ │ ├── BrouAddBiasLayer.h │ │ │ ├── BrouAddLayer.h │ │ │ ├── BrouMaxPoolingLayer.h │ │ │ ├── BrouAveragePoolingLayer.h │ │ │ ├── BrouConvolutionMMLayer.h │ │ │ ├── BrouTransposedConvolutionMMLayer.h │ │ │ ├── BrouBatchNormalizationLayer.h │ │ │ ├── BrouDilatedConvolutionLayer.h │ │ │ ├── BrouDilatedConvolutionMMLayer.h │ │ │ ├── BrouTransposedConvolutionLayer.h │ │ │ ├── BrouConvolutionLayer.h │ │ │ ├── BrouReLuLayer.m │ │ │ ├── BrouTanHLayer.m │ │ │ ├── BrouConvertLayer.m │ │ │ └── BrouPReLuLayer.m │ │ ├── BrouLayer.h │ │ └── BrouLayer.m │ ├── Basic │ │ ├── BrouShareBuffer.h │ │ ├── BrouTensor.h │ │ ├── BrouUniqueTensor.h │ │ ├── BrouTemporaryBuffer1.h │ │ ├── BrouMacro.h │ │ ├── BrouTemporaryTensor.h │ │ ├── BrouStruct.h │ │ ├── BrouTemporaryBuffer1.m │ │ ├── BrouShareBuffer.m │ │ └── BrouUniqueTensor.m │ ├── Brouhaha.m │ └── Brouhaha.h └── Brouhaha.xcodeproj │ └── xcuserdata │ └── yanyuanchi.xcuserdatad │ └── xcschemes │ ├── xcschememanagement.plist │ └── Brouhaha.xcscheme ├── BrouhahaMetal ├── BrouhahaMetal.xcodeproj │ └── xcuserdata │ │ └── yanyuanchi.xcuserdatad │ │ └── xcschemes │ │ ├── xcschememanagement.plist │ │ └── BrouhahaMetal.xcscheme └── BrouhahaMetal │ ├── BrouStruct.metal │ ├── BrouMaxPooling.metal │ ├── BrouReLu.metal │ ├── BrouConvert.metal │ ├── BrouTanH.metal │ ├── BrouAdd.metal │ ├── BrouGenerate.metal │ ├── BrouLinear.metal │ ├── BrouAddBias.metal │ ├── BrouPReLu.metal │ ├── BrouMatrixMultiply.metal │ ├── BrouFullconnect.metal │ ├── BrouSoftMax.metal │ ├── BrouDilatedConvolutionMM.metal │ ├── BrouAveragePooling.metal │ ├── BrouDilatedConvolution.metal │ ├── BrouConvolutionMM.metal │ └── BrouTransposedConvolution.metal ├── LICENSE ├── Readme_zh.md ├── README.md └── read.py /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /Build 2 | /Index 3 | /DerivedData 4 | -------------------------------------------------------------------------------- /Images/art1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/Images/art1.gif -------------------------------------------------------------------------------- /Images/art2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/Images/art2.gif -------------------------------------------------------------------------------- /Images/lenet.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/Images/lenet.gif -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/LeNet/params/b0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/LeNet/params/b0 -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/LeNet/params/b1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/LeNet/params/b1 -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/LeNet/params/b2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/LeNet/params/b2 -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/LeNet/params/b3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/LeNet/params/b3 -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/LeNet/params/w0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/LeNet/params/w0 -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/LeNet/params/w1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/LeNet/params/w1 -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/LeNet/params/w2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/LeNet/params/w2 -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/LeNet/params/w3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/LeNet/params/w3 -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/LeNet/PaintView.h: -------------------------------------------------------------------------------- 1 | #import 2 | 3 | @interface PaintView : UIView 4 | 5 | - (void)clear; 6 | 7 | @end 8 | -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/ViewController.h: -------------------------------------------------------------------------------- 1 | #import 2 | 3 | @interface ViewController : UIViewController 4 | 5 | 6 | @end 7 | 8 | -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/LeNet/params/testX100: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/LeNet/params/testX100 -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/LeNet/params/testY100: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/LeNet/params/testY100 -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/LeNet/LeNetViewController.h: -------------------------------------------------------------------------------- 1 | #import 2 | 3 | @interface LeNetViewController : UIViewController 4 | 5 | @end 6 | -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/images/tk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/images/tk.png -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/images/zgr.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/images/zgr.jpg -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/images/800X800.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/images/800X800.png -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/images/900X900.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/images/900X900.png -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/conv1_alpha: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/conv1_alpha -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/conv1_beta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/conv1_beta -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/conv1_weight: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/conv1_weight -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/conv2_alpha: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/conv2_alpha -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/conv2_beta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/conv2_beta -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/conv2_weight: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/conv2_weight -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/conv3_alpha: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/conv3_alpha -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/conv3_beta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/conv3_beta -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/conv3_weight: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/conv3_weight -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/conv4_alpha: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/conv4_alpha -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/conv4_beta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/conv4_beta -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/conv4_weight: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/conv4_weight -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/ArtTransformViewController.h: -------------------------------------------------------------------------------- 1 | #import 2 | 3 | @interface ArtTransformViewController : UIViewController 4 | 5 | @end 6 | -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/images/1024X1024.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/images/1024X1024.png -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res1_conv1_alpha: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res1_conv1_alpha -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res1_conv1_beta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res1_conv1_beta -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res1_conv1_weight: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res1_conv1_weight -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res1_conv2_alpha: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res1_conv2_alpha -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res1_conv2_beta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res1_conv2_beta -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res1_conv2_weight: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res1_conv2_weight -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res2_conv1_alpha: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res2_conv1_alpha -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res2_conv1_beta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res2_conv1_beta -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res2_conv1_weight: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res2_conv1_weight -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res2_conv2_alpha: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res2_conv2_alpha -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res2_conv2_beta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res2_conv2_beta -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res2_conv2_weight: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res2_conv2_weight -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res3_conv1_alpha: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res3_conv1_alpha -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res3_conv1_beta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res3_conv1_beta -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res3_conv1_weight: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res3_conv1_weight -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res3_conv2_alpha: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res3_conv2_alpha -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res3_conv2_beta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res3_conv2_beta -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res3_conv2_weight: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res3_conv2_weight -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res4_conv1_alpha: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res4_conv1_alpha -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res4_conv1_beta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res4_conv1_beta -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res4_conv1_weight: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res4_conv1_weight -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res4_conv2_alpha: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res4_conv2_alpha -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res4_conv2_beta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res4_conv2_beta -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res4_conv2_weight: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res4_conv2_weight -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res5_conv1_alpha: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res5_conv1_alpha -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res5_conv1_beta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res5_conv1_beta -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res5_conv1_weight: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res5_conv1_weight -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res5_conv2_alpha: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res5_conv2_alpha -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res5_conv2_beta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res5_conv2_beta -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/res5_conv2_weight: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/res5_conv2_weight -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/ArtTransformHalfViewController.h: -------------------------------------------------------------------------------- 1 | #import 2 | 3 | @interface ArtTransformHalfViewController : UIViewController 4 | 5 | @end 6 | -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/transpose_conv1_alpha: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/transpose_conv1_alpha -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/transpose_conv1_beta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/transpose_conv1_beta -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/transpose_conv1_weight: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/transpose_conv1_weight -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/transpose_conv2_alpha: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/transpose_conv2_alpha -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/transpose_conv2_beta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/transpose_conv2_beta -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/params/transpose_conv2_weight: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/BrouhahaDemo/BrouhahaDemo/Art transform/params/transpose_conv2_weight -------------------------------------------------------------------------------- /Brouhaha.xcworkspace/xcuserdata/yanyuanchi.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | -------------------------------------------------------------------------------- /Brouhaha.xcworkspace/xcuserdata/yanyuanchi.xcuserdatad/UserInterfaceState.xcuserstate: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazingyyc/Brouhaha/HEAD/Brouhaha.xcworkspace/xcuserdata/yanyuanchi.xcuserdatad/UserInterfaceState.xcuserstate -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Utils/Generate/BrouGenerate.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU) 2 | 3 | /** 4 | * transpose the in matrix to out 5 | * requirt:outRow >= inCol outCol >= inRow 6 | */ 7 | void BROU(TransposeMatrix)(type *in, size_t inRow, size_t inCol, type *out, size_t outRow, size_t outCol); 8 | 9 | #endif 10 | 11 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouReLuLayer.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(ReLuLayer) : BrouLayer 4 | 5 | - (instancetype)initWithDevice:(id)device 6 | library:(id)library 7 | dimensionType:(DimensionType)dimensionType; 8 | 9 | @end 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouTanHLayer.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(TanHLayer) : BrouLayer 4 | 5 | - (instancetype)initWithDevice:(id)device 6 | library:(id)library 7 | dimensionType:(DimensionType)dimensionType; 8 | 9 | @end 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Support/AppDelegate.h: -------------------------------------------------------------------------------- 1 | // 2 | // AppDelegate.h 3 | // BrouhahaRealDemo 4 | // 5 | // Created by yanyuanchi on 2017/8/24. 6 | // Copyright © 2017年 yanyuanchi. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | @interface AppDelegate : UIResponder 12 | 13 | @property (strong, nonatomic) UIWindow *window; 14 | 15 | 16 | @end 17 | 18 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouConvertLayer.h: -------------------------------------------------------------------------------- 1 | #if defined(from) && defined(to) && defined(BROU_CONVERT_OBJECT) && defined(BROU_CONVERT_METAL) 2 | 3 | @interface BROU_CONVERT_OBJECT(from, to) : BrouLayer 4 | 5 | - (instancetype)initWithDevice:(id)device 6 | library:(id)library 7 | dimensionType:(DimensionType)dimensionType; 8 | 9 | @end 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /Brouhaha.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 9 | 10 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouPReLuLayer.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(PReLuLayer) : BrouLayer 4 | 5 | - (instancetype)initWithDevice:(id)device 6 | library:(id)library 7 | a:(float)a 8 | dimensionType:(DimensionType)dimensionType; 9 | 10 | @end 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Support/main.m: -------------------------------------------------------------------------------- 1 | // 2 | // main.m 3 | // BrouhahaRealDemo 4 | // 5 | // Created by yanyuanchi on 2017/8/24. 6 | // Copyright © 2017年 yanyuanchi. All rights reserved. 7 | // 8 | 9 | #import 10 | #import "AppDelegate.h" 11 | 12 | int main(int argc, char * argv[]) { 13 | @autoreleasepool { 14 | return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class])); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Utils/BrouUtils.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "BrouMacro.h" 4 | #include "BrouNeon.c" 5 | 6 | #define type uint16_t 7 | #define real half 8 | #define real_is_half 9 | #include "Generate/BrouMatrix.c" 10 | #undef real 11 | #undef real_is_half 12 | #undef type 13 | 14 | #define type float 15 | #define real float 16 | #define real_is_float 17 | #include "Generate/BrouMatrix.c" 18 | #undef real 19 | #undef real_is_float 20 | #undef type 21 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouLinearLayer.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(LinearLayer) : BrouLayer 4 | 5 | - (instancetype)initWithDevice:(id)device 6 | library:(id)library 7 | a:(float)a 8 | b:(float)b 9 | dimensionType:(DimensionType)dimensionType; 10 | 11 | @end 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Basic/BrouShareBuffer.h: -------------------------------------------------------------------------------- 1 | #import 2 | #import 3 | 4 | /** 5 | this store a shared buffer of Metal 6 | */ 7 | @interface BrouShareBuffer : NSObject 8 | 9 | + (instancetype)defaultWithDevice:(id)device; 10 | + (instancetype)initWithLifeTime:(NSUInteger)time device:(id)device; 11 | 12 | - (NSNumber*)bindWithBytesCounts:(NSUInteger)bytesCount; 13 | - (id)getBindBufferById:(NSNumber*)bindId; 14 | 15 | @end 16 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Basic/BrouTensor.h: -------------------------------------------------------------------------------- 1 | #import 2 | 3 | /** 4 | this the delegate of BrouTensor and BrouTemporaryTensor 5 | */ 6 | 7 | @protocol BrouTensor 8 | 9 | - (NSUInteger)dimension; 10 | 11 | - (int)height; 12 | - (int)width; 13 | - (int)channel; 14 | 15 | - (int)dim0; 16 | - (int)dim1; 17 | - (int)dim2; 18 | 19 | - (int)dim:(int)dim; 20 | 21 | - (int)innermostDim; 22 | - (int)innermostDimX4; 23 | 24 | - (id)tensorBuffer; 25 | 26 | @end 27 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/BrouLayer.h: -------------------------------------------------------------------------------- 1 | #import 2 | #import 3 | 4 | #import "BrouStruct.h" 5 | #import "BrouTensor.h" 6 | 7 | @interface BrouLayer : NSObject 8 | 9 | @property(nonatomic, strong) NSString *name; 10 | 11 | - (instancetype)initWithName:(NSString *)name; 12 | 13 | - (void)computeCommandBuffer:(id)commandBuffer 14 | input:(id)input 15 | output:(id)output; 16 | @end 17 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Utils/BrouUtils.h: -------------------------------------------------------------------------------- 1 | #ifndef BrouUitls_h 2 | #define BrouUitls_h 3 | 4 | #include 5 | #include "BrouMacro.h" 6 | 7 | #define type uint16_t 8 | #define real half 9 | #define real_is_half 10 | #include "Generate/BrouGenerate.h" 11 | #undef real 12 | #undef real_is_half 13 | #undef type 14 | 15 | #define type float 16 | #define real float 17 | #define real_is_float 18 | #include "Generate/BrouGenerate.h" 19 | #undef real 20 | #undef real_is_float 21 | #undef type 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Utils/BrouConvertType.h: -------------------------------------------------------------------------------- 1 | #ifndef BrouConvertType_h 2 | #define BrouConvertType_h 3 | 4 | #include 5 | 6 | /** 7 | * convert float and half 8 | */ 9 | void convertFloat16ToFloat32(uint16_t *half, uint32_t *single, int length); 10 | void convertFloat32ToFloat16(uint32_t *single, uint16_t *half, int length); 11 | void convertFloat32ToFloat16Two(uint32_t *s1, uint16_t *h1, int l1, uint32_t *s2, uint16_t *h2, int l2); 12 | 13 | uint16_t convertFloat32ToFloat16OneNumber(uint32_t *single); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Basic/BrouUniqueTensor.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(UniqueTensor) : NSObject 4 | 5 | /** 6 | the init function 7 | */ 8 | + (instancetype)initWithLength:(int)length device:(id)device; 9 | + (instancetype)initWithHeight:(int)height width:(int)width device:(id)device; 10 | + (instancetype)initWithHeight:(int)height width:(int)width channel:(int)channel device:(id)device; 11 | 12 | @end 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/BrouLayer.m: -------------------------------------------------------------------------------- 1 | #import "BrouLayer.h" 2 | 3 | @implementation BrouLayer 4 | 5 | - (instancetype)initWithName:(NSString *)name { 6 | self = [super init]; 7 | 8 | if (self) { 9 | _name = name; 10 | } 11 | 12 | return self; 13 | } 14 | 15 | - (void)computeCommandBuffer:(id)commandBuffer 16 | input:(id)input 17 | output:(id)output { 18 | NSAssert(false, @"BrouLayer is base class, can't be inited directly"); 19 | } 20 | 21 | @end 22 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouFullConnectLayer.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(FullConnectLayer) : BrouLayer 4 | 5 | - (instancetype)initWithDevice:(id)device 6 | library:(id)library 7 | floatWeights:(void*)floatWeight 8 | floatBias:(void*)floatBias 9 | intputChannel:(int)inputChannel 10 | outputChannel:(int)outputChannel; 11 | 12 | @end 13 | 14 | #endif 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouAddBiasLayer.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(AddBiasLayer) : BrouLayer 4 | /** 5 | * add a bias to a tensor, the tensor can be 1D, 2D, 3D 6 | * the bias will be added to the inner dimension 7 | */ 8 | - (instancetype)initWithDevice:(id)device 9 | library:(id)library 10 | floatBias:(void*)floatBias 11 | biasLength:(int)biasLength 12 | dimensionType:(DimensionType)dimensionType; 13 | 14 | @end 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouAddLayer.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(AddLayer) : BrouLayer 4 | 5 | - (instancetype)initWithDevice:(id)device 6 | library:(id)library 7 | dimensionType:(DimensionType)dimensionType; 8 | 9 | - (void)computeCommandBuffer:(id)commandBuffer 10 | input1:(id)input1 11 | input2:(id)input2 12 | output:(id)output; 13 | 14 | @end 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouMaxPoolingLayer.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(MaxPoolingLayer) : BrouLayer 4 | 5 | - (instancetype)initWithDevice:(id)device 6 | library:(id)library 7 | kernelHeight:(int)kernelHeight 8 | kernelWidth:(int)kernelWidth 9 | padTop:(int)padTop 10 | padLeft:(int)padLeft 11 | strideY:(int)strideY 12 | strideX:(int)strideX; 13 | 14 | @end 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha.xcodeproj/xcuserdata/yanyuanchi.xcuserdatad/xcschemes/xcschememanagement.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SchemeUserState 6 | 7 | Brouhaha.xcscheme 8 | 9 | orderHint 10 | 3 11 | 12 | 13 | SuppressBuildableAutocreation 14 | 15 | 3498F0A11F493713006DF7D1 16 | 17 | primary 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal.xcodeproj/xcuserdata/yanyuanchi.xcuserdatad/xcschemes/xcschememanagement.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SchemeUserState 6 | 7 | BrouhahaMetal.xcscheme 8 | 9 | orderHint 10 | 1 11 | 12 | 13 | SuppressBuildableAutocreation 14 | 15 | 3463FF631ED18A2D003D7F2C 16 | 17 | primary 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouAveragePoolingLayer.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(AveragePoolingLayer) : BrouLayer 4 | 5 | - (instancetype)initWithDevice:(id)device 6 | library:(id)library 7 | kernelHeight:(int)kernelHeight 8 | kernelWidth:(int)kernelWidth 9 | padTop:(int)padTop 10 | padLeft:(int)padLeft 11 | strideY:(int)strideY 12 | strideX:(int)strideX 13 | withPad:(BOOL)withPad; 14 | 15 | @end 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo.xcodeproj/xcuserdata/yanyuanchi.xcuserdatad/xcschemes/xcschememanagement.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SchemeUserState 6 | 7 | BrouhahaDemo.xcscheme 8 | 9 | orderHint 10 | 2 11 | 12 | BrouhahaRealDemo.xcscheme 13 | 14 | orderHint 15 | 4 16 | 17 | 18 | SuppressBuildableAutocreation 19 | 20 | 34F99F0D1F4ED04700261B45 21 | 22 | primary 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouConvolutionMMLayer.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(ConvolutionMMLayer) : BrouLayer 4 | 5 | - (instancetype)initWithDevice:(id)device 6 | library:(id)library 7 | floatKernel:(void*)floatKernel 8 | floatBias:(void*)floatBias 9 | inputChannel:(int)inputChannel 10 | outputChannel:(int)outputChannel 11 | kernelHeight:(int)kernelHeight 12 | kernelWidth:(int)kernelWidth 13 | padTop:(int)padTop 14 | padLeft:(int)padLeft 15 | strideY:(int)strideY 16 | strideX:(int)strideX; 17 | 18 | @end 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouTransposedConvolutionMMLayer.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(TransposedConvolutionMMLayer) : BrouLayer 4 | 5 | - (instancetype)initWithDevice:(id)device 6 | library:(id)library 7 | floatKernel:(void*)floatKernel 8 | floatBias:(void*)floatBias 9 | originInputChannel:(int)originInputChannel 10 | originOutputChannel:(int)originOutputChannel 11 | originKernelHeight:(int)originKernelHeight 12 | originKernelWidth:(int)originKernelWidth 13 | originPadTop:(int)originPadTop 14 | originPadLeft:(int)originPadLeft 15 | originStrideY:(int)originStrideY 16 | originStrideX:(int)originStrideX; 17 | 18 | @end 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouBatchNormalizationLayer.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | /** 4 | * in testing the mean and variance will be knowed 5 | * if don't the mean and variance will be calculate by 6 | * brouCalculateMeanAndVariance3D 7 | * 8 | * the alpha and beta is knowed 9 | * 10 | * output = alpha * (input - mean) / (sqrt(variance + epsilon)) + beta 11 | * 12 | * this layer is just for the CNN batch normalization for now!!! 13 | */ 14 | @interface BROU_OBJECT(BatchNormalizationLayer) : BrouLayer 15 | 16 | - (instancetype)initWithDevice:(id)device 17 | library:(id)library 18 | epsilon:(float)epsilon 19 | floatAlpha:(void*)floatAlpha 20 | floatBeta:(void*)floatBeta 21 | channel:(int)channel; 22 | 23 | @end 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/BrouResidualLayer_half.h: -------------------------------------------------------------------------------- 1 | #import "Brouhaha.h" 2 | 3 | @interface BrouResidualLayer_half : BrouLayer 4 | /** 5 | * this residual layer ref:https://github.com/lengstrom/fast-style-transfer#video-stylization 6 | * the input diemsion is (height, width, 128) 7 | * the output dimension is (height, width, 128) 8 | * the kernel dimension is (128, 3, 3, 128) 9 | * the stride is (1, 1) 10 | * the pad is (1, 1) 11 | */ 12 | 13 | - (instancetype)initWithDevice:(id)device 14 | library:(id)library 15 | floatWeight1:(void*)floatWeight1 16 | floatWeight2:(void*)floatWeight2 17 | floatAlpha1:(void*)floatAlpha1 18 | floatBeta1:(void*)floatBeta1 19 | floatAlpha2:(void*)floatAlpha2 20 | floatBeta2:(void*)floatBeta2 21 | channel:(int)channel; 22 | 23 | @end 24 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouDilatedConvolutionLayer.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(DilatedConvolutionLayer) : BrouLayer 4 | 5 | - (instancetype)initWithDevice:(id)device 6 | library:(id)library 7 | floatKernel:(void *)floatKernel 8 | floatBias:(void *)floatBias 9 | inputChannel:(int)inputChannel 10 | outputChannel:(int)outputChannel 11 | kernelHeight:(int)kernelHeight 12 | kernelWidth:(int)kernelWidth 13 | padTop:(int)padTop 14 | padLeft:(int)padLeft 15 | strideY:(int)strideY 16 | strideX:(int)strideX 17 | dilateY:(int)dilatedY 18 | dilatedX:(int)dilatedX; 19 | 20 | @end 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/BrouResidualLayer_float.h: -------------------------------------------------------------------------------- 1 | #import "Brouhaha.h" 2 | 3 | @interface BrouResidualLayer_float : BrouLayer 4 | 5 | /** 6 | * this residual layer ref:https://github.com/lengstrom/fast-style-transfer#video-stylization 7 | * the input diemsion is (height, width, 128) 8 | * the output dimension is (height, width, 128) 9 | * the kernel dimension is (128, 3, 3, 128) 10 | * the stride is (1, 1) 11 | * the pad is (1, 1) 12 | */ 13 | 14 | - (instancetype)initWithDevice:(id)device 15 | library:(id)library 16 | floatWeight1:(void*)floatWeight1 17 | floatWeight2:(void*)floatWeight2 18 | floatAlpha1:(void*)floatAlpha1 19 | floatBeta1:(void*)floatBeta1 20 | floatAlpha2:(void*)floatAlpha2 21 | floatBeta2:(void*)floatBeta2 22 | channel:(int)channel; 23 | 24 | @end 25 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouDilatedConvolutionMMLayer.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(DilatedConvolutionMMLayer): BrouLayer 4 | 5 | - (instancetype)initWithDevice:(id)device 6 | library:(id)library 7 | floatKernel:(void *)floatKernel 8 | floatBias:(void *)floatBias 9 | inputChannel:(int)inputChannel 10 | outputChannel:(int)outputChannel 11 | kernelHeight:(int)kernelHeight 12 | kernelWidth:(int)kernelWidth 13 | padTop:(int)padTop 14 | padLeft:(int)padLeft 15 | strideY:(int)strideY 16 | strideX:(int)strideX 17 | dilateY:(int)dilatedY 18 | dilatedX:(int)dilatedX; 19 | 20 | @end 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouTransposedConvolutionLayer.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(TransposedConvolutionLayer) : BrouLayer 4 | 5 | /** 6 | * this init function can init float32 and float16 layer 7 | */ 8 | - (instancetype)initWithDevice:(id)device 9 | library:(id)library 10 | floatKernel:(void*)floatKernel 11 | floatBias:(void*)floatBias 12 | originInputChannel:(int)originInputChannel 13 | originOutputChannel:(int)originOutputChannel 14 | originKernelHeight:(int)originKernelHeight 15 | originKernelWidth:(int)originKernelWidth 16 | originPadTop:(int)originPadTop 17 | originPadLeft:(int)originPadLeft 18 | originStrideY:(int)originStrideY 19 | originStrideX:(int)originStrideX; 20 | 21 | @end 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouConvolutionLayer.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(ConvolutionLayer) : BrouLayer 4 | 5 | /** 6 | * this init function can init float32 and float16 layer 7 | */ 8 | - (instancetype)initWithDevice:(id)device 9 | library:(id)library 10 | floatKernel:(void*)floatKernel 11 | floatBias:(void*)floatBias 12 | inputChannel:(int)inputChannel 13 | outputChannel:(int)outputChannel 14 | kernelHeight:(int)kernelHeight 15 | kernelWidth:(int)kernelWidth 16 | padTop:(int)padTop 17 | padLeft:(int)padLeft 18 | strideY:(int)strideY 19 | strideX:(int)strideX; 20 | @end 21 | 22 | #endif 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Basic/BrouTemporaryBuffer1.h: -------------------------------------------------------------------------------- 1 | //#import 2 | //#import 3 | // 4 | //@interface BrouTemporaryBuffer : NSObject { 5 | //} 6 | // 7 | //@property(nonatomic, strong) id buffer; 8 | //@property(nonatomic, assign) int length; 9 | // 10 | //- (instancetype)init; 11 | // 12 | //- (void)configWithFloatLength:(int)length; 13 | //- (void)configWithFloatHeight:(int)height width:(int)width; 14 | //- (void)configWithFloatHeight:(int)height width:(int)width channel:(int)channel; 15 | // 16 | //- (void)configWithHalfLength:(int)length; 17 | //- (void)configWithHalfHeight:(int)height width:(int)width; 18 | //- (void)configWithHalfHeight:(int)height width:(int)width channel:(int)channel; 19 | // 20 | ///** 21 | // * for the ConvlutionMM, TransposedConvolutionMM, DilatedConvolutionMM 22 | // * the output (height, width, channel) must be subject to: 0 == (height * width) % 4 and 0 == channel % 4 23 | // */ 24 | //- (void)configConvolutionMMWithFloatHeight:(int)height width:(int)width channel:(int)channel; 25 | //- (void)configConvolutionMMWithHalfHeight:(int)height width:(int)width channel:(int)channel; 26 | // 27 | // 28 | //- (void)configWithDevice:(id)device; 29 | // 30 | //@end 31 | 32 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Basic/BrouMacro.h: -------------------------------------------------------------------------------- 1 | /** 2 | * ref: Torch 3 | */ 4 | 5 | #ifndef BrouMacro_h 6 | #define BrouMacro_h 7 | 8 | #define BROU_CONCAT_2_EXPAND(a, b) a ## b 9 | #define BROU_CONCAT_2(a, b) BROU_CONCAT_2_EXPAND(a, b) 10 | 11 | #define BROU_CONCAT_3_EXPAND(a, b, c) a ## b ## c 12 | #define BROU_CONCAT_3(a, b, c) BROU_CONCAT_3_EXPAND(a, b, c) 13 | 14 | #define BROU_CONCAT_4_EXPAND(a, b, c, d) a ## b ## c ## d 15 | #define BROU_CONCAT_4(a, b, c, d) BROU_CONCAT_4_EXPAND(a, b, c, d) 16 | 17 | #define BROU_CONCAT_5_EXPAND(a, b, c, d, e) a ## b ## c ## d ## e 18 | #define BROU_CONCAT_5(a, b, c, d, e) BROU_CONCAT_5_EXPAND(a, b, c, d, e) 19 | 20 | #define BROU_STR_EXPAND(name) #name 21 | #define BROU_STR(name) BROU_STR_EXPAND(name) 22 | 23 | #define BROU(name) BROU_CONCAT_4(brou, name, _, real) 24 | 25 | #define BROU_OBJECT(name) BROU_CONCAT_4(Brou, name, _, real) 26 | #define BROU_OBJECT_NAME(name) BROU_STR(BROU_CONCAT_4(Brou, name, _, real)) 27 | 28 | #define BROU_METAL(name) BROU_STR(BROU(name)) 29 | 30 | #define BROU_CONVERT_OBJECT(from, to) BROU_CONCAT_5(BrouConvertFrom, from, 2, to, Layer) 31 | #define BROU_CONVERT_OBJECT_NAME(from, to) BROU_STR(BROU_CONCAT_5(BrouConvertFrom, from, 2, to, Layer)) 32 | #define BROU_CONVERT_METAL(from, to, dim) BROU_STR(BROU_CONCAT_5(brouConvertFrom, from, 2, to, dim)) 33 | 34 | #define BROU_MAX(a, b) (((a)>(b)) ? (a):(b)) 35 | #define BROU_MIN(a, b) (((a)>(b)) ? (b):(a)) 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2017, 惊奇漫画/amazingyyc (amazingyyc@outlook.com) 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Basic/BrouTemporaryTensor.h: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(TemporaryTensor) : NSObject 4 | 5 | /** 6 | the init function 7 | */ 8 | + (instancetype)initWithLength:(int)length temporaryBufer:(BrouShareBuffer*)temporaryBuffer; 9 | + (instancetype)initWithHeight:(int)height width:(int)width temporaryBufer:(BrouShareBuffer*)temporaryBuffer; 10 | + (instancetype)initWithHeight:(int)height width:(int)width channel:(int)channel temporaryBufer:(BrouShareBuffer*)temporaryBuffer; 11 | 12 | /** 13 | init a TemporaryTensor with another TemporaryTensor 14 | this Tensor and another TemporaryTensor will point to the same MTLBuffer 15 | and the bytesCount must be less than another TemporaryTensor's bytesCount 16 | */ 17 | + (instancetype)initWithLength:(int)length 18 | anotherTemporaryTensor:(BROU_OBJECT(TemporaryTensor)*)anotherTensor; 19 | 20 | + (instancetype)initWithHeight:(int)height 21 | width:(int)width 22 | anotherTemporaryTensor:(BROU_OBJECT(TemporaryTensor)*)anotherTensor; 23 | 24 | + (instancetype)initWithHeight:(int)height 25 | width:(int)width 26 | channel:(int)channel 27 | anotherTemporaryTensor:(BROU_OBJECT(TemporaryTensor)*)anotherTensor; 28 | 29 | - (NSUInteger)bytesCount; 30 | - (NSNumber*)temporaryBufferId; 31 | - (BrouShareBuffer*)temporaryBuffer; 32 | 33 | @end 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouStruct.metal: -------------------------------------------------------------------------------- 1 | /** 2 | * define the structs 3 | * 4 | * Created by yanyuanchi on 2017/7/23. 5 | * Copyright © 2017年 yanyuanchi. All rights reserved. 6 | */ 7 | 8 | #include 9 | using namespace metal; 10 | 11 | /** 12 | * a tensor dimension struct 13 | * (dim0 dim1 dim2) represent a 3d tensor 14 | */ 15 | typedef struct { 16 | int dim0; 17 | int dim1; 18 | int dim2; 19 | int dim3; 20 | } TensorShape; 21 | 22 | /** 23 | * uae a struct to store the params of a convolution 24 | */ 25 | typedef struct { 26 | /**the kernel size*/ 27 | int kernelHeight; 28 | int kernelWidth; 29 | 30 | /**the pad of input*/ 31 | int padTop; 32 | int padLeft; 33 | 34 | /**the stride of kernel, for transposed convolution always be 1*/ 35 | int strideY; 36 | int strideX; 37 | 38 | /**the 0 units inserted to input of transposed convolution*/ 39 | int insertY; 40 | int insertX; 41 | 42 | /**for dilated convolution*/ 43 | int dilatedY; 44 | int dilatedX; 45 | 46 | /**if the convoluton has bias*/ 47 | bool haveBias; 48 | } ConvolutionShape; 49 | 50 | /** 51 | * a struct to store the BatchNormalization params 52 | */ 53 | typedef struct { 54 | /**the epsilon of BN*/ 55 | float epsilon; 56 | 57 | /**every thread deal with (perThreadWidth, perThreadHeight) input*/ 58 | int perThreadWidth; 59 | int perThreadHeight; 60 | } BatchNormalizationShape; 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Support/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | APPL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | LSRequiresIPhoneOS 22 | 23 | UILaunchStoryboardName 24 | LaunchScreen 25 | UIMainStoryboardFile 26 | Main 27 | UIRequiredDeviceCapabilities 28 | 29 | armv7 30 | 31 | UISupportedInterfaceOrientations 32 | 33 | UIInterfaceOrientationPortrait 34 | UIInterfaceOrientationLandscapeLeft 35 | UIInterfaceOrientationLandscapeRight 36 | 37 | UISupportedInterfaceOrientations~ipad 38 | 39 | UIInterfaceOrientationPortrait 40 | UIInterfaceOrientationPortraitUpsideDown 41 | UIInterfaceOrientationLandscapeLeft 42 | UIInterfaceOrientationLandscapeRight 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Basic/BrouStruct.h: -------------------------------------------------------------------------------- 1 | #ifndef BrouStruct_h 2 | #define BrouStruct_h 3 | 4 | /** 5 | * the data's dimension type 6 | */ 7 | typedef NS_ENUM(NSInteger, DimensionType) { 8 | Dimension1D = 1, 9 | Dimension2D = 2, 10 | Dimension3D = 3, 11 | Dimension4D = 4 12 | }; 13 | 14 | /** 15 | * store the shpe of data 16 | * (dim0,dim1, dim2) represent a 3d dimension 17 | */ 18 | typedef struct _TensorShape { 19 | int32_t dim0; 20 | int32_t dim1; 21 | int32_t dim2; 22 | int32_t dim3; 23 | } TensorShape; 24 | 25 | /** 26 | * uae a struct to store the params of a convolution 27 | */ 28 | typedef struct _ConvolutionShape { 29 | /**the kernel size*/ 30 | int32_t kernelHeight; 31 | int32_t kernelWidth; 32 | 33 | /**the pad of input*/ 34 | int32_t padTop; 35 | int32_t padLeft; 36 | 37 | /**the stride of kernel, for transposed convolution always be 1*/ 38 | int32_t strideY; 39 | int32_t strideX; 40 | 41 | /**the 0 units inserted to input of transposed convolution*/ 42 | int32_t insertY; 43 | int32_t insertX; 44 | 45 | /**for dilated convolution*/ 46 | int32_t dilatedY; 47 | int32_t dilatedX; 48 | 49 | /**if the convoluton has bias, 0 false, !0 true*/ 50 | bool haveBias; 51 | } ConvolutionShape; 52 | 53 | /** 54 | * a struct to store the BatchNormalization params 55 | */ 56 | typedef struct _BatchNormalizationShape { 57 | /**the epsilon of BN*/ 58 | float epsilon; 59 | 60 | /**every thread deal with (perThreadWidth, perThreadHeight) input*/ 61 | int32_t perThreadWidth; 62 | int32_t perThreadHeight; 63 | } BatchNormalizationShape; 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /Readme_zh.md: -------------------------------------------------------------------------------- 1 | # Brouhaha:基于iOS Metal的深度学习运算库 2 | 3 | Brouhah是一个基于iOS Metal的深度学习运算库。这个库可以方便的调用iOS Metal Shader执行深度学习算法。 4 | 5 | ## 地址 6 | github:https://github.com/amazingyyc/Brouhaha 7 |
8 | 码云:https://gitee.com/JingQiManHua/Brouhaha 9 | 10 | ## 更新 11 | ### 最新 12 | 1. 精简API,调用更方便。 13 | 2. 添加一个临时MTLBuffer的类,在运行期间更加节省内存。 14 | 15 | ### 2017 16 | 1. 添加Float32支持,现在所有的“层”均支持Float16和Float32。 17 | 2. 添加基于Float32的风格转换Demo。 18 | 3. 修改API,更灵活的使用。 19 | 20 | ## 介绍 21 | Brouhaha只包含深度学习的前向运算,并不能用于训练一个深度学习模型。在使用Brouhaha之前必须有一个使用其他的训练库(比如:Caffe,Tensorflow,Torch)训练好的深度学习model。Brouhaha包含常用的卷积(包括转置卷积,Dilated卷积),池化,激活,全联接,BatchNormalize和方便图片转换的转换层。主要包括以下三个部分: 22 | 1. **BrouhahaMetal:** 使用Metal Shader编写的核心运算函数,用于加速计算。 23 | 2. **Brouhaha:** 包含常用的神经网络层的抽象,使用Objective-c开发,为了加速引入了一些汇编。 24 | 3. **BrouhahaDemo:** 包含3个Demo,演示怎么使用这个库。LeNet是一个使用卷积神经网络识别图片中的数字的模型。ArtTransform类似于Prisma,用于图片风格的转换,包含基于Float16和Float32的实现。 25 | 26 | ## Demo 27 | **Build:** 在运行Brouhaha-Demo之前需要首先编译BrouhahaMetal,然后将生成的文件BrouhahaMetal.metallib拷贝到Brouhaha-Demo的bundle中。 28 |
29 | **LeNet:** 这个Demo是使用神经网络识别图片中的数字。具体的算法参考:http://yann.lecun.com/exdb/lenet/。模型文件来源于网路,抱歉忘记了出处。 30 |
31 | ![](Images/lenet.gif) 32 | 33 | **ArtTransform:** 这个Demo使用卷积神经网络进行图片风格的转换。算法参考:https://arxiv.org/abs/1603.08155,模型文件来源于:https://github.com/lengstrom/fast-style-transfer#video-stylization。包含了两个实现一个基于Float32一个基于Float16。 34 |
35 | 36 | ***基于Float32***
37 | ![](Images/art1.gif) 38 |
39 | 40 | ***基于Float16***
41 | ![](Images/art2.gif) 42 | 43 | 44 | ## Brouhaha的优势 45 | 1. 使用GPU代替CPU速度更快。 46 | 2. 每个层均包含两种实现Float16和Float32,便于选择。Float16可以用于提速,Float32用于提高精度。 47 | 3. 包含最先的“层”实现比如:TransposedConvolution, DilatedConvolution 48 | 4. 不想Apple的Core ML和其他的第三方的库需要iOS10+的系统,Brouhaha只需要iOS8+。 49 | 50 | ## 未来工作 51 | 1. 支持RNN。 -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Support/Base.lproj/Main.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Support/Base.lproj/LaunchScreen.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Support/Assets.xcassets/AppIcon.appiconset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "idiom" : "iphone", 5 | "size" : "20x20", 6 | "scale" : "2x" 7 | }, 8 | { 9 | "idiom" : "iphone", 10 | "size" : "20x20", 11 | "scale" : "3x" 12 | }, 13 | { 14 | "idiom" : "iphone", 15 | "size" : "29x29", 16 | "scale" : "2x" 17 | }, 18 | { 19 | "idiom" : "iphone", 20 | "size" : "29x29", 21 | "scale" : "3x" 22 | }, 23 | { 24 | "idiom" : "iphone", 25 | "size" : "40x40", 26 | "scale" : "2x" 27 | }, 28 | { 29 | "idiom" : "iphone", 30 | "size" : "40x40", 31 | "scale" : "3x" 32 | }, 33 | { 34 | "idiom" : "iphone", 35 | "size" : "60x60", 36 | "scale" : "2x" 37 | }, 38 | { 39 | "idiom" : "iphone", 40 | "size" : "60x60", 41 | "scale" : "3x" 42 | }, 43 | { 44 | "idiom" : "ipad", 45 | "size" : "20x20", 46 | "scale" : "1x" 47 | }, 48 | { 49 | "idiom" : "ipad", 50 | "size" : "20x20", 51 | "scale" : "2x" 52 | }, 53 | { 54 | "idiom" : "ipad", 55 | "size" : "29x29", 56 | "scale" : "1x" 57 | }, 58 | { 59 | "idiom" : "ipad", 60 | "size" : "29x29", 61 | "scale" : "2x" 62 | }, 63 | { 64 | "idiom" : "ipad", 65 | "size" : "40x40", 66 | "scale" : "1x" 67 | }, 68 | { 69 | "idiom" : "ipad", 70 | "size" : "40x40", 71 | "scale" : "2x" 72 | }, 73 | { 74 | "idiom" : "ipad", 75 | "size" : "76x76", 76 | "scale" : "1x" 77 | }, 78 | { 79 | "idiom" : "ipad", 80 | "size" : "76x76", 81 | "scale" : "2x" 82 | }, 83 | { 84 | "idiom" : "ipad", 85 | "size" : "83.5x83.5", 86 | "scale" : "2x" 87 | }, 88 | { 89 | "idiom" : "ios-marketing", 90 | "size" : "1024x1024", 91 | "scale" : "1x" 92 | } 93 | ], 94 | "info" : { 95 | "version" : 1, 96 | "author" : "xcode" 97 | } 98 | } -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/LeNet/PaintView.m: -------------------------------------------------------------------------------- 1 | #import "PaintView.h" 2 | 3 | static const CGFloat RADIUS = 40; 4 | 5 | @interface PaintView() 6 | 7 | @property(nonatomic, strong) NSMutableArray *points; 8 | 9 | @end 10 | 11 | @implementation PaintView 12 | 13 | - (instancetype)initWithFrame:(CGRect)frame { 14 | self = [super initWithFrame:frame]; 15 | 16 | if (self) { 17 | self.backgroundColor = [UIColor whiteColor]; 18 | _points = [[NSMutableArray alloc] init]; 19 | } 20 | 21 | return self; 22 | } 23 | 24 | - (void)touchesBegan:(NSSet *)touches withEvent:(UIEvent *)event { 25 | NSMutableArray *path = [[NSMutableArray alloc] init]; 26 | 27 | CGPoint p = [[touches anyObject]locationInView:self]; 28 | [path addObject:[NSValue valueWithCGPoint:p]]; 29 | 30 | [_points addObject:path]; 31 | 32 | [self setNeedsDisplay]; 33 | } 34 | 35 | - (void)touchesMoved:(NSSet *)touches withEvent:(nullable UIEvent *)event { 36 | NSMutableArray *path = [_points lastObject]; 37 | 38 | CGPoint p = [[touches anyObject]locationInView:self]; 39 | [path addObject:[NSValue valueWithCGPoint:p]]; 40 | 41 | [self setNeedsDisplay]; 42 | } 43 | 44 | - (void)touchesEnded:(NSSet *)touches withEvent:(nullable UIEvent *)event { 45 | } 46 | 47 | - (void)clear { 48 | [_points removeAllObjects]; 49 | 50 | [self setNeedsDisplay]; 51 | } 52 | 53 | - (void)drawRect:(CGRect)rect { 54 | [super drawRect:rect]; 55 | 56 | CGContextRef ctx = UIGraphicsGetCurrentContext(); 57 | 58 | [[UIColor blackColor] set]; 59 | CGContextSetLineCap(ctx, kCGLineCapRound); 60 | CGContextSetLineWidth(ctx, RADIUS); 61 | 62 | for (NSMutableArray *path in _points) { 63 | CGMutablePathRef pathRef = CGPathCreateMutable(); 64 | 65 | for (int i = 0; i < path.count; ++i) { 66 | CGPoint p = [path[i] CGPointValue]; 67 | 68 | if (0 == i) { 69 | CGPathMoveToPoint(pathRef, &CGAffineTransformIdentity, p.x, p.y); 70 | } else { 71 | CGPathAddLineToPoint(pathRef, &CGAffineTransformIdentity, p.x, p.y); 72 | } 73 | } 74 | 75 | CGContextAddPath(ctx, pathRef); 76 | CGContextStrokePath(ctx); 77 | } 78 | } 79 | 80 | @end 81 | -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Support/AppDelegate.m: -------------------------------------------------------------------------------- 1 | #import "AppDelegate.h" 2 | #import "ViewController.h" 3 | 4 | @interface AppDelegate () 5 | 6 | @end 7 | 8 | @implementation AppDelegate 9 | 10 | 11 | - (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { 12 | ViewController *ctrl = [[ViewController alloc] init]; 13 | 14 | self.window = [[UIWindow alloc] initWithFrame:[[UIScreen mainScreen] bounds]]; 15 | self.window.rootViewController = ctrl; 16 | self.window.backgroundColor = [UIColor clearColor]; 17 | [self.window makeKeyAndVisible]; 18 | 19 | return YES; 20 | } 21 | 22 | 23 | - (void)applicationWillResignActive:(UIApplication *)application { 24 | // Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state. 25 | // Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game. 26 | } 27 | 28 | 29 | - (void)applicationDidEnterBackground:(UIApplication *)application { 30 | // Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later. 31 | // If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits. 32 | } 33 | 34 | 35 | - (void)applicationWillEnterForeground:(UIApplication *)application { 36 | // Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background. 37 | } 38 | 39 | 40 | - (void)applicationDidBecomeActive:(UIApplication *)application { 41 | // Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface. 42 | } 43 | 44 | 45 | - (void)applicationWillTerminate:(UIApplication *)application { 46 | // Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:. 47 | } 48 | 49 | 50 | @end 51 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouMaxPooling.metal: -------------------------------------------------------------------------------- 1 | /** 2 | * Brouhaha 3 | * convolution.metal 4 | * Created by yanyuanchi on 2017/5/15. 5 | * Copyright © 2017年 yanyuanchi. All rights reserved. 6 | * 7 | * the max pooling operate 8 | */ 9 | #if defined(real) && defined(real4) && defined(BROU) 10 | 11 | /** 12 | * every thread deal with 4 output 13 | * the input dimesnion is (inputHeight, inputWidth, channelX4) 14 | * the ouput dimesnion is (outputHeight, outputWidth, channelX4) 15 | */ 16 | kernel void BROU(MaxPooling)(device real *input [[buffer(0)]], 17 | device real *output [[buffer(1)]], 18 | constant TensorShape& inputShape [[buffer(2)]], 19 | constant TensorShape& outputShape [[buffer(3)]], 20 | constant ConvolutionShape& convolutionShape [[buffer(4)]], 21 | ushort3 grid [[thread_position_in_grid]]) { 22 | int outputHeight = outputShape.dim0; 23 | int outputWidth = outputShape.dim1; 24 | int outputChannel = outputShape.dim2; 25 | 26 | int x = grid.x; 27 | int y = grid.y; 28 | int z = grid.z << 2; 29 | 30 | if (x >= outputWidth || y >= outputHeight || z >= outputChannel) { 31 | return; 32 | } 33 | 34 | int inputHeight = inputShape.dim0; 35 | int inputWidth = inputShape.dim1; 36 | int inputChannel = inputShape.dim2; 37 | 38 | int inputTop = -convolutionShape.padTop + convolutionShape.strideY * y; 39 | int inputLeft = -convolutionShape.padLeft + convolutionShape.strideX * x; 40 | 41 | int inputBottom = inputTop + convolutionShape.kernelHeight; 42 | int inputRight = inputLeft + convolutionShape.kernelWidth; 43 | 44 | inputTop = max(0, inputTop); 45 | inputLeft = max(0, inputLeft); 46 | 47 | inputBottom = min(inputHeight, inputBottom); 48 | inputRight = min(inputWidth, inputRight); 49 | 50 | device real4 *inputV = (device real4*)(input + (inputTop * inputWidth + inputLeft) * inputChannel + z); 51 | 52 | real4 out = inputV[0]; 53 | 54 | for (int inY = inputTop; inY < inputBottom; ++inY) { 55 | for (int inX = inputLeft; inX < inputRight; ++inX) { 56 | inputV = (device real4*)(input + (inY * inputWidth + inX) * inputChannel + z); 57 | 58 | out = max(out, inputV[0]); 59 | } 60 | } 61 | 62 | device real4 *outputV = (device real4*)(output + (y * outputWidth + x) * outputChannel + z); 63 | 64 | outputV[0] = out; 65 | } 66 | 67 | #endif 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouReLu.metal: -------------------------------------------------------------------------------- 1 | #if defined(real) && defined(real4) && defined(BROU) 2 | 3 | kernel void BROU(ReLu1D)(device real *input [[buffer(0)]], 4 | device real *output [[buffer(1)]], 5 | constant TensorShape& shape [[buffer(2)]], 6 | ushort grid [[thread_position_in_grid]]) { 7 | int index = grid << 2; 8 | 9 | if (index >= shape.dim0) { 10 | return; 11 | } 12 | 13 | device real4 *inputV = (device real4*)(input + index); 14 | device real4 *outputV = (device real4*)(output + index); 15 | 16 | outputV[0] = max(0, inputV[0]); 17 | } 18 | 19 | kernel void BROU(ReLu2D)(device real *input [[buffer(0)]], 20 | device real *output [[buffer(1)]], 21 | constant TensorShape& shape [[buffer(2)]], 22 | ushort2 grid [[thread_position_in_grid]]) { 23 | int height = shape.dim0; 24 | int width = shape.dim1; 25 | 26 | int x = grid.x << 2; 27 | int y = grid.y << 2; 28 | 29 | if (y >= height || x >= width) { 30 | return; 31 | } 32 | 33 | int maxJ = min(y + 4, height); 34 | 35 | for (int j = y; j < maxJ; ++j) { 36 | int offset = j * width + x; 37 | 38 | device real4 *inputV = (device real4*)(input + offset); 39 | device real4 *outputV = (device real4*)(output + offset); 40 | 41 | outputV[0] = max(0, inputV[0]); 42 | } 43 | } 44 | 45 | kernel void BROU(ReLu3D)(device real *input [[buffer(0)]], 46 | device real *output [[buffer(1)]], 47 | constant TensorShape& shape [[buffer(2)]], 48 | ushort3 grid [[thread_position_in_grid]]) { 49 | int height = shape.dim0; 50 | int width = shape.dim1; 51 | int channel = shape.dim2; 52 | 53 | int y = grid.y << 2; 54 | int x = grid.x << 2; 55 | int z = grid.z << 2; 56 | 57 | if (y >= height || x >= width || z >= channel) { 58 | return; 59 | } 60 | 61 | int maxJ = min(y + 4, height); 62 | int maxI = min(x + 4, width); 63 | 64 | for (int j = y; j < maxJ; ++j) { 65 | for (int i = x; i < maxI; ++i) { 66 | int offset = (j * width + i) * channel + z; 67 | 68 | device real4 *inputV = (device real4*)(input + offset); 69 | device real4 *outputV = (device real4*)(output + offset); 70 | 71 | outputV[0] = max(0, inputV[0]); 72 | } 73 | } 74 | } 75 | 76 | #endif 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/ViewController.m: -------------------------------------------------------------------------------- 1 | #import "ViewController.h" 2 | #import "LeNetViewController.h" 3 | #import "ArtTransformViewController.h" 4 | #import "ArtTransformHalfViewController.h" 5 | 6 | @interface ViewController () 7 | 8 | @property (nonatomic, strong) UITableView *tableView; 9 | 10 | @end 11 | 12 | @implementation ViewController 13 | 14 | - (void)viewDidLoad { 15 | [super viewDidLoad]; 16 | 17 | CGRect bounds = self.view.bounds; 18 | 19 | _tableView = [[UITableView alloc] initWithFrame:CGRectMake(0, 20, bounds.size.width, bounds.size.height - 20) 20 | style:UITableViewStylePlain]; 21 | _tableView.dataSource = self; 22 | _tableView.delegate = self; 23 | 24 | [_tableView registerClass:[UITableViewCell class] forCellReuseIdentifier:@"cellId"]; 25 | 26 | [self.view addSubview:_tableView]; 27 | [self.view setBackgroundColor:[UIColor whiteColor]]; 28 | } 29 | 30 | - (NSInteger)tableView:(UITableView *)tableView numberOfRowsInSection:(NSInteger)section { 31 | if (0 == section) { 32 | return 3; 33 | } 34 | 35 | return 0; 36 | } 37 | 38 | - (UITableViewCell *)tableView:(UITableView *)tableView cellForRowAtIndexPath:(NSIndexPath *)indexPath { 39 | UITableViewCell *cell = [_tableView dequeueReusableCellWithIdentifier:@"cellId"]; 40 | 41 | if (0 == indexPath.row) { 42 | cell.textLabel.text = @"LeNet Float32"; 43 | } else if (1 == indexPath.row) { 44 | cell.textLabel.text = @"Artistic Style Transform Float32"; 45 | } else if (2 == indexPath.row) { 46 | cell.textLabel.text = @"Artistic Style Transform Float16"; 47 | } 48 | 49 | return cell; 50 | } 51 | 52 | - (CGFloat)tableView:(UITableView *)tableView heightForRowAtIndexPath:(NSIndexPath *)indexPath { 53 | return 100; 54 | } 55 | 56 | - (void)tableView:(UITableView *)tableView didSelectRowAtIndexPath:(NSIndexPath *)indexPath { 57 | [tableView deselectRowAtIndexPath:indexPath animated:YES]; 58 | 59 | if (0 == indexPath.row) { 60 | LeNetViewController *ctrl = [[LeNetViewController alloc] init]; 61 | 62 | [self presentViewController:ctrl animated:YES completion:nil]; 63 | } else if (1 == indexPath.row) { 64 | ArtTransformViewController *ctrl = [[ArtTransformViewController alloc] init]; 65 | 66 | [self presentViewController:ctrl animated:YES completion:nil]; 67 | } else if (2 == indexPath.row) { 68 | ArtTransformHalfViewController *ctrl = [[ArtTransformHalfViewController alloc] init]; 69 | 70 | [self presentViewController:ctrl animated:YES completion:nil]; 71 | } 72 | } 73 | 74 | - (void)didReceiveMemoryWarning { 75 | [super didReceiveMemoryWarning]; 76 | } 77 | 78 | @end 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouConvert.metal: -------------------------------------------------------------------------------- 1 | #if defined(from) && defined(to) && defined(from4) && defined(to4) && defined(BROU_CONVERT) 2 | 3 | kernel void BROU_CONVERT(from, to, 1D)(device from *input [[buffer(0)]], 4 | device to *output [[buffer(1)]], 5 | constant TensorShape& shape [[buffer(2)]], 6 | ushort grid [[thread_position_in_grid]]) { 7 | int index = grid << 2; 8 | 9 | if (index >= shape.dim0) { 10 | return; 11 | } 12 | 13 | device from4 *inputV = (device from4*)(input + index); 14 | device to4 *outputV = (device to4*)(output + index); 15 | 16 | outputV[0] = static_cast(inputV[0]); 17 | } 18 | 19 | kernel void BROU_CONVERT(from, to, 2D)(device from *input [[buffer(0)]], 20 | device to *output [[buffer(1)]], 21 | constant TensorShape& shape [[buffer(2)]], 22 | ushort2 grid [[thread_position_in_grid]]) { 23 | int height = shape.dim0; 24 | int width = shape.dim1; 25 | 26 | int y = grid.y << 2; 27 | int x = grid.x << 2; 28 | 29 | if (y >= height || x >= width) { 30 | return; 31 | } 32 | 33 | int maxY = min(y + 4, height); 34 | 35 | for (int j = y; j < maxY; ++j) { 36 | int offset = j * width + x; 37 | 38 | device from4 *inputV = (device from4*)(input + offset); 39 | device to4 *outputV = (device to4*)(output + offset); 40 | 41 | outputV[0] = static_cast(inputV[0]); 42 | } 43 | } 44 | 45 | kernel void BROU_CONVERT(from, to, 3D)(device from *input [[buffer(0)]], 46 | device to *output [[buffer(1)]], 47 | constant TensorShape& shape [[buffer(2)]], 48 | ushort3 grid [[thread_position_in_grid]]) { 49 | int height = shape.dim0; 50 | int width = shape.dim1; 51 | int channel = shape.dim2; 52 | 53 | int y = grid.y << 2; 54 | int x = grid.x << 2; 55 | int z = grid.z << 2; 56 | 57 | if (y >= height || x >= width || z >= channel) { 58 | return; 59 | } 60 | 61 | int maxY = min(y + 4, height); 62 | int maxX = min(x + 4, width); 63 | 64 | for (int j = y; j < maxY; ++j) { 65 | for (int i = x; i < maxX; ++i) { 66 | int offset = (j * width + i) * channel + z; 67 | 68 | device from4 *inputV = (device from4*)(input + offset); 69 | device to4 *outputV = (device to4*)(output + offset); 70 | 71 | outputV[0] = static_cast(inputV[0]); 72 | } 73 | } 74 | } 75 | 76 | #endif 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Utils/Generate/BrouMatrix.c: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU) 2 | 3 | void BROU(MatrixTranspose4X4)(type *src, size_t srcRowBytes, type *dst, size_t dstRowBytes) { 4 | type *realDst = dst; 5 | 6 | dst[0] = src[0]; dst = ((void*)dst) + dstRowBytes; 7 | dst[0] = src[1]; dst = ((void*)dst) + dstRowBytes; 8 | dst[0] = src[2]; dst = ((void*)dst) + dstRowBytes; 9 | dst[0] = src[3]; 10 | 11 | src = ((void*)src) + srcRowBytes; 12 | dst = realDst + 1; 13 | 14 | dst[0] = src[0]; dst = ((void*)dst) + dstRowBytes; 15 | dst[0] = src[1]; dst = ((void*)dst) + dstRowBytes; 16 | dst[0] = src[2]; dst = ((void*)dst) + dstRowBytes; 17 | dst[0] = src[3]; 18 | 19 | src = ((void*)src) + srcRowBytes; 20 | dst = realDst + 2; 21 | 22 | dst[0] = src[0]; dst = ((void*)dst) + dstRowBytes; 23 | dst[0] = src[1]; dst = ((void*)dst) + dstRowBytes; 24 | dst[0] = src[2]; dst = ((void*)dst) + dstRowBytes; 25 | dst[0] = src[3]; 26 | 27 | src = ((void*)src) + srcRowBytes; 28 | dst = realDst + 3; 29 | 30 | dst[0] = src[0]; dst = ((void*)dst) + dstRowBytes; 31 | dst[0] = src[1]; dst = ((void*)dst) + dstRowBytes; 32 | dst[0] = src[2]; dst = ((void*)dst) + dstRowBytes; 33 | dst[0] = src[3]; 34 | } 35 | 36 | /** 37 | * transpose 4X4 matrix every time 38 | * todo: add multi threads 39 | */ 40 | void BROU(TransposeMatrixBlock)(type *in, size_t inRow, size_t inCol, type *out, size_t outRow, size_t outCol) { 41 | size_t inRowBytes = sizeof(type) * inCol; 42 | size_t outRowBytes = sizeof(type) * outCol; 43 | 44 | for (size_t y = 0; y < inRow; y += 4) { 45 | y = BROU_MIN(inRow - 4, y); 46 | 47 | for (size_t x = 0; x < inCol; x += 4) { 48 | x = BROU_MIN(inCol - 4, x); 49 | 50 | #if defined(__ARM_NEON) 51 | BROU(MatrixTranspose4X4Neon)(in + y * inCol + x, inRowBytes, out + x * outCol + y, outRowBytes); 52 | #else 53 | BROU(MatrixTranspose4X4)(in + y * inCol + x, inRowBytes, out + x * outCol + y, outRowBytes); 54 | #endif 55 | } 56 | } 57 | } 58 | 59 | /** 60 | * transpose the matrix use 2 loop 61 | */ 62 | void BROU(TransposeMatrixDirectly)(type *in, size_t inRow, size_t inCol, type *out, size_t outRow, size_t outCol) { 63 | for (size_t y = 0; y < inCol; ++y) { 64 | for (size_t x = 0; x < inRow; ++x) { 65 | out[y * outCol + x] = in[x * inCol + y]; 66 | } 67 | } 68 | } 69 | 70 | /** 71 | * transpose the in matrix to out 72 | * outRow >= inCol outCol >= inRow 73 | */ 74 | void BROU(TransposeMatrix)(type *in, size_t inRow, size_t inCol, type *out, size_t outRow, size_t outCol) { 75 | if (4 > inRow || 4 > inCol) { 76 | BROU(TransposeMatrixDirectly)(in, inRow, inCol, out, outRow, outCol); 77 | } else { 78 | BROU(TransposeMatrixBlock)(in, inRow, inCol, out, outRow, outCol); 79 | } 80 | } 81 | 82 | #endif 83 | 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Brouhaha.m: -------------------------------------------------------------------------------- 1 | #import "Brouhaha.h" 2 | 3 | #define type uint16_t 4 | #define real half 5 | #define real_is_half 6 | 7 | #include "Basic/BrouUniqueTensor.m" 8 | #include "Basic/BrouTemporaryTensor.m" 9 | 10 | #include "Layer/Generate/BrouConvolutionLayer.m" 11 | #include "Layer/Generate/BrouConvolutionMMLayer.m" 12 | #include "Layer/Generate/BrouTransposedConvolutionLayer.m" 13 | #include "Layer/Generate/BrouTransposedConvolutionMMLayer.m" 14 | #include "Layer/Generate/BrouDilatedConvolutionLayer.m" 15 | #include "Layer/Generate/BrouDilatedConvolutionMMLayer.m" 16 | #include "Layer/Generate/BrouMaxPoolingLayer.m" 17 | #include "Layer/Generate/BrouAveragePoolingLayer.m" 18 | #include "Layer/Generate/BrouTanHLayer.m" 19 | #include "Layer/Generate/BrouReLuLayer.m" 20 | #include "Layer/Generate/BrouPReLuLayer.m" 21 | #include "Layer/Generate/BrouLinearLayer.m" 22 | #include "Layer/Generate/BrouFullConnectLayer.m" 23 | #include "Layer/Generate/BrouAddLayer.m" 24 | #include "Layer/Generate/BrouAddBiasLayer.m" 25 | #include "Layer/Generate/BrouBatchNormalizationLayer.m" 26 | #undef real 27 | #undef real_is_half 28 | #undef type 29 | 30 | #define type float 31 | #define real float 32 | #define real_is_float 33 | 34 | #include "Basic/BrouUniqueTensor.m" 35 | #include "Basic/BrouTemporaryTensor.m" 36 | 37 | #include "Layer/Generate/BrouConvolutionLayer.m" 38 | #include "Layer/Generate/BrouConvolutionMMLayer.m" 39 | #include "Layer/Generate/BrouTransposedConvolutionLayer.m" 40 | #include "Layer/Generate/BrouTransposedConvolutionMMLayer.m" 41 | #include "Layer/Generate/BrouDilatedConvolutionLayer.m" 42 | #include "Layer/Generate/BrouDilatedConvolutionMMLayer.m" 43 | #include "Layer/Generate/BrouMaxPoolingLayer.m" 44 | #include "Layer/Generate/BrouAveragePoolingLayer.m" 45 | #include "Layer/Generate/BrouTanHLayer.m" 46 | #include "Layer/Generate/BrouReLuLayer.m" 47 | #include "Layer/Generate/BrouPReLuLayer.m" 48 | #include "Layer/Generate/BrouLinearLayer.m" 49 | #include "Layer/Generate/BrouFullConnectLayer.m" 50 | #include "Layer/Generate/BrouAddLayer.m" 51 | #include "Layer/Generate/BrouAddBiasLayer.m" 52 | #include "Layer/Generate/BrouBatchNormalizationLayer.m" 53 | #undef real 54 | #undef real_is_float 55 | #undef type 56 | 57 | /** 58 | * a convert layer used to convert number type 59 | */ 60 | #define from uchar 61 | #define to half 62 | #include "Layer/Generate/BrouConvertLayer.m" 63 | #undef to 64 | #undef from 65 | 66 | #define from half 67 | #define to uchar 68 | #include "Layer/Generate/BrouConvertLayer.m" 69 | #undef to 70 | #undef from 71 | 72 | #define from half 73 | #define to float 74 | #include "Layer/Generate/BrouConvertLayer.m" 75 | #undef to 76 | #undef from 77 | 78 | #define from float 79 | #define to half 80 | #include "Layer/Generate/BrouConvertLayer.m" 81 | #undef to 82 | #undef from 83 | 84 | #define from uchar 85 | #define to float 86 | #include "Layer/Generate/BrouConvertLayer.m" 87 | #undef to 88 | #undef from 89 | 90 | #define from float 91 | #define to uchar 92 | #include "Layer/Generate/BrouConvertLayer.m" 93 | #undef to 94 | #undef from 95 | 96 | 97 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouTanH.metal: -------------------------------------------------------------------------------- 1 | /** 2 | * BrouhahaMetal 3 | * 4 | * Created by yanyuanchi on 2017/8/14. 5 | * Copyright © 2017年 yanyuanchi. All rights reserved. 6 | * 7 | * the TanH operate, 8 | */ 9 | 10 | #if defined(real) && defined(real4) && defined(BROU) 11 | 12 | /** 13 | * for 1d shape every thread output 1X4 14 | */ 15 | kernel void BROU(TanH1D)(device real *input [[buffer(0)]], 16 | device real *output [[buffer(1)]], 17 | constant TensorShape& shape [[buffer(2)]], 18 | ushort grid [[thread_position_in_grid]]) { 19 | int index = grid << 2; 20 | 21 | if (index >= shape.dim0) { 22 | return; 23 | } 24 | 25 | device real4 *inputV = (device real4*)(input + index); 26 | device real4 *outputV = (device real4*)(output + index); 27 | 28 | outputV[0] = tanh(inputV[0]); 29 | } 30 | 31 | /** 32 | * every thread output 4X4 block 33 | * the width is timed by 4 34 | */ 35 | kernel void BROU(TanH2D)(device real *input [[buffer(0)]], 36 | device real *output [[buffer(1)]], 37 | constant TensorShape& shape [[buffer(2)]], 38 | ushort2 grid [[thread_position_in_grid]]) { 39 | int height = shape.dim0; 40 | int width = shape.dim1; 41 | 42 | int x = grid.x << 2; 43 | int y = grid.y << 2; 44 | 45 | if (y >= height || x >= width) { 46 | return; 47 | } 48 | 49 | int maxJ = min(y + 4, height); 50 | 51 | for (int j = y; j < maxJ; ++j) { 52 | int offset = j * width + x; 53 | 54 | device real4 *inputV = (device real4*)(input + offset); 55 | device real4 *outputV = (device real4*)(output + offset); 56 | 57 | outputV[0] = tanh(inputV[0]); 58 | } 59 | } 60 | 61 | /** 62 | * every thread output 4X4X4 block 63 | * the channel must be timed by 4 64 | */ 65 | kernel void BROU(TanH3D)(device real *input [[buffer(0)]], 66 | device real *output [[buffer(1)]], 67 | constant TensorShape& shape [[buffer(2)]], 68 | ushort3 grid [[thread_position_in_grid]]) { 69 | int height = shape.dim0; 70 | int width = shape.dim1; 71 | int channel = shape.dim2; 72 | 73 | int y = grid.y << 2; 74 | int x = grid.x << 2; 75 | int z = grid.z << 2; 76 | 77 | if (y >= height || x >= width || z >= channel) { 78 | return; 79 | } 80 | 81 | int maxJ = min(y + 4, height); 82 | int maxI = min(x + 4, width); 83 | 84 | for (int j = y; j < maxJ; ++j) { 85 | for (int i = x; i < maxI; ++i) { 86 | int offset = (j * width + i) * channel + z; 87 | 88 | device real4 *inputV = (device real4*)(input + offset); 89 | device real4 *outputV = (device real4*)(output + offset); 90 | 91 | outputV[0] = tanh(inputV[0]); 92 | } 93 | } 94 | } 95 | 96 | #endif 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouAdd.metal: -------------------------------------------------------------------------------- 1 | #if defined(real) && defined(real4) && defined(BROU) 2 | 3 | kernel void BROU(Add1D)(device real *in1 [[buffer(0)]], 4 | device real *in2 [[buffer(1)]], 5 | device real *out [[buffer(2)]], 6 | constant TensorShape& shape [[buffer(3)]], 7 | ushort grid [[thread_position_in_grid]]) { 8 | int len = shape.dim0; 9 | 10 | int index = grid << 2; 11 | 12 | if (index >= len) { 13 | return; 14 | } 15 | 16 | device real4 *in1V = (device real4*)(in1 + index); 17 | device real4 *in2V = (device real4*)(in2 + index); 18 | device real4 *outV = (device real4*)(out + index); 19 | 20 | outV[0] = in1V[0] + in2V[0]; 21 | } 22 | 23 | kernel void BROU(Add2D)(device real *in1 [[buffer(0)]], 24 | device real *in2 [[buffer(1)]], 25 | device real *out [[buffer(2)]], 26 | constant TensorShape& shape [[buffer(3)]], 27 | ushort2 grid [[thread_position_in_grid]]) { 28 | int height = shape.dim0; 29 | int width = shape.dim1; 30 | 31 | int x = grid.x << 2; 32 | int y = grid.y << 2; 33 | 34 | if (y >= height || x >= width) { 35 | return; 36 | } 37 | 38 | int maxJ = min(y + 4, height); 39 | 40 | for (int j = y; j < maxJ; ++j) { 41 | int offset = j * width + x; 42 | 43 | device real4 *in1V = (device real4*)(in1 + offset); 44 | device real4 *in2V = (device real4*)(in2 + offset); 45 | device real4 *outV = (device real4*)(out + offset); 46 | 47 | outV[0] = in1V[0] + in2V[0]; 48 | } 49 | } 50 | 51 | kernel void BROU(Add3D)(device real *in1 [[buffer(0)]], 52 | device real *in2 [[buffer(1)]], 53 | device real *out [[buffer(2)]], 54 | constant TensorShape& shape [[buffer(3)]], 55 | ushort3 grid [[thread_position_in_grid]]) { 56 | int height = shape.dim0; 57 | int width = shape.dim1; 58 | int channel = shape.dim2; 59 | 60 | int x = grid.x << 2; 61 | int y = grid.y << 2; 62 | int z = grid.z << 2; 63 | 64 | if (y >= height || x >= width || z >= channel) { 65 | return; 66 | } 67 | 68 | int maxJ = min(y + 4, height); 69 | int maxI = min(x + 4, width); 70 | 71 | for (int j = y; j < maxJ; ++j) { 72 | for (int i = x; i < maxI; ++i) { 73 | int offset = (j * width + i) * channel + z; 74 | 75 | device real4 *in1V = (device real4*)(in1 + offset); 76 | device real4 *in2V = (device real4*)(in2 + offset); 77 | device real4 *outV = (device real4*)(out + offset); 78 | 79 | outV[0] = in1V[0] + in2V[0]; 80 | } 81 | } 82 | } 83 | 84 | #endif 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha.xcodeproj/xcuserdata/yanyuanchi.xcuserdatad/xcschemes/Brouhaha.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 31 | 32 | 33 | 34 | 35 | 36 | 47 | 48 | 54 | 55 | 56 | 57 | 58 | 59 | 65 | 66 | 72 | 73 | 74 | 75 | 77 | 78 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DEPRECATED 2 | 3 | # Brouhaha: is a Deep Learning toolkit for iOS 4 | 5 | The Brouhaha is a Deep Learning toolkit that based on iOS Metal. It make easier to use the iOS GPU shader to run the Deep Learning algorithms.This toolkit is no only include the Metal shader, but also include abstract layer of Neural networks write in objective-c. 6 | 7 | China site:https://gitee.com/JingQiManHua/Brouhaha 8 | 9 | ## Update Note 10 | ### last 11 | 1. simplify the API and make it easier to use. 12 | 2. add a tempporary MTLBuffer to save the memory when runnning. 13 | 14 | #### 2017 15 | 1. Add float32 support, The layers support the flaot16 and float32 at the same time. 16 | 2. Add a art-transform demo on float32. 17 | 3. change the API and it become more feasible. 18 | 19 | ## Introduction 20 | The Brouhaha is only used to run the predicting of a Deep Learning algorithm, it can’t used to training a algorithm. Before using Brouhaha, you must have a pretrained model by other toolkit like: Caffe, Torch or Tensorflow. 21 | The Brouhaha has common layers like:Convolution(include Transposed and Dilated convolution), Pooling, Active, FullConnect, BatchNormalize and some special layer for Image-convert. It includes 3 parts: 22 | 1. **BrouhahaMetal:** which is write by Metal shader, using the GPU to speed up the layer’s calculating. 23 | 2. **Brouhaha:** includes the abstract layers of Deep Learning, write by objective-c. For speed up, some functions coding in asm. Brouhaha must be based on the BrouhahaMetal. 24 | 3. **BrouhahaDemo:** includes 3 demos to show how to use this toolkit.The LeNet demo is a Convolution Neural Networks algorithm that use to recognize a digit number from a image . The ArtTransform Demo is a Convolution Neural Networks algorithm for “Artistic Style Transform” like Prisma. It includes 2 demo one is based on float16, another is float32. 25 | 26 | ## Demo 27 | **Build:** Before build the BrouhahaDemo, must build the BrouhahaMetal first and copy the "BrouhahaMetal.metallib" file to BrouhahaDemo's bundle. 28 |
29 | **LeNet:** This demo is a Neural Networks that recognize the digit number from images. The details of the algorithm ref: http://yann.lecun.com/exdb/lenet/. The model file is from internet, sorry forgot the source. 30 |
31 | ![](Images/lenet.gif) 32 | 33 | **ArtTransform:** This demo is a Convolution Neural Networks algorithm for “Artistic Style Transform” like Prisma. The algorithm details ref:https://arxiv.org/abs/1603.08155 and the model file is from: https://github.com/lengstrom/fast-style-transfer#video-stylization.It includes 2 demo one is based on float16, another is float32. 34 |
35 | 36 | ***Based on Float32***
37 | ![](Images/art1.gif) 38 |
39 | 40 | ***Based on Float16***
41 | ![](Images/art2.gif) 42 | 43 | ## Why Brouhaha? 44 | 1. Brouhaha use the GPU instead of CPU, so it has high-performance. 45 | 2. In Brouhaha every layer has 2 implements, one is float16 another is float32, it is easy to choose for the special purpose. Float16 for faster and Float32 fo precision. 46 | 3. Brouhaha includes a lot new layers like:TransposedConvolution or DilatedConvolution 47 | 4. Does not like the Apple's Core ML or other third toolkit based on Metal, Boruhaha don't needs iOS 10+. It just needs the iOS 8+. 48 | 49 | ## Future Work 50 | 1. Support RNN. 51 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal.xcodeproj/xcuserdata/yanyuanchi.xcuserdatad/xcschemes/BrouhahaMetal.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 31 | 32 | 33 | 34 | 35 | 36 | 47 | 48 | 54 | 55 | 56 | 57 | 58 | 59 | 65 | 66 | 72 | 73 | 74 | 75 | 77 | 78 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouGenerate.metal: -------------------------------------------------------------------------------- 1 | /** 2 | * BrouhahaMetal 3 | * 4 | * Created by yanyuanchi on 2017/8/14. 5 | * Copyright © 2017年 yanyuanchi. All rights reserved. 6 | * 7 | * ref: Torch 8 | */ 9 | 10 | #include 11 | #include 12 | 13 | #include "BrouMacro.metal" 14 | #include "BrouStruct.metal" 15 | 16 | using namespace metal; 17 | 18 | #define real half 19 | #define real4 half4 20 | #define real_is_half 21 | 22 | #include "BrouTanH.metal" 23 | #include "BrouAdd.metal" 24 | #include "BrouAddBias.metal" 25 | #include "BrouMaxPooling.metal" 26 | #include "BrouAveragePooling.metal" 27 | #include "BrouMatrixMultiply.metal" 28 | #include "BrouConvolution.metal" 29 | #include "BrouConvolutionMM.metal" 30 | #include "BrouTransposedConvolution.metal" 31 | #include "BrouTransposedConvolutionMM.metal" 32 | #include "BrouDilatedConvolution.metal" 33 | #include "BrouDilatedConvolutionMM.metal" 34 | #include "BrouFullconnect.metal" 35 | #include "BrouLinear.metal" 36 | #include "BrouReLu.metal" 37 | #include "BrouPReLu.metal" 38 | #include "BrouBatchNormalization.metal" 39 | #include "BrouSoftMax.metal" 40 | 41 | #undef real_is_half 42 | #undef real4 43 | #undef real 44 | 45 | #define real float 46 | #define real4 float4 47 | #define real_is_float 48 | 49 | #include "BrouTanH.metal" 50 | #include "BrouAdd.metal" 51 | #include "BrouAddBias.metal" 52 | #include "BrouMaxPooling.metal" 53 | #include "BrouAveragePooling.metal" 54 | #include "BrouMatrixMultiply.metal" 55 | #include "BrouConvolution.metal" 56 | #include "BrouConvolutionMM.metal" 57 | #include "BrouTransposedConvolution.metal" 58 | #include "BrouTransposedConvolutionMM.metal" 59 | #include "BrouDilatedConvolution.metal" 60 | #include "BrouDilatedConvolutionMM.metal" 61 | #include "BrouFullconnect.metal" 62 | #include "BrouLinear.metal" 63 | #include "BrouReLu.metal" 64 | #include "BrouPReLu.metal" 65 | #include "BrouBatchNormalization.metal" 66 | #include "BrouSoftMax.metal" 67 | 68 | #undef real_is_float 69 | #undef real4 70 | #undef real 71 | 72 | #define from uchar 73 | #define from4 uchar4 74 | #define to half 75 | #define to4 half4 76 | #include "BrouConvert.metal" 77 | #undef to4 78 | #undef to 79 | #undef from4 80 | #undef from 81 | 82 | #define from half 83 | #define from4 half4 84 | #define to uchar 85 | #define to4 uchar4 86 | #include "BrouConvert.metal" 87 | #undef to4 88 | #undef to 89 | #undef from4 90 | #undef from 91 | 92 | #define from uchar 93 | #define from4 uchar4 94 | #define to float 95 | #define to4 float4 96 | #include "BrouConvert.metal" 97 | #undef to4 98 | #undef to 99 | #undef from4 100 | #undef from 101 | 102 | #define from float 103 | #define from4 float4 104 | #define to uchar 105 | #define to4 uchar4 106 | #include "BrouConvert.metal" 107 | #undef to4 108 | #undef to 109 | #undef from4 110 | #undef from 111 | 112 | #define from half 113 | #define from4 half4 114 | #define to float 115 | #define to4 float4 116 | #include "BrouConvert.metal" 117 | #undef to4 118 | #undef to 119 | #undef from4 120 | #undef from 121 | 122 | #define from float 123 | #define from4 float4 124 | #define to half 125 | #define to4 half4 126 | #include "BrouConvert.metal" 127 | #undef to4 128 | #undef to 129 | #undef from4 130 | #undef from 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouLinear.metal: -------------------------------------------------------------------------------- 1 | /** 2 | * Brouhaha 3 | * convolution.metal 4 | * Created by yanyuanchi on 2017/5/15. 5 | * Copyright © 2017年 yanyuanchi. All rights reserved. 6 | * 7 | * the linear operate 8 | */ 9 | #if defined(real) && defined(real4) && defined(BROU) 10 | 11 | /** 12 | * f(x) = a * x + b 13 | */ 14 | kernel void BROU(Linear1D)(device real *input [[buffer(0)]], 15 | device real *output [[buffer(1)]], 16 | device real *ab [[buffer(2)]], 17 | constant TensorShape& shape [[buffer(3)]], 18 | ushort grid [[thread_position_in_grid]]) { 19 | int len = shape.dim0; 20 | 21 | int index = grid << 2; 22 | 23 | if (index >= len) { 24 | return; 25 | } 26 | 27 | device real4 *inputV = (device real4*)(input + index); 28 | device real4 *outputV = (device real4*)(output + index); 29 | 30 | outputV[0] = ab[0] * inputV[0] + ab[1]; 31 | } 32 | 33 | kernel void BROU(Linear2D)(device real *input [[buffer(0)]], 34 | device real *output [[buffer(1)]], 35 | device real *ab [[buffer(2)]], 36 | constant TensorShape& shape [[buffer(3)]], 37 | ushort2 grid [[thread_position_in_grid]]) { 38 | /**the width must be timed by 4*/ 39 | int height = shape.dim0; 40 | int width = shape.dim1; 41 | 42 | int x = grid.x << 2; 43 | int y = grid.y << 2; 44 | 45 | if (x >= width || y >= height) { 46 | return; 47 | } 48 | 49 | int maxJ = min(y + 4, height); 50 | 51 | for (int j = y; j < maxJ; ++j) { 52 | device real4 *inputV = (device real4*)(input + j * width + x); 53 | device real4 *outputV = (device real4*)(output + j * width + x); 54 | 55 | outputV[0] = ab[0] * inputV[0] + ab[1]; 56 | } 57 | } 58 | 59 | kernel void BROU(Linear3D)(device real *input [[buffer(0)]], 60 | device real *output [[buffer(1)]], 61 | device real *ab [[buffer(2)]], 62 | constant TensorShape& shape [[buffer(3)]], 63 | ushort3 grid [[thread_position_in_grid]]) { 64 | /**the channel must be timed by 4*/ 65 | int height = shape.dim0; 66 | int width = shape.dim1; 67 | int channel = shape.dim2; 68 | 69 | int x = grid.x << 2; 70 | int y = grid.y << 2; 71 | int z = grid.z << 2; 72 | 73 | if (y >= height || x >= width || z >= channel) { 74 | return; 75 | } 76 | 77 | int maxJ = min(y + 4, height); 78 | int maxI = min(x + 4, width); 79 | 80 | for (int j = y; j < maxJ; ++j) { 81 | for (int i = x; i < maxI; ++i) { 82 | int offset = (j * width + i) * channel + z; 83 | 84 | device real4 *inputV = (device real4*)(input + offset); 85 | device real4 *outputV = (device real4*)(output + offset); 86 | 87 | outputV[0] = ab[0] * inputV[0] + ab[1]; 88 | } 89 | } 90 | } 91 | 92 | #endif 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Brouhaha.h: -------------------------------------------------------------------------------- 1 | #import 2 | #import 3 | 4 | #import "BrouMacro.h" 5 | #import "BrouStruct.h" 6 | #import "BrouUtils.h" 7 | #import "BrouConvertType.h" 8 | #import "BrouShareBuffer.h" 9 | #import "BrouTensor.h" 10 | #import "BrouLayer.h" 11 | 12 | #define type uint16_t 13 | #define real half 14 | #define real_is_half 15 | 16 | #include "Basic/BrouUniqueTensor.h" 17 | #include "Basic/BrouTemporaryTensor.h" 18 | 19 | #include "Layer/Generate/BrouConvolutionLayer.h" 20 | #include "Layer/Generate/BrouConvolutionMMLayer.h" 21 | #include "Layer/Generate/BrouTransposedConvolutionLayer.h" 22 | #include "Layer/Generate/BrouTransposedConvolutionMMLayer.h" 23 | #include "Layer/Generate/BrouDilatedConvolutionLayer.h" 24 | #include "Layer/Generate/BrouDilatedConvolutionMMLayer.h" 25 | #include "Layer/Generate/BrouMaxPoolingLayer.h" 26 | #include "Layer/Generate/BrouAveragePoolingLayer.h" 27 | #include "Layer/Generate/BrouTanHLayer.h" 28 | #include "Layer/Generate/BrouReLuLayer.h" 29 | #include "Layer/Generate/BrouPReLuLayer.h" 30 | #include "Layer/Generate/BrouLinearLayer.h" 31 | #include "Layer/Generate/BrouFullConnectLayer.h" 32 | #include "Layer/Generate/BrouAddLayer.h" 33 | #include "Layer/Generate/BrouAddBiasLayer.h" 34 | #include "Layer/Generate/BrouBatchNormalizationLayer.h" 35 | #undef real 36 | #undef real_is_half 37 | #undef type 38 | 39 | #define type float 40 | #define real float 41 | #define real_is_float 42 | 43 | #include "Basic/BrouUniqueTensor.h" 44 | #include "Basic/BrouTemporaryTensor.h" 45 | 46 | #include "Layer/Generate/BrouConvolutionLayer.h" 47 | #include "Layer/Generate/BrouConvolutionMMLayer.h" 48 | #include "Layer/Generate/BrouTransposedConvolutionLayer.h" 49 | #include "Layer/Generate/BrouTransposedConvolutionMMLayer.h" 50 | #include "Layer/Generate/BrouDilatedConvolutionLayer.h" 51 | #include "Layer/Generate/BrouDilatedConvolutionMMLayer.h" 52 | #include "Layer/Generate/BrouMaxPoolingLayer.h" 53 | #include "Layer/Generate/BrouAveragePoolingLayer.h" 54 | #include "Layer/Generate/BrouTanHLayer.h" 55 | #include "Layer/Generate/BrouReLuLayer.h" 56 | #include "Layer/Generate/BrouPReLuLayer.h" 57 | #include "Layer/Generate/BrouLinearLayer.h" 58 | #include "Layer/Generate/BrouFullConnectLayer.h" 59 | #include "Layer/Generate/BrouAddLayer.h" 60 | #include "Layer/Generate/BrouAddBiasLayer.h" 61 | #include "Layer/Generate/BrouBatchNormalizationLayer.h" 62 | #undef real 63 | #undef real_is_float 64 | #undef type 65 | 66 | /** 67 | * a convert layer used to convert number type 68 | */ 69 | #define from uchar 70 | #define to half 71 | #include "Layer/Generate/BrouConvertLayer.h" 72 | #undef to 73 | #undef from 74 | 75 | #define from half 76 | #define to uchar 77 | #include "Layer/Generate/BrouConvertLayer.h" 78 | #undef to 79 | #undef from 80 | 81 | #define from half 82 | #define to float 83 | #include "Layer/Generate/BrouConvertLayer.h" 84 | #undef to 85 | #undef from 86 | 87 | #define from float 88 | #define to half 89 | #include "Layer/Generate/BrouConvertLayer.h" 90 | #undef to 91 | #undef from 92 | 93 | #define from uchar 94 | #define to float 95 | #include "Layer/Generate/BrouConvertLayer.h" 96 | #undef to 97 | #undef from 98 | 99 | #define from float 100 | #define to uchar 101 | #include "Layer/Generate/BrouConvertLayer.h" 102 | #undef to 103 | #undef from 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouAddBias.metal: -------------------------------------------------------------------------------- 1 | #if defined(real) && defined(real4) && defined(BROU) 2 | 3 | kernel void BROU(AddBias1D)(device real *in [[buffer(0)]], 4 | device real *bia [[buffer(1)]], 5 | device real *out [[buffer(2)]], 6 | constant TensorShape& shape [[buffer(3)]], 7 | ushort grid [[thread_position_in_grid]]) { 8 | int len = shape.dim0; 9 | int index = grid << 2; 10 | 11 | if (index >= len) { 12 | return; 13 | } 14 | 15 | device real4 *inV = (device real4*)(in + index); 16 | device real4 *biaV = (device real4*)(bia + index); 17 | device real4 *outV = (device real4*)(out + index); 18 | 19 | outV[0] = inV[0] + biaV[0]; 20 | } 21 | 22 | 23 | /** 24 | * the in and out's dimension is (height, width) 25 | * the bias's dimension is (width, 1) 26 | * width is timed by 4 27 | */ 28 | kernel void BROU(AddBias2D)(device real *in [[buffer(0)]], 29 | device real *bia [[buffer(1)]], 30 | device real *out [[buffer(2)]], 31 | constant TensorShape& shape [[buffer(3)]], 32 | ushort2 grid [[thread_position_in_grid]]) { 33 | int height = shape.dim0; 34 | int width = shape.dim1; 35 | 36 | int y = grid.y << 2; 37 | int x = grid.x << 2; 38 | 39 | if (y >= height || x >= width) { 40 | return; 41 | } 42 | 43 | real4 biaV = ((device real4*)(bia + x))[0]; 44 | 45 | int maxJ = min(y + 4, height); 46 | 47 | for (int j = y; j < maxJ; ++j) { 48 | int offset = j * width + x; 49 | 50 | device real4 *inV = (device real4*)(in + offset); 51 | device real4 *outV = (device real4*)(out + offset); 52 | 53 | outV[0] = inV[0] + biaV; 54 | } 55 | } 56 | 57 | /** 58 | * the in and out's dimension is (height, width, channel) 59 | * the bia diemnsion is (channel, 1) 60 | * the channel is timed by 4 61 | */ 62 | kernel void BROU(AddBias3D)(device real *in [[buffer(0)]], 63 | device real *bia [[buffer(1)]], 64 | device real *out [[buffer(2)]], 65 | constant TensorShape& shape [[buffer(3)]], 66 | ushort3 grid [[thread_position_in_grid]]) { 67 | int height = shape.dim0; 68 | int width = shape.dim1; 69 | int channel = shape.dim2; 70 | 71 | int y = grid.y << 2; 72 | int x = grid.x << 2; 73 | int z = grid.z << 2; 74 | 75 | if (y >= height || x >= width || z >= channel) { 76 | return; 77 | } 78 | 79 | real4 biaV = ((device real4*)(bia + z))[0]; 80 | 81 | int maxJ = min(y + 4, height); 82 | int maxI = min(x + 4, width); 83 | 84 | for (int j = y; j < maxJ; ++j) { 85 | for (int i = x; i < maxI; ++i) { 86 | int offset = (j * width + i) * channel + z; 87 | 88 | device real4 *inV = (device real4*)(in + offset); 89 | device real4 *outV = (device real4*)(out + offset); 90 | 91 | outV[0] = inV[0] + biaV; 92 | } 93 | } 94 | } 95 | 96 | #endif 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Basic/BrouTemporaryBuffer1.m: -------------------------------------------------------------------------------- 1 | //#import "BrouTemporaryBuffer.h" 2 | // 3 | //@implementation BrouTemporaryBuffer 4 | // 5 | //- (instancetype)init { 6 | // self = [super init]; 7 | // 8 | // if (self) { 9 | // _length = 0; 10 | // } 11 | // 12 | // return self; 13 | //} 14 | // 15 | //- (void)configWithFloatLength:(int)length { 16 | // NSAssert(length > 0, @"length must > 0"); 17 | // 18 | // int lengthX4 = (length + 3) / 4 * 4; 19 | // 20 | // [self configWithLength:sizeof(float) * lengthX4]; 21 | //} 22 | // 23 | //- (void)configWithFloatHeight:(int)height width:(int)width { 24 | // NSAssert(height > 0, @"height must > 0"); 25 | // NSAssert(width > 0, @"width must > 0"); 26 | // 27 | // int widthX4 = (width + 3) / 4 * 4; 28 | // 29 | // [self configWithLength:sizeof(float) * height * widthX4]; 30 | //} 31 | // 32 | //- (void)configWithFloatHeight:(int)height width:(int)width channel:(int)channel { 33 | // NSAssert(height > 0, @"height must > 0"); 34 | // NSAssert(width > 0, @"width must > 0"); 35 | // NSAssert(channel > 0, @"channel must > 0"); 36 | // 37 | // int channelX4 = (channel + 3) / 4 * 4; 38 | // 39 | // [self configWithLength:sizeof(float) * height * width * channelX4]; 40 | //} 41 | // 42 | //- (void)configWithHalfLength:(int)length { 43 | // NSAssert(length > 0, @"length must > 0"); 44 | // 45 | // int lengthX4 = (length + 3) / 4 * 4; 46 | // 47 | // [self configWithLength:sizeof(uint16_t) * lengthX4]; 48 | //} 49 | // 50 | //- (void)configWithHalfHeight:(int)height width:(int)width { 51 | // NSAssert(height > 0, @"height must > 0"); 52 | // NSAssert(width > 0, @"width must > 0"); 53 | // 54 | // int widthX4 = (width + 3) / 4 * 4; 55 | // 56 | // [self configWithLength:sizeof(uint16_t) * height * widthX4]; 57 | //} 58 | // 59 | //- (void)configWithHalfHeight:(int)height width:(int)width channel:(int)channel { 60 | // NSAssert(height > 0, @"height must > 0"); 61 | // NSAssert(width > 0, @"width must > 0"); 62 | // NSAssert(channel > 0, @"channel must > 0"); 63 | // 64 | // int channelX4 = (channel + 3) / 4 * 4; 65 | // 66 | // [self configWithLength:sizeof(uint16_t) * height * width * channelX4]; 67 | //} 68 | // 69 | //- (void)configConvolutionMMWithFloatHeight:(int)height width:(int)width channel:(int)channel { 70 | // NSAssert(height > 0, @"height must > 0"); 71 | // NSAssert(width > 0, @"width must > 0"); 72 | // NSAssert(channel > 0, @"channel must > 0"); 73 | // 74 | // int heightXwidthX4 = (height * width + 3) / 4 * 4; 75 | // int channelX4 = (channel + 3) / 4 * 4; 76 | // 77 | // [self configWithLength:sizeof(float) * heightXwidthX4 * channelX4]; 78 | //} 79 | // 80 | //- (void)configConvolutionMMWithHalfHeight:(int)height width:(int)width channel:(int)channel { 81 | // NSAssert(height > 0, @"height must > 0"); 82 | // NSAssert(width > 0, @"width must > 0"); 83 | // NSAssert(channel > 0, @"channel must > 0"); 84 | // 85 | // int heightXwidthX4 = (height * width + 3) / 4 * 4; 86 | // int channelX4 = (channel + 3) / 4 * 4; 87 | // 88 | // [self configWithLength:sizeof(uint16_t) * heightXwidthX4 * channelX4]; 89 | //} 90 | // 91 | //- (void)configWithLength:(int)length { 92 | // NSAssert(length > 0, @"length must > 0"); 93 | // 94 | // if (_length < length) { 95 | // _length = length; 96 | // } 97 | //} 98 | // 99 | //- (void)configWithDevice:(id)device { 100 | // if (!_buffer || _buffer.length < _length) { 101 | // _buffer = [device newBufferWithLength:_length 102 | // options:MTLResourceCPUCacheModeDefaultCache | MTLResourceStorageModeShared]; 103 | // } 104 | //} 105 | // 106 | //@end 107 | 108 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Basic/BrouShareBuffer.m: -------------------------------------------------------------------------------- 1 | #import "BrouShareBuffer.h" 2 | 3 | /** 4 | BrouTemporaryBuffer will init a lot of MTLBuffer 5 | and some BrouTensor will point to a smae Buffer 6 | the step is: 7 | BrouTemporaryBuffer will init size-MTLBuffers. 8 | and when a BrouTemporaryTensor bind to a BrouTemporaryBuffer it will bind to one of the size-MTLBuffers 9 | so the two BrouTemporaryTensor may point to the same MTLBuffer 10 | */ 11 | @interface BrouShareBuffer() { 12 | id _device; 13 | 14 | /**_lifeTime means the temp MTLBuffer's life, default is 1*/ 15 | NSUInteger _lifeTime; 16 | 17 | /**_size = _lifeTime + 1*/ 18 | NSUInteger _size; 19 | 20 | /**the cur index of the buffer*/ 21 | NSUInteger _curIndex; 22 | 23 | /**store the bytes count of MTLBuffer*/ 24 | NSMutableArray *_bufferBytesCounts; 25 | 26 | /**store the real MTLBuffer*/ 27 | NSMutableDictionary> *_temporaryBuffers; 28 | } 29 | 30 | @end 31 | 32 | @implementation BrouShareBuffer 33 | 34 | + (instancetype)defaultWithDevice:(id)device { 35 | return [BrouShareBuffer initWithLifeTime:1 device:device]; 36 | } 37 | 38 | + (instancetype)initWithLifeTime:(NSUInteger)time device:(id)device { 39 | return [[BrouShareBuffer alloc] initWithLifeTime:time device:device]; 40 | } 41 | 42 | - (instancetype)initWithLifeTime:(NSUInteger)lifeTime device:(id)device { 43 | NSAssert(lifeTime > 0, @"the life time must be > 0"); 44 | 45 | self = [super init]; 46 | 47 | if (self) { 48 | _device = device; 49 | _lifeTime = lifeTime; 50 | _size = _lifeTime + 1; 51 | _curIndex = 0; 52 | 53 | _temporaryBuffers = [[NSMutableDictionary> alloc] init]; 54 | _bufferBytesCounts = [[NSMutableArray alloc] init]; 55 | 56 | for (NSUInteger i = 0; i < _size; ++i) { 57 | [_bufferBytesCounts addObject:[NSNumber numberWithUnsignedInteger:0]]; 58 | } 59 | } 60 | 61 | return self; 62 | } 63 | 64 | - (NSNumber*)bindWithBytesCounts:(NSUInteger)bytesCount { 65 | if (_bufferBytesCounts[_curIndex].unsignedIntegerValue < bytesCount) { 66 | _bufferBytesCounts[_curIndex] = [NSNumber numberWithUnsignedInteger:bytesCount]; 67 | } 68 | 69 | NSNumber *bindId = [NSNumber numberWithUnsignedInteger:_curIndex]; 70 | 71 | _curIndex = (_curIndex + 1) % _size; 72 | 73 | return bindId; 74 | } 75 | 76 | - (id)getBindBufferById:(NSNumber*)bindId { 77 | NSAssert(bindId.unsignedIntegerValue >= 0 && bindId.unsignedIntegerValue < _size, @"the bindId is error"); 78 | 79 | if (nil == [_temporaryBuffers objectForKey:bindId]) { 80 | if (@available(iOS 9.0, *)) { 81 | id buffer = [_device newBufferWithLength:_bufferBytesCounts[bindId.unsignedIntegerValue].unsignedIntegerValue 82 | options:MTLResourceCPUCacheModeDefaultCache | MTLResourceStorageModeShared]; 83 | 84 | [_temporaryBuffers setObject:buffer forKey:bindId]; 85 | } else { 86 | id buffer = [_device newBufferWithLength:_bufferBytesCounts[bindId.unsignedIntegerValue].unsignedIntegerValue 87 | options:MTLResourceCPUCacheModeDefaultCache]; 88 | 89 | [_temporaryBuffers setObject:buffer forKey:bindId]; 90 | } 91 | } 92 | 93 | return [_temporaryBuffers objectForKey:bindId]; 94 | } 95 | 96 | @end 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo.xcodeproj/xcuserdata/yanyuanchi.xcuserdatad/xcschemes/BrouhahaDemo.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 31 | 32 | 33 | 34 | 40 | 41 | 42 | 43 | 44 | 45 | 56 | 58 | 64 | 65 | 66 | 67 | 68 | 69 | 75 | 77 | 83 | 84 | 85 | 86 | 88 | 89 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouPReLu.metal: -------------------------------------------------------------------------------- 1 | #if defined(real) && defined(real4) && defined(BROU) 2 | 3 | kernel void BROU(PReLu1D)(device real *input [[buffer(0)]], 4 | device real *output [[buffer(1)]], 5 | device real *a [[buffer(2)]], 6 | constant TensorShape& shape [[buffer(3)]], 7 | ushort grid [[thread_position_in_grid]]) { 8 | int index = grid << 2; 9 | 10 | if (index >= shape.dim0) { 11 | return; 12 | } 13 | 14 | real realA = a[0]; 15 | 16 | real4 in = ((device real4*)(input + index))[0]; 17 | real4 out; 18 | 19 | out.x = (in.x > 0) ? (in.x) : (realA * in.x); 20 | out.y = (in.y > 0) ? (in.y) : (realA * in.y); 21 | out.z = (in.z > 0) ? (in.z) : (realA * in.z); 22 | out.w = (in.w > 0) ? (in.w) : (realA * in.w); 23 | 24 | device real4 *outputV = (device real4*)(output + index); 25 | 26 | outputV[0] = out; 27 | } 28 | 29 | kernel void BROU(PReLu2D)(device real *input [[buffer(0)]], 30 | device real *output [[buffer(1)]], 31 | device real *a [[buffer(2)]], 32 | constant TensorShape& shape [[buffer(3)]], 33 | ushort2 grid [[thread_position_in_grid]]) { 34 | int height = shape.dim0; 35 | int width = shape.dim1; 36 | 37 | int x = grid.x << 2; 38 | int y = grid.y << 2; 39 | 40 | if (y >= height || x >= width) { 41 | return; 42 | } 43 | 44 | real realA = a[0]; 45 | 46 | int maxJ = min(y + 4, height); 47 | 48 | for (int j = y; j < maxJ; ++j) { 49 | int offset = j * width + x; 50 | 51 | real4 in = ((device real4*)(input + offset))[0]; 52 | real4 out; 53 | 54 | out.x = (in.x > 0) ? (in.x) : (realA * in.x); 55 | out.y = (in.y > 0) ? (in.y) : (realA * in.y); 56 | out.z = (in.z > 0) ? (in.z) : (realA * in.z); 57 | out.w = (in.w > 0) ? (in.w) : (realA * in.w); 58 | 59 | device real4 *outputV = (device real4*)(output + offset); 60 | 61 | outputV[0] = out; 62 | } 63 | } 64 | 65 | /** 66 | * every thread output 4X4X4 block 67 | * the channel must be timed by 4 68 | */ 69 | kernel void BROU(PReLu3D)(device real *input [[buffer(0)]], 70 | device real *output [[buffer(1)]], 71 | device real *a [[buffer(2)]], 72 | constant TensorShape& shape [[buffer(3)]], 73 | ushort3 grid [[thread_position_in_grid]]) { 74 | int height = shape.dim0; 75 | int width = shape.dim1; 76 | int channel = shape.dim2; 77 | 78 | int y = grid.y << 2; 79 | int x = grid.x << 2; 80 | int z = grid.z << 2; 81 | 82 | if (y >= height || x >= width || z >= channel) { 83 | return; 84 | } 85 | 86 | real realA = a[0]; 87 | 88 | int maxJ = min(y + 4, height); 89 | int maxI = min(x + 4, width); 90 | 91 | for (int j = y; j < maxJ; ++j) { 92 | for (int i = x; i < maxI; ++i) { 93 | int offset = (j * width + i) * channel + z; 94 | 95 | real4 in = ((device real4*)(input + offset))[0]; 96 | real4 out; 97 | 98 | out.x = (in.x > 0) ? (in.x) : (realA * in.x); 99 | out.y = (in.y > 0) ? (in.y) : (realA * in.y); 100 | out.z = (in.z > 0) ? (in.z) : (realA * in.z); 101 | out.w = (in.w > 0) ? (in.w) : (realA * in.w); 102 | 103 | device real4 *outputV = (device real4*)(output + offset); 104 | 105 | outputV[0] = out; 106 | } 107 | } 108 | } 109 | #endif 110 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouMatrixMultiply.metal: -------------------------------------------------------------------------------- 1 | /** 2 | * Brouhaha 3 | * convolution.metal 4 | * Created by yanyuanchi on 2017/5/15. 5 | * Copyright © 2017年 yanyuanchi. All rights reserved. 6 | * 7 | * matrix multiply 8 | */ 9 | #if defined(real) && defined(real4) && defined(BROU) 10 | 11 | /** 12 | * A dimension is (shape.dim0, shape.dim1) 13 | * B dimesnion is (shape.dim1, shape.dim2) 14 | * C dimension is (shape.dim0, shape.dim2) 15 | * the shape.dim0 and shape.dim2 must be timed by 4 16 | * 17 | * the A is col-major 18 | * the B is row-major 19 | * the C is row-major 20 | */ 21 | kernel void BROU(MatrixMultiply)(device real *A [[buffer(0)]], 22 | device real *B [[buffer(1)]], 23 | device real *C [[buffer(2)]], 24 | constant TensorShape& shape [[buffer(3)]], 25 | ushort2 grid [[thread_position_in_grid]]) { 26 | int m = shape.dim0; 27 | int k = shape.dim1; 28 | int n = shape.dim2; 29 | 30 | int row = grid.y << 2; 31 | int col = grid.x << 2; 32 | 33 | if (row >= m || col >= n) { 34 | return; 35 | } 36 | 37 | device real4 *aV = (device real4*)(A + row); 38 | device real4 *bV = (device real4*)(B + col); 39 | 40 | real4 a, b; 41 | real4 c0 = 0, c1 = 0, c2 = 0, c3 = 0; 42 | 43 | int loopCount = k; 44 | 45 | do { 46 | a = aV[0]; 47 | b = bV[0]; 48 | 49 | c0 += a.x * b; 50 | c1 += a.y * b; 51 | c2 += a.z * b; 52 | c3 += a.w * b; 53 | 54 | aV = (device real4*)((device real*)aV + m); 55 | bV = (device real4*)((device real*)bV + n); 56 | } while(--loopCount); 57 | 58 | device real4 *cV = (device real4*)(C + row * n + col); 59 | 60 | cV[0] = c0; cV = (device real4*)((device real*)cV + n); 61 | cV[0] = c1; cV = (device real4*)((device real*)cV + n); 62 | cV[0] = c2; cV = (device real4*)((device real*)cV + n); 63 | cV[0] = c3; 64 | } 65 | 66 | /** 67 | * the output c will add bias 68 | * the bias dimension is (shape.dim2) 69 | */ 70 | kernel void BROU(MatrixMultiplyWithBias)(device real *A [[buffer(0)]], 71 | device real *B [[buffer(1)]], 72 | device real *C [[buffer(2)]], 73 | device real *bia [[buffer(3)]], 74 | constant TensorShape& shape [[buffer(4)]], 75 | ushort2 grid [[thread_position_in_grid]]) { 76 | int m = shape.dim0; 77 | int k = shape.dim1; 78 | int n = shape.dim2; 79 | 80 | int row = grid.y << 2; 81 | int col = grid.x << 2; 82 | 83 | if (row >= m || col >= n) { 84 | return; 85 | } 86 | 87 | device real4 *aV = (device real4*)(A + row); 88 | device real4 *bV = (device real4*)(B + col); 89 | 90 | real4 a, b; 91 | real4 c0 = 0, c1 = 0, c2 = 0, c3 = 0; 92 | 93 | int loopCount = k; 94 | 95 | do { 96 | a = aV[0]; 97 | b = bV[0]; 98 | 99 | c0 += a.x * b; 100 | c1 += a.y * b; 101 | c2 += a.z * b; 102 | c3 += a.w * b; 103 | 104 | aV = (device real4*)((device real*)aV + m); 105 | bV = (device real4*)((device real*)bV + n); 106 | } while(--loopCount); 107 | 108 | real4 biaV = ((device real4*)(bia + col))[0]; 109 | device real4 *cV = (device real4*)(C + row * n + col); 110 | 111 | cV[0] = c0 + biaV; cV = (device real4*)((device real*)cV + n); 112 | cV[0] = c1 + biaV; cV = (device real4*)((device real*)cV + n); 113 | cV[0] = c2 + biaV; cV = (device real4*)((device real*)cV + n); 114 | cV[0] = c3 + biaV; 115 | } 116 | 117 | #endif 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouFullconnect.metal: -------------------------------------------------------------------------------- 1 | /** 2 | * Brouhaha 3 | * convolution.metal 4 | * Created by yanyuanchi on 2017/5/15. 5 | * Copyright © 2017年 yanyuanchi. All rights reserved. 6 | * 7 | * the fullconnet operate 8 | */ 9 | #if defined(real) && defined(real4) && defined(BROU) 10 | 11 | /** 12 | * every thread will deal with 4 output 13 | * the input's dimesnion is (inputChannel, 1) 14 | * the output's dimension is (outputChannel, 1) 15 | * the weigths's dimesnion is (outputChannel, inputChannel) 16 | * the bias's dimension is (outputChannel, 1) 17 | * 18 | * inputchannel and outputchannel time by 4 19 | */ 20 | kernel void BROU(Fullconnect)(device real *input [[buffer(0)]], 21 | device real *weights [[buffer(1)]], 22 | device real *bia [[buffer(2)]], 23 | device real *output [[buffer(3)]], 24 | constant TensorShape& shape [[buffer(4)]], 25 | ushort grid [[thread_position_in_grid]]) { 26 | int inputChannel = shape.dim0; 27 | int outputChannel = shape.dim1; 28 | 29 | int index = grid << 2; 30 | 31 | if (index >= outputChannel) { 32 | return; 33 | } 34 | 35 | real4 out = 0; 36 | real4 in; 37 | 38 | device real4 *inputV = (device real4*)input; 39 | 40 | device real4 *offset0 = (device real4*)(weights + inputChannel * index); 41 | device real4 *offset1 = (device real4*)(((device real*)offset0) + inputChannel); 42 | device real4 *offset2 = (device real4*)(((device real*)offset1) + inputChannel); 43 | device real4 *offset3 = (device real4*)(((device real*)offset2) + inputChannel); 44 | 45 | int loop = inputChannel / 4; 46 | 47 | do { 48 | in = inputV[0]; 49 | 50 | out.x += dot(in, offset0[0]); 51 | out.y += dot(in, offset1[0]); 52 | out.z += dot(in, offset2[0]); 53 | out.w += dot(in, offset3[0]); 54 | 55 | inputV = (device real4*)(((device real*)inputV) + 4); 56 | 57 | offset0 = (device real4*)(((device real*)offset0) + 4); 58 | offset1 = (device real4*)(((device real*)offset1) + 4); 59 | offset2 = (device real4*)(((device real*)offset2) + 4); 60 | offset3 = (device real4*)(((device real*)offset3) + 4); 61 | } while(--loop); 62 | 63 | device real4 *outputV = (device real4*)(output + index); 64 | device real4 *biaV = (device real4*)(bia + index); 65 | 66 | outputV[0] = out + biaV[0]; 67 | } 68 | 69 | kernel void BROU(FullconnectWithoutBias)(device real *input [[buffer(0)]], 70 | device real *weights [[buffer(1)]], 71 | device real *output [[buffer(2)]], 72 | constant TensorShape& shape [[buffer(3)]], 73 | ushort grid [[thread_position_in_grid]]) { 74 | int inputChannel = shape.dim0; 75 | int outputChannel = shape.dim1; 76 | 77 | int index = grid << 2; 78 | 79 | if (index >= outputChannel) { 80 | return; 81 | } 82 | 83 | real4 out = 0; 84 | real4 in; 85 | 86 | device real4 *inputV = (device real4*)input; 87 | 88 | device real4 *offset0 = (device real4*)(weights + inputChannel * index); 89 | device real4 *offset1 = (device real4*)(((device real*)offset0) + inputChannel); 90 | device real4 *offset2 = (device real4*)(((device real*)offset1) + inputChannel); 91 | device real4 *offset3 = (device real4*)(((device real*)offset2) + inputChannel); 92 | 93 | int loop = inputChannel / 4; 94 | 95 | do { 96 | in = inputV[0]; 97 | 98 | out.x += dot(in, offset0[0]); 99 | out.y += dot(in, offset0[0]); 100 | out.z += dot(in, offset0[0]); 101 | out.w += dot(in, offset0[0]); 102 | 103 | inputV = (device real4*)(((device real*)inputV) + 4); 104 | 105 | offset0 = (device real4*)(((device real*)offset0) + 4); 106 | offset1 = (device real4*)(((device real*)offset1) + 4); 107 | offset2 = (device real4*)(((device real*)offset2) + 4); 108 | offset3 = (device real4*)(((device real*)offset3) + 4); 109 | } while(--loop); 110 | 111 | device real4 *outputV = (device real4*)(output + index); 112 | 113 | outputV[0] = out; 114 | } 115 | 116 | #endif 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouSoftMax.metal: -------------------------------------------------------------------------------- 1 | #if defined(real) && defined(real4) && defined(BROU) 2 | 3 | /** 4 | for 1D softmax, a thread group contains 32 threads 5 | the output channel will be diviede into 32 blocks 6 | one thread output a block 7 | the input/output's dimension is (channel) 8 | intput: the input data 9 | output: the output data 10 | shape: 11 | shape.dim0 is smallest number that not less than channel and must be divided by 4 without remainder 12 | shape dim1 is channel of input/output 13 | 14 | creat 1 thread group and 32 threads in a group 15 | */ 16 | kernel void BROU(SoftMax1D)(device real *input [[buffer(0)]], 17 | device real *output [[buffer(1)]], 18 | constant TensorShape& shape [[buffer(2)]], 19 | ushort index [[thread_index_in_threadgroup]]) { 20 | int channel = shape.dim1; 21 | int blockSize = (channel + 31) / 32; 22 | 23 | int minI = index * blockSize; 24 | int maxI = min((index + 1) * blockSize, channel); 25 | 26 | threadgroup real sum = 0; 27 | threadgroup real sharedSum[32]; 28 | 29 | real curSum = 0; 30 | for (int i = minI; i < maxI; ++i) { 31 | real e = exp(input[i]); 32 | output[i] = e; 33 | 34 | curSum += e; 35 | } 36 | 37 | sharedSum[index] = curSum; 38 | 39 | threadgroup_barrier(mem_flags::mem_threadgroup); 40 | 41 | if (0 == index) { 42 | sum = (sharedSum[0] + sharedSum[1] + sharedSum[2] + sharedSum[3] + 43 | sharedSum[4] + sharedSum[5] + sharedSum[6] + sharedSum[7] + 44 | sharedSum[8] + sharedSum[9] + sharedSum[10] + sharedSum[11] + 45 | sharedSum[12] + sharedSum[13] + sharedSum[14] + sharedSum[15] + 46 | sharedSum[16] + sharedSum[17] + sharedSum[18] + sharedSum[19] + 47 | sharedSum[20] + sharedSum[21] + sharedSum[22] + sharedSum[23] + 48 | sharedSum[24] + sharedSum[25] + sharedSum[26] + sharedSum[27] + 49 | sharedSum[28] + sharedSum[29] + sharedSum[30] + sharedSum[31]); 50 | } 51 | 52 | threadgroup_barrier(mem_flags::mem_threadgroup); 53 | 54 | for (int i = minI; i < maxI; ++i) { 55 | output[i] /= sum; 56 | } 57 | } 58 | 59 | /** 60 | for 2D the input/output data dimesion is (height, width) 61 | shape.dim0 is height 62 | shape.dim1 is widthX4 63 | shape.dim2 is width 64 | 65 | a thread will handle with a dim 66 | */ 67 | kernel void BROU(SoftMax2D)(device real *input [[buffer(0)]], 68 | device real *output [[buffer(1)]], 69 | constant TensorShape& shape [[buffer(2)]], 70 | ushort index [[thread_position_in_grid]]) { 71 | int height = shape.dim0; 72 | int widthX4 = shape.dim1; 73 | int width = shape.dim2; 74 | 75 | if (index >= height) { 76 | return; 77 | } 78 | 79 | device real *inputPtr = input + index * widthX4; 80 | device real *outputPtr = output + index * widthX4; 81 | 82 | real sum = 0; 83 | for (int i = 0; i < width; ++i) { 84 | real e = exp(inputPtr[i]); 85 | outputPtr[i] = e; 86 | sum += e; 87 | } 88 | 89 | for (int i = 0; i < width; ++i) { 90 | outputPtr[i] /= sum; 91 | } 92 | } 93 | 94 | /** 95 | for 3D input/output data the dimension is (height, width, channel) 96 | the memory's dimension is (height, width, channelX4) 97 | 98 | shape.dim0 is height 99 | shape.dim1 is width 100 | shape.dim2 is channelX4 101 | shape.dim3 is channel 102 | 103 | a thread will handle with a dim 104 | */ 105 | kernel void BROU(SoftMax3D)(device real *input [[buffer(0)]], 106 | device real *output [[buffer(1)]], 107 | constant TensorShape& shape [[buffer(2)]], 108 | ushort2 grid [[thread_position_in_grid]]) { 109 | int height = shape.dim0; 110 | int width = shape.dim1; 111 | int channelX4 = shape.dim2; 112 | int channel = shape.dim3; 113 | 114 | int x = grid.x; 115 | int y = grid.y; 116 | 117 | if (x >= width || y >= height) { 118 | return; 119 | } 120 | 121 | device real *inputPtr = input + (y * width + x) * channelX4; 122 | device real *outputPtr = output + (y * width + x) * channelX4; 123 | 124 | real sum = 0; 125 | for (int i = 0; i < channel; ++i) { 126 | real e = exp(inputPtr[i]); 127 | outputPtr[i] = e; 128 | sum += e; 129 | } 130 | 131 | for (int i = 0; i < width; ++i) { 132 | outputPtr[i] /= sum; 133 | } 134 | } 135 | 136 | #endif 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouDilatedConvolutionMM.metal: -------------------------------------------------------------------------------- 1 | #if defined(real) && defined(real4) && defined(BROU) 2 | 3 | /** 4 | * the input's dimension is (inputHeight, intputWidth, intputChannelX4) 5 | * it will be convert to matrix that is (kernelHeight * kernelWidth * inputChannelX4, [outputHeight * outputWidth]X4) 6 | * the matrix will be col-major 7 | * 8 | * the convolutoin is not equal the real convolution in math 9 | * like input is (a, b, c) the kernel is (i, j, k) 10 | * the convolution in math is output = a*k + b*j + c*i 11 | * but in brouhaha the convolution will be output = a*i + b*j + c*k 12 | */ 13 | inline real4 BROU(GetDilatedConvolutionVector4FromInput)(device real *data, int height, int width, int channel, int y, int x, int z) { 14 | if (0 > y || 0 > x || 0 > z || y >= height || x >= width || z >= channel) { 15 | return 0; 16 | } 17 | 18 | device real4 *dataV = (device real4*)(data + (y * width + x) * channel + z); 19 | 20 | return dataV[0]; 21 | } 22 | 23 | kernel void BROU(DilatedConvolutionInput2Matrix)(device real *input [[buffer(0)]], 24 | device real *matrix [[buffer(1)]], 25 | constant TensorShape& inputShape [[buffer(2)]], 26 | constant TensorShape& outputShape [[buffer(3)]], 27 | constant ConvolutionShape& convolutionShape [[buffer(4)]], 28 | ushort grid [[thread_position_in_grid]]) { 29 | int outputHeight = outputShape.dim0; 30 | int outputWidth = outputShape.dim1; 31 | 32 | /**every thread handle 4 col output*/ 33 | int col = grid << 2; 34 | 35 | if (col >= outputHeight * outputWidth) { 36 | return; 37 | } 38 | 39 | int inputHeight = inputShape.dim0; 40 | int inputWidth = inputShape.dim1; 41 | int inputChannel = inputShape.dim2; 42 | 43 | int padTop = convolutionShape.padTop; 44 | int padLeft = convolutionShape.padLeft; 45 | 46 | int strideY = convolutionShape.strideY; 47 | int strideX = convolutionShape.strideX; 48 | 49 | int kernelHeight = convolutionShape.kernelHeight; 50 | int kernelWidth = convolutionShape.kernelWidth; 51 | 52 | int dilatedX = convolutionShape.dilatedX; 53 | int dilatedY = convolutionShape.dilatedY; 54 | 55 | int inputX0 = (col % outputWidth) * strideX - padLeft; 56 | int inputY0 = (col / outputWidth) * strideY - padTop; 57 | int inputX1 = ((col + 1) % outputWidth) * strideX - padLeft; 58 | int inputY1 = ((col + 1) / outputWidth) * strideY - padTop; 59 | int inputX2 = ((col + 2) % outputWidth) * strideX - padLeft; 60 | int inputY2 = ((col + 2) / outputWidth) * strideY - padTop; 61 | int inputX3 = ((col + 3) % outputWidth) * strideX - padLeft; 62 | int inputY3 = ((col + 3) / outputWidth) * strideY - padTop; 63 | 64 | int matrixCol = (outputHeight * outputWidth + 3) / 4 * 4; 65 | device real4 *matrixV = (device real4*)(matrix + col); 66 | 67 | real4 inputV0, inputV1, inputV2, inputV3; 68 | 69 | for (int y = 0; y < kernelHeight; ++y) { 70 | for (int x = 0; x < kernelWidth; ++x) { 71 | for (int c = 0; c < inputChannel; c += 4) { 72 | inputV0 = BROU(GetDilatedConvolutionVector4FromInput)(input,inputHeight,inputWidth,inputChannel,inputY0+y*dilatedY,inputX0+x*dilatedX,c); 73 | inputV1 = BROU(GetDilatedConvolutionVector4FromInput)(input,inputHeight,inputWidth,inputChannel,inputY1+y*dilatedY,inputX1+x*dilatedX,c); 74 | inputV2 = BROU(GetDilatedConvolutionVector4FromInput)(input,inputHeight,inputWidth,inputChannel,inputY2+y*dilatedY,inputX2+x*dilatedX,c); 75 | inputV3 = BROU(GetDilatedConvolutionVector4FromInput)(input,inputHeight,inputWidth,inputChannel,inputY3+y*dilatedY,inputX3+x*dilatedX,c); 76 | 77 | matrixV[0] = {inputV0.x, inputV1.x, inputV2.x, inputV3.x}; 78 | 79 | matrixV = (device real4*)((device real*)matrixV + matrixCol); 80 | matrixV[0] = {inputV0.y, inputV1.y, inputV2.y, inputV3.y}; 81 | 82 | matrixV = (device real4*)((device real*)matrixV + matrixCol); 83 | matrixV[0] = {inputV0.z, inputV1.z, inputV2.z, inputV3.z}; 84 | 85 | matrixV = (device real4*)((device real*)matrixV + matrixCol); 86 | matrixV[0] = {inputV0.w, inputV1.w, inputV2.w, inputV3.w}; 87 | } 88 | } 89 | } 90 | } 91 | 92 | #endif 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouAveragePooling.metal: -------------------------------------------------------------------------------- 1 | #if defined(real) && defined(real4) && defined(BROU) 2 | 3 | /** 4 | * every thread deal with 4 output 5 | * the input dimesnion is (inputHeight, inputWidth, channel) 6 | * the ouput dimesnion is (outputHeight, outputWidth, channel) 7 | */ 8 | kernel void BROU(AveragePooling)(device real *input [[buffer(0)]], 9 | device real *output [[buffer(1)]], 10 | constant TensorShape& inputShape [[buffer(2)]], 11 | constant TensorShape& outputShape [[buffer(3)]], 12 | constant ConvolutionShape& convolutionShape [[buffer(4)]], 13 | ushort3 grid [[thread_position_in_grid]]) { 14 | int outputHeight = outputShape.dim0; 15 | int outputWidth = outputShape.dim1; 16 | int outputChannel = outputShape.dim2; 17 | 18 | int x = grid.x; 19 | int y = grid.y; 20 | int z = grid.z << 2; 21 | 22 | if (x >= outputWidth || y >= outputHeight || z >= outputChannel) { 23 | return; 24 | } 25 | 26 | int inputHeight = inputShape.dim0; 27 | int inputWidth = inputShape.dim1; 28 | int inputChannel = inputShape.dim2; 29 | 30 | int inputLeft = x * convolutionShape.strideX - convolutionShape.padLeft; 31 | int inputTop = y * convolutionShape.strideY - convolutionShape.padTop; 32 | 33 | int kernelHeight = convolutionShape.kernelHeight; 34 | int kernelWidth = convolutionShape.kernelWidth; 35 | 36 | int inputRight = inputLeft + kernelWidth; 37 | int inputBottom = inputTop + kernelHeight; 38 | 39 | inputTop = max(0, inputTop); 40 | inputLeft = max(0, inputLeft); 41 | 42 | inputBottom = min(inputHeight, inputBottom); 43 | inputRight = min(inputWidth, inputRight); 44 | 45 | real4 sum = 0; 46 | 47 | for (int inY = inputTop; inY < inputBottom; ++inY) { 48 | for (int inX = inputLeft; inX < inputRight; ++inX) { 49 | device real4 *inputV = (device real4*)(input + (inY * inputWidth + inX) * inputChannel + z); 50 | 51 | sum += inputV[0]; 52 | } 53 | } 54 | 55 | device real4 *outputV = (device real4*)(output + (y * outputWidth + x) * outputChannel + z); 56 | 57 | outputV[0] = static_cast(sum / (1.0 * kernelHeight * kernelWidth)); 58 | } 59 | 60 | kernel void BROU(AveragePoolingWithoutPad)(device real *input [[buffer(0)]], 61 | device real *output [[buffer(1)]], 62 | constant TensorShape& inputShape [[buffer(2)]], 63 | constant TensorShape& outputShape [[buffer(3)]], 64 | constant ConvolutionShape& convolutionShape [[buffer(4)]], 65 | ushort3 grid [[thread_position_in_grid]]) { 66 | int outputHeight = outputShape.dim0; 67 | int outputWidth = outputShape.dim1; 68 | int outputChannel = outputShape.dim2; 69 | 70 | int x = grid.x; 71 | int y = grid.y; 72 | int z = grid.z << 2; 73 | 74 | if (x >= outputWidth || y >= outputHeight || z >= outputChannel) { 75 | return; 76 | } 77 | 78 | int inputHeight = inputShape.dim0; 79 | int inputWidth = inputShape.dim1; 80 | int inputChannel = inputShape.dim2; 81 | 82 | int inputLeft = x * convolutionShape.strideX - convolutionShape.padLeft; 83 | int inputTop = y * convolutionShape.strideY - convolutionShape.padTop; 84 | 85 | int kernelHeight = convolutionShape.kernelHeight; 86 | int kernelWidth = convolutionShape.kernelWidth; 87 | 88 | int inputRight = inputLeft + kernelWidth; 89 | int inputBottom = inputTop + kernelHeight; 90 | 91 | inputTop = max(0, inputTop); 92 | inputLeft = max(0, inputLeft); 93 | 94 | inputBottom = min(inputHeight, inputBottom); 95 | inputRight = min(inputWidth, inputRight); 96 | 97 | real4 sum = 0; 98 | 99 | for (int inY = inputTop; inY < inputBottom; ++inY) { 100 | for (int inX = inputLeft; inX < inputRight; ++inX) { 101 | device real4 *inputV = (device real4*)(input + (inY * inputWidth + inX) * inputChannel + z); 102 | 103 | sum += inputV[0]; 104 | } 105 | } 106 | 107 | device real4 *outputV = (device real4*)(output + (y * outputWidth + x) * outputChannel + z); 108 | 109 | outputV[0] = static_cast(sum / (1.0 * (inputBottom - inputTop) * (inputRight - inputLeft))); 110 | } 111 | 112 | #endif 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouDilatedConvolution.metal: -------------------------------------------------------------------------------- 1 | /** 2 | * handle the dialted convolution 3 | */ 4 | 5 | #if defined(real) && defined(real4) && defined(BROU) 6 | 7 | kernel void BROU(DilatedConvolution)(device real *input [[buffer(0)]], 8 | device real *kerne [[buffer(1)]], 9 | device real *bia [[buffer(2)]], 10 | device real *output [[buffer(3)]], 11 | constant TensorShape& inputShape [[buffer(4)]], 12 | constant TensorShape& outputShape [[buffer(5)]], 13 | constant ConvolutionShape& convolutionShape [[buffer(6)]], 14 | ushort3 grid [[thread_position_in_grid]]) { 15 | int outputHeight = outputShape.dim0; 16 | int outputWidth = outputShape.dim1; 17 | int outputChannel = outputShape.dim2; 18 | 19 | int x = grid.x << 2; 20 | int y = grid.y << 2; 21 | int z = grid.z << 2; 22 | 23 | if (x >= outputWidth || y >= outputHeight || z >= outputChannel) { 24 | return; 25 | } 26 | 27 | int inputHeight = inputShape.dim0; 28 | int inputWidth = inputShape.dim1; 29 | int inputChannel = inputShape.dim2; 30 | 31 | int kernelHeight = convolutionShape.kernelHeight; 32 | int kernelWidth = convolutionShape.kernelWidth; 33 | 34 | int padTop = convolutionShape.padTop; 35 | int padLeft = convolutionShape.padLeft; 36 | 37 | int strideY = convolutionShape.strideY; 38 | int strideX = convolutionShape.strideX; 39 | 40 | int dilatedY = convolutionShape.dilatedY; 41 | int dilatedX = convolutionShape.dilatedX; 42 | 43 | int maxOutY = min(y + 4, outputHeight); 44 | int maxOutX = min(x + 4, outputWidth); 45 | 46 | real4 biasV = convolutionShape.haveBias ? ((device real4*)(bia + z))[0] : 0; 47 | 48 | for (int outY = y; outY < maxOutY; ++outY) { 49 | for (int outX = x; outX < maxOutX; ++outX) { 50 | real4 out = biasV; 51 | 52 | int inputTop = -padTop + outY * strideY; 53 | int inputLeft = -padLeft + outX * strideX; 54 | 55 | int inputBottom = inputTop + dilatedY * (kernelHeight - 1); 56 | int inputRight = inputLeft + dilatedX * (kernelWidth - 1); 57 | 58 | int kernelTop = (inputTop >= 0) ? 0 : -inputTop; 59 | int kenelLeft = (inputLeft >= 0) ? 0 : -inputLeft; 60 | 61 | inputTop = max(0, inputTop); 62 | inputLeft = max(0, inputLeft); 63 | 64 | inputBottom = min(inputHeight - 1, inputBottom); 65 | inputRight = min(inputWidth - 1, inputRight); 66 | 67 | for (int inY = inputTop, kernelY = kernelTop; inY <= inputBottom; inY += dilatedY, ++kernelY) { 68 | for (int inX = inputLeft, kernelX = kenelLeft; inX <= inputRight; inX += dilatedX, ++kernelX) { 69 | device real *inputOffset = input + (inY * inputWidth + inX) * inputChannel; 70 | 71 | device real *kernelOffset0 = kerne + (((z ) * kernelHeight + kernelY) * kernelWidth + kernelX) * inputChannel; 72 | device real *kernelOffset1 = kerne + (((z+1) * kernelHeight + kernelY) * kernelWidth + kernelX) * inputChannel; 73 | device real *kernelOffset2 = kerne + (((z+2) * kernelHeight + kernelY) * kernelWidth + kernelX) * inputChannel; 74 | device real *kernelOffset3 = kerne + (((z+3) * kernelHeight + kernelY) * kernelWidth + kernelX) * inputChannel; 75 | 76 | for (int c = 0; c < inputChannel; c += 4) { 77 | real4 inV = ((device real4*)(inputOffset))[0]; 78 | 79 | real4 kernelV0 = ((device real4*)(kernelOffset0))[0]; 80 | real4 kernelV1 = ((device real4*)(kernelOffset1))[0]; 81 | real4 kernelV2 = ((device real4*)(kernelOffset2))[0]; 82 | real4 kernelV3 = ((device real4*)(kernelOffset3))[0]; 83 | 84 | out.x += dot(inV, kernelV0); 85 | out.y += dot(inV, kernelV1); 86 | out.z += dot(inV, kernelV2); 87 | out.w += dot(inV, kernelV3); 88 | 89 | inputOffset += 4; 90 | 91 | kernelOffset0 += 4; 92 | kernelOffset1 += 4; 93 | kernelOffset2 += 4; 94 | kernelOffset3 += 4; 95 | } 96 | } 97 | } 98 | 99 | device real4 *outputV = (device real4*)(output + (outY * outputWidth + outX) * outputChannel + z); 100 | 101 | outputV[0] = out; 102 | } 103 | } 104 | } 105 | 106 | #endif 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Basic/BrouUniqueTensor.m: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | /** 4 | the BrouTesnor a simple wrapper of memory 5 | */ 6 | @interface BROU_OBJECT(UniqueTensor)() { 7 | /**the MTL buffer, point to a memory*/ 8 | id _buffer; 9 | 10 | /**the dimension if the tensor*/ 11 | NSUInteger _dimension; 12 | 13 | /**store the dim of the tensor's dimension*/ 14 | NSArray *_dims; 15 | 16 | /**the length of the Tensor*/ 17 | NSUInteger _length; 18 | 19 | /** 20 | for the preformance of Metal, the dimension's shape will be changed 21 | like a 1D Tensor and the dim[0] is 15 22 | The Tensor will malloc 16-memory to store the data 23 | so the length of this Tensor is 15, but the lengthX4 is 16 24 | 25 | When init the Tensor, innermost dimension(d) will be normalizated to dX4 the dX4 is not less than d and must be divided by 4 without remainder 26 | like a Tensor's diemension is (3, 5, 15) 27 | It will malloc a memeory is (2, 5, 16) 28 | so the length is 3 * 5 * 15 29 | but the lengthX4 is 3 * 5 * 16 30 | */ 31 | NSUInteger _lengthX4; 32 | 33 | NSUInteger _bytesCount; 34 | } 35 | 36 | @end 37 | 38 | @implementation BROU_OBJECT(UniqueTensor) 39 | 40 | + (instancetype)initWithLength:(int)length device:(id)device { 41 | NSMutableArray *dims = [[NSMutableArray alloc] init]; 42 | [dims addObject:[NSNumber numberWithInt:length]]; 43 | 44 | return [[BROU_OBJECT(UniqueTensor) alloc] initWithDimsArray:dims device:device]; 45 | } 46 | 47 | + (instancetype)initWithHeight:(int)height width:(int)width device:(id)device { 48 | NSMutableArray *dims = [[NSMutableArray alloc] init]; 49 | [dims addObject:[NSNumber numberWithInt:height]]; 50 | [dims addObject:[NSNumber numberWithInt:width]]; 51 | 52 | return [[BROU_OBJECT(UniqueTensor) alloc] initWithDimsArray:dims device:device]; 53 | } 54 | 55 | + (instancetype)initWithHeight:(int)height width:(int)width channel:(int)channel device:(id)device { 56 | NSMutableArray *dims = [[NSMutableArray alloc] init]; 57 | [dims addObject:[NSNumber numberWithInt:height]]; 58 | [dims addObject:[NSNumber numberWithInt:width]]; 59 | [dims addObject:[NSNumber numberWithInt:channel]]; 60 | 61 | return [[BROU_OBJECT(UniqueTensor) alloc] initWithDimsArray:dims device:device]; 62 | } 63 | 64 | - (instancetype)initWithDimsArray:(NSMutableArray*)dimArray device:(id)device { 65 | NSAssert(dimArray.count > 0, @"the dimension of Tensor must be > 0"); 66 | 67 | self = [super init]; 68 | 69 | if (self) { 70 | /**get the dim*/ 71 | _dimension = dimArray.count; 72 | 73 | /**calcualte the length*/ 74 | _length = 1; 75 | 76 | for (int i = 0; i < _dimension; ++i) { 77 | NSAssert(dimArray[i].intValue > 0, @"the dimension of Tensor must be > 0"); 78 | 79 | _length *= dimArray[i].intValue; 80 | } 81 | 82 | int lastDim = [dimArray lastObject].intValue; 83 | int lastDimX4 = (lastDim + 3) / 4 * 4; 84 | 85 | _lengthX4 = _length / lastDim * lastDimX4; 86 | 87 | /**add to the last*/ 88 | [dimArray addObject:[[NSNumber alloc] initWithInt:lastDimX4]]; 89 | 90 | _dims = [[NSArray alloc] initWithArray:dimArray]; 91 | 92 | _bytesCount = _lengthX4 * sizeof(type); 93 | 94 | /**malloc the memory*/ 95 | if (@available(iOS 9.0, *)) { 96 | _buffer = [device newBufferWithLength:_bytesCount 97 | options:MTLResourceCPUCacheModeDefaultCache | MTLResourceStorageModeShared]; 98 | } else { 99 | _buffer = [device newBufferWithLength:_bytesCount 100 | options:MTLResourceCPUCacheModeDefaultCache]; 101 | } 102 | } 103 | 104 | return self; 105 | } 106 | 107 | - (NSUInteger)dimension { 108 | return _dimension; 109 | } 110 | 111 | - (int)height { 112 | return [self dim0]; 113 | } 114 | 115 | - (int)width { 116 | return [self dim1]; 117 | } 118 | 119 | - (int)channel { 120 | return [self dim2]; 121 | } 122 | 123 | - (int)dim0 { 124 | NSAssert(_dimension >= 1, @"the tensor has no dim0"); 125 | 126 | return [_dims objectAtIndex:0].intValue; 127 | } 128 | 129 | - (int)dim1 { 130 | NSAssert(_dimension >= 2, @"the tensor has no dim1"); 131 | 132 | return [_dims objectAtIndex:1].intValue; 133 | } 134 | 135 | - (int)dim2 { 136 | NSAssert(_dimension >= 3, @"the tensor has no dim2"); 137 | 138 | return [_dims objectAtIndex:2].intValue; 139 | } 140 | 141 | - (int)dim:(int)dim { 142 | NSAssert(_dimension > dim, @"the dimension is error"); 143 | 144 | return [_dims objectAtIndex:dim].intValue; 145 | } 146 | 147 | - (int)innermostDim { 148 | return [_dims objectAtIndex:_dimension - 1].intValue; 149 | } 150 | 151 | - (int)innermostDimX4 { 152 | return [_dims objectAtIndex:_dimension].intValue; 153 | } 154 | 155 | - (id)tensorBuffer { 156 | return _buffer; 157 | } 158 | 159 | @end 160 | 161 | #endif 162 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouConvolutionMM.metal: -------------------------------------------------------------------------------- 1 | /** 2 | * Brouhaha 3 | * convolution.metal 4 | * Created by yanyuanchi on 2017/5/15. 5 | * Copyright © 2017年 yanyuanchi. All rights reserved. 6 | * 7 | * the convolution opetator use matrix multiply 8 | */ 9 | 10 | /** 11 | * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 12 | * 13 | * this method is:convert the convolution to a matrix multiply, so it needs temp memeory to store the matrix. 14 | * but the memory limit of Metal is 256MB, it means it may be out of memory. 15 | * the temp matrix is (kernelHeight * kernelWidth * inputChannel, [outputHeight * outputWidth]X4) 16 | * like a input is (512, 512, 32) kernel is (3, 9, 9, 32) the output is (512, 512, 3) 17 | * the the memory of temp matrix of half is 512 * 512 * 9 * 9 * 32 * 2byte = 1296mb is much bigger than 256mb 18 | * so if the image is big, the BrouConvolutionLayer is a better choice 19 | * 20 | * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 21 | */ 22 | 23 | #if defined(real) && defined(real4) && defined(BROU) 24 | 25 | /** 26 | * the input's dimension is (inputHeight, intputWidth, intputChannelX4) 27 | * it will be convert to matrix that is (kernelHeight * kernelWidth * inputChannelX4, [outputHeight * outputWidth]X4) 28 | * 29 | * the convolutoin is not equal the real convolution in math 30 | * like input is (a, b, c) the kernel is (i, j, k) 31 | * the convolution in math is output = a*k + b*j + c*i 32 | * but in brouhaha the convolution will be output = a*i + b*j + c*k 33 | */ 34 | inline real4 BROU(GetConvolutionVector4FromInput)(device real *data, int height, int width, int channel, int y, int x, int z) { 35 | if (0 > y || 0 > x || 0 > z || y >= height || x >= width || z >= channel) { 36 | return 0; 37 | } 38 | 39 | device real4 *dataV = (device real4*)(data + (y * width + x) * channel + z); 40 | 41 | return dataV[0]; 42 | } 43 | 44 | kernel void BROU(ConvolutionInput2Matrix)(device real *input [[buffer(0)]], 45 | device real *matrix [[buffer(1)]], 46 | constant TensorShape& inputShape [[buffer(2)]], 47 | constant TensorShape& outputShape [[buffer(3)]], 48 | constant ConvolutionShape& convolutionShape [[buffer(4)]], 49 | ushort grid [[thread_position_in_grid]]) { 50 | int outputHeight = outputShape.dim0; 51 | int outputWidth = outputShape.dim1; 52 | 53 | /**every thread handle 4 col output*/ 54 | int col = grid << 2; 55 | 56 | if (col >= outputHeight * outputWidth) { 57 | return; 58 | } 59 | 60 | int inputHeight = inputShape.dim0; 61 | int inputWidth = inputShape.dim1; 62 | int inputChannel = inputShape.dim2; 63 | 64 | int padTop = convolutionShape.padTop; 65 | int padLeft = convolutionShape.padLeft; 66 | 67 | int strideY = convolutionShape.strideY; 68 | int strideX = convolutionShape.strideX; 69 | 70 | int kernelHeight = convolutionShape.kernelHeight; 71 | int kernelWidth = convolutionShape.kernelWidth; 72 | 73 | int inputX0 = (col % outputWidth) * strideX - padLeft; 74 | int inputY0 = (col / outputWidth) * strideY - padTop; 75 | int inputX1 = ((col + 1) % outputWidth) * strideX - padLeft; 76 | int inputY1 = ((col + 1) / outputWidth) * strideY - padTop; 77 | int inputX2 = ((col + 2) % outputWidth) * strideX - padLeft; 78 | int inputY2 = ((col + 2) / outputWidth) * strideY - padTop; 79 | int inputX3 = ((col + 3) % outputWidth) * strideX - padLeft; 80 | int inputY3 = ((col + 3) / outputWidth) * strideY - padTop; 81 | 82 | int matrixCol = (outputHeight * outputWidth + 3) / 4 * 4; 83 | device real4 *matrixV = (device real4*)(matrix + col); 84 | 85 | real4 inputV0, inputV1, inputV2, inputV3; 86 | 87 | for (int y = 0; y < kernelHeight; ++y) { 88 | for (int x = 0; x < kernelWidth; ++x) { 89 | for (int c = 0; c < inputChannel; c += 4) { 90 | inputV0 = BROU(GetConvolutionVector4FromInput)(input, inputHeight, inputWidth, inputChannel, inputY0+y, inputX0+x, c); 91 | inputV1 = BROU(GetConvolutionVector4FromInput)(input, inputHeight, inputWidth, inputChannel, inputY1+y, inputX1+x, c); 92 | inputV2 = BROU(GetConvolutionVector4FromInput)(input, inputHeight, inputWidth, inputChannel, inputY2+y, inputX2+x, c); 93 | inputV3 = BROU(GetConvolutionVector4FromInput)(input, inputHeight, inputWidth, inputChannel, inputY3+y, inputX3+x, c); 94 | 95 | matrixV[0] = {inputV0.x, inputV1.x, inputV2.x, inputV3.x}; 96 | matrixV = (device real4*)((device real*)matrixV + matrixCol); 97 | 98 | matrixV[0] = {inputV0.y, inputV1.y, inputV2.y, inputV3.y}; 99 | matrixV = (device real4*)((device real*)matrixV + matrixCol); 100 | 101 | matrixV[0] = {inputV0.z, inputV1.z, inputV2.z, inputV3.z}; 102 | matrixV = (device real4*)((device real*)matrixV + matrixCol); 103 | 104 | matrixV[0] = {inputV0.w, inputV1.w, inputV2.w, inputV3.w}; 105 | matrixV = (device real4*)((device real*)matrixV + matrixCol); 106 | } 107 | } 108 | } 109 | } 110 | 111 | #endif 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouReLuLayer.m: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(ReLuLayer)() { 4 | DimensionType _dimensionType; 5 | 6 | id _shape; 7 | 8 | NSString *_functionName; 9 | 10 | /**the Metal computePipelineState*/ 11 | id _computePipelineState; 12 | } 13 | 14 | @end 15 | 16 | @implementation BROU_OBJECT(ReLuLayer) 17 | 18 | - (instancetype)initWithDevice:(id)device 19 | library:(id)library 20 | dimensionType:(DimensionType)dimensionType { 21 | self = [super initWithName:@BROU_OBJECT_NAME(ReLuLayer)]; 22 | 23 | if (!self) { 24 | return self; 25 | } 26 | 27 | _dimensionType = dimensionType; 28 | 29 | if (@available(iOS 9.0, *)) { 30 | _shape = [device newBufferWithLength:sizeof(TensorShape) 31 | options:MTLResourceCPUCacheModeDefaultCache | MTLResourceStorageModeShared]; 32 | } else { 33 | _shape = [device newBufferWithLength:sizeof(TensorShape) 34 | options:MTLResourceCPUCacheModeDefaultCache]; 35 | } 36 | 37 | [self configComputePipelinesStateWithDevice:device library:library]; 38 | 39 | return self; 40 | } 41 | 42 | - (void)configComputePipelinesStateWithDevice:(id)device 43 | library:(id)library { 44 | if (Dimension1D == _dimensionType) { 45 | _functionName = @BROU_METAL(ReLu1D); 46 | } else if (Dimension2D == _dimensionType) { 47 | _functionName = @BROU_METAL(ReLu2D); 48 | } else if (Dimension3D == _dimensionType) { 49 | _functionName = @BROU_METAL(ReLu3D); 50 | } else { 51 | NSAssert(false, @"the dimension type is error"); 52 | } 53 | 54 | id function = [library newFunctionWithName:_functionName]; 55 | 56 | NSAssert(function, @"init %@ function:%@ error!", self.name, _functionName); 57 | 58 | /**get the function*/ 59 | NSError *error = nil; 60 | 61 | _computePipelineState = [device newComputePipelineStateWithFunction:function error:&error]; 62 | 63 | NSAssert(_computePipelineState, @"init %@ ComputePipelineState error:%@", self.name, error); 64 | } 65 | 66 | - (void)checkParamsWithInput:(id)input 67 | output:(id)output { 68 | if (Dimension1D == _dimensionType) { 69 | NSAssert(input.dim0 == output.dim0, 70 | @"the input dim must be equal to output"); 71 | NSAssert(input.dim0 > 0, 72 | @"the input and output dimension must be > 0"); 73 | } else if (Dimension2D == _dimensionType) { 74 | NSAssert( input.dim0 == output.dim0 75 | && input.dim1 == output.dim1, 76 | @"the input dim must be equal to output"); 77 | NSAssert( input.dim0 > 0 78 | && input.dim1 > 0, 79 | @"the input and output dimension must be > 0"); 80 | } else if (Dimension3D == _dimensionType) { 81 | NSAssert( input.dim0 == output.dim0 82 | && input.dim1 == output.dim1 83 | && input.dim2 == output.dim2, 84 | @"the input dim must be equal to output"); 85 | NSAssert( input.dim0 > 0 86 | && input.dim1 > 0 87 | && input.dim2 > 0, 88 | @"the input and output dimension must be > 0"); 89 | } else { 90 | NSAssert(false, @"the dimension type is error"); 91 | } 92 | } 93 | 94 | - (void)computeCommandBuffer:(id)commandBuffer 95 | input:(id)input 96 | output:(id)output { 97 | [self checkParamsWithInput:input output:output]; 98 | 99 | TensorShape *shapeRef = (TensorShape*)_shape.contents; 100 | MTLSize group = MTLSizeMake(1, 1, 1); 101 | MTLSize grid = MTLSizeMake(1, 1, 1); 102 | 103 | if (Dimension1D == _dimensionType) { 104 | shapeRef->dim0 = input.innermostDimX4; 105 | 106 | group = MTLSizeMake(32, 1, 1); 107 | grid = MTLSizeMake((shapeRef->dim0 + 32 * 4 - 1) / (32 * 4), 108 | 1, 109 | 1); 110 | } else if (Dimension2D == _dimensionType) { 111 | shapeRef->dim0 = input.dim0; 112 | shapeRef->dim1 = input.innermostDimX4; 113 | 114 | group = MTLSizeMake(8, 4, 1); 115 | grid = MTLSizeMake((shapeRef->dim1 + 31) / 32, 116 | (shapeRef->dim0 + 15) / 16, 117 | 1); 118 | } else if (Dimension3D == _dimensionType) { 119 | shapeRef->dim0 = input.dim0; 120 | shapeRef->dim1 = input.dim1; 121 | shapeRef->dim2 = input.innermostDimX4; 122 | 123 | group = MTLSizeMake(8, 4, 1); 124 | grid = MTLSizeMake((shapeRef->dim1 + 31) / 32, 125 | (shapeRef->dim0 + 15) / 16, 126 | (shapeRef->dim2 / 4)); 127 | } else { 128 | NSAssert(false, @"The input/output dimension is error"); 129 | } 130 | 131 | id encoder = [commandBuffer computeCommandEncoder]; 132 | [encoder setComputePipelineState:_computePipelineState]; 133 | [encoder setBuffer:input.tensorBuffer offset:0 atIndex:0]; 134 | [encoder setBuffer:output.tensorBuffer offset:0 atIndex:1]; 135 | [encoder setBuffer:_shape offset:0 atIndex:2]; 136 | 137 | [encoder dispatchThreadgroups:grid threadsPerThreadgroup:group]; 138 | [encoder endEncoding]; 139 | } 140 | 141 | @end 142 | 143 | #endif 144 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouTanHLayer.m: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(TanHLayer)() { 4 | DimensionType _dimensionType; 5 | 6 | id _shape; 7 | 8 | NSString *_functionName; 9 | 10 | /**the Metal computePipelineState*/ 11 | id _computePipelineState; 12 | } 13 | 14 | @end 15 | 16 | @implementation BROU_OBJECT(TanHLayer) 17 | 18 | - (instancetype)initWithDevice:(id)device 19 | library:(id)library 20 | dimensionType:(DimensionType)dimensionType { 21 | self = [super initWithName:@BROU_OBJECT_NAME(TanHLayer)]; 22 | 23 | if (!self) { 24 | return self; 25 | } 26 | 27 | _dimensionType = dimensionType; 28 | 29 | if (@available(iOS 9.0, *)) { 30 | _shape = [device newBufferWithLength:sizeof(TensorShape) 31 | options:MTLResourceCPUCacheModeDefaultCache | MTLResourceStorageModeShared]; 32 | } else { 33 | _shape = [device newBufferWithLength:sizeof(TensorShape) 34 | options:MTLResourceCPUCacheModeDefaultCache]; 35 | } 36 | 37 | [self configComputePipelinesStateWithDevice:device library:library]; 38 | 39 | return self; 40 | } 41 | 42 | - (void)configComputePipelinesStateWithDevice:(id)device 43 | library:(id)library { 44 | if (Dimension1D == _dimensionType) { 45 | _functionName = @BROU_METAL(TanH1D); 46 | } else if (Dimension2D == _dimensionType) { 47 | _functionName = @BROU_METAL(TanH2D); 48 | } else if (Dimension3D == _dimensionType) { 49 | _functionName = @BROU_METAL(TanH3D); 50 | } else { 51 | NSAssert(false, @"the dimension type is error"); 52 | } 53 | 54 | id function = [library newFunctionWithName:_functionName]; 55 | 56 | NSAssert(function, @"init %@ function:%@ error!", self.name, _functionName); 57 | 58 | /**get the function*/ 59 | NSError *error = nil; 60 | 61 | _computePipelineState = [device newComputePipelineStateWithFunction:function error:&error]; 62 | 63 | NSAssert(_computePipelineState, @"init %@ ComputePipelineState error:%@", self.name, error); 64 | } 65 | 66 | - (void)checkParamsWithInput:(id)input 67 | output:(id)output { 68 | if (Dimension1D == _dimensionType) { 69 | NSAssert(input.dim0 == output.dim0, 70 | @"the input dim must be equal to output"); 71 | NSAssert(input.dim0 > 0, 72 | @"the input and output dimension must be > 0"); 73 | } else if (Dimension2D == _dimensionType) { 74 | NSAssert( input.dim0 == output.dim0 75 | && input.dim1 == output.dim1, 76 | @"the input dim must be equal to output"); 77 | NSAssert( input.dim0 > 0 78 | && input.dim1 > 0, 79 | @"the input and output dimension must be > 0"); 80 | } else if (Dimension3D == _dimensionType) { 81 | NSAssert( input.dim0 == output.dim0 82 | && input.dim1 == output.dim1 83 | && input.dim2 == output.dim2, 84 | @"the input dim must be equal to output"); 85 | NSAssert( input.dim0 > 0 86 | && input.dim1 > 0 87 | && input.dim2 > 0, 88 | @"the input and output dimension must be > 0"); 89 | } else { 90 | NSAssert(false, @"the dimension type is error"); 91 | } 92 | } 93 | 94 | - (void)computeCommandBuffer:(id)commandBuffer 95 | input:(id)input 96 | output:(id)output { 97 | [self checkParamsWithInput:input output:output]; 98 | 99 | TensorShape *shapeRef = (TensorShape*)_shape.contents; 100 | MTLSize group = MTLSizeMake(1, 1, 1); 101 | MTLSize grid = MTLSizeMake(1, 1, 1); 102 | 103 | if (Dimension1D == _dimensionType) { 104 | shapeRef->dim0 = input.innermostDimX4; 105 | 106 | group = MTLSizeMake(32, 1, 1); 107 | grid = MTLSizeMake((shapeRef->dim0 + 32 * 4 - 1) / (32 * 4), 108 | 1, 109 | 1); 110 | } else if (Dimension2D == _dimensionType) { 111 | shapeRef->dim0 = input.dim0; 112 | shapeRef->dim1 = input.innermostDimX4; 113 | 114 | group = MTLSizeMake(8, 4, 1); 115 | grid = MTLSizeMake((shapeRef->dim1 + 31) / 32, 116 | (shapeRef->dim0 + 15) / 16, 117 | 1); 118 | } else if (Dimension3D == _dimensionType) { 119 | shapeRef->dim0 = input.dim0; 120 | shapeRef->dim1 = input.dim1; 121 | shapeRef->dim2 = input.innermostDimX4; 122 | 123 | group = MTLSizeMake(8, 4, 1); 124 | grid = MTLSizeMake((shapeRef->dim1 + 31) / 32, 125 | (shapeRef->dim0 + 15) / 16, 126 | (shapeRef->dim2 / 4)); 127 | } else { 128 | NSAssert(false, @"The input/output dimension is error"); 129 | } 130 | 131 | id encoder = [commandBuffer computeCommandEncoder]; 132 | [encoder setComputePipelineState:_computePipelineState]; 133 | [encoder setBuffer:input.tensorBuffer offset:0 atIndex:0]; 134 | [encoder setBuffer:output.tensorBuffer offset:0 atIndex:1]; 135 | [encoder setBuffer:_shape offset:0 atIndex:2]; 136 | 137 | [encoder dispatchThreadgroups:grid threadsPerThreadgroup:group]; 138 | [encoder endEncoding]; 139 | } 140 | 141 | @end 142 | 143 | #endif 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouConvertLayer.m: -------------------------------------------------------------------------------- 1 | #if defined(from) && defined(to) && defined(BROU_CONVERT_OBJECT) && defined(BROU_CONVERT_METAL) 2 | 3 | @interface BROU_CONVERT_OBJECT(from, to)() { 4 | DimensionType _dimensionType; 5 | 6 | id _shape; 7 | 8 | NSString *_functionName; 9 | 10 | id _computePipelineState; 11 | } 12 | 13 | @end 14 | 15 | @implementation BROU_CONVERT_OBJECT(from, to) 16 | 17 | - (instancetype)initWithDevice:(id)device 18 | library:(id)library 19 | dimensionType:(DimensionType)dimensionType { 20 | self = [super initWithName:@BROU_CONVERT_OBJECT_NAME(from, to)]; 21 | 22 | if (!self) { 23 | return self; 24 | } 25 | 26 | _dimensionType = dimensionType; 27 | 28 | if (@available(iOS 9.0, *)) { 29 | _shape = [device newBufferWithLength:sizeof(TensorShape) 30 | options:MTLResourceCPUCacheModeDefaultCache | MTLResourceStorageModeShared]; 31 | } else { 32 | _shape = [device newBufferWithLength:sizeof(TensorShape) 33 | options:MTLResourceCPUCacheModeDefaultCache]; 34 | } 35 | 36 | [self configComputePipelinesStateWithDevice:device library:library]; 37 | 38 | return self; 39 | } 40 | 41 | - (void)configComputePipelinesStateWithDevice:(id)device 42 | library:(id)library { 43 | if (Dimension1D == _dimensionType) { 44 | _functionName = @BROU_CONVERT_METAL(from, to, 1D); 45 | } else if (Dimension2D == _dimensionType) { 46 | _functionName = @BROU_CONVERT_METAL(from, to, 2D); 47 | } else if (Dimension3D == _dimensionType) { 48 | _functionName = @BROU_CONVERT_METAL(from, to, 3D); 49 | } else { 50 | NSAssert(false, @"the dimension is error"); 51 | } 52 | 53 | id function = [library newFunctionWithName:_functionName]; 54 | 55 | NSAssert(function, @"init %@ function:%@ error!", self.name, _functionName); 56 | 57 | /**get the function*/ 58 | NSError *error = nil; 59 | 60 | _computePipelineState = [device newComputePipelineStateWithFunction:function error:&error]; 61 | 62 | NSAssert(_computePipelineState, @"init %@ ComputePipelineState error:%@", self.name, error); 63 | } 64 | 65 | - (void)checkParamsWithInput:(id)input 66 | output:(id)output { 67 | if (Dimension1D == _dimensionType) { 68 | NSAssert(input.dim0 == output.dim0, 69 | @"the input dim must be equal to output"); 70 | NSAssert(input.dim0 > 0, 71 | @"the input and output dimension must be > 0"); 72 | } else if (Dimension2D == _dimensionType) { 73 | NSAssert( input.dim0 == output.dim0 74 | && input.dim1 == output.dim1, 75 | @"the input dim must be equal to output"); 76 | NSAssert( input.dim0 > 0 77 | && input.dim1 > 0, 78 | @"the input and output dimension must be > 0"); 79 | } else if (Dimension3D == _dimensionType) { 80 | NSAssert( input.dim0 == output.dim0 81 | && input.dim1 == output.dim1 82 | && input.dim2 == output.dim2, 83 | @"the input dim must be equal to output"); 84 | NSAssert( input.dim0 > 0 85 | && input.dim1 > 0 86 | && input.dim2 > 0, 87 | @"the input and output dimension must be > 0"); 88 | } else { 89 | NSAssert(false, @"the dimension type is error"); 90 | } 91 | } 92 | 93 | - (void)computeCommandBuffer:(id)commandBuffer 94 | input:(id)input 95 | output:(id)output { 96 | [self checkParamsWithInput:input output:output]; 97 | 98 | TensorShape *shapeRef = (TensorShape*)_shape.contents; 99 | MTLSize group = MTLSizeMake(1, 1, 1); 100 | MTLSize grid = MTLSizeMake(1, 1, 1); 101 | 102 | if (Dimension1D == _dimensionType) { 103 | shapeRef->dim0 = input.innermostDimX4; 104 | 105 | group = MTLSizeMake(32, 1, 1); 106 | grid = MTLSizeMake((shapeRef->dim0 + 32 * 4 - 1) / (32 * 4), 107 | 1, 108 | 1); 109 | } else if (Dimension2D == _dimensionType) { 110 | shapeRef->dim0 = input.dim0; 111 | shapeRef->dim1 = input.innermostDimX4; 112 | 113 | group = MTLSizeMake(8, 4, 1); 114 | grid = MTLSizeMake((shapeRef->dim1 + 31) / 32, 115 | (shapeRef->dim0 + 15) / 16, 116 | 1); 117 | } else if (Dimension3D == _dimensionType) { 118 | shapeRef->dim0 = input.dim0; 119 | shapeRef->dim1 = input.dim1; 120 | shapeRef->dim2 = input.innermostDimX4; 121 | 122 | group = MTLSizeMake(8, 4, 1); 123 | grid = MTLSizeMake((shapeRef->dim1 + 31) / 32, 124 | (shapeRef->dim0 + 15) / 16, 125 | (shapeRef->dim2 / 4)); 126 | } else { 127 | NSAssert(false, @"The input/output dimension is error"); 128 | } 129 | 130 | id encoder = [commandBuffer computeCommandEncoder]; 131 | [encoder setComputePipelineState:_computePipelineState]; 132 | 133 | [encoder setBuffer:input.tensorBuffer offset:0 atIndex:0]; 134 | [encoder setBuffer:output.tensorBuffer offset:0 atIndex:1]; 135 | [encoder setBuffer:_shape offset:0 atIndex:2]; 136 | 137 | [encoder dispatchThreadgroups:grid threadsPerThreadgroup:group]; 138 | [encoder endEncoding]; 139 | } 140 | 141 | @end 142 | 143 | #endif 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | -------------------------------------------------------------------------------- /read.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import os 4 | import numpy as np 5 | import tensorflow as tf 6 | import transform 7 | 8 | # 打开tensorflow的可视化工具 9 | # tensorboard --logdir "/Users/yanyuanchi/code/python/readtffile/see" 10 | 11 | # 参数存储路径 12 | params_path = "/Users/yanyuanchi/code/python/readtffile/params/" 13 | 14 | def save_np_array(array, path): 15 | print "参数:", path 16 | print "数据shape:", np.shape(array) 17 | print "-------------------------------------------------------------------" 18 | 19 | f = file(path, "wb") 20 | 21 | array.flatten().astype("float32").tofile(f) 22 | 23 | f.close() 24 | 25 | # tf 的卷积的权值存储为(h, w, input_channel, output_channel) 26 | # 需要转换成(output_channel, h, w, input_channel) 27 | def save_tf_conv_np_array(array, path): 28 | array = np.moveaxis(array, -1, 0) 29 | save_np_array(array, path) 30 | 31 | # tf 的卷积的权值存储为(h, w, output_channel, input_channel) 32 | # 需要转换成(output_channel, h, w, input_channel) 33 | def save_tf_tranpose_conv_np_array(array, path): 34 | array = np.moveaxis(array, 2, 0) 35 | 36 | outChannel, h, w, inChannel = np.shape(array) 37 | 38 | for out in range(outChannel): 39 | temp = array[out].copy() 40 | 41 | for y in range(h): 42 | for x in range(w): 43 | in1 = temp[h - y - 1][w - x - 1] 44 | out1 = array[out][y][x] 45 | 46 | for l in range(inChannel): 47 | out1[l] = in1[l] 48 | 49 | save_np_array(array, path) 50 | 51 | image_height = 228 52 | image_width = 228 53 | 54 | checkpoint_dir = "/Users/yanyuanchi/code/python/readtffile/model/la_muse.ckpt" 55 | 56 | g = tf.Graph() 57 | # allow_soft_placement=True : 如果你指定的设备不存在,允许TF自动分配设备 58 | soft_config = tf.ConfigProto(allow_soft_placement=True) 59 | soft_config.gpu_options.allow_growth = True 60 | 61 | with g.as_default(), g.device("/cpu:0"), tf.Session(config=soft_config) as sess: 62 | batch_shape = (1, image_height, image_width, 3) 63 | img_placeholder = tf.placeholder(tf.float32, shape=batch_shape, name='img_placeholder') 64 | preds = transform.net(img_placeholder) 65 | 66 | saver = tf.train.Saver() 67 | saver.restore(sess, checkpoint_dir) 68 | 69 | variables = tf.trainable_variables() 70 | # variables = tf.model_variables() 71 | 72 | # for var in variables: 73 | # print var.name 74 | # print variables 75 | 76 | # for var in variables: 77 | # name = var.name 78 | # realValue = var._variable.eval() 79 | # 80 | # print name 81 | # print np.shape(realValue) 82 | # print "-------------------------------------" 83 | 84 | 85 | # 这个项目对应的结构是 86 | 87 | # 3个卷积 88 | # 1:卷积(weight),batchnormalization(alpha, beta) 3个变量 89 | # 2:卷积(weight),batchnormalization(alpha, beta) 3个变量 90 | # 3:卷积(weight),batchnormalization(alpha, beta) 3个变量 91 | 92 | # 5个res层 bn 的变量顺序为 beta alpha 93 | # 4:res层 卷积(weights),batchnormalization(alpha,beta)卷积(weights),batchnormalization(alpha,beta) 6个变量 94 | # 5:res层 卷积(weights),batchnormalization(alpha,beta)卷积(weights),batchnormalization(alpha,beta) 6个变量 95 | # 6:res层 卷积(weights),batchnormalization(alpha,beta)卷积(weights),batchnormalization(alpha,beta) 6个变量 96 | # 7:res层 卷积(weights),batchnormalization(alpha,beta)卷积(weights),batchnormalization(alpha,beta) 6个变量 97 | # 8:res层 卷积(weights),batchnormalization(alpha,beta)卷积(weights),batchnormalization(alpha,beta) 6个变量 98 | 99 | # 两个转置卷积 100 | # 9:转置卷积(weights),batchnormalization(alpha, beta) 3个变量 101 | # 10:转置卷积(weights),batchnormalization(alpha, beta) 3个变量 102 | 103 | # 卷积 104 | # 11:卷积(weights),batchnormalization(alpha, beta) 3个变量 105 | 106 | i = 0 107 | for var in variables: 108 | name = var.name 109 | realValue = var._variable.eval() 110 | 111 | if i < 9: 112 | name = "conv" + str(i / 3 + 1) 113 | if 0 == i % 3: 114 | save_tf_conv_np_array(realValue, params_path + name + "_weight") 115 | elif 1 == i % 3: 116 | save_np_array(realValue, params_path + name + "_beta") 117 | else: 118 | save_np_array(realValue, params_path + name + "_alpha") 119 | elif i < 39: 120 | # res层 121 | j = i - 9 122 | name = "res" + str(j / 6 + 1) 123 | if j % 6 < 3: 124 | name += "_conv1" 125 | else: 126 | name += "_conv2" 127 | 128 | z = (j % 6) 129 | 130 | if 0 == z % 3: 131 | save_tf_conv_np_array(realValue, params_path + name + "_weight") 132 | elif 1 == z % 3: 133 | save_np_array(realValue, params_path + name + "_beta") 134 | else: 135 | save_np_array(realValue, params_path + name + "_alpha") 136 | elif i < 45: 137 | j = i - 39 138 | 139 | name = "transpose_conv" + str(j / 3 + 1) 140 | 141 | z = j % 6 142 | 143 | if 0 == z % 3: 144 | # (h, w, outputchannel, inputchannel) 145 | # to (outputchannel, h, w, inputchannel) 146 | save_tf_tranpose_conv_np_array(realValue, params_path + name + "_weight") 147 | elif 1 == z % 3: 148 | save_np_array(realValue, params_path + name + "_beta") 149 | else: 150 | save_np_array(realValue, params_path + name + "_alpha") 151 | elif i < 48: 152 | j = i - 45 153 | name = "conv4" 154 | 155 | if 0 == j % 3: 156 | save_tf_conv_np_array(realValue, params_path + name + "_weight") 157 | elif 1 == j % 3: 158 | save_np_array(realValue, params_path + name + "_beta") 159 | else: 160 | save_np_array(realValue, params_path + name + "_alpha") 161 | else: 162 | raise Exception("error") 163 | 164 | i += 1 165 | 166 | 167 | 168 | 169 | -------------------------------------------------------------------------------- /BrouhahaMetal/BrouhahaMetal/BrouTransposedConvolution.metal: -------------------------------------------------------------------------------- 1 | /** 2 | * BrouTransposedConvolution.metal 3 | * BrouhahaMetal 4 | * 5 | * Created by yanyuanchi on 2017/7/30. 6 | * Copyright © 2017年 yanyuanchi. All rights reserved. 7 | */ 8 | 9 | #if defined(real) && defined(real4) && defined(BROU) 10 | 11 | /** 12 | * ref:A guide to convolution arithmetic for deep learning 13 | * 14 | * a convolution that with pad, stride, kernel, input, output, than it will hava a corresponding transposed convolution 15 | * that kernel on the output ((stride - 1) zeros are inserted between output units) 16 | * and pad' = kernel - 1 - pad, stride' = 1, kernel' = kernel 17 | */ 18 | 19 | /** 20 | * for the input, output, kernel's diemension, the innermost dimension must timed by 4 21 | */ 22 | kernel void BROU(TransposedConvolution)(device real *input [[buffer(0)]], 23 | device real *kerne [[buffer(1)]], 24 | device real *bia [[buffer(2)]], 25 | device real *output [[buffer(3)]], 26 | constant TensorShape& inputShape [[buffer(4)]], 27 | constant TensorShape& outputShape [[buffer(5)]], 28 | constant ConvolutionShape& convolutionShape [[buffer(6)]], 29 | ushort3 grid [[thread_position_in_grid]]) { 30 | int outputHeight = outputShape.dim0; 31 | int outputWidth = outputShape.dim1; 32 | int outputChannel = outputShape.dim2; 33 | 34 | int x = grid.x << 2; 35 | int y = grid.y << 2; 36 | int z = grid.z << 2; 37 | 38 | if (x >= outputWidth || y >= outputHeight || z >= outputChannel) { 39 | return; 40 | } 41 | 42 | int inputHeight = inputShape.dim0; 43 | int inputWidth = inputShape.dim1; 44 | int inputChannel = inputShape.dim2; 45 | 46 | int kernelHeight = convolutionShape.kernelHeight; 47 | int kernelWidth = convolutionShape.kernelWidth; 48 | 49 | int padTop = convolutionShape.padTop; 50 | int padLeft = convolutionShape.padLeft; 51 | 52 | int insertY = convolutionShape.insertY; 53 | int insertX = convolutionShape.insertX; 54 | 55 | int insertYAdd1 = insertY + 1; 56 | int insertXAdd1 = insertX + 1; 57 | 58 | int fakeInputHeight = inputHeight + (inputHeight - 1) * insertY; 59 | int fakeInputWidth = inputWidth + (inputWidth - 1) * insertX; 60 | 61 | int maxOutY = min(y + 4, outputHeight); 62 | int maxOutX = min(x + 4, outputWidth); 63 | 64 | /**if have a bias*/ 65 | real4 biasV = convolutionShape.haveBias ? ((device real4*)(bia + z))[0] : 0; 66 | 67 | for (int outY = y; outY < maxOutY; ++outY) { 68 | for (int outX = x; outX < maxOutX; ++outX) { 69 | /**store the out*/ 70 | real4 out = biasV; 71 | 72 | int inputTop = -padTop + outY; 73 | int inputLeft = -padLeft + outX; 74 | 75 | int inputBottom = min(inputTop + kernelHeight, fakeInputHeight); 76 | int inputRight = min(inputLeft + kernelWidth, fakeInputWidth); 77 | 78 | int realInputTop = (0 > inputTop) ? 0 : ((inputTop + insertY) / insertYAdd1 * insertYAdd1); 79 | int realInputLeft = (0 > inputLeft) ? 0 : ((inputLeft + insertX) / insertXAdd1 * insertXAdd1); 80 | 81 | int kernelTop = realInputTop - inputTop; 82 | int kernelLeft = realInputLeft - inputLeft; 83 | 84 | for (int inY = realInputTop, kernelY = kernelTop; inY < inputBottom; inY += insertYAdd1, kernelY += insertYAdd1) { 85 | for (int inX = realInputLeft, kernelX = kernelLeft; inX < inputRight; inX += insertXAdd1, kernelX += insertXAdd1) { 86 | int realInY = inY / insertYAdd1; 87 | int realInX = inX / insertXAdd1; 88 | 89 | device real *inOffset = input + (realInY * inputWidth + realInX) * inputChannel; 90 | 91 | device real *kernelOffset0 = kerne + ((z * kernelHeight + kernelY) * kernelWidth + kernelX) * inputChannel; 92 | device real *kernelOffset1 = kerne + (((z + 1) * kernelHeight + kernelY) * kernelWidth + kernelX) * inputChannel; 93 | device real *kernelOffset2 = kerne + (((z + 2) * kernelHeight + kernelY) * kernelWidth + kernelX) * inputChannel; 94 | device real *kernelOffset3 = kerne + (((z + 3) * kernelHeight + kernelY) * kernelWidth + kernelX) * inputChannel; 95 | 96 | for (int c = 0; c < inputChannel; c += 4) { 97 | real4 inV = ((device real4*)(inOffset))[0]; 98 | 99 | real4 kernelV0 = ((device real4*)(kernelOffset0))[0]; 100 | real4 kernelV1 = ((device real4*)(kernelOffset1))[0]; 101 | real4 kernelV2 = ((device real4*)(kernelOffset2))[0]; 102 | real4 kernelV3 = ((device real4*)(kernelOffset3))[0]; 103 | 104 | out.x += dot(inV, kernelV0); 105 | out.y += dot(inV, kernelV1); 106 | out.z += dot(inV, kernelV2); 107 | out.w += dot(inV, kernelV3); 108 | 109 | inOffset += 4; 110 | 111 | kernelOffset0 += 4; 112 | kernelOffset1 += 4; 113 | kernelOffset2 += 4; 114 | kernelOffset3 += 4; 115 | } 116 | } 117 | } 118 | 119 | device real4 *outputV = (device real4*)(output + (outY * outputWidth + outX) * outputChannel + z); 120 | 121 | outputV[0] = out; 122 | } 123 | } 124 | } 125 | 126 | #endif 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Utils/BrouConvertType.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "BrouConvertType.h" 4 | 5 | uint32_t convertmantissa(uint32_t i) { 6 | uint32_t m = i << 13; 7 | uint32_t e = 0; 8 | 9 | while (!(m & 0x00800000)) { 10 | e -= 0x00800000; 11 | m <<= 1; 12 | } 13 | 14 | m &= ~(0x00800000); 15 | e += 0x38800000; 16 | 17 | return m | e; 18 | } 19 | 20 | /** 21 | * convert the float16 to float32 22 | * ref:Fast Half Float Conversions Jeroen van der Zijp November 2008 (Revised September 2010) 23 | */ 24 | void convertFloat16ToFloat32(uint16_t *half, uint32_t *single, int length) { 25 | uint32_t exponenttable[64]; 26 | uint32_t offsettable[64]; 27 | uint32_t mantissatable[2048]; 28 | 29 | exponenttable[0] = 0; 30 | exponenttable[32] = 0x80000000; 31 | exponenttable[31] = 0x47800000; 32 | exponenttable[63] = 0xC7800000; 33 | 34 | for (uint32_t i = 1; i <= 30; ++i) { 35 | exponenttable[i] = i << 23; 36 | } 37 | 38 | for (uint32_t i = 33; i <= 62; ++i) { 39 | exponenttable[i] = 0x80000000 + ((i - 32) << 23); 40 | } 41 | 42 | for (uint32_t i = 0; i < 64; ++i) { 43 | offsettable[i] = 1024; 44 | } 45 | 46 | offsettable[0] = 0; 47 | offsettable[32] = 0; 48 | 49 | mantissatable[0] = 0; 50 | 51 | for (int i = 1; i < 1024; ++i) { 52 | mantissatable[i] = convertmantissa(i); 53 | } 54 | 55 | for (int i = 1024; i < 2048; ++i) { 56 | mantissatable[i] = 0x38000000 + ((i - 1024) << 13); 57 | } 58 | 59 | int limit = length - 3; 60 | int i = 0; 61 | for (; i < limit; i += 4) { 62 | single[i] = mantissatable[offsettable[half[i] >> 10] + (half[i] & 0x3ff)] + exponenttable[half[i] >> 10]; 63 | single[i+1] = mantissatable[offsettable[half[i+1] >> 10] + (half[i+1] & 0x3ff)] + exponenttable[half[i+1] >> 10]; 64 | single[i+2] = mantissatable[offsettable[half[i+2] >> 10] + (half[i+2] & 0x3ff)] + exponenttable[half[i+2] >> 10]; 65 | single[i+3] = mantissatable[offsettable[half[i+3] >> 10] + (half[i+3] & 0x3ff)] + exponenttable[half[i+3] >> 10]; 66 | } 67 | 68 | for (; i < length; ++i) { 69 | single[i] = mantissatable[offsettable[half[i] >> 10] + (half[i] & 0x3ff)] + exponenttable[half[i] >> 10]; 70 | } 71 | } 72 | 73 | void initFloat32ToFloat16Table(uint16_t *basetable, int16_t *shifttable) { 74 | int e; 75 | for (uint32_t i = 0; i < 256; ++i) { 76 | e = i - 127; 77 | if(e < -24) { 78 | basetable[i|0x000]=0x0000; 79 | basetable[i|0x100]=0x8000; 80 | shifttable[i|0x000]=24; 81 | shifttable[i|0x100]=24; 82 | } else if(e < -14) { 83 | basetable[i|0x000]=(0x0400>>(-e-14)); 84 | basetable[i|0x100]=(0x0400>>(-e-14)) | 0x8000; 85 | shifttable[i|0x000]=-e-1; 86 | shifttable[i|0x100]=-e-1; 87 | } else if(e <= 15) { 88 | basetable[i|0x000]=((e+15)<<10); 89 | basetable[i|0x100]=((e+15)<<10) | 0x8000; 90 | shifttable[i|0x000]=13; 91 | shifttable[i|0x100]=13; 92 | } else if(e < 128) { 93 | basetable[i|0x000]=0x7C00; 94 | basetable[i|0x100]=0xFC00; 95 | shifttable[i|0x000]=24; 96 | shifttable[i|0x100]=24; 97 | } else { 98 | basetable[i|0x000]=0x7C00; 99 | basetable[i|0x100]=0xFC00; 100 | shifttable[i|0x000]=13; 101 | shifttable[i|0x100]=13; 102 | } 103 | } 104 | } 105 | 106 | void convertFloat32ToFloat16WithTable(uint32_t *single, uint16_t *half, int length, 107 | uint16_t *basetable, int16_t *shifttable) { 108 | int limit = length - 3; 109 | int i = 0; 110 | 111 | for (; i < limit; i += 4) { 112 | half[i] = basetable[(single[i] >> 23)&0x1ff] + ((single[i] & 0x007fffff) >> shifttable[(single[i] >> 23) & 0x1ff]); 113 | half[i+1] = basetable[(single[i+1]>>23)&0x1ff] + ((single[i+1] & 0x007fffff) >> shifttable[(single[i+1] >> 23) & 0x1ff]); 114 | half[i+2] = basetable[(single[i+2]>>23)&0x1ff] + ((single[i+2] & 0x007fffff) >> shifttable[(single[i+2] >> 23) & 0x1ff]); 115 | half[i+3] = basetable[(single[i+3]>>23)&0x1ff] + ((single[i+3] & 0x007fffff) >> shifttable[(single[i+3] >> 23) & 0x1ff]); 116 | } 117 | 118 | for (; i < length; ++i) { 119 | half[i] = basetable[(single[i] >> 23) & 0x1ff] + ((single[i] & 0x007fffff) >> shifttable[(single[i] >> 23) & 0x1ff]); 120 | } 121 | } 122 | 123 | /** 124 | * convert float32 to float16 125 | */ 126 | void convertFloat32ToFloat16(uint32_t *single, uint16_t *half, int length) { 127 | uint16_t basetable[512]; 128 | int16_t shifttable[512]; 129 | 130 | initFloat32ToFloat16Table(basetable, shifttable); 131 | convertFloat32ToFloat16WithTable(single, half, length, basetable, shifttable); 132 | } 133 | 134 | void convertFloat32ToFloat16Two(uint32_t *s1, uint16_t *h1, int l1, uint32_t *s2, uint16_t *h2, int l2) { 135 | uint16_t basetable[512]; 136 | int16_t shifttable[512]; 137 | 138 | initFloat32ToFloat16Table(basetable, shifttable); 139 | convertFloat32ToFloat16WithTable(s1, h1, l1, basetable, shifttable); 140 | convertFloat32ToFloat16WithTable(s2, h2, l2, basetable, shifttable); 141 | } 142 | 143 | uint16_t convertFloat32ToFloat16OneNumber(uint32_t *single) { 144 | uint32_t f = single[0]; 145 | 146 | int index = (f >> 23) & 0x1ff; 147 | int i = index & 0xff; 148 | int e = i -127; 149 | 150 | uint16_t base; 151 | uint16_t shift; 152 | if (e < -24) { 153 | base = ((index >> 8) & 1) ? 0x8000 : 0x0000; 154 | shift = 24; 155 | } else if (e < -14) { 156 | base = ((index >> 8) & 1) ? ((0x0400>>(-e-14)) | 0x8000) : (0x0400>>(-e-14)); 157 | shift = -e - 1; 158 | } else if (e <= 15) { 159 | base = ((index >> 8) & 1) ? (((e+15)<<10) | 0x8000) : ((e+15)<<10); 160 | shift = 13; 161 | } else if (e < 128) { 162 | base = ((index >> 8) & 1) ? 0xFC00 : 0x7C00; 163 | shift = 24; 164 | } else { 165 | base = ((index >> 8) & 1) ? 0xFC00 : 0x7C00; 166 | shift = 13; 167 | } 168 | 169 | return base + ((f & 0x007fffff) >> shift); 170 | } 171 | -------------------------------------------------------------------------------- /Brouhaha/Brouhaha/Layer/Generate/BrouPReLuLayer.m: -------------------------------------------------------------------------------- 1 | #if defined(type) && defined(real) && defined(BROU_METAL) && defined(BROU_OBJECT) 2 | 3 | @interface BROU_OBJECT(PReLuLayer)() { 4 | type _typeA; 5 | float _floatA; 6 | 7 | DimensionType _dimensionType; 8 | 9 | id _aBuffer; 10 | id _shape; 11 | 12 | NSString *_functionName; 13 | 14 | id _computePipelineState; 15 | } 16 | 17 | @end 18 | 19 | @implementation BROU_OBJECT(PReLuLayer) 20 | 21 | - (instancetype)initWithDevice:(id)device 22 | library:(id)library 23 | a:(float)a 24 | dimensionType:(DimensionType)dimensionType { 25 | self = [super initWithName:@BROU_OBJECT_NAME(PReLuLayer)]; 26 | 27 | if (!self) { 28 | return self; 29 | } 30 | 31 | if (@available(iOS 9.0, *)) { 32 | _shape = [device newBufferWithLength:sizeof(TensorShape) 33 | options:MTLResourceCPUCacheModeDefaultCache | MTLResourceStorageModeShared]; 34 | 35 | _aBuffer = [device newBufferWithLength:sizeof(type) 36 | options:MTLResourceCPUCacheModeDefaultCache | MTLResourceStorageModeShared]; 37 | } else { 38 | _shape = [device newBufferWithLength:sizeof(TensorShape) 39 | options:MTLResourceCPUCacheModeDefaultCache]; 40 | 41 | _aBuffer = [device newBufferWithLength:sizeof(type) 42 | options:MTLResourceCPUCacheModeDefaultCache]; 43 | } 44 | 45 | _floatA = a; 46 | 47 | #if defined(real_is_half) 48 | _typeA = convertFloat32ToFloat16OneNumber((uint32_t*)(&a)); 49 | #elif defined(real_is_float) 50 | _typeA = a; 51 | #endif 52 | 53 | *((type *)(_aBuffer.contents)) = _typeA; 54 | 55 | [self configComputePipelinesStateWithDevice:device library:library]; 56 | 57 | return self; 58 | } 59 | 60 | - (void)configComputePipelinesStateWithDevice:(id)device 61 | library:(id)library { 62 | if (Dimension1D == _dimensionType) { 63 | _functionName = @BROU_METAL(PReLu1D); 64 | } else if (Dimension2D == _dimensionType) { 65 | _functionName = @BROU_METAL(PReLu2D); 66 | } else if (Dimension3D == _dimensionType) { 67 | _functionName = @BROU_METAL(PReLu3D); 68 | } else { 69 | NSAssert(false, @"the dimension type is error"); 70 | } 71 | 72 | id function = [library newFunctionWithName:_functionName]; 73 | 74 | NSAssert(function, @"init %@ function:%@ error!", self.name, _functionName); 75 | 76 | /**get the function*/ 77 | NSError *error = nil; 78 | 79 | _computePipelineState = [device newComputePipelineStateWithFunction:function error:&error]; 80 | 81 | NSAssert(_computePipelineState, @"init %@ ComputePipelineState error:%@", self.name, error); 82 | } 83 | 84 | - (void)checkParamsWithInput:(id)input 85 | output:(id)output { 86 | if (Dimension1D == _dimensionType) { 87 | NSAssert(input.dim0 == output.dim0, 88 | @"the input dim must be equal to output"); 89 | NSAssert(input.dim0 > 0, 90 | @"the input and output dimension must be > 0"); 91 | } else if (Dimension2D == _dimensionType) { 92 | NSAssert( input.dim0 == output.dim0 93 | && input.dim1 == output.dim1, 94 | @"the input dim must be equal to output"); 95 | NSAssert( input.dim0 > 0 96 | && input.dim1 > 0, 97 | @"the input and output dimension must be > 0"); 98 | } else if (Dimension3D == _dimensionType) { 99 | NSAssert( input.dim0 == output.dim0 100 | && input.dim1 == output.dim1 101 | && input.dim2 == output.dim2, 102 | @"the input dim must be equal to output"); 103 | NSAssert( input.dim0 > 0 104 | && input.dim1 > 0 105 | && input.dim2 > 0, 106 | @"the input and output dimension must be > 0"); 107 | } else { 108 | NSAssert(false, @"the dimension type is error"); 109 | } 110 | } 111 | 112 | - (void)computeCommandBuffer:(id)commandBuffer 113 | input:(id)input 114 | output:(id)output { 115 | [self checkParamsWithInput:input output:output]; 116 | 117 | TensorShape *shapeRef = (TensorShape*)_shape.contents; 118 | MTLSize group = MTLSizeMake(1, 1, 1); 119 | MTLSize grid = MTLSizeMake(1, 1, 1); 120 | 121 | if (Dimension1D == _dimensionType) { 122 | shapeRef->dim0 = input.innermostDimX4; 123 | 124 | group = MTLSizeMake(32, 1, 1); 125 | grid = MTLSizeMake((shapeRef->dim0 + 32 * 4 - 1) / (32 * 4), 126 | 1, 127 | 1); 128 | } else if (Dimension2D == _dimensionType) { 129 | shapeRef->dim0 = input.dim0; 130 | shapeRef->dim1 = input.innermostDimX4; 131 | 132 | group = MTLSizeMake(8, 4, 1); 133 | grid = MTLSizeMake((shapeRef->dim1 + 31) / 32, 134 | (shapeRef->dim0 + 15) / 16, 135 | 1); 136 | } else if (Dimension3D == _dimensionType) { 137 | shapeRef->dim0 = input.dim0; 138 | shapeRef->dim1 = input.dim1; 139 | shapeRef->dim2 = input.innermostDimX4; 140 | 141 | group = MTLSizeMake(8, 4, 1); 142 | grid = MTLSizeMake((shapeRef->dim1 + 31) / 32, 143 | (shapeRef->dim0 + 15) / 16, 144 | (shapeRef->dim2 / 4)); 145 | } else { 146 | NSAssert(false, @"The input/output dimension is error"); 147 | } 148 | 149 | id encoder = [commandBuffer computeCommandEncoder]; 150 | [encoder setComputePipelineState:_computePipelineState]; 151 | [encoder setBuffer:input.tensorBuffer offset:0 atIndex:0]; 152 | [encoder setBuffer:output.tensorBuffer offset:0 atIndex:1]; 153 | [encoder setBuffer:_aBuffer offset:0 atIndex:2]; 154 | [encoder setBuffer:_shape offset:0 atIndex:3]; 155 | 156 | [encoder dispatchThreadgroups:grid threadsPerThreadgroup:group]; 157 | [encoder endEncoding]; 158 | } 159 | 160 | @end 161 | 162 | #endif 163 | -------------------------------------------------------------------------------- /BrouhahaDemo/BrouhahaDemo/Art transform/BrouResidualLayer_float.m: -------------------------------------------------------------------------------- 1 | #import "BrouResidualLayer_float.h" 2 | 3 | @interface BrouResidualLayer_float() { 4 | BrouConvolutionMMLayer_float *_conv1; 5 | BrouBatchNormalizationLayer_float *_batchNorm1; 6 | BrouReLuLayer_float *_relu1; 7 | 8 | BrouConvolutionMMLayer_float *_conv2; 9 | BrouBatchNormalizationLayer_float *_batchNorm2; 10 | 11 | BrouAddLayer_float *_add; 12 | 13 | int _channel; 14 | int _channelX4; 15 | 16 | id _buffer1; 17 | id _buffer2; 18 | } 19 | 20 | @end 21 | 22 | @implementation BrouResidualLayer_float 23 | 24 | - (instancetype)initWithDevice:(id)device 25 | library:(id)library 26 | floatWeight1:(void*)floatWeight1 27 | floatWeight2:(void*)floatWeight2 28 | floatAlpha1:(void*)floatAlpha1 29 | floatBeta1:(void*)floatBeta1 30 | floatAlpha2:(void*)floatAlpha2 31 | floatBeta2:(void*)floatBeta2 32 | channel:(int)channel { 33 | self = [super initWithName:@"BrouResidualLayer_float"]; 34 | 35 | if (!self) { 36 | return self; 37 | } 38 | 39 | _channel = channel; 40 | _channelX4 = (_channel + 3) / 4 * 4; 41 | 42 | _conv1 = [[BrouConvolutionMMLayer_float alloc] initWithDevice:device 43 | library:library 44 | floatKernel:floatWeight1 45 | floatBias:nil 46 | inputChannel:_channel 47 | outputChannel:_channel 48 | kernelHeight:3 49 | kernelWidth:3 50 | padTop:1 51 | padLeft:1 52 | strideY:1 53 | strideX:1]; 54 | 55 | _batchNorm1 = [[BrouBatchNormalizationLayer_float alloc] initWithDevice:device 56 | library:library 57 | epsilon:0.001 58 | floatAlpha:floatAlpha1 59 | floatBeta:floatBeta1 60 | channel:_channel]; 61 | 62 | _relu1 = [[BrouReLuLayer_float alloc] initWithDevice:device library:library dimensionType:Dimension3D]; 63 | 64 | _conv2 = [[BrouConvolutionMMLayer_float alloc] initWithDevice:device 65 | library:library 66 | floatKernel:floatWeight2 67 | floatBias:nil 68 | inputChannel:_channel 69 | outputChannel:_channel 70 | kernelHeight:3 71 | kernelWidth:3 72 | padTop:1 73 | padLeft:1 74 | strideY:1 75 | strideX:1]; 76 | 77 | _batchNorm2 = [[BrouBatchNormalizationLayer_float alloc] initWithDevice:device 78 | library:library 79 | epsilon:0.001 80 | floatAlpha:floatAlpha2 81 | floatBeta:floatBeta2 82 | channel:_channel]; 83 | 84 | _add = [[BrouAddLayer_float alloc] initWithDevice:device library:library dimensionType:Dimension3D]; 85 | 86 | return self; 87 | } 88 | 89 | - (void)checkParamsWithInput:(id)input 90 | output:(id)output { 91 | NSAssert(input.dim0 == output.dim0 && input.dim0 > 0 && 92 | input.dim1 == output.dim1 && input.dim1 > 0 && 93 | input.dim2 == output.dim2 && input.dim2 > 0, 94 | @"the shape is error!"); 95 | 96 | NSAssert(input.innermostDimX4 == _channelX4, @"the shape is error!"); 97 | } 98 | 99 | - (void)computeCommandBuffer:(id)commandBuffer 100 | input:(id)input 101 | output:(id)output { 102 | [self checkParamsWithInput:input output:output]; 103 | 104 | NSUInteger length = sizeof(float) * input.dim0 * input.dim1 * input.innermostDimX4; 105 | 106 | if (!_buffer1 || _buffer1.tensorBuffer.length < length) { 107 | _buffer1 = [BrouUniqueTensor_float initWithHeight:input.dim0 108 | width:input.dim1 109 | channel:input.innermostDimX4 110 | device:commandBuffer.device]; 111 | } 112 | 113 | if (!_buffer2 || _buffer2.tensorBuffer.length < length) { 114 | _buffer2 = [BrouUniqueTensor_float initWithHeight:input.dim0 115 | width:input.dim1 116 | channel:input.innermostDimX4 117 | device:commandBuffer.device]; 118 | } 119 | 120 | [_conv1 computeCommandBuffer:commandBuffer input:input output:_buffer1]; 121 | [_batchNorm1 computeCommandBuffer:commandBuffer input:_buffer1 output:_buffer2]; 122 | [_relu1 computeCommandBuffer:commandBuffer input:_buffer2 output:_buffer1]; 123 | [_conv2 computeCommandBuffer:commandBuffer input:_buffer1 output:_buffer2]; 124 | [_batchNorm2 computeCommandBuffer:commandBuffer input:_buffer2 output:_buffer1]; 125 | [_add computeCommandBuffer:commandBuffer input1:_buffer1 input2:input output:output]; 126 | } 127 | 128 | @end 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | --------------------------------------------------------------------------------