├── LICENSE ├── MatMul ├── MAC_128x32_8_bias_32 │ ├── MAC_128x32_8.vhd │ ├── MAC_128x32_8_reg.vhd │ ├── MAC_bias_128x32_8.vhd │ ├── MAC_bias_128x32_8_reg.vhd │ ├── bias_sum_128_32.vhd │ └── mux_32to1_nbit.vhd ├── MAC_32x16_bias_32 │ ├── MAC_32x16_8.vhd │ ├── MAC_32x16_8_reg.vhd │ ├── MAC_bias_32x16_8.vhd │ ├── MAC_bias_32x16_8_reg.vhd │ ├── bias_sum_32_32.vhd │ └── mux_16to1_nbit.vhd ├── MAC_32x32_8_bias_32 │ ├── MAC_32x32_8.vhd │ ├── MAC_32x32_8_reg.vhd │ ├── MAC_bias_32x32_8.vhd │ ├── MAC_bias_32x32_8_reg.vhd │ ├── bias_sum_32_32.vhd │ └── mux_32to1_nbit.vhd ├── MAC_32x8_8_bias_32 │ ├── MAC_32x8_8.vhd │ ├── MAC_32x8_8_reg.vhd │ ├── MAC_bias_32x8_8.vhd │ ├── MAC_bias_32x8_8_reg.vhd │ ├── bias_sum_32_32.vhd │ └── mux_8to1_nbit.vhd ├── MAC_64x16_8_bias_32 │ ├── MAC_64x16_8.vhd │ ├── MAC_64x16_8_reg.vhd │ ├── MAC_bias_64x16_8.vhd │ ├── MAC_bias_64x16_8_reg.vhd │ ├── bias_sum_64_32.vhd │ └── mux_16to1_nbit.vhd ├── MAC_64x32_8_bias_32 │ ├── MAC_64x32_8.vhd │ ├── MAC_64x32_8_reg.vhd │ ├── MAC_bias_64x32_8.vhd │ ├── MAC_bias_64x32_8_reg.vhd │ ├── bias_sum_64_32.vhd │ └── mux_32to1_nbit.vhd ├── MAC_64x64_8_bias_32 │ ├── MAC_64x64_8.vhd │ ├── MAC_64x64_8_reg.vhd │ ├── MAC_bias_64x64_8.vhd │ ├── MAC_bias_64x64_8_reg.vhd │ ├── bias_sum_64_32.vhd │ └── mux_64to1_nbit.vhd ├── MAC_8x768_8_bias_32 │ ├── MAC_8x768_8.vhd │ ├── MAC_8x768_8_reg.vhd │ ├── MAC_bias_8x768_8.vhd │ ├── MAC_bias_8x768_8_reg.vhd │ ├── bias_sum_8_32.vhd │ └── mux_768to1_nbit.vhd ├── bias │ ├── MAC_bias_base_8x8_8.vhd │ └── bias_sum_16_32.vhd ├── common │ ├── MAC.vhd │ ├── clk_gen.vhd │ ├── ff.vhd │ ├── mux_2to1_nbit_base.vhd │ ├── mux_4to1_nbit_base.vhd │ └── regnbit.vhd ├── mux_768to1_nbit.vhd └── readme.txt ├── NonLinear ├── I_ERF.vhd ├── I_EXP.vhd ├── I_GELU.vhd ├── I_POLY.vhd ├── I_POLY_exp.vhd ├── I_SOFTMAX.vhd ├── I_SQRT.vhd ├── I_SQRT_2.vhd ├── I_SQRT_test.vhd ├── LayerNorm.vhd ├── REQUANTIZATION.vhd ├── ff.vhd └── regnbit.vhd └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Alberto Marchisio 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MatMul/MAC_128x32_8_bias_32/bias_sum_128_32.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY bias_sum_128_32 IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 8 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 9 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 10 | input_row_30, input_row_31, input_row_32, input_row_33, input_row_34, input_row_35, input_row_36, input_row_37, input_row_38, input_row_39, 11 | input_row_40, input_row_41, input_row_42, input_row_43, input_row_44, input_row_45, input_row_46, input_row_47, input_row_48, input_row_49, 12 | input_row_50, input_row_51, input_row_52, input_row_53, input_row_54, input_row_55, input_row_56, input_row_57, input_row_58, input_row_59, 13 | input_row_60, input_row_61, input_row_62, input_row_63, input_row_64, input_row_65, input_row_66, input_row_67, input_row_68, input_row_69, 14 | input_row_70, input_row_71, input_row_72, input_row_73, input_row_74, input_row_75, input_row_76, input_row_77, input_row_78, input_row_79, 15 | input_row_80, input_row_81, input_row_82, input_row_83, input_row_84, input_row_85, input_row_86, input_row_87, input_row_88, input_row_89, 16 | input_row_90, input_row_91, input_row_92, input_row_93, input_row_94, input_row_95, input_row_96, input_row_97, input_row_98, input_row_99, 17 | input_row_100, input_row_101, input_row_102, input_row_103, input_row_104, input_row_105, input_row_106, input_row_107, input_row_108, input_row_109, 18 | input_row_110, input_row_111, input_row_112, input_row_113, input_row_114, input_row_115, input_row_116, input_row_117, input_row_118, input_row_119, 19 | input_row_120, input_row_121, input_row_122, input_row_123, input_row_124, input_row_125, input_row_126, input_row_127: IN STD_LOGIC_VECTOR(31 downto 0); 20 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 21 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 22 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 23 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 24 | output_row_30, output_row_31, output_row_32, output_row_33, output_row_34, output_row_35, output_row_36, output_row_37, output_row_38, output_row_39, 25 | output_row_40, output_row_41, output_row_42, output_row_43, output_row_44, output_row_45, output_row_46, output_row_47, output_row_48, output_row_49, 26 | output_row_50, output_row_51, output_row_52, output_row_53, output_row_54, output_row_55, output_row_56, output_row_57, output_row_58, output_row_59, 27 | output_row_60, output_row_61, output_row_62, output_row_63, output_row_64, output_row_65, output_row_66, output_row_67, output_row_68, output_row_69, 28 | output_row_70, output_row_71, output_row_72, output_row_73, output_row_74, output_row_75, output_row_76, output_row_77, output_row_78, output_row_79, 29 | output_row_80, output_row_81, output_row_82, output_row_83, output_row_84, output_row_85, output_row_86, output_row_87, output_row_88, output_row_89, 30 | output_row_90, output_row_91, output_row_92, output_row_93, output_row_94, output_row_95, output_row_96, output_row_97, output_row_98, output_row_99, 31 | output_row_100, output_row_101, output_row_102, output_row_103, output_row_104, output_row_105, output_row_106, output_row_107, output_row_108, output_row_109, 32 | output_row_110, output_row_111, output_row_112, output_row_113, output_row_114, output_row_115, output_row_116, output_row_117, output_row_118, output_row_119, 33 | output_row_120, output_row_121, output_row_122, output_row_123, output_row_124, output_row_125, output_row_126, output_row_127: OUT STD_LOGIC_VECTOR(31 downto 0) 34 | ); 35 | END bias_sum_128_32; 36 | 37 | ARCHITECTURE behaviour OF bias_sum_128_32 IS 38 | 39 | 40 | 41 | BEGIN 42 | 43 | output_row_0 <= STD_LOGIC_VECTOR(SIGNED(input_row_0) + SIGNED(b_col)); 44 | output_row_1 <= STD_LOGIC_VECTOR(SIGNED(input_row_1) + SIGNED(b_col)); 45 | output_row_2 <= STD_LOGIC_VECTOR(SIGNED(input_row_2) + SIGNED(b_col)); 46 | output_row_3 <= STD_LOGIC_VECTOR(SIGNED(input_row_3) + SIGNED(b_col)); 47 | output_row_4 <= STD_LOGIC_VECTOR(SIGNED(input_row_4) + SIGNED(b_col)); 48 | output_row_5 <= STD_LOGIC_VECTOR(SIGNED(input_row_5) + SIGNED(b_col)); 49 | output_row_6 <= STD_LOGIC_VECTOR(SIGNED(input_row_6) + SIGNED(b_col)); 50 | output_row_7 <= STD_LOGIC_VECTOR(SIGNED(input_row_7) + SIGNED(b_col)); 51 | output_row_8 <= STD_LOGIC_VECTOR(SIGNED(input_row_8) + SIGNED(b_col)); 52 | output_row_9 <= STD_LOGIC_VECTOR(SIGNED(input_row_9) + SIGNED(b_col)); 53 | output_row_10 <= STD_LOGIC_VECTOR(SIGNED(input_row_10) + SIGNED(b_col)); 54 | output_row_11 <= STD_LOGIC_VECTOR(SIGNED(input_row_11) + SIGNED(b_col)); 55 | output_row_12 <= STD_LOGIC_VECTOR(SIGNED(input_row_12) + SIGNED(b_col)); 56 | output_row_13 <= STD_LOGIC_VECTOR(SIGNED(input_row_13) + SIGNED(b_col)); 57 | output_row_14 <= STD_LOGIC_VECTOR(SIGNED(input_row_14) + SIGNED(b_col)); 58 | output_row_15 <= STD_LOGIC_VECTOR(SIGNED(input_row_15) + SIGNED(b_col)); 59 | output_row_16 <= STD_LOGIC_VECTOR(SIGNED(input_row_16) + SIGNED(b_col)); 60 | output_row_17 <= STD_LOGIC_VECTOR(SIGNED(input_row_17) + SIGNED(b_col)); 61 | output_row_18 <= STD_LOGIC_VECTOR(SIGNED(input_row_18) + SIGNED(b_col)); 62 | output_row_19 <= STD_LOGIC_VECTOR(SIGNED(input_row_19) + SIGNED(b_col)); 63 | output_row_20 <= STD_LOGIC_VECTOR(SIGNED(input_row_20) + SIGNED(b_col)); 64 | output_row_21 <= STD_LOGIC_VECTOR(SIGNED(input_row_21) + SIGNED(b_col)); 65 | output_row_22 <= STD_LOGIC_VECTOR(SIGNED(input_row_22) + SIGNED(b_col)); 66 | output_row_23 <= STD_LOGIC_VECTOR(SIGNED(input_row_23) + SIGNED(b_col)); 67 | output_row_24 <= STD_LOGIC_VECTOR(SIGNED(input_row_24) + SIGNED(b_col)); 68 | output_row_25 <= STD_LOGIC_VECTOR(SIGNED(input_row_25) + SIGNED(b_col)); 69 | output_row_26 <= STD_LOGIC_VECTOR(SIGNED(input_row_26) + SIGNED(b_col)); 70 | output_row_27 <= STD_LOGIC_VECTOR(SIGNED(input_row_27) + SIGNED(b_col)); 71 | output_row_28 <= STD_LOGIC_VECTOR(SIGNED(input_row_28) + SIGNED(b_col)); 72 | output_row_29 <= STD_LOGIC_VECTOR(SIGNED(input_row_29) + SIGNED(b_col)); 73 | output_row_30 <= STD_LOGIC_VECTOR(SIGNED(input_row_30) + SIGNED(b_col)); 74 | output_row_31 <= STD_LOGIC_VECTOR(SIGNED(input_row_31) + SIGNED(b_col)); 75 | output_row_32 <= STD_LOGIC_VECTOR(SIGNED(input_row_32) + SIGNED(b_col)); 76 | output_row_33 <= STD_LOGIC_VECTOR(SIGNED(input_row_33) + SIGNED(b_col)); 77 | output_row_34 <= STD_LOGIC_VECTOR(SIGNED(input_row_34) + SIGNED(b_col)); 78 | output_row_35 <= STD_LOGIC_VECTOR(SIGNED(input_row_35) + SIGNED(b_col)); 79 | output_row_36 <= STD_LOGIC_VECTOR(SIGNED(input_row_36) + SIGNED(b_col)); 80 | output_row_37 <= STD_LOGIC_VECTOR(SIGNED(input_row_37) + SIGNED(b_col)); 81 | output_row_38 <= STD_LOGIC_VECTOR(SIGNED(input_row_38) + SIGNED(b_col)); 82 | output_row_39 <= STD_LOGIC_VECTOR(SIGNED(input_row_39) + SIGNED(b_col)); 83 | output_row_40 <= STD_LOGIC_VECTOR(SIGNED(input_row_40) + SIGNED(b_col)); 84 | output_row_41 <= STD_LOGIC_VECTOR(SIGNED(input_row_41) + SIGNED(b_col)); 85 | output_row_42 <= STD_LOGIC_VECTOR(SIGNED(input_row_42) + SIGNED(b_col)); 86 | output_row_43 <= STD_LOGIC_VECTOR(SIGNED(input_row_43) + SIGNED(b_col)); 87 | output_row_44 <= STD_LOGIC_VECTOR(SIGNED(input_row_44) + SIGNED(b_col)); 88 | output_row_45 <= STD_LOGIC_VECTOR(SIGNED(input_row_45) + SIGNED(b_col)); 89 | output_row_46 <= STD_LOGIC_VECTOR(SIGNED(input_row_46) + SIGNED(b_col)); 90 | output_row_47 <= STD_LOGIC_VECTOR(SIGNED(input_row_47) + SIGNED(b_col)); 91 | output_row_48 <= STD_LOGIC_VECTOR(SIGNED(input_row_48) + SIGNED(b_col)); 92 | output_row_49 <= STD_LOGIC_VECTOR(SIGNED(input_row_49) + SIGNED(b_col)); 93 | output_row_50 <= STD_LOGIC_VECTOR(SIGNED(input_row_50) + SIGNED(b_col)); 94 | output_row_51 <= STD_LOGIC_VECTOR(SIGNED(input_row_51) + SIGNED(b_col)); 95 | output_row_52 <= STD_LOGIC_VECTOR(SIGNED(input_row_52) + SIGNED(b_col)); 96 | output_row_53 <= STD_LOGIC_VECTOR(SIGNED(input_row_53) + SIGNED(b_col)); 97 | output_row_54 <= STD_LOGIC_VECTOR(SIGNED(input_row_54) + SIGNED(b_col)); 98 | output_row_55 <= STD_LOGIC_VECTOR(SIGNED(input_row_55) + SIGNED(b_col)); 99 | output_row_56 <= STD_LOGIC_VECTOR(SIGNED(input_row_56) + SIGNED(b_col)); 100 | output_row_57 <= STD_LOGIC_VECTOR(SIGNED(input_row_57) + SIGNED(b_col)); 101 | output_row_58 <= STD_LOGIC_VECTOR(SIGNED(input_row_58) + SIGNED(b_col)); 102 | output_row_59 <= STD_LOGIC_VECTOR(SIGNED(input_row_59) + SIGNED(b_col)); 103 | output_row_60 <= STD_LOGIC_VECTOR(SIGNED(input_row_60) + SIGNED(b_col)); 104 | output_row_61 <= STD_LOGIC_VECTOR(SIGNED(input_row_61) + SIGNED(b_col)); 105 | output_row_62 <= STD_LOGIC_VECTOR(SIGNED(input_row_62) + SIGNED(b_col)); 106 | output_row_63 <= STD_LOGIC_VECTOR(SIGNED(input_row_63) + SIGNED(b_col)); 107 | output_row_64 <= STD_LOGIC_VECTOR(SIGNED(input_row_64) + SIGNED(b_col)); 108 | output_row_65 <= STD_LOGIC_VECTOR(SIGNED(input_row_65) + SIGNED(b_col)); 109 | output_row_66 <= STD_LOGIC_VECTOR(SIGNED(input_row_66) + SIGNED(b_col)); 110 | output_row_67 <= STD_LOGIC_VECTOR(SIGNED(input_row_67) + SIGNED(b_col)); 111 | output_row_68 <= STD_LOGIC_VECTOR(SIGNED(input_row_68) + SIGNED(b_col)); 112 | output_row_69 <= STD_LOGIC_VECTOR(SIGNED(input_row_69) + SIGNED(b_col)); 113 | output_row_70 <= STD_LOGIC_VECTOR(SIGNED(input_row_70) + SIGNED(b_col)); 114 | output_row_71 <= STD_LOGIC_VECTOR(SIGNED(input_row_71) + SIGNED(b_col)); 115 | output_row_72 <= STD_LOGIC_VECTOR(SIGNED(input_row_72) + SIGNED(b_col)); 116 | output_row_73 <= STD_LOGIC_VECTOR(SIGNED(input_row_73) + SIGNED(b_col)); 117 | output_row_74 <= STD_LOGIC_VECTOR(SIGNED(input_row_74) + SIGNED(b_col)); 118 | output_row_75 <= STD_LOGIC_VECTOR(SIGNED(input_row_75) + SIGNED(b_col)); 119 | output_row_76 <= STD_LOGIC_VECTOR(SIGNED(input_row_76) + SIGNED(b_col)); 120 | output_row_77 <= STD_LOGIC_VECTOR(SIGNED(input_row_77) + SIGNED(b_col)); 121 | output_row_78 <= STD_LOGIC_VECTOR(SIGNED(input_row_78) + SIGNED(b_col)); 122 | output_row_79 <= STD_LOGIC_VECTOR(SIGNED(input_row_79) + SIGNED(b_col)); 123 | output_row_80 <= STD_LOGIC_VECTOR(SIGNED(input_row_80) + SIGNED(b_col)); 124 | output_row_81 <= STD_LOGIC_VECTOR(SIGNED(input_row_81) + SIGNED(b_col)); 125 | output_row_82 <= STD_LOGIC_VECTOR(SIGNED(input_row_82) + SIGNED(b_col)); 126 | output_row_83 <= STD_LOGIC_VECTOR(SIGNED(input_row_83) + SIGNED(b_col)); 127 | output_row_84 <= STD_LOGIC_VECTOR(SIGNED(input_row_84) + SIGNED(b_col)); 128 | output_row_85 <= STD_LOGIC_VECTOR(SIGNED(input_row_85) + SIGNED(b_col)); 129 | output_row_86 <= STD_LOGIC_VECTOR(SIGNED(input_row_86) + SIGNED(b_col)); 130 | output_row_87 <= STD_LOGIC_VECTOR(SIGNED(input_row_87) + SIGNED(b_col)); 131 | output_row_88 <= STD_LOGIC_VECTOR(SIGNED(input_row_88) + SIGNED(b_col)); 132 | output_row_89 <= STD_LOGIC_VECTOR(SIGNED(input_row_89) + SIGNED(b_col)); 133 | output_row_90 <= STD_LOGIC_VECTOR(SIGNED(input_row_90) + SIGNED(b_col)); 134 | output_row_91 <= STD_LOGIC_VECTOR(SIGNED(input_row_91) + SIGNED(b_col)); 135 | output_row_92 <= STD_LOGIC_VECTOR(SIGNED(input_row_92) + SIGNED(b_col)); 136 | output_row_93 <= STD_LOGIC_VECTOR(SIGNED(input_row_93) + SIGNED(b_col)); 137 | output_row_94 <= STD_LOGIC_VECTOR(SIGNED(input_row_94) + SIGNED(b_col)); 138 | output_row_95 <= STD_LOGIC_VECTOR(SIGNED(input_row_95) + SIGNED(b_col)); 139 | output_row_96 <= STD_LOGIC_VECTOR(SIGNED(input_row_96) + SIGNED(b_col)); 140 | output_row_97 <= STD_LOGIC_VECTOR(SIGNED(input_row_97) + SIGNED(b_col)); 141 | output_row_98 <= STD_LOGIC_VECTOR(SIGNED(input_row_98) + SIGNED(b_col)); 142 | output_row_99 <= STD_LOGIC_VECTOR(SIGNED(input_row_99) + SIGNED(b_col)); 143 | output_row_100 <= STD_LOGIC_VECTOR(SIGNED(input_row_100) + SIGNED(b_col)); 144 | output_row_101 <= STD_LOGIC_VECTOR(SIGNED(input_row_101) + SIGNED(b_col)); 145 | output_row_102 <= STD_LOGIC_VECTOR(SIGNED(input_row_102) + SIGNED(b_col)); 146 | output_row_103 <= STD_LOGIC_VECTOR(SIGNED(input_row_103) + SIGNED(b_col)); 147 | output_row_104 <= STD_LOGIC_VECTOR(SIGNED(input_row_104) + SIGNED(b_col)); 148 | output_row_105 <= STD_LOGIC_VECTOR(SIGNED(input_row_105) + SIGNED(b_col)); 149 | output_row_106 <= STD_LOGIC_VECTOR(SIGNED(input_row_106) + SIGNED(b_col)); 150 | output_row_107 <= STD_LOGIC_VECTOR(SIGNED(input_row_107) + SIGNED(b_col)); 151 | output_row_108 <= STD_LOGIC_VECTOR(SIGNED(input_row_108) + SIGNED(b_col)); 152 | output_row_109 <= STD_LOGIC_VECTOR(SIGNED(input_row_109) + SIGNED(b_col)); 153 | output_row_110 <= STD_LOGIC_VECTOR(SIGNED(input_row_110) + SIGNED(b_col)); 154 | output_row_111 <= STD_LOGIC_VECTOR(SIGNED(input_row_111) + SIGNED(b_col)); 155 | output_row_112 <= STD_LOGIC_VECTOR(SIGNED(input_row_112) + SIGNED(b_col)); 156 | output_row_113 <= STD_LOGIC_VECTOR(SIGNED(input_row_113) + SIGNED(b_col)); 157 | output_row_114 <= STD_LOGIC_VECTOR(SIGNED(input_row_114) + SIGNED(b_col)); 158 | output_row_115 <= STD_LOGIC_VECTOR(SIGNED(input_row_115) + SIGNED(b_col)); 159 | output_row_116 <= STD_LOGIC_VECTOR(SIGNED(input_row_116) + SIGNED(b_col)); 160 | output_row_117 <= STD_LOGIC_VECTOR(SIGNED(input_row_117) + SIGNED(b_col)); 161 | output_row_118 <= STD_LOGIC_VECTOR(SIGNED(input_row_118) + SIGNED(b_col)); 162 | output_row_119 <= STD_LOGIC_VECTOR(SIGNED(input_row_119) + SIGNED(b_col)); 163 | output_row_120 <= STD_LOGIC_VECTOR(SIGNED(input_row_120) + SIGNED(b_col)); 164 | output_row_121 <= STD_LOGIC_VECTOR(SIGNED(input_row_121) + SIGNED(b_col)); 165 | output_row_122 <= STD_LOGIC_VECTOR(SIGNED(input_row_122) + SIGNED(b_col)); 166 | output_row_123 <= STD_LOGIC_VECTOR(SIGNED(input_row_123) + SIGNED(b_col)); 167 | output_row_124 <= STD_LOGIC_VECTOR(SIGNED(input_row_124) + SIGNED(b_col)); 168 | output_row_125 <= STD_LOGIC_VECTOR(SIGNED(input_row_125) + SIGNED(b_col)); 169 | output_row_126 <= STD_LOGIC_VECTOR(SIGNED(input_row_126) + SIGNED(b_col)); 170 | output_row_127 <= STD_LOGIC_VECTOR(SIGNED(input_row_127) + SIGNED(b_col)); 171 | 172 | 173 | END behaviour; 174 | -------------------------------------------------------------------------------- /MatMul/MAC_128x32_8_bias_32/mux_32to1_nbit.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY mux_32to1_nbit IS 7 | GENERIC( 8 | N : POSITIVE := 2 9 | ); 10 | PORT (I0, I1, I2, I3, I4, I5, I6, I7, I8, I9, 11 | I10, I11, I12, I13, I14, I15, I16, I17, I18, I19, 12 | I20, I21, I22, I23, I24, I25, I26, I27, I28, I29, 13 | I30, I31: IN STD_LOGIC_VECTOR(N-1 downto 0); 14 | SEL_mux: IN STD_LOGIC_VECTOR(4 downto 0); 15 | O: OUT STD_LOGIC_VECTOR(N-1 downto 0) 16 | ); 17 | END mux_32to1_nbit; 18 | 19 | ARCHITECTURE behaviour OF mux_32to1_nbit IS 20 | 21 | COMPONENT mux_4to1_nbit_base IS 22 | GENERIC ( N : POSITIVE :=1); 23 | PORT( I0, I1, I2, I3: IN STD_LOGIC_VECTOR(N-1 downto 0); 24 | SEL : IN STD_LOGIC_VECTOR(1 downto 0); 25 | O : OUT STD_LOGIC_VECTOR(N-1 downto 0) 26 | ); 27 | END COMPONENT; 28 | 29 | COMPONENT mux_2to1_nbit_base IS 30 | GENERIC ( N : POSITIVE :=1); 31 | PORT( I0, I1: IN STD_LOGIC_VECTOR(N-1 downto 0); 32 | SEL : IN STD_LOGIC; 33 | O : OUT STD_LOGIC_VECTOR(N-1 downto 0) 34 | ); 35 | END COMPONENT; 36 | 37 | 38 | SIGNAL output_mux_0_0: STD_LOGIC_VECTOR(N-1 downto 0); 39 | SIGNAL output_mux_0_1: STD_LOGIC_VECTOR(N-1 downto 0); 40 | SIGNAL output_mux_0_2: STD_LOGIC_VECTOR(N-1 downto 0); 41 | SIGNAL output_mux_0_3: STD_LOGIC_VECTOR(N-1 downto 0); 42 | SIGNAL output_mux_0_4: STD_LOGIC_VECTOR(N-1 downto 0); 43 | SIGNAL output_mux_0_5: STD_LOGIC_VECTOR(N-1 downto 0); 44 | SIGNAL output_mux_0_6: STD_LOGIC_VECTOR(N-1 downto 0); 45 | SIGNAL output_mux_0_7: STD_LOGIC_VECTOR(N-1 downto 0); 46 | SIGNAL output_mux_1_0: STD_LOGIC_VECTOR(N-1 downto 0); 47 | SIGNAL output_mux_1_1: STD_LOGIC_VECTOR(N-1 downto 0); 48 | 49 | BEGIN 50 | 51 | 52 | mux_0_0: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I0, I1=>I1, I2=>I2, I3=>I3, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_0); 53 | mux_0_1: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I4, I1=>I5, I2=>I6, I3=>I7, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_1); 54 | mux_0_2: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I8, I1=>I9, I2=>I10, I3=>I11, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_2); 55 | mux_0_3: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I12, I1=>I13, I2=>I14, I3=>I15, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_3); 56 | mux_0_4: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I16, I1=>I17, I2=>I18, I3=>I19, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_4); 57 | mux_0_5: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I20, I1=>I21, I2=>I22, I3=>I23, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_5); 58 | mux_0_6: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I24, I1=>I25, I2=>I26, I3=>I27, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_6); 59 | mux_0_7: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I28, I1=>I29, I2=>I30, I3=>I31, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_7); 60 | mux_1_0: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>output_mux_0_0, I1=>output_mux_0_1, I2=>output_mux_0_2, I3=>output_mux_0_3, SEL=>SEL_mux(3 downto 2), O=>output_mux_1_0); 61 | mux_1_1: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>output_mux_0_4, I1=>output_mux_0_5, I2=>output_mux_0_6, I3=>output_mux_0_7, SEL=>SEL_mux(3 downto 2), O=>output_mux_1_1); 62 | mux_2_out: mux_2to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>output_mux_1_0, I1=>output_mux_1_1, SEL=>SEL_mux(4), O=>O); 63 | 64 | END behaviour; 65 | -------------------------------------------------------------------------------- /MatMul/MAC_32x16_bias_32/MAC_bias_32x16_8.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY MAC_bias_32x16_8 IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 8 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 9 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 10 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(7 downto 0); 11 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7, input_col_8, input_col_9, 12 | input_col_10, input_col_11, input_col_12, input_col_13, input_col_14, input_col_15: IN STD_LOGIC_VECTOR(7 downto 0); 13 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 14 | SEL_mux: IN STD_LOGIC_VECTOR(3 downto 0); 15 | CLK, RST_n, ENABLE : IN STD_LOGIC; 16 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 17 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 18 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 19 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 20 | ); 21 | END MAC_bias_32x16_8; 22 | 23 | ARCHITECTURE behaviour OF MAC_bias_32x16_8 IS 24 | 25 | 26 | COMPONENT MAC_32x16_8 IS 27 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 28 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 29 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 30 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(7 downto 0); 31 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7, input_col_8, input_col_9, 32 | input_col_10, input_col_11, input_col_12, input_col_13, input_col_14, input_col_15: IN STD_LOGIC_VECTOR(7 downto 0); 33 | SEL_mux: IN STD_LOGIC_VECTOR(3 downto 0); 34 | CLK, RST_n, ENABLE : IN STD_LOGIC; 35 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 36 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 37 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 38 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 39 | ); 40 | END COMPONENT; 41 | 42 | 43 | COMPONENT bias_sum_32_32 IS 44 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 45 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 46 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 47 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(31 downto 0); 48 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 49 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 50 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 51 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 52 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 53 | ); 54 | END COMPONENT; 55 | 56 | 57 | SIGNAL output_row_MAC_base_0: STD_LOGIC_VECTOR(31 downto 0); 58 | SIGNAL output_row_MAC_base_1: STD_LOGIC_VECTOR(31 downto 0); 59 | SIGNAL output_row_MAC_base_2: STD_LOGIC_VECTOR(31 downto 0); 60 | SIGNAL output_row_MAC_base_3: STD_LOGIC_VECTOR(31 downto 0); 61 | SIGNAL output_row_MAC_base_4: STD_LOGIC_VECTOR(31 downto 0); 62 | SIGNAL output_row_MAC_base_5: STD_LOGIC_VECTOR(31 downto 0); 63 | SIGNAL output_row_MAC_base_6: STD_LOGIC_VECTOR(31 downto 0); 64 | SIGNAL output_row_MAC_base_7: STD_LOGIC_VECTOR(31 downto 0); 65 | SIGNAL output_row_MAC_base_8: STD_LOGIC_VECTOR(31 downto 0); 66 | SIGNAL output_row_MAC_base_9: STD_LOGIC_VECTOR(31 downto 0); 67 | SIGNAL output_row_MAC_base_10: STD_LOGIC_VECTOR(31 downto 0); 68 | SIGNAL output_row_MAC_base_11: STD_LOGIC_VECTOR(31 downto 0); 69 | SIGNAL output_row_MAC_base_12: STD_LOGIC_VECTOR(31 downto 0); 70 | SIGNAL output_row_MAC_base_13: STD_LOGIC_VECTOR(31 downto 0); 71 | SIGNAL output_row_MAC_base_14: STD_LOGIC_VECTOR(31 downto 0); 72 | SIGNAL output_row_MAC_base_15: STD_LOGIC_VECTOR(31 downto 0); 73 | SIGNAL output_row_MAC_base_16: STD_LOGIC_VECTOR(31 downto 0); 74 | SIGNAL output_row_MAC_base_17: STD_LOGIC_VECTOR(31 downto 0); 75 | SIGNAL output_row_MAC_base_18: STD_LOGIC_VECTOR(31 downto 0); 76 | SIGNAL output_row_MAC_base_19: STD_LOGIC_VECTOR(31 downto 0); 77 | SIGNAL output_row_MAC_base_20: STD_LOGIC_VECTOR(31 downto 0); 78 | SIGNAL output_row_MAC_base_21: STD_LOGIC_VECTOR(31 downto 0); 79 | SIGNAL output_row_MAC_base_22: STD_LOGIC_VECTOR(31 downto 0); 80 | SIGNAL output_row_MAC_base_23: STD_LOGIC_VECTOR(31 downto 0); 81 | SIGNAL output_row_MAC_base_24: STD_LOGIC_VECTOR(31 downto 0); 82 | SIGNAL output_row_MAC_base_25: STD_LOGIC_VECTOR(31 downto 0); 83 | SIGNAL output_row_MAC_base_26: STD_LOGIC_VECTOR(31 downto 0); 84 | SIGNAL output_row_MAC_base_27: STD_LOGIC_VECTOR(31 downto 0); 85 | SIGNAL output_row_MAC_base_28: STD_LOGIC_VECTOR(31 downto 0); 86 | SIGNAL output_row_MAC_base_29: STD_LOGIC_VECTOR(31 downto 0); 87 | SIGNAL output_row_MAC_base_30: STD_LOGIC_VECTOR(31 downto 0); 88 | SIGNAL output_row_MAC_base_31: STD_LOGIC_VECTOR(31 downto 0); 89 | 90 | BEGIN 91 | 92 | MAC_base: MAC_32x16_8 PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>ENABLE, 93 | input_row_0=>input_row_0, input_row_1=>input_row_1, input_row_2=>input_row_2, input_row_3=>input_row_3, input_row_4=>input_row_4, input_row_5=>input_row_5, input_row_6=>input_row_6, input_row_7=>input_row_7, input_row_8=>input_row_8, input_row_9=>input_row_9, input_row_10=>input_row_10, input_row_11=>input_row_11, input_row_12=>input_row_12, input_row_13=>input_row_13, input_row_14=>input_row_14, input_row_15=>input_row_15, input_row_16=>input_row_16, input_row_17=>input_row_17, input_row_18=>input_row_18, input_row_19=>input_row_19, input_row_20=>input_row_20, input_row_21=>input_row_21, input_row_22=>input_row_22, input_row_23=>input_row_23, input_row_24=>input_row_24, input_row_25=>input_row_25, input_row_26=>input_row_26, input_row_27=>input_row_27, input_row_28=>input_row_28, input_row_29=>input_row_29, input_row_30=>input_row_30, input_row_31=>input_row_31, 94 | input_col_0=>input_col_0, input_col_1=>input_col_1, input_col_2=>input_col_2, input_col_3=>input_col_3, input_col_4=>input_col_4, input_col_5=>input_col_5, input_col_6=>input_col_6, input_col_7=>input_col_7, input_col_8=>input_col_8, input_col_9=>input_col_9, input_col_10=>input_col_10, input_col_11=>input_col_11, input_col_12=>input_col_12, input_col_13=>input_col_13, input_col_14=>input_col_14, input_col_15=>input_col_15, 95 | output_row_0=>output_row_MAC_base_0, output_row_1=>output_row_MAC_base_1, output_row_2=>output_row_MAC_base_2, output_row_3=>output_row_MAC_base_3, output_row_4=>output_row_MAC_base_4, output_row_5=>output_row_MAC_base_5, output_row_6=>output_row_MAC_base_6, output_row_7=>output_row_MAC_base_7, output_row_8=>output_row_MAC_base_8, output_row_9=>output_row_MAC_base_9, output_row_10=>output_row_MAC_base_10, output_row_11=>output_row_MAC_base_11, output_row_12=>output_row_MAC_base_12, output_row_13=>output_row_MAC_base_13, output_row_14=>output_row_MAC_base_14, output_row_15=>output_row_MAC_base_15, output_row_16=>output_row_MAC_base_16, output_row_17=>output_row_MAC_base_17, output_row_18=>output_row_MAC_base_18, output_row_19=>output_row_MAC_base_19, output_row_20=>output_row_MAC_base_20, output_row_21=>output_row_MAC_base_21, output_row_22=>output_row_MAC_base_22, output_row_23=>output_row_MAC_base_23, output_row_24=>output_row_MAC_base_24, output_row_25=>output_row_MAC_base_25, output_row_26=>output_row_MAC_base_26, output_row_27=>output_row_MAC_base_27, output_row_28=>output_row_MAC_base_28, output_row_29=>output_row_MAC_base_29, output_row_30=>output_row_MAC_base_30, output_row_31=>output_row_MAC_base_31, 96 | SEL_mux=>SEL_mux); 97 | bias_sum: bias_sum_32_32 PORT MAP(input_row_0=>output_row_MAC_base_0, input_row_1=>output_row_MAC_base_1, input_row_2=>output_row_MAC_base_2, input_row_3=>output_row_MAC_base_3, input_row_4=>output_row_MAC_base_4, input_row_5=>output_row_MAC_base_5, input_row_6=>output_row_MAC_base_6, input_row_7=>output_row_MAC_base_7, input_row_8=>output_row_MAC_base_8, input_row_9=>output_row_MAC_base_9, input_row_10=>output_row_MAC_base_10, input_row_11=>output_row_MAC_base_11, input_row_12=>output_row_MAC_base_12, input_row_13=>output_row_MAC_base_13, input_row_14=>output_row_MAC_base_14, input_row_15=>output_row_MAC_base_15, input_row_16=>output_row_MAC_base_16, input_row_17=>output_row_MAC_base_17, input_row_18=>output_row_MAC_base_18, input_row_19=>output_row_MAC_base_19, input_row_20=>output_row_MAC_base_20, input_row_21=>output_row_MAC_base_21, input_row_22=>output_row_MAC_base_22, input_row_23=>output_row_MAC_base_23, input_row_24=>output_row_MAC_base_24, input_row_25=>output_row_MAC_base_25, input_row_26=>output_row_MAC_base_26, input_row_27=>output_row_MAC_base_27, input_row_28=>output_row_MAC_base_28, input_row_29=>output_row_MAC_base_29, input_row_30=>output_row_MAC_base_30, input_row_31=>output_row_MAC_base_31, 98 | b_col=>b_col, output_row_0=>output_row_0, output_row_1=>output_row_1, output_row_2=>output_row_2, output_row_3=>output_row_3, output_row_4=>output_row_4, output_row_5=>output_row_5, output_row_6=>output_row_6, output_row_7=>output_row_7, output_row_8=>output_row_8, output_row_9=>output_row_9, output_row_10=>output_row_10, output_row_11=>output_row_11, output_row_12=>output_row_12, output_row_13=>output_row_13, output_row_14=>output_row_14, output_row_15=>output_row_15, output_row_16=>output_row_16, output_row_17=>output_row_17, output_row_18=>output_row_18, output_row_19=>output_row_19, output_row_20=>output_row_20, output_row_21=>output_row_21, output_row_22=>output_row_22, output_row_23=>output_row_23, output_row_24=>output_row_24, output_row_25=>output_row_25, output_row_26=>output_row_26, output_row_27=>output_row_27, output_row_28=>output_row_28, output_row_29=>output_row_29, output_row_30=>output_row_30, output_row_31=>output_row_31); 99 | 100 | 101 | END behaviour; 102 | -------------------------------------------------------------------------------- /MatMul/MAC_32x16_bias_32/MAC_bias_32x16_8_reg.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY MAC_bias_32x16_8_reg IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 8 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 9 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 10 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(7 downto 0); 11 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7, input_col_8, input_col_9, 12 | input_col_10, input_col_11, input_col_12, input_col_13, input_col_14, input_col_15: IN STD_LOGIC_VECTOR(7 downto 0); 13 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 14 | SEL_mux: IN STD_LOGIC_VECTOR(3 downto 0); 15 | CLK, RST_n, ENABLE : IN STD_LOGIC; 16 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 17 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 18 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 19 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 20 | ); 21 | END MAC_bias_32x16_8_reg; 22 | 23 | ARCHITECTURE behaviour OF MAC_bias_32x16_8_reg IS 24 | 25 | 26 | COMPONENT MAC_32x16_8_reg IS 27 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 28 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 29 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 30 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(7 downto 0); 31 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7, input_col_8, input_col_9, 32 | input_col_10, input_col_11, input_col_12, input_col_13, input_col_14, input_col_15: IN STD_LOGIC_VECTOR(7 downto 0); 33 | SEL_mux: IN STD_LOGIC_VECTOR(3 downto 0); 34 | CLK, RST_n, ENABLE : IN STD_LOGIC; 35 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 36 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 37 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 38 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 39 | ); 40 | END COMPONENT; 41 | 42 | 43 | COMPONENT bias_sum_32_32 IS 44 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 45 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 46 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 47 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(31 downto 0); 48 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 49 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 50 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 51 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 52 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 53 | ); 54 | END COMPONENT; 55 | 56 | COMPONENT regnbit IS 57 | GENERIC ( N : POSITIVE := 2); 58 | PORT( 59 | D : IN STD_LOGIC_VECTOR(N-1 downto 0); 60 | CLK, RST_n, ENABLE : IN STD_LOGIC; 61 | Q : OUT STD_LOGIC_VECTOR(N-1 downto 0) 62 | ); 63 | END COMPONENT; 64 | 65 | 66 | SIGNAL output_row_MAC_base_0: STD_LOGIC_VECTOR(31 downto 0); 67 | SIGNAL output_row_MAC_base_1: STD_LOGIC_VECTOR(31 downto 0); 68 | SIGNAL output_row_MAC_base_2: STD_LOGIC_VECTOR(31 downto 0); 69 | SIGNAL output_row_MAC_base_3: STD_LOGIC_VECTOR(31 downto 0); 70 | SIGNAL output_row_MAC_base_4: STD_LOGIC_VECTOR(31 downto 0); 71 | SIGNAL output_row_MAC_base_5: STD_LOGIC_VECTOR(31 downto 0); 72 | SIGNAL output_row_MAC_base_6: STD_LOGIC_VECTOR(31 downto 0); 73 | SIGNAL output_row_MAC_base_7: STD_LOGIC_VECTOR(31 downto 0); 74 | SIGNAL output_row_MAC_base_8: STD_LOGIC_VECTOR(31 downto 0); 75 | SIGNAL output_row_MAC_base_9: STD_LOGIC_VECTOR(31 downto 0); 76 | SIGNAL output_row_MAC_base_10: STD_LOGIC_VECTOR(31 downto 0); 77 | SIGNAL output_row_MAC_base_11: STD_LOGIC_VECTOR(31 downto 0); 78 | SIGNAL output_row_MAC_base_12: STD_LOGIC_VECTOR(31 downto 0); 79 | SIGNAL output_row_MAC_base_13: STD_LOGIC_VECTOR(31 downto 0); 80 | SIGNAL output_row_MAC_base_14: STD_LOGIC_VECTOR(31 downto 0); 81 | SIGNAL output_row_MAC_base_15: STD_LOGIC_VECTOR(31 downto 0); 82 | SIGNAL output_row_MAC_base_16: STD_LOGIC_VECTOR(31 downto 0); 83 | SIGNAL output_row_MAC_base_17: STD_LOGIC_VECTOR(31 downto 0); 84 | SIGNAL output_row_MAC_base_18: STD_LOGIC_VECTOR(31 downto 0); 85 | SIGNAL output_row_MAC_base_19: STD_LOGIC_VECTOR(31 downto 0); 86 | SIGNAL output_row_MAC_base_20: STD_LOGIC_VECTOR(31 downto 0); 87 | SIGNAL output_row_MAC_base_21: STD_LOGIC_VECTOR(31 downto 0); 88 | SIGNAL output_row_MAC_base_22: STD_LOGIC_VECTOR(31 downto 0); 89 | SIGNAL output_row_MAC_base_23: STD_LOGIC_VECTOR(31 downto 0); 90 | SIGNAL output_row_MAC_base_24: STD_LOGIC_VECTOR(31 downto 0); 91 | SIGNAL output_row_MAC_base_25: STD_LOGIC_VECTOR(31 downto 0); 92 | SIGNAL output_row_MAC_base_26: STD_LOGIC_VECTOR(31 downto 0); 93 | SIGNAL output_row_MAC_base_27: STD_LOGIC_VECTOR(31 downto 0); 94 | SIGNAL output_row_MAC_base_28: STD_LOGIC_VECTOR(31 downto 0); 95 | SIGNAL output_row_MAC_base_29: STD_LOGIC_VECTOR(31 downto 0); 96 | SIGNAL output_row_MAC_base_30: STD_LOGIC_VECTOR(31 downto 0); 97 | SIGNAL output_row_MAC_base_31: STD_LOGIC_VECTOR(31 downto 0); 98 | SIGNAL b_col_reg: STD_LOGIC_VECTOR(31 downto 0); 99 | 100 | 101 | BEGIN 102 | 103 | MAC_base: MAC_32x16_8_reg PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>ENABLE, 104 | input_row_0=>input_row_0, input_row_1=>input_row_1, input_row_2=>input_row_2, input_row_3=>input_row_3, input_row_4=>input_row_4, input_row_5=>input_row_5, input_row_6=>input_row_6, input_row_7=>input_row_7, input_row_8=>input_row_8, input_row_9=>input_row_9, input_row_10=>input_row_10, input_row_11=>input_row_11, input_row_12=>input_row_12, input_row_13=>input_row_13, input_row_14=>input_row_14, input_row_15=>input_row_15, input_row_16=>input_row_16, input_row_17=>input_row_17, input_row_18=>input_row_18, input_row_19=>input_row_19, input_row_20=>input_row_20, input_row_21=>input_row_21, input_row_22=>input_row_22, input_row_23=>input_row_23, input_row_24=>input_row_24, input_row_25=>input_row_25, input_row_26=>input_row_26, input_row_27=>input_row_27, input_row_28=>input_row_28, input_row_29=>input_row_29, input_row_30=>input_row_30, input_row_31=>input_row_31, 105 | input_col_0=>input_col_0, input_col_1=>input_col_1, input_col_2=>input_col_2, input_col_3=>input_col_3, input_col_4=>input_col_4, input_col_5=>input_col_5, input_col_6=>input_col_6, input_col_7=>input_col_7, input_col_8=>input_col_8, input_col_9=>input_col_9, input_col_10=>input_col_10, input_col_11=>input_col_11, input_col_12=>input_col_12, input_col_13=>input_col_13, input_col_14=>input_col_14, input_col_15=>input_col_15, 106 | output_row_0=>output_row_MAC_base_0, output_row_1=>output_row_MAC_base_1, output_row_2=>output_row_MAC_base_2, output_row_3=>output_row_MAC_base_3, output_row_4=>output_row_MAC_base_4, output_row_5=>output_row_MAC_base_5, output_row_6=>output_row_MAC_base_6, output_row_7=>output_row_MAC_base_7, output_row_8=>output_row_MAC_base_8, output_row_9=>output_row_MAC_base_9, output_row_10=>output_row_MAC_base_10, output_row_11=>output_row_MAC_base_11, output_row_12=>output_row_MAC_base_12, output_row_13=>output_row_MAC_base_13, output_row_14=>output_row_MAC_base_14, output_row_15=>output_row_MAC_base_15, output_row_16=>output_row_MAC_base_16, output_row_17=>output_row_MAC_base_17, output_row_18=>output_row_MAC_base_18, output_row_19=>output_row_MAC_base_19, output_row_20=>output_row_MAC_base_20, output_row_21=>output_row_MAC_base_21, output_row_22=>output_row_MAC_base_22, output_row_23=>output_row_MAC_base_23, output_row_24=>output_row_MAC_base_24, output_row_25=>output_row_MAC_base_25, output_row_26=>output_row_MAC_base_26, output_row_27=>output_row_MAC_base_27, output_row_28=>output_row_MAC_base_28, output_row_29=>output_row_MAC_base_29, output_row_30=>output_row_MAC_base_30, output_row_31=>output_row_MAC_base_31, 107 | SEL_mux=>SEL_mux); 108 | 109 | bias_reg: regnbit GENERIC MAP(N=>32) PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>'1', D=>b_col, Q=>b_col_reg); 110 | bias_sum: bias_sum_32_32 PORT MAP(input_row_0=>output_row_MAC_base_0, input_row_1=>output_row_MAC_base_1, input_row_2=>output_row_MAC_base_2, input_row_3=>output_row_MAC_base_3, input_row_4=>output_row_MAC_base_4, input_row_5=>output_row_MAC_base_5, input_row_6=>output_row_MAC_base_6, input_row_7=>output_row_MAC_base_7, input_row_8=>output_row_MAC_base_8, input_row_9=>output_row_MAC_base_9, input_row_10=>output_row_MAC_base_10, input_row_11=>output_row_MAC_base_11, input_row_12=>output_row_MAC_base_12, input_row_13=>output_row_MAC_base_13, input_row_14=>output_row_MAC_base_14, input_row_15=>output_row_MAC_base_15, input_row_16=>output_row_MAC_base_16, input_row_17=>output_row_MAC_base_17, input_row_18=>output_row_MAC_base_18, input_row_19=>output_row_MAC_base_19, input_row_20=>output_row_MAC_base_20, input_row_21=>output_row_MAC_base_21, input_row_22=>output_row_MAC_base_22, input_row_23=>output_row_MAC_base_23, input_row_24=>output_row_MAC_base_24, input_row_25=>output_row_MAC_base_25, input_row_26=>output_row_MAC_base_26, input_row_27=>output_row_MAC_base_27, input_row_28=>output_row_MAC_base_28, input_row_29=>output_row_MAC_base_29, input_row_30=>output_row_MAC_base_30, input_row_31=>output_row_MAC_base_31, 111 | b_col=>b_col_reg, output_row_0=>output_row_0, output_row_1=>output_row_1, output_row_2=>output_row_2, output_row_3=>output_row_3, output_row_4=>output_row_4, output_row_5=>output_row_5, output_row_6=>output_row_6, output_row_7=>output_row_7, output_row_8=>output_row_8, output_row_9=>output_row_9, output_row_10=>output_row_10, output_row_11=>output_row_11, output_row_12=>output_row_12, output_row_13=>output_row_13, output_row_14=>output_row_14, output_row_15=>output_row_15, output_row_16=>output_row_16, output_row_17=>output_row_17, output_row_18=>output_row_18, output_row_19=>output_row_19, output_row_20=>output_row_20, output_row_21=>output_row_21, output_row_22=>output_row_22, output_row_23=>output_row_23, output_row_24=>output_row_24, output_row_25=>output_row_25, output_row_26=>output_row_26, output_row_27=>output_row_27, output_row_28=>output_row_28, output_row_29=>output_row_29, output_row_30=>output_row_30, output_row_31=>output_row_31); 112 | 113 | 114 | END behaviour; 115 | -------------------------------------------------------------------------------- /MatMul/MAC_32x16_bias_32/bias_sum_32_32.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY bias_sum_32_32 IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 8 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 9 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 10 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(31 downto 0); 11 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 12 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 13 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 14 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 15 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 16 | ); 17 | END bias_sum_32_32; 18 | 19 | ARCHITECTURE behaviour OF bias_sum_32_32 IS 20 | 21 | 22 | 23 | BEGIN 24 | 25 | output_row_0 <= STD_LOGIC_VECTOR(SIGNED(input_row_0) + SIGNED(b_col)); 26 | output_row_1 <= STD_LOGIC_VECTOR(SIGNED(input_row_1) + SIGNED(b_col)); 27 | output_row_2 <= STD_LOGIC_VECTOR(SIGNED(input_row_2) + SIGNED(b_col)); 28 | output_row_3 <= STD_LOGIC_VECTOR(SIGNED(input_row_3) + SIGNED(b_col)); 29 | output_row_4 <= STD_LOGIC_VECTOR(SIGNED(input_row_4) + SIGNED(b_col)); 30 | output_row_5 <= STD_LOGIC_VECTOR(SIGNED(input_row_5) + SIGNED(b_col)); 31 | output_row_6 <= STD_LOGIC_VECTOR(SIGNED(input_row_6) + SIGNED(b_col)); 32 | output_row_7 <= STD_LOGIC_VECTOR(SIGNED(input_row_7) + SIGNED(b_col)); 33 | output_row_8 <= STD_LOGIC_VECTOR(SIGNED(input_row_8) + SIGNED(b_col)); 34 | output_row_9 <= STD_LOGIC_VECTOR(SIGNED(input_row_9) + SIGNED(b_col)); 35 | output_row_10 <= STD_LOGIC_VECTOR(SIGNED(input_row_10) + SIGNED(b_col)); 36 | output_row_11 <= STD_LOGIC_VECTOR(SIGNED(input_row_11) + SIGNED(b_col)); 37 | output_row_12 <= STD_LOGIC_VECTOR(SIGNED(input_row_12) + SIGNED(b_col)); 38 | output_row_13 <= STD_LOGIC_VECTOR(SIGNED(input_row_13) + SIGNED(b_col)); 39 | output_row_14 <= STD_LOGIC_VECTOR(SIGNED(input_row_14) + SIGNED(b_col)); 40 | output_row_15 <= STD_LOGIC_VECTOR(SIGNED(input_row_15) + SIGNED(b_col)); 41 | output_row_16 <= STD_LOGIC_VECTOR(SIGNED(input_row_16) + SIGNED(b_col)); 42 | output_row_17 <= STD_LOGIC_VECTOR(SIGNED(input_row_17) + SIGNED(b_col)); 43 | output_row_18 <= STD_LOGIC_VECTOR(SIGNED(input_row_18) + SIGNED(b_col)); 44 | output_row_19 <= STD_LOGIC_VECTOR(SIGNED(input_row_19) + SIGNED(b_col)); 45 | output_row_20 <= STD_LOGIC_VECTOR(SIGNED(input_row_20) + SIGNED(b_col)); 46 | output_row_21 <= STD_LOGIC_VECTOR(SIGNED(input_row_21) + SIGNED(b_col)); 47 | output_row_22 <= STD_LOGIC_VECTOR(SIGNED(input_row_22) + SIGNED(b_col)); 48 | output_row_23 <= STD_LOGIC_VECTOR(SIGNED(input_row_23) + SIGNED(b_col)); 49 | output_row_24 <= STD_LOGIC_VECTOR(SIGNED(input_row_24) + SIGNED(b_col)); 50 | output_row_25 <= STD_LOGIC_VECTOR(SIGNED(input_row_25) + SIGNED(b_col)); 51 | output_row_26 <= STD_LOGIC_VECTOR(SIGNED(input_row_26) + SIGNED(b_col)); 52 | output_row_27 <= STD_LOGIC_VECTOR(SIGNED(input_row_27) + SIGNED(b_col)); 53 | output_row_28 <= STD_LOGIC_VECTOR(SIGNED(input_row_28) + SIGNED(b_col)); 54 | output_row_29 <= STD_LOGIC_VECTOR(SIGNED(input_row_29) + SIGNED(b_col)); 55 | output_row_30 <= STD_LOGIC_VECTOR(SIGNED(input_row_30) + SIGNED(b_col)); 56 | output_row_31 <= STD_LOGIC_VECTOR(SIGNED(input_row_31) + SIGNED(b_col)); 57 | 58 | 59 | END behaviour; 60 | -------------------------------------------------------------------------------- /MatMul/MAC_32x16_bias_32/mux_16to1_nbit.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY mux_16to1_nbit IS 7 | GENERIC( 8 | N : POSITIVE := 2 9 | ); 10 | PORT (I0, I1, I2, I3, I4, I5, I6, I7, I8, I9, 11 | I10, I11, I12, I13, I14, I15: IN STD_LOGIC_VECTOR(N-1 downto 0); 12 | SEL_mux: IN STD_LOGIC_VECTOR(3 downto 0); 13 | O: OUT STD_LOGIC_VECTOR(N-1 downto 0) 14 | ); 15 | END mux_16to1_nbit; 16 | 17 | ARCHITECTURE behaviour OF mux_16to1_nbit IS 18 | 19 | COMPONENT mux_4to1_nbit_base IS 20 | GENERIC ( N : POSITIVE :=1); 21 | PORT( I0, I1, I2, I3: IN STD_LOGIC_VECTOR(N-1 downto 0); 22 | SEL : IN STD_LOGIC_VECTOR(1 downto 0); 23 | O : OUT STD_LOGIC_VECTOR(N-1 downto 0) 24 | ); 25 | END COMPONENT; 26 | 27 | 28 | SIGNAL output_mux_0_0: STD_LOGIC_VECTOR(N-1 downto 0); 29 | SIGNAL output_mux_0_1: STD_LOGIC_VECTOR(N-1 downto 0); 30 | SIGNAL output_mux_0_2: STD_LOGIC_VECTOR(N-1 downto 0); 31 | SIGNAL output_mux_0_3: STD_LOGIC_VECTOR(N-1 downto 0); 32 | 33 | BEGIN 34 | 35 | 36 | mux_0_0: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I0, I1=>I1, I2=>I2, I3=>I3, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_0); 37 | mux_0_1: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I4, I1=>I5, I2=>I6, I3=>I7, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_1); 38 | mux_0_2: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I8, I1=>I9, I2=>I10, I3=>I11, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_2); 39 | mux_0_3: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I12, I1=>I13, I2=>I14, I3=>I15, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_3); 40 | mux_1_out: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>output_mux_0_0, I1=>output_mux_0_1, I2=>output_mux_0_2, I3=>output_mux_0_3, SEL=>SEL_mux(3 downto 2), O=>O); 41 | 42 | END behaviour; 43 | -------------------------------------------------------------------------------- /MatMul/MAC_32x32_8_bias_32/MAC_bias_32x32_8.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY MAC_bias_32x32_8 IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 8 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 9 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 10 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(7 downto 0); 11 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7, input_col_8, input_col_9, 12 | input_col_10, input_col_11, input_col_12, input_col_13, input_col_14, input_col_15, input_col_16, input_col_17, input_col_18, input_col_19, 13 | input_col_20, input_col_21, input_col_22, input_col_23, input_col_24, input_col_25, input_col_26, input_col_27, input_col_28, input_col_29, 14 | input_col_30, input_col_31: IN STD_LOGIC_VECTOR(7 downto 0); 15 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 16 | SEL_mux: IN STD_LOGIC_VECTOR(4 downto 0); 17 | CLK, RST_n, ENABLE : IN STD_LOGIC; 18 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 19 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 20 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 21 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 22 | ); 23 | END MAC_bias_32x32_8; 24 | 25 | ARCHITECTURE behaviour OF MAC_bias_32x32_8 IS 26 | 27 | 28 | COMPONENT MAC_32x32_8 IS 29 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 30 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 31 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 32 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(7 downto 0); 33 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7, input_col_8, input_col_9, 34 | input_col_10, input_col_11, input_col_12, input_col_13, input_col_14, input_col_15, input_col_16, input_col_17, input_col_18, input_col_19, 35 | input_col_20, input_col_21, input_col_22, input_col_23, input_col_24, input_col_25, input_col_26, input_col_27, input_col_28, input_col_29, 36 | input_col_30, input_col_31: IN STD_LOGIC_VECTOR(7 downto 0); 37 | SEL_mux: IN STD_LOGIC_VECTOR(4 downto 0); 38 | CLK, RST_n, ENABLE : IN STD_LOGIC; 39 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 40 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 41 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 42 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 43 | ); 44 | END COMPONENT; 45 | 46 | 47 | COMPONENT bias_sum_32_32 IS 48 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 49 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 50 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 51 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(31 downto 0); 52 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 53 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 54 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 55 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 56 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 57 | ); 58 | END COMPONENT; 59 | 60 | 61 | SIGNAL output_row_MAC_base_0: STD_LOGIC_VECTOR(31 downto 0); 62 | SIGNAL output_row_MAC_base_1: STD_LOGIC_VECTOR(31 downto 0); 63 | SIGNAL output_row_MAC_base_2: STD_LOGIC_VECTOR(31 downto 0); 64 | SIGNAL output_row_MAC_base_3: STD_LOGIC_VECTOR(31 downto 0); 65 | SIGNAL output_row_MAC_base_4: STD_LOGIC_VECTOR(31 downto 0); 66 | SIGNAL output_row_MAC_base_5: STD_LOGIC_VECTOR(31 downto 0); 67 | SIGNAL output_row_MAC_base_6: STD_LOGIC_VECTOR(31 downto 0); 68 | SIGNAL output_row_MAC_base_7: STD_LOGIC_VECTOR(31 downto 0); 69 | SIGNAL output_row_MAC_base_8: STD_LOGIC_VECTOR(31 downto 0); 70 | SIGNAL output_row_MAC_base_9: STD_LOGIC_VECTOR(31 downto 0); 71 | SIGNAL output_row_MAC_base_10: STD_LOGIC_VECTOR(31 downto 0); 72 | SIGNAL output_row_MAC_base_11: STD_LOGIC_VECTOR(31 downto 0); 73 | SIGNAL output_row_MAC_base_12: STD_LOGIC_VECTOR(31 downto 0); 74 | SIGNAL output_row_MAC_base_13: STD_LOGIC_VECTOR(31 downto 0); 75 | SIGNAL output_row_MAC_base_14: STD_LOGIC_VECTOR(31 downto 0); 76 | SIGNAL output_row_MAC_base_15: STD_LOGIC_VECTOR(31 downto 0); 77 | SIGNAL output_row_MAC_base_16: STD_LOGIC_VECTOR(31 downto 0); 78 | SIGNAL output_row_MAC_base_17: STD_LOGIC_VECTOR(31 downto 0); 79 | SIGNAL output_row_MAC_base_18: STD_LOGIC_VECTOR(31 downto 0); 80 | SIGNAL output_row_MAC_base_19: STD_LOGIC_VECTOR(31 downto 0); 81 | SIGNAL output_row_MAC_base_20: STD_LOGIC_VECTOR(31 downto 0); 82 | SIGNAL output_row_MAC_base_21: STD_LOGIC_VECTOR(31 downto 0); 83 | SIGNAL output_row_MAC_base_22: STD_LOGIC_VECTOR(31 downto 0); 84 | SIGNAL output_row_MAC_base_23: STD_LOGIC_VECTOR(31 downto 0); 85 | SIGNAL output_row_MAC_base_24: STD_LOGIC_VECTOR(31 downto 0); 86 | SIGNAL output_row_MAC_base_25: STD_LOGIC_VECTOR(31 downto 0); 87 | SIGNAL output_row_MAC_base_26: STD_LOGIC_VECTOR(31 downto 0); 88 | SIGNAL output_row_MAC_base_27: STD_LOGIC_VECTOR(31 downto 0); 89 | SIGNAL output_row_MAC_base_28: STD_LOGIC_VECTOR(31 downto 0); 90 | SIGNAL output_row_MAC_base_29: STD_LOGIC_VECTOR(31 downto 0); 91 | SIGNAL output_row_MAC_base_30: STD_LOGIC_VECTOR(31 downto 0); 92 | SIGNAL output_row_MAC_base_31: STD_LOGIC_VECTOR(31 downto 0); 93 | 94 | BEGIN 95 | 96 | MAC_base: MAC_32x32_8 PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>ENABLE, 97 | input_row_0=>input_row_0, input_row_1=>input_row_1, input_row_2=>input_row_2, input_row_3=>input_row_3, input_row_4=>input_row_4, input_row_5=>input_row_5, input_row_6=>input_row_6, input_row_7=>input_row_7, input_row_8=>input_row_8, input_row_9=>input_row_9, input_row_10=>input_row_10, input_row_11=>input_row_11, input_row_12=>input_row_12, input_row_13=>input_row_13, input_row_14=>input_row_14, input_row_15=>input_row_15, input_row_16=>input_row_16, input_row_17=>input_row_17, input_row_18=>input_row_18, input_row_19=>input_row_19, input_row_20=>input_row_20, input_row_21=>input_row_21, input_row_22=>input_row_22, input_row_23=>input_row_23, input_row_24=>input_row_24, input_row_25=>input_row_25, input_row_26=>input_row_26, input_row_27=>input_row_27, input_row_28=>input_row_28, input_row_29=>input_row_29, input_row_30=>input_row_30, input_row_31=>input_row_31, 98 | input_col_0=>input_col_0, input_col_1=>input_col_1, input_col_2=>input_col_2, input_col_3=>input_col_3, input_col_4=>input_col_4, input_col_5=>input_col_5, input_col_6=>input_col_6, input_col_7=>input_col_7, input_col_8=>input_col_8, input_col_9=>input_col_9, input_col_10=>input_col_10, input_col_11=>input_col_11, input_col_12=>input_col_12, input_col_13=>input_col_13, input_col_14=>input_col_14, input_col_15=>input_col_15, input_col_16=>input_col_16, input_col_17=>input_col_17, input_col_18=>input_col_18, input_col_19=>input_col_19, input_col_20=>input_col_20, input_col_21=>input_col_21, input_col_22=>input_col_22, input_col_23=>input_col_23, input_col_24=>input_col_24, input_col_25=>input_col_25, input_col_26=>input_col_26, input_col_27=>input_col_27, input_col_28=>input_col_28, input_col_29=>input_col_29, input_col_30=>input_col_30, input_col_31=>input_col_31, 99 | output_row_0=>output_row_MAC_base_0, output_row_1=>output_row_MAC_base_1, output_row_2=>output_row_MAC_base_2, output_row_3=>output_row_MAC_base_3, output_row_4=>output_row_MAC_base_4, output_row_5=>output_row_MAC_base_5, output_row_6=>output_row_MAC_base_6, output_row_7=>output_row_MAC_base_7, output_row_8=>output_row_MAC_base_8, output_row_9=>output_row_MAC_base_9, output_row_10=>output_row_MAC_base_10, output_row_11=>output_row_MAC_base_11, output_row_12=>output_row_MAC_base_12, output_row_13=>output_row_MAC_base_13, output_row_14=>output_row_MAC_base_14, output_row_15=>output_row_MAC_base_15, output_row_16=>output_row_MAC_base_16, output_row_17=>output_row_MAC_base_17, output_row_18=>output_row_MAC_base_18, output_row_19=>output_row_MAC_base_19, output_row_20=>output_row_MAC_base_20, output_row_21=>output_row_MAC_base_21, output_row_22=>output_row_MAC_base_22, output_row_23=>output_row_MAC_base_23, output_row_24=>output_row_MAC_base_24, output_row_25=>output_row_MAC_base_25, output_row_26=>output_row_MAC_base_26, output_row_27=>output_row_MAC_base_27, output_row_28=>output_row_MAC_base_28, output_row_29=>output_row_MAC_base_29, output_row_30=>output_row_MAC_base_30, output_row_31=>output_row_MAC_base_31, 100 | SEL_mux=>SEL_mux); 101 | bias_sum: bias_sum_32_32 PORT MAP(input_row_0=>output_row_MAC_base_0, input_row_1=>output_row_MAC_base_1, input_row_2=>output_row_MAC_base_2, input_row_3=>output_row_MAC_base_3, input_row_4=>output_row_MAC_base_4, input_row_5=>output_row_MAC_base_5, input_row_6=>output_row_MAC_base_6, input_row_7=>output_row_MAC_base_7, input_row_8=>output_row_MAC_base_8, input_row_9=>output_row_MAC_base_9, input_row_10=>output_row_MAC_base_10, input_row_11=>output_row_MAC_base_11, input_row_12=>output_row_MAC_base_12, input_row_13=>output_row_MAC_base_13, input_row_14=>output_row_MAC_base_14, input_row_15=>output_row_MAC_base_15, input_row_16=>output_row_MAC_base_16, input_row_17=>output_row_MAC_base_17, input_row_18=>output_row_MAC_base_18, input_row_19=>output_row_MAC_base_19, input_row_20=>output_row_MAC_base_20, input_row_21=>output_row_MAC_base_21, input_row_22=>output_row_MAC_base_22, input_row_23=>output_row_MAC_base_23, input_row_24=>output_row_MAC_base_24, input_row_25=>output_row_MAC_base_25, input_row_26=>output_row_MAC_base_26, input_row_27=>output_row_MAC_base_27, input_row_28=>output_row_MAC_base_28, input_row_29=>output_row_MAC_base_29, input_row_30=>output_row_MAC_base_30, input_row_31=>output_row_MAC_base_31, 102 | b_col=>b_col, output_row_0=>output_row_0, output_row_1=>output_row_1, output_row_2=>output_row_2, output_row_3=>output_row_3, output_row_4=>output_row_4, output_row_5=>output_row_5, output_row_6=>output_row_6, output_row_7=>output_row_7, output_row_8=>output_row_8, output_row_9=>output_row_9, output_row_10=>output_row_10, output_row_11=>output_row_11, output_row_12=>output_row_12, output_row_13=>output_row_13, output_row_14=>output_row_14, output_row_15=>output_row_15, output_row_16=>output_row_16, output_row_17=>output_row_17, output_row_18=>output_row_18, output_row_19=>output_row_19, output_row_20=>output_row_20, output_row_21=>output_row_21, output_row_22=>output_row_22, output_row_23=>output_row_23, output_row_24=>output_row_24, output_row_25=>output_row_25, output_row_26=>output_row_26, output_row_27=>output_row_27, output_row_28=>output_row_28, output_row_29=>output_row_29, output_row_30=>output_row_30, output_row_31=>output_row_31); 103 | 104 | 105 | END behaviour; 106 | -------------------------------------------------------------------------------- /MatMul/MAC_32x32_8_bias_32/MAC_bias_32x32_8_reg.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY MAC_bias_32x32_8_reg IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 8 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 9 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 10 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(7 downto 0); 11 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7, input_col_8, input_col_9, 12 | input_col_10, input_col_11, input_col_12, input_col_13, input_col_14, input_col_15, input_col_16, input_col_17, input_col_18, input_col_19, 13 | input_col_20, input_col_21, input_col_22, input_col_23, input_col_24, input_col_25, input_col_26, input_col_27, input_col_28, input_col_29, 14 | input_col_30, input_col_31: IN STD_LOGIC_VECTOR(7 downto 0); 15 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 16 | SEL_mux: IN STD_LOGIC_VECTOR(4 downto 0); 17 | CLK, RST_n, ENABLE : IN STD_LOGIC; 18 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 19 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 20 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 21 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 22 | ); 23 | END MAC_bias_32x32_8_reg; 24 | 25 | ARCHITECTURE behaviour OF MAC_bias_32x32_8_reg IS 26 | 27 | 28 | COMPONENT MAC_32x32_8_reg IS 29 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 30 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 31 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 32 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(7 downto 0); 33 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7, input_col_8, input_col_9, 34 | input_col_10, input_col_11, input_col_12, input_col_13, input_col_14, input_col_15, input_col_16, input_col_17, input_col_18, input_col_19, 35 | input_col_20, input_col_21, input_col_22, input_col_23, input_col_24, input_col_25, input_col_26, input_col_27, input_col_28, input_col_29, 36 | input_col_30, input_col_31: IN STD_LOGIC_VECTOR(7 downto 0); 37 | SEL_mux: IN STD_LOGIC_VECTOR(4 downto 0); 38 | CLK, RST_n, ENABLE : IN STD_LOGIC; 39 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 40 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 41 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 42 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 43 | ); 44 | END COMPONENT; 45 | 46 | 47 | COMPONENT bias_sum_32_32 IS 48 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 49 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 50 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 51 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(31 downto 0); 52 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 53 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 54 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 55 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 56 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 57 | ); 58 | END COMPONENT; 59 | 60 | COMPONENT regnbit IS 61 | GENERIC ( N : POSITIVE := 2); 62 | PORT( 63 | D : IN STD_LOGIC_VECTOR(N-1 downto 0); 64 | CLK, RST_n, ENABLE : IN STD_LOGIC; 65 | Q : OUT STD_LOGIC_VECTOR(N-1 downto 0) 66 | ); 67 | END COMPONENT; 68 | 69 | 70 | SIGNAL output_row_MAC_base_0: STD_LOGIC_VECTOR(31 downto 0); 71 | SIGNAL output_row_MAC_base_1: STD_LOGIC_VECTOR(31 downto 0); 72 | SIGNAL output_row_MAC_base_2: STD_LOGIC_VECTOR(31 downto 0); 73 | SIGNAL output_row_MAC_base_3: STD_LOGIC_VECTOR(31 downto 0); 74 | SIGNAL output_row_MAC_base_4: STD_LOGIC_VECTOR(31 downto 0); 75 | SIGNAL output_row_MAC_base_5: STD_LOGIC_VECTOR(31 downto 0); 76 | SIGNAL output_row_MAC_base_6: STD_LOGIC_VECTOR(31 downto 0); 77 | SIGNAL output_row_MAC_base_7: STD_LOGIC_VECTOR(31 downto 0); 78 | SIGNAL output_row_MAC_base_8: STD_LOGIC_VECTOR(31 downto 0); 79 | SIGNAL output_row_MAC_base_9: STD_LOGIC_VECTOR(31 downto 0); 80 | SIGNAL output_row_MAC_base_10: STD_LOGIC_VECTOR(31 downto 0); 81 | SIGNAL output_row_MAC_base_11: STD_LOGIC_VECTOR(31 downto 0); 82 | SIGNAL output_row_MAC_base_12: STD_LOGIC_VECTOR(31 downto 0); 83 | SIGNAL output_row_MAC_base_13: STD_LOGIC_VECTOR(31 downto 0); 84 | SIGNAL output_row_MAC_base_14: STD_LOGIC_VECTOR(31 downto 0); 85 | SIGNAL output_row_MAC_base_15: STD_LOGIC_VECTOR(31 downto 0); 86 | SIGNAL output_row_MAC_base_16: STD_LOGIC_VECTOR(31 downto 0); 87 | SIGNAL output_row_MAC_base_17: STD_LOGIC_VECTOR(31 downto 0); 88 | SIGNAL output_row_MAC_base_18: STD_LOGIC_VECTOR(31 downto 0); 89 | SIGNAL output_row_MAC_base_19: STD_LOGIC_VECTOR(31 downto 0); 90 | SIGNAL output_row_MAC_base_20: STD_LOGIC_VECTOR(31 downto 0); 91 | SIGNAL output_row_MAC_base_21: STD_LOGIC_VECTOR(31 downto 0); 92 | SIGNAL output_row_MAC_base_22: STD_LOGIC_VECTOR(31 downto 0); 93 | SIGNAL output_row_MAC_base_23: STD_LOGIC_VECTOR(31 downto 0); 94 | SIGNAL output_row_MAC_base_24: STD_LOGIC_VECTOR(31 downto 0); 95 | SIGNAL output_row_MAC_base_25: STD_LOGIC_VECTOR(31 downto 0); 96 | SIGNAL output_row_MAC_base_26: STD_LOGIC_VECTOR(31 downto 0); 97 | SIGNAL output_row_MAC_base_27: STD_LOGIC_VECTOR(31 downto 0); 98 | SIGNAL output_row_MAC_base_28: STD_LOGIC_VECTOR(31 downto 0); 99 | SIGNAL output_row_MAC_base_29: STD_LOGIC_VECTOR(31 downto 0); 100 | SIGNAL output_row_MAC_base_30: STD_LOGIC_VECTOR(31 downto 0); 101 | SIGNAL output_row_MAC_base_31: STD_LOGIC_VECTOR(31 downto 0); 102 | SIGNAL b_col_reg: STD_LOGIC_VECTOR(31 downto 0); 103 | 104 | 105 | BEGIN 106 | 107 | MAC_base: MAC_32x32_8_reg PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>ENABLE, 108 | input_row_0=>input_row_0, input_row_1=>input_row_1, input_row_2=>input_row_2, input_row_3=>input_row_3, input_row_4=>input_row_4, input_row_5=>input_row_5, input_row_6=>input_row_6, input_row_7=>input_row_7, input_row_8=>input_row_8, input_row_9=>input_row_9, input_row_10=>input_row_10, input_row_11=>input_row_11, input_row_12=>input_row_12, input_row_13=>input_row_13, input_row_14=>input_row_14, input_row_15=>input_row_15, input_row_16=>input_row_16, input_row_17=>input_row_17, input_row_18=>input_row_18, input_row_19=>input_row_19, input_row_20=>input_row_20, input_row_21=>input_row_21, input_row_22=>input_row_22, input_row_23=>input_row_23, input_row_24=>input_row_24, input_row_25=>input_row_25, input_row_26=>input_row_26, input_row_27=>input_row_27, input_row_28=>input_row_28, input_row_29=>input_row_29, input_row_30=>input_row_30, input_row_31=>input_row_31, 109 | input_col_0=>input_col_0, input_col_1=>input_col_1, input_col_2=>input_col_2, input_col_3=>input_col_3, input_col_4=>input_col_4, input_col_5=>input_col_5, input_col_6=>input_col_6, input_col_7=>input_col_7, input_col_8=>input_col_8, input_col_9=>input_col_9, input_col_10=>input_col_10, input_col_11=>input_col_11, input_col_12=>input_col_12, input_col_13=>input_col_13, input_col_14=>input_col_14, input_col_15=>input_col_15, input_col_16=>input_col_16, input_col_17=>input_col_17, input_col_18=>input_col_18, input_col_19=>input_col_19, input_col_20=>input_col_20, input_col_21=>input_col_21, input_col_22=>input_col_22, input_col_23=>input_col_23, input_col_24=>input_col_24, input_col_25=>input_col_25, input_col_26=>input_col_26, input_col_27=>input_col_27, input_col_28=>input_col_28, input_col_29=>input_col_29, input_col_30=>input_col_30, input_col_31=>input_col_31, 110 | output_row_0=>output_row_MAC_base_0, output_row_1=>output_row_MAC_base_1, output_row_2=>output_row_MAC_base_2, output_row_3=>output_row_MAC_base_3, output_row_4=>output_row_MAC_base_4, output_row_5=>output_row_MAC_base_5, output_row_6=>output_row_MAC_base_6, output_row_7=>output_row_MAC_base_7, output_row_8=>output_row_MAC_base_8, output_row_9=>output_row_MAC_base_9, output_row_10=>output_row_MAC_base_10, output_row_11=>output_row_MAC_base_11, output_row_12=>output_row_MAC_base_12, output_row_13=>output_row_MAC_base_13, output_row_14=>output_row_MAC_base_14, output_row_15=>output_row_MAC_base_15, output_row_16=>output_row_MAC_base_16, output_row_17=>output_row_MAC_base_17, output_row_18=>output_row_MAC_base_18, output_row_19=>output_row_MAC_base_19, output_row_20=>output_row_MAC_base_20, output_row_21=>output_row_MAC_base_21, output_row_22=>output_row_MAC_base_22, output_row_23=>output_row_MAC_base_23, output_row_24=>output_row_MAC_base_24, output_row_25=>output_row_MAC_base_25, output_row_26=>output_row_MAC_base_26, output_row_27=>output_row_MAC_base_27, output_row_28=>output_row_MAC_base_28, output_row_29=>output_row_MAC_base_29, output_row_30=>output_row_MAC_base_30, output_row_31=>output_row_MAC_base_31, 111 | SEL_mux=>SEL_mux); 112 | 113 | bias_reg: regnbit GENERIC MAP(N=>32) PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>'1', D=>b_col, Q=>b_col_reg); 114 | bias_sum: bias_sum_32_32 PORT MAP(input_row_0=>output_row_MAC_base_0, input_row_1=>output_row_MAC_base_1, input_row_2=>output_row_MAC_base_2, input_row_3=>output_row_MAC_base_3, input_row_4=>output_row_MAC_base_4, input_row_5=>output_row_MAC_base_5, input_row_6=>output_row_MAC_base_6, input_row_7=>output_row_MAC_base_7, input_row_8=>output_row_MAC_base_8, input_row_9=>output_row_MAC_base_9, input_row_10=>output_row_MAC_base_10, input_row_11=>output_row_MAC_base_11, input_row_12=>output_row_MAC_base_12, input_row_13=>output_row_MAC_base_13, input_row_14=>output_row_MAC_base_14, input_row_15=>output_row_MAC_base_15, input_row_16=>output_row_MAC_base_16, input_row_17=>output_row_MAC_base_17, input_row_18=>output_row_MAC_base_18, input_row_19=>output_row_MAC_base_19, input_row_20=>output_row_MAC_base_20, input_row_21=>output_row_MAC_base_21, input_row_22=>output_row_MAC_base_22, input_row_23=>output_row_MAC_base_23, input_row_24=>output_row_MAC_base_24, input_row_25=>output_row_MAC_base_25, input_row_26=>output_row_MAC_base_26, input_row_27=>output_row_MAC_base_27, input_row_28=>output_row_MAC_base_28, input_row_29=>output_row_MAC_base_29, input_row_30=>output_row_MAC_base_30, input_row_31=>output_row_MAC_base_31, 115 | b_col=>b_col_reg, output_row_0=>output_row_0, output_row_1=>output_row_1, output_row_2=>output_row_2, output_row_3=>output_row_3, output_row_4=>output_row_4, output_row_5=>output_row_5, output_row_6=>output_row_6, output_row_7=>output_row_7, output_row_8=>output_row_8, output_row_9=>output_row_9, output_row_10=>output_row_10, output_row_11=>output_row_11, output_row_12=>output_row_12, output_row_13=>output_row_13, output_row_14=>output_row_14, output_row_15=>output_row_15, output_row_16=>output_row_16, output_row_17=>output_row_17, output_row_18=>output_row_18, output_row_19=>output_row_19, output_row_20=>output_row_20, output_row_21=>output_row_21, output_row_22=>output_row_22, output_row_23=>output_row_23, output_row_24=>output_row_24, output_row_25=>output_row_25, output_row_26=>output_row_26, output_row_27=>output_row_27, output_row_28=>output_row_28, output_row_29=>output_row_29, output_row_30=>output_row_30, output_row_31=>output_row_31); 116 | 117 | 118 | END behaviour; 119 | -------------------------------------------------------------------------------- /MatMul/MAC_32x32_8_bias_32/bias_sum_32_32.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY bias_sum_32_32 IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 8 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 9 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 10 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(31 downto 0); 11 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 12 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 13 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 14 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 15 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 16 | ); 17 | END bias_sum_32_32; 18 | 19 | ARCHITECTURE behaviour OF bias_sum_32_32 IS 20 | 21 | 22 | 23 | BEGIN 24 | 25 | output_row_0 <= STD_LOGIC_VECTOR(SIGNED(input_row_0) + SIGNED(b_col)); 26 | output_row_1 <= STD_LOGIC_VECTOR(SIGNED(input_row_1) + SIGNED(b_col)); 27 | output_row_2 <= STD_LOGIC_VECTOR(SIGNED(input_row_2) + SIGNED(b_col)); 28 | output_row_3 <= STD_LOGIC_VECTOR(SIGNED(input_row_3) + SIGNED(b_col)); 29 | output_row_4 <= STD_LOGIC_VECTOR(SIGNED(input_row_4) + SIGNED(b_col)); 30 | output_row_5 <= STD_LOGIC_VECTOR(SIGNED(input_row_5) + SIGNED(b_col)); 31 | output_row_6 <= STD_LOGIC_VECTOR(SIGNED(input_row_6) + SIGNED(b_col)); 32 | output_row_7 <= STD_LOGIC_VECTOR(SIGNED(input_row_7) + SIGNED(b_col)); 33 | output_row_8 <= STD_LOGIC_VECTOR(SIGNED(input_row_8) + SIGNED(b_col)); 34 | output_row_9 <= STD_LOGIC_VECTOR(SIGNED(input_row_9) + SIGNED(b_col)); 35 | output_row_10 <= STD_LOGIC_VECTOR(SIGNED(input_row_10) + SIGNED(b_col)); 36 | output_row_11 <= STD_LOGIC_VECTOR(SIGNED(input_row_11) + SIGNED(b_col)); 37 | output_row_12 <= STD_LOGIC_VECTOR(SIGNED(input_row_12) + SIGNED(b_col)); 38 | output_row_13 <= STD_LOGIC_VECTOR(SIGNED(input_row_13) + SIGNED(b_col)); 39 | output_row_14 <= STD_LOGIC_VECTOR(SIGNED(input_row_14) + SIGNED(b_col)); 40 | output_row_15 <= STD_LOGIC_VECTOR(SIGNED(input_row_15) + SIGNED(b_col)); 41 | output_row_16 <= STD_LOGIC_VECTOR(SIGNED(input_row_16) + SIGNED(b_col)); 42 | output_row_17 <= STD_LOGIC_VECTOR(SIGNED(input_row_17) + SIGNED(b_col)); 43 | output_row_18 <= STD_LOGIC_VECTOR(SIGNED(input_row_18) + SIGNED(b_col)); 44 | output_row_19 <= STD_LOGIC_VECTOR(SIGNED(input_row_19) + SIGNED(b_col)); 45 | output_row_20 <= STD_LOGIC_VECTOR(SIGNED(input_row_20) + SIGNED(b_col)); 46 | output_row_21 <= STD_LOGIC_VECTOR(SIGNED(input_row_21) + SIGNED(b_col)); 47 | output_row_22 <= STD_LOGIC_VECTOR(SIGNED(input_row_22) + SIGNED(b_col)); 48 | output_row_23 <= STD_LOGIC_VECTOR(SIGNED(input_row_23) + SIGNED(b_col)); 49 | output_row_24 <= STD_LOGIC_VECTOR(SIGNED(input_row_24) + SIGNED(b_col)); 50 | output_row_25 <= STD_LOGIC_VECTOR(SIGNED(input_row_25) + SIGNED(b_col)); 51 | output_row_26 <= STD_LOGIC_VECTOR(SIGNED(input_row_26) + SIGNED(b_col)); 52 | output_row_27 <= STD_LOGIC_VECTOR(SIGNED(input_row_27) + SIGNED(b_col)); 53 | output_row_28 <= STD_LOGIC_VECTOR(SIGNED(input_row_28) + SIGNED(b_col)); 54 | output_row_29 <= STD_LOGIC_VECTOR(SIGNED(input_row_29) + SIGNED(b_col)); 55 | output_row_30 <= STD_LOGIC_VECTOR(SIGNED(input_row_30) + SIGNED(b_col)); 56 | output_row_31 <= STD_LOGIC_VECTOR(SIGNED(input_row_31) + SIGNED(b_col)); 57 | 58 | 59 | END behaviour; 60 | -------------------------------------------------------------------------------- /MatMul/MAC_32x32_8_bias_32/mux_32to1_nbit.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY mux_32to1_nbit IS 7 | GENERIC( 8 | N : POSITIVE := 2 9 | ); 10 | PORT (I0, I1, I2, I3, I4, I5, I6, I7, I8, I9, 11 | I10, I11, I12, I13, I14, I15, I16, I17, I18, I19, 12 | I20, I21, I22, I23, I24, I25, I26, I27, I28, I29, 13 | I30, I31: IN STD_LOGIC_VECTOR(N-1 downto 0); 14 | SEL_mux: IN STD_LOGIC_VECTOR(4 downto 0); 15 | O: OUT STD_LOGIC_VECTOR(N-1 downto 0) 16 | ); 17 | END mux_32to1_nbit; 18 | 19 | ARCHITECTURE behaviour OF mux_32to1_nbit IS 20 | 21 | COMPONENT mux_4to1_nbit_base IS 22 | GENERIC ( N : POSITIVE :=1); 23 | PORT( I0, I1, I2, I3: IN STD_LOGIC_VECTOR(N-1 downto 0); 24 | SEL : IN STD_LOGIC_VECTOR(1 downto 0); 25 | O : OUT STD_LOGIC_VECTOR(N-1 downto 0) 26 | ); 27 | END COMPONENT; 28 | 29 | COMPONENT mux_2to1_nbit_base IS 30 | GENERIC ( N : POSITIVE :=1); 31 | PORT( I0, I1: IN STD_LOGIC_VECTOR(N-1 downto 0); 32 | SEL : IN STD_LOGIC; 33 | O : OUT STD_LOGIC_VECTOR(N-1 downto 0) 34 | ); 35 | END COMPONENT; 36 | 37 | 38 | SIGNAL output_mux_0_0: STD_LOGIC_VECTOR(N-1 downto 0); 39 | SIGNAL output_mux_0_1: STD_LOGIC_VECTOR(N-1 downto 0); 40 | SIGNAL output_mux_0_2: STD_LOGIC_VECTOR(N-1 downto 0); 41 | SIGNAL output_mux_0_3: STD_LOGIC_VECTOR(N-1 downto 0); 42 | SIGNAL output_mux_0_4: STD_LOGIC_VECTOR(N-1 downto 0); 43 | SIGNAL output_mux_0_5: STD_LOGIC_VECTOR(N-1 downto 0); 44 | SIGNAL output_mux_0_6: STD_LOGIC_VECTOR(N-1 downto 0); 45 | SIGNAL output_mux_0_7: STD_LOGIC_VECTOR(N-1 downto 0); 46 | SIGNAL output_mux_1_0: STD_LOGIC_VECTOR(N-1 downto 0); 47 | SIGNAL output_mux_1_1: STD_LOGIC_VECTOR(N-1 downto 0); 48 | 49 | BEGIN 50 | 51 | 52 | mux_0_0: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I0, I1=>I1, I2=>I2, I3=>I3, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_0); 53 | mux_0_1: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I4, I1=>I5, I2=>I6, I3=>I7, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_1); 54 | mux_0_2: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I8, I1=>I9, I2=>I10, I3=>I11, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_2); 55 | mux_0_3: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I12, I1=>I13, I2=>I14, I3=>I15, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_3); 56 | mux_0_4: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I16, I1=>I17, I2=>I18, I3=>I19, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_4); 57 | mux_0_5: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I20, I1=>I21, I2=>I22, I3=>I23, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_5); 58 | mux_0_6: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I24, I1=>I25, I2=>I26, I3=>I27, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_6); 59 | mux_0_7: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I28, I1=>I29, I2=>I30, I3=>I31, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_7); 60 | mux_1_0: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>output_mux_0_0, I1=>output_mux_0_1, I2=>output_mux_0_2, I3=>output_mux_0_3, SEL=>SEL_mux(3 downto 2), O=>output_mux_1_0); 61 | mux_1_1: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>output_mux_0_4, I1=>output_mux_0_5, I2=>output_mux_0_6, I3=>output_mux_0_7, SEL=>SEL_mux(3 downto 2), O=>output_mux_1_1); 62 | mux_2_out: mux_2to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>output_mux_1_0, I1=>output_mux_1_1, SEL=>SEL_mux(4), O=>O); 63 | 64 | END behaviour; 65 | -------------------------------------------------------------------------------- /MatMul/MAC_32x8_8_bias_32/MAC_bias_32x8_8.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY MAC_bias_32x8_8 IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 8 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 9 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 10 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(7 downto 0); 11 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7: IN STD_LOGIC_VECTOR(7 downto 0); 12 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 13 | SEL_mux: IN STD_LOGIC_VECTOR(2 downto 0); 14 | CLK, RST_n, ENABLE : IN STD_LOGIC; 15 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 16 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 17 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 18 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 19 | ); 20 | END MAC_bias_32x8_8; 21 | 22 | ARCHITECTURE behaviour OF MAC_bias_32x8_8 IS 23 | 24 | 25 | COMPONENT MAC_32x8_8 IS 26 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 27 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 28 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 29 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(7 downto 0); 30 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7: IN STD_LOGIC_VECTOR(7 downto 0); 31 | SEL_mux: IN STD_LOGIC_VECTOR(2 downto 0); 32 | CLK, RST_n, ENABLE : IN STD_LOGIC; 33 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 34 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 35 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 36 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 37 | ); 38 | END COMPONENT; 39 | 40 | 41 | COMPONENT bias_sum_32_32 IS 42 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 43 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 44 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 45 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(31 downto 0); 46 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 47 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 48 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 49 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 50 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 51 | ); 52 | END COMPONENT; 53 | 54 | 55 | SIGNAL output_row_MAC_base_0: STD_LOGIC_VECTOR(31 downto 0); 56 | SIGNAL output_row_MAC_base_1: STD_LOGIC_VECTOR(31 downto 0); 57 | SIGNAL output_row_MAC_base_2: STD_LOGIC_VECTOR(31 downto 0); 58 | SIGNAL output_row_MAC_base_3: STD_LOGIC_VECTOR(31 downto 0); 59 | SIGNAL output_row_MAC_base_4: STD_LOGIC_VECTOR(31 downto 0); 60 | SIGNAL output_row_MAC_base_5: STD_LOGIC_VECTOR(31 downto 0); 61 | SIGNAL output_row_MAC_base_6: STD_LOGIC_VECTOR(31 downto 0); 62 | SIGNAL output_row_MAC_base_7: STD_LOGIC_VECTOR(31 downto 0); 63 | SIGNAL output_row_MAC_base_8: STD_LOGIC_VECTOR(31 downto 0); 64 | SIGNAL output_row_MAC_base_9: STD_LOGIC_VECTOR(31 downto 0); 65 | SIGNAL output_row_MAC_base_10: STD_LOGIC_VECTOR(31 downto 0); 66 | SIGNAL output_row_MAC_base_11: STD_LOGIC_VECTOR(31 downto 0); 67 | SIGNAL output_row_MAC_base_12: STD_LOGIC_VECTOR(31 downto 0); 68 | SIGNAL output_row_MAC_base_13: STD_LOGIC_VECTOR(31 downto 0); 69 | SIGNAL output_row_MAC_base_14: STD_LOGIC_VECTOR(31 downto 0); 70 | SIGNAL output_row_MAC_base_15: STD_LOGIC_VECTOR(31 downto 0); 71 | SIGNAL output_row_MAC_base_16: STD_LOGIC_VECTOR(31 downto 0); 72 | SIGNAL output_row_MAC_base_17: STD_LOGIC_VECTOR(31 downto 0); 73 | SIGNAL output_row_MAC_base_18: STD_LOGIC_VECTOR(31 downto 0); 74 | SIGNAL output_row_MAC_base_19: STD_LOGIC_VECTOR(31 downto 0); 75 | SIGNAL output_row_MAC_base_20: STD_LOGIC_VECTOR(31 downto 0); 76 | SIGNAL output_row_MAC_base_21: STD_LOGIC_VECTOR(31 downto 0); 77 | SIGNAL output_row_MAC_base_22: STD_LOGIC_VECTOR(31 downto 0); 78 | SIGNAL output_row_MAC_base_23: STD_LOGIC_VECTOR(31 downto 0); 79 | SIGNAL output_row_MAC_base_24: STD_LOGIC_VECTOR(31 downto 0); 80 | SIGNAL output_row_MAC_base_25: STD_LOGIC_VECTOR(31 downto 0); 81 | SIGNAL output_row_MAC_base_26: STD_LOGIC_VECTOR(31 downto 0); 82 | SIGNAL output_row_MAC_base_27: STD_LOGIC_VECTOR(31 downto 0); 83 | SIGNAL output_row_MAC_base_28: STD_LOGIC_VECTOR(31 downto 0); 84 | SIGNAL output_row_MAC_base_29: STD_LOGIC_VECTOR(31 downto 0); 85 | SIGNAL output_row_MAC_base_30: STD_LOGIC_VECTOR(31 downto 0); 86 | SIGNAL output_row_MAC_base_31: STD_LOGIC_VECTOR(31 downto 0); 87 | 88 | BEGIN 89 | 90 | MAC_base: MAC_32x8_8 PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>ENABLE, 91 | input_row_0=>input_row_0, input_row_1=>input_row_1, input_row_2=>input_row_2, input_row_3=>input_row_3, input_row_4=>input_row_4, input_row_5=>input_row_5, input_row_6=>input_row_6, input_row_7=>input_row_7, input_row_8=>input_row_8, input_row_9=>input_row_9, input_row_10=>input_row_10, input_row_11=>input_row_11, input_row_12=>input_row_12, input_row_13=>input_row_13, input_row_14=>input_row_14, input_row_15=>input_row_15, input_row_16=>input_row_16, input_row_17=>input_row_17, input_row_18=>input_row_18, input_row_19=>input_row_19, input_row_20=>input_row_20, input_row_21=>input_row_21, input_row_22=>input_row_22, input_row_23=>input_row_23, input_row_24=>input_row_24, input_row_25=>input_row_25, input_row_26=>input_row_26, input_row_27=>input_row_27, input_row_28=>input_row_28, input_row_29=>input_row_29, input_row_30=>input_row_30, input_row_31=>input_row_31, 92 | input_col_0=>input_col_0, input_col_1=>input_col_1, input_col_2=>input_col_2, input_col_3=>input_col_3, input_col_4=>input_col_4, input_col_5=>input_col_5, input_col_6=>input_col_6, input_col_7=>input_col_7, 93 | output_row_0=>output_row_MAC_base_0, output_row_1=>output_row_MAC_base_1, output_row_2=>output_row_MAC_base_2, output_row_3=>output_row_MAC_base_3, output_row_4=>output_row_MAC_base_4, output_row_5=>output_row_MAC_base_5, output_row_6=>output_row_MAC_base_6, output_row_7=>output_row_MAC_base_7, output_row_8=>output_row_MAC_base_8, output_row_9=>output_row_MAC_base_9, output_row_10=>output_row_MAC_base_10, output_row_11=>output_row_MAC_base_11, output_row_12=>output_row_MAC_base_12, output_row_13=>output_row_MAC_base_13, output_row_14=>output_row_MAC_base_14, output_row_15=>output_row_MAC_base_15, output_row_16=>output_row_MAC_base_16, output_row_17=>output_row_MAC_base_17, output_row_18=>output_row_MAC_base_18, output_row_19=>output_row_MAC_base_19, output_row_20=>output_row_MAC_base_20, output_row_21=>output_row_MAC_base_21, output_row_22=>output_row_MAC_base_22, output_row_23=>output_row_MAC_base_23, output_row_24=>output_row_MAC_base_24, output_row_25=>output_row_MAC_base_25, output_row_26=>output_row_MAC_base_26, output_row_27=>output_row_MAC_base_27, output_row_28=>output_row_MAC_base_28, output_row_29=>output_row_MAC_base_29, output_row_30=>output_row_MAC_base_30, output_row_31=>output_row_MAC_base_31, 94 | SEL_mux=>SEL_mux); 95 | bias_sum: bias_sum_32_32 PORT MAP(input_row_0=>output_row_MAC_base_0, input_row_1=>output_row_MAC_base_1, input_row_2=>output_row_MAC_base_2, input_row_3=>output_row_MAC_base_3, input_row_4=>output_row_MAC_base_4, input_row_5=>output_row_MAC_base_5, input_row_6=>output_row_MAC_base_6, input_row_7=>output_row_MAC_base_7, input_row_8=>output_row_MAC_base_8, input_row_9=>output_row_MAC_base_9, input_row_10=>output_row_MAC_base_10, input_row_11=>output_row_MAC_base_11, input_row_12=>output_row_MAC_base_12, input_row_13=>output_row_MAC_base_13, input_row_14=>output_row_MAC_base_14, input_row_15=>output_row_MAC_base_15, input_row_16=>output_row_MAC_base_16, input_row_17=>output_row_MAC_base_17, input_row_18=>output_row_MAC_base_18, input_row_19=>output_row_MAC_base_19, input_row_20=>output_row_MAC_base_20, input_row_21=>output_row_MAC_base_21, input_row_22=>output_row_MAC_base_22, input_row_23=>output_row_MAC_base_23, input_row_24=>output_row_MAC_base_24, input_row_25=>output_row_MAC_base_25, input_row_26=>output_row_MAC_base_26, input_row_27=>output_row_MAC_base_27, input_row_28=>output_row_MAC_base_28, input_row_29=>output_row_MAC_base_29, input_row_30=>output_row_MAC_base_30, input_row_31=>output_row_MAC_base_31, 96 | b_col=>b_col, output_row_0=>output_row_0, output_row_1=>output_row_1, output_row_2=>output_row_2, output_row_3=>output_row_3, output_row_4=>output_row_4, output_row_5=>output_row_5, output_row_6=>output_row_6, output_row_7=>output_row_7, output_row_8=>output_row_8, output_row_9=>output_row_9, output_row_10=>output_row_10, output_row_11=>output_row_11, output_row_12=>output_row_12, output_row_13=>output_row_13, output_row_14=>output_row_14, output_row_15=>output_row_15, output_row_16=>output_row_16, output_row_17=>output_row_17, output_row_18=>output_row_18, output_row_19=>output_row_19, output_row_20=>output_row_20, output_row_21=>output_row_21, output_row_22=>output_row_22, output_row_23=>output_row_23, output_row_24=>output_row_24, output_row_25=>output_row_25, output_row_26=>output_row_26, output_row_27=>output_row_27, output_row_28=>output_row_28, output_row_29=>output_row_29, output_row_30=>output_row_30, output_row_31=>output_row_31); 97 | 98 | 99 | END behaviour; 100 | -------------------------------------------------------------------------------- /MatMul/MAC_32x8_8_bias_32/MAC_bias_32x8_8_reg.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY MAC_bias_32x8_8_reg IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 8 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 9 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 10 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(7 downto 0); 11 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7: IN STD_LOGIC_VECTOR(7 downto 0); 12 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 13 | SEL_mux: IN STD_LOGIC_VECTOR(2 downto 0); 14 | CLK, RST_n, ENABLE : IN STD_LOGIC; 15 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 16 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 17 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 18 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 19 | ); 20 | END MAC_bias_32x8_8_reg; 21 | 22 | ARCHITECTURE behaviour OF MAC_bias_32x8_8_reg IS 23 | 24 | 25 | COMPONENT MAC_32x8_8_reg IS 26 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 27 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 28 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 29 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(7 downto 0); 30 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7: IN STD_LOGIC_VECTOR(7 downto 0); 31 | SEL_mux: IN STD_LOGIC_VECTOR(2 downto 0); 32 | CLK, RST_n, ENABLE : IN STD_LOGIC; 33 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 34 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 35 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 36 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 37 | ); 38 | END COMPONENT; 39 | 40 | 41 | COMPONENT bias_sum_32_32 IS 42 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 43 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 44 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 45 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(31 downto 0); 46 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 47 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 48 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 49 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 50 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 51 | ); 52 | END COMPONENT; 53 | 54 | COMPONENT regnbit IS 55 | GENERIC ( N : POSITIVE := 2); 56 | PORT( 57 | D : IN STD_LOGIC_VECTOR(N-1 downto 0); 58 | CLK, RST_n, ENABLE : IN STD_LOGIC; 59 | Q : OUT STD_LOGIC_VECTOR(N-1 downto 0) 60 | ); 61 | END COMPONENT; 62 | 63 | 64 | SIGNAL output_row_MAC_base_0: STD_LOGIC_VECTOR(31 downto 0); 65 | SIGNAL output_row_MAC_base_1: STD_LOGIC_VECTOR(31 downto 0); 66 | SIGNAL output_row_MAC_base_2: STD_LOGIC_VECTOR(31 downto 0); 67 | SIGNAL output_row_MAC_base_3: STD_LOGIC_VECTOR(31 downto 0); 68 | SIGNAL output_row_MAC_base_4: STD_LOGIC_VECTOR(31 downto 0); 69 | SIGNAL output_row_MAC_base_5: STD_LOGIC_VECTOR(31 downto 0); 70 | SIGNAL output_row_MAC_base_6: STD_LOGIC_VECTOR(31 downto 0); 71 | SIGNAL output_row_MAC_base_7: STD_LOGIC_VECTOR(31 downto 0); 72 | SIGNAL output_row_MAC_base_8: STD_LOGIC_VECTOR(31 downto 0); 73 | SIGNAL output_row_MAC_base_9: STD_LOGIC_VECTOR(31 downto 0); 74 | SIGNAL output_row_MAC_base_10: STD_LOGIC_VECTOR(31 downto 0); 75 | SIGNAL output_row_MAC_base_11: STD_LOGIC_VECTOR(31 downto 0); 76 | SIGNAL output_row_MAC_base_12: STD_LOGIC_VECTOR(31 downto 0); 77 | SIGNAL output_row_MAC_base_13: STD_LOGIC_VECTOR(31 downto 0); 78 | SIGNAL output_row_MAC_base_14: STD_LOGIC_VECTOR(31 downto 0); 79 | SIGNAL output_row_MAC_base_15: STD_LOGIC_VECTOR(31 downto 0); 80 | SIGNAL output_row_MAC_base_16: STD_LOGIC_VECTOR(31 downto 0); 81 | SIGNAL output_row_MAC_base_17: STD_LOGIC_VECTOR(31 downto 0); 82 | SIGNAL output_row_MAC_base_18: STD_LOGIC_VECTOR(31 downto 0); 83 | SIGNAL output_row_MAC_base_19: STD_LOGIC_VECTOR(31 downto 0); 84 | SIGNAL output_row_MAC_base_20: STD_LOGIC_VECTOR(31 downto 0); 85 | SIGNAL output_row_MAC_base_21: STD_LOGIC_VECTOR(31 downto 0); 86 | SIGNAL output_row_MAC_base_22: STD_LOGIC_VECTOR(31 downto 0); 87 | SIGNAL output_row_MAC_base_23: STD_LOGIC_VECTOR(31 downto 0); 88 | SIGNAL output_row_MAC_base_24: STD_LOGIC_VECTOR(31 downto 0); 89 | SIGNAL output_row_MAC_base_25: STD_LOGIC_VECTOR(31 downto 0); 90 | SIGNAL output_row_MAC_base_26: STD_LOGIC_VECTOR(31 downto 0); 91 | SIGNAL output_row_MAC_base_27: STD_LOGIC_VECTOR(31 downto 0); 92 | SIGNAL output_row_MAC_base_28: STD_LOGIC_VECTOR(31 downto 0); 93 | SIGNAL output_row_MAC_base_29: STD_LOGIC_VECTOR(31 downto 0); 94 | SIGNAL output_row_MAC_base_30: STD_LOGIC_VECTOR(31 downto 0); 95 | SIGNAL output_row_MAC_base_31: STD_LOGIC_VECTOR(31 downto 0); 96 | SIGNAL b_col_reg: STD_LOGIC_VECTOR(31 downto 0); 97 | 98 | 99 | BEGIN 100 | 101 | MAC_base: MAC_32x8_8_reg PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>ENABLE, 102 | input_row_0=>input_row_0, input_row_1=>input_row_1, input_row_2=>input_row_2, input_row_3=>input_row_3, input_row_4=>input_row_4, input_row_5=>input_row_5, input_row_6=>input_row_6, input_row_7=>input_row_7, input_row_8=>input_row_8, input_row_9=>input_row_9, input_row_10=>input_row_10, input_row_11=>input_row_11, input_row_12=>input_row_12, input_row_13=>input_row_13, input_row_14=>input_row_14, input_row_15=>input_row_15, input_row_16=>input_row_16, input_row_17=>input_row_17, input_row_18=>input_row_18, input_row_19=>input_row_19, input_row_20=>input_row_20, input_row_21=>input_row_21, input_row_22=>input_row_22, input_row_23=>input_row_23, input_row_24=>input_row_24, input_row_25=>input_row_25, input_row_26=>input_row_26, input_row_27=>input_row_27, input_row_28=>input_row_28, input_row_29=>input_row_29, input_row_30=>input_row_30, input_row_31=>input_row_31, 103 | input_col_0=>input_col_0, input_col_1=>input_col_1, input_col_2=>input_col_2, input_col_3=>input_col_3, input_col_4=>input_col_4, input_col_5=>input_col_5, input_col_6=>input_col_6, input_col_7=>input_col_7, 104 | output_row_0=>output_row_MAC_base_0, output_row_1=>output_row_MAC_base_1, output_row_2=>output_row_MAC_base_2, output_row_3=>output_row_MAC_base_3, output_row_4=>output_row_MAC_base_4, output_row_5=>output_row_MAC_base_5, output_row_6=>output_row_MAC_base_6, output_row_7=>output_row_MAC_base_7, output_row_8=>output_row_MAC_base_8, output_row_9=>output_row_MAC_base_9, output_row_10=>output_row_MAC_base_10, output_row_11=>output_row_MAC_base_11, output_row_12=>output_row_MAC_base_12, output_row_13=>output_row_MAC_base_13, output_row_14=>output_row_MAC_base_14, output_row_15=>output_row_MAC_base_15, output_row_16=>output_row_MAC_base_16, output_row_17=>output_row_MAC_base_17, output_row_18=>output_row_MAC_base_18, output_row_19=>output_row_MAC_base_19, output_row_20=>output_row_MAC_base_20, output_row_21=>output_row_MAC_base_21, output_row_22=>output_row_MAC_base_22, output_row_23=>output_row_MAC_base_23, output_row_24=>output_row_MAC_base_24, output_row_25=>output_row_MAC_base_25, output_row_26=>output_row_MAC_base_26, output_row_27=>output_row_MAC_base_27, output_row_28=>output_row_MAC_base_28, output_row_29=>output_row_MAC_base_29, output_row_30=>output_row_MAC_base_30, output_row_31=>output_row_MAC_base_31, 105 | SEL_mux=>SEL_mux); 106 | 107 | bias_reg: regnbit GENERIC MAP(N=>32) PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>'1', D=>b_col, Q=>b_col_reg); 108 | bias_sum: bias_sum_32_32 PORT MAP(input_row_0=>output_row_MAC_base_0, input_row_1=>output_row_MAC_base_1, input_row_2=>output_row_MAC_base_2, input_row_3=>output_row_MAC_base_3, input_row_4=>output_row_MAC_base_4, input_row_5=>output_row_MAC_base_5, input_row_6=>output_row_MAC_base_6, input_row_7=>output_row_MAC_base_7, input_row_8=>output_row_MAC_base_8, input_row_9=>output_row_MAC_base_9, input_row_10=>output_row_MAC_base_10, input_row_11=>output_row_MAC_base_11, input_row_12=>output_row_MAC_base_12, input_row_13=>output_row_MAC_base_13, input_row_14=>output_row_MAC_base_14, input_row_15=>output_row_MAC_base_15, input_row_16=>output_row_MAC_base_16, input_row_17=>output_row_MAC_base_17, input_row_18=>output_row_MAC_base_18, input_row_19=>output_row_MAC_base_19, input_row_20=>output_row_MAC_base_20, input_row_21=>output_row_MAC_base_21, input_row_22=>output_row_MAC_base_22, input_row_23=>output_row_MAC_base_23, input_row_24=>output_row_MAC_base_24, input_row_25=>output_row_MAC_base_25, input_row_26=>output_row_MAC_base_26, input_row_27=>output_row_MAC_base_27, input_row_28=>output_row_MAC_base_28, input_row_29=>output_row_MAC_base_29, input_row_30=>output_row_MAC_base_30, input_row_31=>output_row_MAC_base_31, 109 | b_col=>b_col_reg, output_row_0=>output_row_0, output_row_1=>output_row_1, output_row_2=>output_row_2, output_row_3=>output_row_3, output_row_4=>output_row_4, output_row_5=>output_row_5, output_row_6=>output_row_6, output_row_7=>output_row_7, output_row_8=>output_row_8, output_row_9=>output_row_9, output_row_10=>output_row_10, output_row_11=>output_row_11, output_row_12=>output_row_12, output_row_13=>output_row_13, output_row_14=>output_row_14, output_row_15=>output_row_15, output_row_16=>output_row_16, output_row_17=>output_row_17, output_row_18=>output_row_18, output_row_19=>output_row_19, output_row_20=>output_row_20, output_row_21=>output_row_21, output_row_22=>output_row_22, output_row_23=>output_row_23, output_row_24=>output_row_24, output_row_25=>output_row_25, output_row_26=>output_row_26, output_row_27=>output_row_27, output_row_28=>output_row_28, output_row_29=>output_row_29, output_row_30=>output_row_30, output_row_31=>output_row_31); 110 | 111 | 112 | END behaviour; 113 | -------------------------------------------------------------------------------- /MatMul/MAC_32x8_8_bias_32/bias_sum_32_32.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY bias_sum_32_32 IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 8 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 9 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 10 | input_row_30, input_row_31: IN STD_LOGIC_VECTOR(31 downto 0); 11 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 12 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 13 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 14 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 15 | output_row_30, output_row_31: OUT STD_LOGIC_VECTOR(31 downto 0) 16 | ); 17 | END bias_sum_32_32; 18 | 19 | ARCHITECTURE behaviour OF bias_sum_32_32 IS 20 | 21 | 22 | 23 | BEGIN 24 | 25 | output_row_0 <= STD_LOGIC_VECTOR(SIGNED(input_row_0) + SIGNED(b_col)); 26 | output_row_1 <= STD_LOGIC_VECTOR(SIGNED(input_row_1) + SIGNED(b_col)); 27 | output_row_2 <= STD_LOGIC_VECTOR(SIGNED(input_row_2) + SIGNED(b_col)); 28 | output_row_3 <= STD_LOGIC_VECTOR(SIGNED(input_row_3) + SIGNED(b_col)); 29 | output_row_4 <= STD_LOGIC_VECTOR(SIGNED(input_row_4) + SIGNED(b_col)); 30 | output_row_5 <= STD_LOGIC_VECTOR(SIGNED(input_row_5) + SIGNED(b_col)); 31 | output_row_6 <= STD_LOGIC_VECTOR(SIGNED(input_row_6) + SIGNED(b_col)); 32 | output_row_7 <= STD_LOGIC_VECTOR(SIGNED(input_row_7) + SIGNED(b_col)); 33 | output_row_8 <= STD_LOGIC_VECTOR(SIGNED(input_row_8) + SIGNED(b_col)); 34 | output_row_9 <= STD_LOGIC_VECTOR(SIGNED(input_row_9) + SIGNED(b_col)); 35 | output_row_10 <= STD_LOGIC_VECTOR(SIGNED(input_row_10) + SIGNED(b_col)); 36 | output_row_11 <= STD_LOGIC_VECTOR(SIGNED(input_row_11) + SIGNED(b_col)); 37 | output_row_12 <= STD_LOGIC_VECTOR(SIGNED(input_row_12) + SIGNED(b_col)); 38 | output_row_13 <= STD_LOGIC_VECTOR(SIGNED(input_row_13) + SIGNED(b_col)); 39 | output_row_14 <= STD_LOGIC_VECTOR(SIGNED(input_row_14) + SIGNED(b_col)); 40 | output_row_15 <= STD_LOGIC_VECTOR(SIGNED(input_row_15) + SIGNED(b_col)); 41 | output_row_16 <= STD_LOGIC_VECTOR(SIGNED(input_row_16) + SIGNED(b_col)); 42 | output_row_17 <= STD_LOGIC_VECTOR(SIGNED(input_row_17) + SIGNED(b_col)); 43 | output_row_18 <= STD_LOGIC_VECTOR(SIGNED(input_row_18) + SIGNED(b_col)); 44 | output_row_19 <= STD_LOGIC_VECTOR(SIGNED(input_row_19) + SIGNED(b_col)); 45 | output_row_20 <= STD_LOGIC_VECTOR(SIGNED(input_row_20) + SIGNED(b_col)); 46 | output_row_21 <= STD_LOGIC_VECTOR(SIGNED(input_row_21) + SIGNED(b_col)); 47 | output_row_22 <= STD_LOGIC_VECTOR(SIGNED(input_row_22) + SIGNED(b_col)); 48 | output_row_23 <= STD_LOGIC_VECTOR(SIGNED(input_row_23) + SIGNED(b_col)); 49 | output_row_24 <= STD_LOGIC_VECTOR(SIGNED(input_row_24) + SIGNED(b_col)); 50 | output_row_25 <= STD_LOGIC_VECTOR(SIGNED(input_row_25) + SIGNED(b_col)); 51 | output_row_26 <= STD_LOGIC_VECTOR(SIGNED(input_row_26) + SIGNED(b_col)); 52 | output_row_27 <= STD_LOGIC_VECTOR(SIGNED(input_row_27) + SIGNED(b_col)); 53 | output_row_28 <= STD_LOGIC_VECTOR(SIGNED(input_row_28) + SIGNED(b_col)); 54 | output_row_29 <= STD_LOGIC_VECTOR(SIGNED(input_row_29) + SIGNED(b_col)); 55 | output_row_30 <= STD_LOGIC_VECTOR(SIGNED(input_row_30) + SIGNED(b_col)); 56 | output_row_31 <= STD_LOGIC_VECTOR(SIGNED(input_row_31) + SIGNED(b_col)); 57 | 58 | 59 | END behaviour; 60 | -------------------------------------------------------------------------------- /MatMul/MAC_32x8_8_bias_32/mux_8to1_nbit.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY mux_8to1_nbit IS 7 | GENERIC( 8 | N : POSITIVE := 2 9 | ); 10 | PORT (I0, I1, I2, I3, I4, I5, I6, I7: IN STD_LOGIC_VECTOR(N-1 downto 0); 11 | SEL_mux: IN STD_LOGIC_VECTOR(2 downto 0); 12 | O: OUT STD_LOGIC_VECTOR(N-1 downto 0) 13 | ); 14 | END mux_8to1_nbit; 15 | 16 | ARCHITECTURE behaviour OF mux_8to1_nbit IS 17 | 18 | COMPONENT mux_4to1_nbit_base IS 19 | GENERIC ( N : POSITIVE :=1); 20 | PORT( I0, I1, I2, I3: IN STD_LOGIC_VECTOR(N-1 downto 0); 21 | SEL : IN STD_LOGIC_VECTOR(1 downto 0); 22 | O : OUT STD_LOGIC_VECTOR(N-1 downto 0) 23 | ); 24 | END COMPONENT; 25 | 26 | COMPONENT mux_2to1_nbit_base IS 27 | GENERIC ( N : POSITIVE :=1); 28 | PORT( I0, I1: IN STD_LOGIC_VECTOR(N-1 downto 0); 29 | SEL : IN STD_LOGIC; 30 | O : OUT STD_LOGIC_VECTOR(N-1 downto 0) 31 | ); 32 | END COMPONENT; 33 | 34 | 35 | SIGNAL output_mux_0_0: STD_LOGIC_VECTOR(N-1 downto 0); 36 | SIGNAL output_mux_0_1: STD_LOGIC_VECTOR(N-1 downto 0); 37 | 38 | BEGIN 39 | 40 | 41 | mux_0_0: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I0, I1=>I1, I2=>I2, I3=>I3, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_0); 42 | mux_0_1: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I4, I1=>I5, I2=>I6, I3=>I7, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_1); 43 | mux_1_out: mux_2to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>output_mux_0_0, I1=>output_mux_0_1, SEL=>SEL_mux(2), O=>O); 44 | 45 | END behaviour; 46 | -------------------------------------------------------------------------------- /MatMul/MAC_64x16_8_bias_32/MAC_bias_64x16_8.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY MAC_bias_64x16_8 IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 8 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 9 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 10 | input_row_30, input_row_31, input_row_32, input_row_33, input_row_34, input_row_35, input_row_36, input_row_37, input_row_38, input_row_39, 11 | input_row_40, input_row_41, input_row_42, input_row_43, input_row_44, input_row_45, input_row_46, input_row_47, input_row_48, input_row_49, 12 | input_row_50, input_row_51, input_row_52, input_row_53, input_row_54, input_row_55, input_row_56, input_row_57, input_row_58, input_row_59, 13 | input_row_60, input_row_61, input_row_62, input_row_63: IN STD_LOGIC_VECTOR(7 downto 0); 14 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7, input_col_8, input_col_9, 15 | input_col_10, input_col_11, input_col_12, input_col_13, input_col_14, input_col_15: IN STD_LOGIC_VECTOR(7 downto 0); 16 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 17 | SEL_mux: IN STD_LOGIC_VECTOR(3 downto 0); 18 | CLK, RST_n, ENABLE : IN STD_LOGIC; 19 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 20 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 21 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 22 | output_row_30, output_row_31, output_row_32, output_row_33, output_row_34, output_row_35, output_row_36, output_row_37, output_row_38, output_row_39, 23 | output_row_40, output_row_41, output_row_42, output_row_43, output_row_44, output_row_45, output_row_46, output_row_47, output_row_48, output_row_49, 24 | output_row_50, output_row_51, output_row_52, output_row_53, output_row_54, output_row_55, output_row_56, output_row_57, output_row_58, output_row_59, 25 | output_row_60, output_row_61, output_row_62, output_row_63: OUT STD_LOGIC_VECTOR(31 downto 0) 26 | ); 27 | END MAC_bias_64x16_8; 28 | 29 | ARCHITECTURE behaviour OF MAC_bias_64x16_8 IS 30 | 31 | 32 | COMPONENT MAC_64x16_8 IS 33 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 34 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 35 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 36 | input_row_30, input_row_31, input_row_32, input_row_33, input_row_34, input_row_35, input_row_36, input_row_37, input_row_38, input_row_39, 37 | input_row_40, input_row_41, input_row_42, input_row_43, input_row_44, input_row_45, input_row_46, input_row_47, input_row_48, input_row_49, 38 | input_row_50, input_row_51, input_row_52, input_row_53, input_row_54, input_row_55, input_row_56, input_row_57, input_row_58, input_row_59, 39 | input_row_60, input_row_61, input_row_62, input_row_63: IN STD_LOGIC_VECTOR(7 downto 0); 40 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7, input_col_8, input_col_9, 41 | input_col_10, input_col_11, input_col_12, input_col_13, input_col_14, input_col_15: IN STD_LOGIC_VECTOR(7 downto 0); 42 | SEL_mux: IN STD_LOGIC_VECTOR(3 downto 0); 43 | CLK, RST_n, ENABLE : IN STD_LOGIC; 44 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 45 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 46 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 47 | output_row_30, output_row_31, output_row_32, output_row_33, output_row_34, output_row_35, output_row_36, output_row_37, output_row_38, output_row_39, 48 | output_row_40, output_row_41, output_row_42, output_row_43, output_row_44, output_row_45, output_row_46, output_row_47, output_row_48, output_row_49, 49 | output_row_50, output_row_51, output_row_52, output_row_53, output_row_54, output_row_55, output_row_56, output_row_57, output_row_58, output_row_59, 50 | output_row_60, output_row_61, output_row_62, output_row_63: OUT STD_LOGIC_VECTOR(31 downto 0) 51 | ); 52 | END COMPONENT; 53 | 54 | 55 | COMPONENT bias_sum_64_32 IS 56 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 57 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 58 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 59 | input_row_30, input_row_31, input_row_32, input_row_33, input_row_34, input_row_35, input_row_36, input_row_37, input_row_38, input_row_39, 60 | input_row_40, input_row_41, input_row_42, input_row_43, input_row_44, input_row_45, input_row_46, input_row_47, input_row_48, input_row_49, 61 | input_row_50, input_row_51, input_row_52, input_row_53, input_row_54, input_row_55, input_row_56, input_row_57, input_row_58, input_row_59, 62 | input_row_60, input_row_61, input_row_62, input_row_63: IN STD_LOGIC_VECTOR(31 downto 0); 63 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 64 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 65 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 66 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 67 | output_row_30, output_row_31, output_row_32, output_row_33, output_row_34, output_row_35, output_row_36, output_row_37, output_row_38, output_row_39, 68 | output_row_40, output_row_41, output_row_42, output_row_43, output_row_44, output_row_45, output_row_46, output_row_47, output_row_48, output_row_49, 69 | output_row_50, output_row_51, output_row_52, output_row_53, output_row_54, output_row_55, output_row_56, output_row_57, output_row_58, output_row_59, 70 | output_row_60, output_row_61, output_row_62, output_row_63: OUT STD_LOGIC_VECTOR(31 downto 0) 71 | ); 72 | END COMPONENT; 73 | 74 | 75 | SIGNAL output_row_MAC_base_0: STD_LOGIC_VECTOR(31 downto 0); 76 | SIGNAL output_row_MAC_base_1: STD_LOGIC_VECTOR(31 downto 0); 77 | SIGNAL output_row_MAC_base_2: STD_LOGIC_VECTOR(31 downto 0); 78 | SIGNAL output_row_MAC_base_3: STD_LOGIC_VECTOR(31 downto 0); 79 | SIGNAL output_row_MAC_base_4: STD_LOGIC_VECTOR(31 downto 0); 80 | SIGNAL output_row_MAC_base_5: STD_LOGIC_VECTOR(31 downto 0); 81 | SIGNAL output_row_MAC_base_6: STD_LOGIC_VECTOR(31 downto 0); 82 | SIGNAL output_row_MAC_base_7: STD_LOGIC_VECTOR(31 downto 0); 83 | SIGNAL output_row_MAC_base_8: STD_LOGIC_VECTOR(31 downto 0); 84 | SIGNAL output_row_MAC_base_9: STD_LOGIC_VECTOR(31 downto 0); 85 | SIGNAL output_row_MAC_base_10: STD_LOGIC_VECTOR(31 downto 0); 86 | SIGNAL output_row_MAC_base_11: STD_LOGIC_VECTOR(31 downto 0); 87 | SIGNAL output_row_MAC_base_12: STD_LOGIC_VECTOR(31 downto 0); 88 | SIGNAL output_row_MAC_base_13: STD_LOGIC_VECTOR(31 downto 0); 89 | SIGNAL output_row_MAC_base_14: STD_LOGIC_VECTOR(31 downto 0); 90 | SIGNAL output_row_MAC_base_15: STD_LOGIC_VECTOR(31 downto 0); 91 | SIGNAL output_row_MAC_base_16: STD_LOGIC_VECTOR(31 downto 0); 92 | SIGNAL output_row_MAC_base_17: STD_LOGIC_VECTOR(31 downto 0); 93 | SIGNAL output_row_MAC_base_18: STD_LOGIC_VECTOR(31 downto 0); 94 | SIGNAL output_row_MAC_base_19: STD_LOGIC_VECTOR(31 downto 0); 95 | SIGNAL output_row_MAC_base_20: STD_LOGIC_VECTOR(31 downto 0); 96 | SIGNAL output_row_MAC_base_21: STD_LOGIC_VECTOR(31 downto 0); 97 | SIGNAL output_row_MAC_base_22: STD_LOGIC_VECTOR(31 downto 0); 98 | SIGNAL output_row_MAC_base_23: STD_LOGIC_VECTOR(31 downto 0); 99 | SIGNAL output_row_MAC_base_24: STD_LOGIC_VECTOR(31 downto 0); 100 | SIGNAL output_row_MAC_base_25: STD_LOGIC_VECTOR(31 downto 0); 101 | SIGNAL output_row_MAC_base_26: STD_LOGIC_VECTOR(31 downto 0); 102 | SIGNAL output_row_MAC_base_27: STD_LOGIC_VECTOR(31 downto 0); 103 | SIGNAL output_row_MAC_base_28: STD_LOGIC_VECTOR(31 downto 0); 104 | SIGNAL output_row_MAC_base_29: STD_LOGIC_VECTOR(31 downto 0); 105 | SIGNAL output_row_MAC_base_30: STD_LOGIC_VECTOR(31 downto 0); 106 | SIGNAL output_row_MAC_base_31: STD_LOGIC_VECTOR(31 downto 0); 107 | SIGNAL output_row_MAC_base_32: STD_LOGIC_VECTOR(31 downto 0); 108 | SIGNAL output_row_MAC_base_33: STD_LOGIC_VECTOR(31 downto 0); 109 | SIGNAL output_row_MAC_base_34: STD_LOGIC_VECTOR(31 downto 0); 110 | SIGNAL output_row_MAC_base_35: STD_LOGIC_VECTOR(31 downto 0); 111 | SIGNAL output_row_MAC_base_36: STD_LOGIC_VECTOR(31 downto 0); 112 | SIGNAL output_row_MAC_base_37: STD_LOGIC_VECTOR(31 downto 0); 113 | SIGNAL output_row_MAC_base_38: STD_LOGIC_VECTOR(31 downto 0); 114 | SIGNAL output_row_MAC_base_39: STD_LOGIC_VECTOR(31 downto 0); 115 | SIGNAL output_row_MAC_base_40: STD_LOGIC_VECTOR(31 downto 0); 116 | SIGNAL output_row_MAC_base_41: STD_LOGIC_VECTOR(31 downto 0); 117 | SIGNAL output_row_MAC_base_42: STD_LOGIC_VECTOR(31 downto 0); 118 | SIGNAL output_row_MAC_base_43: STD_LOGIC_VECTOR(31 downto 0); 119 | SIGNAL output_row_MAC_base_44: STD_LOGIC_VECTOR(31 downto 0); 120 | SIGNAL output_row_MAC_base_45: STD_LOGIC_VECTOR(31 downto 0); 121 | SIGNAL output_row_MAC_base_46: STD_LOGIC_VECTOR(31 downto 0); 122 | SIGNAL output_row_MAC_base_47: STD_LOGIC_VECTOR(31 downto 0); 123 | SIGNAL output_row_MAC_base_48: STD_LOGIC_VECTOR(31 downto 0); 124 | SIGNAL output_row_MAC_base_49: STD_LOGIC_VECTOR(31 downto 0); 125 | SIGNAL output_row_MAC_base_50: STD_LOGIC_VECTOR(31 downto 0); 126 | SIGNAL output_row_MAC_base_51: STD_LOGIC_VECTOR(31 downto 0); 127 | SIGNAL output_row_MAC_base_52: STD_LOGIC_VECTOR(31 downto 0); 128 | SIGNAL output_row_MAC_base_53: STD_LOGIC_VECTOR(31 downto 0); 129 | SIGNAL output_row_MAC_base_54: STD_LOGIC_VECTOR(31 downto 0); 130 | SIGNAL output_row_MAC_base_55: STD_LOGIC_VECTOR(31 downto 0); 131 | SIGNAL output_row_MAC_base_56: STD_LOGIC_VECTOR(31 downto 0); 132 | SIGNAL output_row_MAC_base_57: STD_LOGIC_VECTOR(31 downto 0); 133 | SIGNAL output_row_MAC_base_58: STD_LOGIC_VECTOR(31 downto 0); 134 | SIGNAL output_row_MAC_base_59: STD_LOGIC_VECTOR(31 downto 0); 135 | SIGNAL output_row_MAC_base_60: STD_LOGIC_VECTOR(31 downto 0); 136 | SIGNAL output_row_MAC_base_61: STD_LOGIC_VECTOR(31 downto 0); 137 | SIGNAL output_row_MAC_base_62: STD_LOGIC_VECTOR(31 downto 0); 138 | SIGNAL output_row_MAC_base_63: STD_LOGIC_VECTOR(31 downto 0); 139 | 140 | BEGIN 141 | 142 | MAC_base: MAC_64x16_8 PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>ENABLE, 143 | input_row_0=>input_row_0, input_row_1=>input_row_1, input_row_2=>input_row_2, input_row_3=>input_row_3, input_row_4=>input_row_4, input_row_5=>input_row_5, input_row_6=>input_row_6, input_row_7=>input_row_7, input_row_8=>input_row_8, input_row_9=>input_row_9, input_row_10=>input_row_10, input_row_11=>input_row_11, input_row_12=>input_row_12, input_row_13=>input_row_13, input_row_14=>input_row_14, input_row_15=>input_row_15, input_row_16=>input_row_16, input_row_17=>input_row_17, input_row_18=>input_row_18, input_row_19=>input_row_19, input_row_20=>input_row_20, input_row_21=>input_row_21, input_row_22=>input_row_22, input_row_23=>input_row_23, input_row_24=>input_row_24, input_row_25=>input_row_25, input_row_26=>input_row_26, input_row_27=>input_row_27, input_row_28=>input_row_28, input_row_29=>input_row_29, input_row_30=>input_row_30, input_row_31=>input_row_31, input_row_32=>input_row_32, input_row_33=>input_row_33, input_row_34=>input_row_34, input_row_35=>input_row_35, input_row_36=>input_row_36, input_row_37=>input_row_37, input_row_38=>input_row_38, input_row_39=>input_row_39, input_row_40=>input_row_40, input_row_41=>input_row_41, input_row_42=>input_row_42, input_row_43=>input_row_43, input_row_44=>input_row_44, input_row_45=>input_row_45, input_row_46=>input_row_46, input_row_47=>input_row_47, input_row_48=>input_row_48, input_row_49=>input_row_49, input_row_50=>input_row_50, input_row_51=>input_row_51, input_row_52=>input_row_52, input_row_53=>input_row_53, input_row_54=>input_row_54, input_row_55=>input_row_55, input_row_56=>input_row_56, input_row_57=>input_row_57, input_row_58=>input_row_58, input_row_59=>input_row_59, input_row_60=>input_row_60, input_row_61=>input_row_61, input_row_62=>input_row_62, input_row_63=>input_row_63, 144 | input_col_0=>input_col_0, input_col_1=>input_col_1, input_col_2=>input_col_2, input_col_3=>input_col_3, input_col_4=>input_col_4, input_col_5=>input_col_5, input_col_6=>input_col_6, input_col_7=>input_col_7, input_col_8=>input_col_8, input_col_9=>input_col_9, input_col_10=>input_col_10, input_col_11=>input_col_11, input_col_12=>input_col_12, input_col_13=>input_col_13, input_col_14=>input_col_14, input_col_15=>input_col_15, 145 | output_row_0=>output_row_MAC_base_0, output_row_1=>output_row_MAC_base_1, output_row_2=>output_row_MAC_base_2, output_row_3=>output_row_MAC_base_3, output_row_4=>output_row_MAC_base_4, output_row_5=>output_row_MAC_base_5, output_row_6=>output_row_MAC_base_6, output_row_7=>output_row_MAC_base_7, output_row_8=>output_row_MAC_base_8, output_row_9=>output_row_MAC_base_9, output_row_10=>output_row_MAC_base_10, output_row_11=>output_row_MAC_base_11, output_row_12=>output_row_MAC_base_12, output_row_13=>output_row_MAC_base_13, output_row_14=>output_row_MAC_base_14, output_row_15=>output_row_MAC_base_15, output_row_16=>output_row_MAC_base_16, output_row_17=>output_row_MAC_base_17, output_row_18=>output_row_MAC_base_18, output_row_19=>output_row_MAC_base_19, output_row_20=>output_row_MAC_base_20, output_row_21=>output_row_MAC_base_21, output_row_22=>output_row_MAC_base_22, output_row_23=>output_row_MAC_base_23, output_row_24=>output_row_MAC_base_24, output_row_25=>output_row_MAC_base_25, output_row_26=>output_row_MAC_base_26, output_row_27=>output_row_MAC_base_27, output_row_28=>output_row_MAC_base_28, output_row_29=>output_row_MAC_base_29, output_row_30=>output_row_MAC_base_30, output_row_31=>output_row_MAC_base_31, output_row_32=>output_row_MAC_base_32, output_row_33=>output_row_MAC_base_33, output_row_34=>output_row_MAC_base_34, output_row_35=>output_row_MAC_base_35, output_row_36=>output_row_MAC_base_36, output_row_37=>output_row_MAC_base_37, output_row_38=>output_row_MAC_base_38, output_row_39=>output_row_MAC_base_39, output_row_40=>output_row_MAC_base_40, output_row_41=>output_row_MAC_base_41, output_row_42=>output_row_MAC_base_42, output_row_43=>output_row_MAC_base_43, output_row_44=>output_row_MAC_base_44, output_row_45=>output_row_MAC_base_45, output_row_46=>output_row_MAC_base_46, output_row_47=>output_row_MAC_base_47, output_row_48=>output_row_MAC_base_48, output_row_49=>output_row_MAC_base_49, output_row_50=>output_row_MAC_base_50, output_row_51=>output_row_MAC_base_51, output_row_52=>output_row_MAC_base_52, output_row_53=>output_row_MAC_base_53, output_row_54=>output_row_MAC_base_54, output_row_55=>output_row_MAC_base_55, output_row_56=>output_row_MAC_base_56, output_row_57=>output_row_MAC_base_57, output_row_58=>output_row_MAC_base_58, output_row_59=>output_row_MAC_base_59, output_row_60=>output_row_MAC_base_60, output_row_61=>output_row_MAC_base_61, output_row_62=>output_row_MAC_base_62, output_row_63=>output_row_MAC_base_63, 146 | SEL_mux=>SEL_mux); 147 | bias_sum: bias_sum_64_32 PORT MAP(input_row_0=>output_row_MAC_base_0, input_row_1=>output_row_MAC_base_1, input_row_2=>output_row_MAC_base_2, input_row_3=>output_row_MAC_base_3, input_row_4=>output_row_MAC_base_4, input_row_5=>output_row_MAC_base_5, input_row_6=>output_row_MAC_base_6, input_row_7=>output_row_MAC_base_7, input_row_8=>output_row_MAC_base_8, input_row_9=>output_row_MAC_base_9, input_row_10=>output_row_MAC_base_10, input_row_11=>output_row_MAC_base_11, input_row_12=>output_row_MAC_base_12, input_row_13=>output_row_MAC_base_13, input_row_14=>output_row_MAC_base_14, input_row_15=>output_row_MAC_base_15, input_row_16=>output_row_MAC_base_16, input_row_17=>output_row_MAC_base_17, input_row_18=>output_row_MAC_base_18, input_row_19=>output_row_MAC_base_19, input_row_20=>output_row_MAC_base_20, input_row_21=>output_row_MAC_base_21, input_row_22=>output_row_MAC_base_22, input_row_23=>output_row_MAC_base_23, input_row_24=>output_row_MAC_base_24, input_row_25=>output_row_MAC_base_25, input_row_26=>output_row_MAC_base_26, input_row_27=>output_row_MAC_base_27, input_row_28=>output_row_MAC_base_28, input_row_29=>output_row_MAC_base_29, input_row_30=>output_row_MAC_base_30, input_row_31=>output_row_MAC_base_31, input_row_32=>output_row_MAC_base_32, input_row_33=>output_row_MAC_base_33, input_row_34=>output_row_MAC_base_34, input_row_35=>output_row_MAC_base_35, input_row_36=>output_row_MAC_base_36, input_row_37=>output_row_MAC_base_37, input_row_38=>output_row_MAC_base_38, input_row_39=>output_row_MAC_base_39, input_row_40=>output_row_MAC_base_40, input_row_41=>output_row_MAC_base_41, input_row_42=>output_row_MAC_base_42, input_row_43=>output_row_MAC_base_43, input_row_44=>output_row_MAC_base_44, input_row_45=>output_row_MAC_base_45, input_row_46=>output_row_MAC_base_46, input_row_47=>output_row_MAC_base_47, input_row_48=>output_row_MAC_base_48, input_row_49=>output_row_MAC_base_49, input_row_50=>output_row_MAC_base_50, input_row_51=>output_row_MAC_base_51, input_row_52=>output_row_MAC_base_52, input_row_53=>output_row_MAC_base_53, input_row_54=>output_row_MAC_base_54, input_row_55=>output_row_MAC_base_55, input_row_56=>output_row_MAC_base_56, input_row_57=>output_row_MAC_base_57, input_row_58=>output_row_MAC_base_58, input_row_59=>output_row_MAC_base_59, input_row_60=>output_row_MAC_base_60, input_row_61=>output_row_MAC_base_61, input_row_62=>output_row_MAC_base_62, input_row_63=>output_row_MAC_base_63, 148 | b_col=>b_col, output_row_0=>output_row_0, output_row_1=>output_row_1, output_row_2=>output_row_2, output_row_3=>output_row_3, output_row_4=>output_row_4, output_row_5=>output_row_5, output_row_6=>output_row_6, output_row_7=>output_row_7, output_row_8=>output_row_8, output_row_9=>output_row_9, output_row_10=>output_row_10, output_row_11=>output_row_11, output_row_12=>output_row_12, output_row_13=>output_row_13, output_row_14=>output_row_14, output_row_15=>output_row_15, output_row_16=>output_row_16, output_row_17=>output_row_17, output_row_18=>output_row_18, output_row_19=>output_row_19, output_row_20=>output_row_20, output_row_21=>output_row_21, output_row_22=>output_row_22, output_row_23=>output_row_23, output_row_24=>output_row_24, output_row_25=>output_row_25, output_row_26=>output_row_26, output_row_27=>output_row_27, output_row_28=>output_row_28, output_row_29=>output_row_29, output_row_30=>output_row_30, output_row_31=>output_row_31, output_row_32=>output_row_32, output_row_33=>output_row_33, output_row_34=>output_row_34, output_row_35=>output_row_35, output_row_36=>output_row_36, output_row_37=>output_row_37, output_row_38=>output_row_38, output_row_39=>output_row_39, output_row_40=>output_row_40, output_row_41=>output_row_41, output_row_42=>output_row_42, output_row_43=>output_row_43, output_row_44=>output_row_44, output_row_45=>output_row_45, output_row_46=>output_row_46, output_row_47=>output_row_47, output_row_48=>output_row_48, output_row_49=>output_row_49, output_row_50=>output_row_50, output_row_51=>output_row_51, output_row_52=>output_row_52, output_row_53=>output_row_53, output_row_54=>output_row_54, output_row_55=>output_row_55, output_row_56=>output_row_56, output_row_57=>output_row_57, output_row_58=>output_row_58, output_row_59=>output_row_59, output_row_60=>output_row_60, output_row_61=>output_row_61, output_row_62=>output_row_62, output_row_63=>output_row_63); 149 | 150 | 151 | END behaviour; 152 | -------------------------------------------------------------------------------- /MatMul/MAC_64x16_8_bias_32/MAC_bias_64x16_8_reg.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY MAC_bias_64x16_8_reg IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 8 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 9 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 10 | input_row_30, input_row_31, input_row_32, input_row_33, input_row_34, input_row_35, input_row_36, input_row_37, input_row_38, input_row_39, 11 | input_row_40, input_row_41, input_row_42, input_row_43, input_row_44, input_row_45, input_row_46, input_row_47, input_row_48, input_row_49, 12 | input_row_50, input_row_51, input_row_52, input_row_53, input_row_54, input_row_55, input_row_56, input_row_57, input_row_58, input_row_59, 13 | input_row_60, input_row_61, input_row_62, input_row_63: IN STD_LOGIC_VECTOR(7 downto 0); 14 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7, input_col_8, input_col_9, 15 | input_col_10, input_col_11, input_col_12, input_col_13, input_col_14, input_col_15: IN STD_LOGIC_VECTOR(7 downto 0); 16 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 17 | SEL_mux: IN STD_LOGIC_VECTOR(3 downto 0); 18 | CLK, RST_n, ENABLE : IN STD_LOGIC; 19 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 20 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 21 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 22 | output_row_30, output_row_31, output_row_32, output_row_33, output_row_34, output_row_35, output_row_36, output_row_37, output_row_38, output_row_39, 23 | output_row_40, output_row_41, output_row_42, output_row_43, output_row_44, output_row_45, output_row_46, output_row_47, output_row_48, output_row_49, 24 | output_row_50, output_row_51, output_row_52, output_row_53, output_row_54, output_row_55, output_row_56, output_row_57, output_row_58, output_row_59, 25 | output_row_60, output_row_61, output_row_62, output_row_63: OUT STD_LOGIC_VECTOR(31 downto 0) 26 | ); 27 | END MAC_bias_64x16_8_reg; 28 | 29 | ARCHITECTURE behaviour OF MAC_bias_64x16_8_reg IS 30 | 31 | 32 | COMPONENT MAC_64x16_8_reg IS 33 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 34 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 35 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 36 | input_row_30, input_row_31, input_row_32, input_row_33, input_row_34, input_row_35, input_row_36, input_row_37, input_row_38, input_row_39, 37 | input_row_40, input_row_41, input_row_42, input_row_43, input_row_44, input_row_45, input_row_46, input_row_47, input_row_48, input_row_49, 38 | input_row_50, input_row_51, input_row_52, input_row_53, input_row_54, input_row_55, input_row_56, input_row_57, input_row_58, input_row_59, 39 | input_row_60, input_row_61, input_row_62, input_row_63: IN STD_LOGIC_VECTOR(7 downto 0); 40 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7, input_col_8, input_col_9, 41 | input_col_10, input_col_11, input_col_12, input_col_13, input_col_14, input_col_15: IN STD_LOGIC_VECTOR(7 downto 0); 42 | SEL_mux: IN STD_LOGIC_VECTOR(3 downto 0); 43 | CLK, RST_n, ENABLE : IN STD_LOGIC; 44 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 45 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 46 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 47 | output_row_30, output_row_31, output_row_32, output_row_33, output_row_34, output_row_35, output_row_36, output_row_37, output_row_38, output_row_39, 48 | output_row_40, output_row_41, output_row_42, output_row_43, output_row_44, output_row_45, output_row_46, output_row_47, output_row_48, output_row_49, 49 | output_row_50, output_row_51, output_row_52, output_row_53, output_row_54, output_row_55, output_row_56, output_row_57, output_row_58, output_row_59, 50 | output_row_60, output_row_61, output_row_62, output_row_63: OUT STD_LOGIC_VECTOR(31 downto 0) 51 | ); 52 | END COMPONENT; 53 | 54 | 55 | COMPONENT bias_sum_64_32 IS 56 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 57 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 58 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 59 | input_row_30, input_row_31, input_row_32, input_row_33, input_row_34, input_row_35, input_row_36, input_row_37, input_row_38, input_row_39, 60 | input_row_40, input_row_41, input_row_42, input_row_43, input_row_44, input_row_45, input_row_46, input_row_47, input_row_48, input_row_49, 61 | input_row_50, input_row_51, input_row_52, input_row_53, input_row_54, input_row_55, input_row_56, input_row_57, input_row_58, input_row_59, 62 | input_row_60, input_row_61, input_row_62, input_row_63: IN STD_LOGIC_VECTOR(31 downto 0); 63 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 64 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 65 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 66 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 67 | output_row_30, output_row_31, output_row_32, output_row_33, output_row_34, output_row_35, output_row_36, output_row_37, output_row_38, output_row_39, 68 | output_row_40, output_row_41, output_row_42, output_row_43, output_row_44, output_row_45, output_row_46, output_row_47, output_row_48, output_row_49, 69 | output_row_50, output_row_51, output_row_52, output_row_53, output_row_54, output_row_55, output_row_56, output_row_57, output_row_58, output_row_59, 70 | output_row_60, output_row_61, output_row_62, output_row_63: OUT STD_LOGIC_VECTOR(31 downto 0) 71 | ); 72 | END COMPONENT; 73 | 74 | COMPONENT regnbit IS 75 | GENERIC ( N : POSITIVE := 2); 76 | PORT( 77 | D : IN STD_LOGIC_VECTOR(N-1 downto 0); 78 | CLK, RST_n, ENABLE : IN STD_LOGIC; 79 | Q : OUT STD_LOGIC_VECTOR(N-1 downto 0) 80 | ); 81 | END COMPONENT; 82 | 83 | 84 | SIGNAL output_row_MAC_base_0: STD_LOGIC_VECTOR(31 downto 0); 85 | SIGNAL output_row_MAC_base_1: STD_LOGIC_VECTOR(31 downto 0); 86 | SIGNAL output_row_MAC_base_2: STD_LOGIC_VECTOR(31 downto 0); 87 | SIGNAL output_row_MAC_base_3: STD_LOGIC_VECTOR(31 downto 0); 88 | SIGNAL output_row_MAC_base_4: STD_LOGIC_VECTOR(31 downto 0); 89 | SIGNAL output_row_MAC_base_5: STD_LOGIC_VECTOR(31 downto 0); 90 | SIGNAL output_row_MAC_base_6: STD_LOGIC_VECTOR(31 downto 0); 91 | SIGNAL output_row_MAC_base_7: STD_LOGIC_VECTOR(31 downto 0); 92 | SIGNAL output_row_MAC_base_8: STD_LOGIC_VECTOR(31 downto 0); 93 | SIGNAL output_row_MAC_base_9: STD_LOGIC_VECTOR(31 downto 0); 94 | SIGNAL output_row_MAC_base_10: STD_LOGIC_VECTOR(31 downto 0); 95 | SIGNAL output_row_MAC_base_11: STD_LOGIC_VECTOR(31 downto 0); 96 | SIGNAL output_row_MAC_base_12: STD_LOGIC_VECTOR(31 downto 0); 97 | SIGNAL output_row_MAC_base_13: STD_LOGIC_VECTOR(31 downto 0); 98 | SIGNAL output_row_MAC_base_14: STD_LOGIC_VECTOR(31 downto 0); 99 | SIGNAL output_row_MAC_base_15: STD_LOGIC_VECTOR(31 downto 0); 100 | SIGNAL output_row_MAC_base_16: STD_LOGIC_VECTOR(31 downto 0); 101 | SIGNAL output_row_MAC_base_17: STD_LOGIC_VECTOR(31 downto 0); 102 | SIGNAL output_row_MAC_base_18: STD_LOGIC_VECTOR(31 downto 0); 103 | SIGNAL output_row_MAC_base_19: STD_LOGIC_VECTOR(31 downto 0); 104 | SIGNAL output_row_MAC_base_20: STD_LOGIC_VECTOR(31 downto 0); 105 | SIGNAL output_row_MAC_base_21: STD_LOGIC_VECTOR(31 downto 0); 106 | SIGNAL output_row_MAC_base_22: STD_LOGIC_VECTOR(31 downto 0); 107 | SIGNAL output_row_MAC_base_23: STD_LOGIC_VECTOR(31 downto 0); 108 | SIGNAL output_row_MAC_base_24: STD_LOGIC_VECTOR(31 downto 0); 109 | SIGNAL output_row_MAC_base_25: STD_LOGIC_VECTOR(31 downto 0); 110 | SIGNAL output_row_MAC_base_26: STD_LOGIC_VECTOR(31 downto 0); 111 | SIGNAL output_row_MAC_base_27: STD_LOGIC_VECTOR(31 downto 0); 112 | SIGNAL output_row_MAC_base_28: STD_LOGIC_VECTOR(31 downto 0); 113 | SIGNAL output_row_MAC_base_29: STD_LOGIC_VECTOR(31 downto 0); 114 | SIGNAL output_row_MAC_base_30: STD_LOGIC_VECTOR(31 downto 0); 115 | SIGNAL output_row_MAC_base_31: STD_LOGIC_VECTOR(31 downto 0); 116 | SIGNAL output_row_MAC_base_32: STD_LOGIC_VECTOR(31 downto 0); 117 | SIGNAL output_row_MAC_base_33: STD_LOGIC_VECTOR(31 downto 0); 118 | SIGNAL output_row_MAC_base_34: STD_LOGIC_VECTOR(31 downto 0); 119 | SIGNAL output_row_MAC_base_35: STD_LOGIC_VECTOR(31 downto 0); 120 | SIGNAL output_row_MAC_base_36: STD_LOGIC_VECTOR(31 downto 0); 121 | SIGNAL output_row_MAC_base_37: STD_LOGIC_VECTOR(31 downto 0); 122 | SIGNAL output_row_MAC_base_38: STD_LOGIC_VECTOR(31 downto 0); 123 | SIGNAL output_row_MAC_base_39: STD_LOGIC_VECTOR(31 downto 0); 124 | SIGNAL output_row_MAC_base_40: STD_LOGIC_VECTOR(31 downto 0); 125 | SIGNAL output_row_MAC_base_41: STD_LOGIC_VECTOR(31 downto 0); 126 | SIGNAL output_row_MAC_base_42: STD_LOGIC_VECTOR(31 downto 0); 127 | SIGNAL output_row_MAC_base_43: STD_LOGIC_VECTOR(31 downto 0); 128 | SIGNAL output_row_MAC_base_44: STD_LOGIC_VECTOR(31 downto 0); 129 | SIGNAL output_row_MAC_base_45: STD_LOGIC_VECTOR(31 downto 0); 130 | SIGNAL output_row_MAC_base_46: STD_LOGIC_VECTOR(31 downto 0); 131 | SIGNAL output_row_MAC_base_47: STD_LOGIC_VECTOR(31 downto 0); 132 | SIGNAL output_row_MAC_base_48: STD_LOGIC_VECTOR(31 downto 0); 133 | SIGNAL output_row_MAC_base_49: STD_LOGIC_VECTOR(31 downto 0); 134 | SIGNAL output_row_MAC_base_50: STD_LOGIC_VECTOR(31 downto 0); 135 | SIGNAL output_row_MAC_base_51: STD_LOGIC_VECTOR(31 downto 0); 136 | SIGNAL output_row_MAC_base_52: STD_LOGIC_VECTOR(31 downto 0); 137 | SIGNAL output_row_MAC_base_53: STD_LOGIC_VECTOR(31 downto 0); 138 | SIGNAL output_row_MAC_base_54: STD_LOGIC_VECTOR(31 downto 0); 139 | SIGNAL output_row_MAC_base_55: STD_LOGIC_VECTOR(31 downto 0); 140 | SIGNAL output_row_MAC_base_56: STD_LOGIC_VECTOR(31 downto 0); 141 | SIGNAL output_row_MAC_base_57: STD_LOGIC_VECTOR(31 downto 0); 142 | SIGNAL output_row_MAC_base_58: STD_LOGIC_VECTOR(31 downto 0); 143 | SIGNAL output_row_MAC_base_59: STD_LOGIC_VECTOR(31 downto 0); 144 | SIGNAL output_row_MAC_base_60: STD_LOGIC_VECTOR(31 downto 0); 145 | SIGNAL output_row_MAC_base_61: STD_LOGIC_VECTOR(31 downto 0); 146 | SIGNAL output_row_MAC_base_62: STD_LOGIC_VECTOR(31 downto 0); 147 | SIGNAL output_row_MAC_base_63: STD_LOGIC_VECTOR(31 downto 0); 148 | SIGNAL b_col_reg: STD_LOGIC_VECTOR(31 downto 0); 149 | 150 | 151 | BEGIN 152 | 153 | MAC_base: MAC_64x16_8_reg PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>ENABLE, 154 | input_row_0=>input_row_0, input_row_1=>input_row_1, input_row_2=>input_row_2, input_row_3=>input_row_3, input_row_4=>input_row_4, input_row_5=>input_row_5, input_row_6=>input_row_6, input_row_7=>input_row_7, input_row_8=>input_row_8, input_row_9=>input_row_9, input_row_10=>input_row_10, input_row_11=>input_row_11, input_row_12=>input_row_12, input_row_13=>input_row_13, input_row_14=>input_row_14, input_row_15=>input_row_15, input_row_16=>input_row_16, input_row_17=>input_row_17, input_row_18=>input_row_18, input_row_19=>input_row_19, input_row_20=>input_row_20, input_row_21=>input_row_21, input_row_22=>input_row_22, input_row_23=>input_row_23, input_row_24=>input_row_24, input_row_25=>input_row_25, input_row_26=>input_row_26, input_row_27=>input_row_27, input_row_28=>input_row_28, input_row_29=>input_row_29, input_row_30=>input_row_30, input_row_31=>input_row_31, input_row_32=>input_row_32, input_row_33=>input_row_33, input_row_34=>input_row_34, input_row_35=>input_row_35, input_row_36=>input_row_36, input_row_37=>input_row_37, input_row_38=>input_row_38, input_row_39=>input_row_39, input_row_40=>input_row_40, input_row_41=>input_row_41, input_row_42=>input_row_42, input_row_43=>input_row_43, input_row_44=>input_row_44, input_row_45=>input_row_45, input_row_46=>input_row_46, input_row_47=>input_row_47, input_row_48=>input_row_48, input_row_49=>input_row_49, input_row_50=>input_row_50, input_row_51=>input_row_51, input_row_52=>input_row_52, input_row_53=>input_row_53, input_row_54=>input_row_54, input_row_55=>input_row_55, input_row_56=>input_row_56, input_row_57=>input_row_57, input_row_58=>input_row_58, input_row_59=>input_row_59, input_row_60=>input_row_60, input_row_61=>input_row_61, input_row_62=>input_row_62, input_row_63=>input_row_63, 155 | input_col_0=>input_col_0, input_col_1=>input_col_1, input_col_2=>input_col_2, input_col_3=>input_col_3, input_col_4=>input_col_4, input_col_5=>input_col_5, input_col_6=>input_col_6, input_col_7=>input_col_7, input_col_8=>input_col_8, input_col_9=>input_col_9, input_col_10=>input_col_10, input_col_11=>input_col_11, input_col_12=>input_col_12, input_col_13=>input_col_13, input_col_14=>input_col_14, input_col_15=>input_col_15, 156 | output_row_0=>output_row_MAC_base_0, output_row_1=>output_row_MAC_base_1, output_row_2=>output_row_MAC_base_2, output_row_3=>output_row_MAC_base_3, output_row_4=>output_row_MAC_base_4, output_row_5=>output_row_MAC_base_5, output_row_6=>output_row_MAC_base_6, output_row_7=>output_row_MAC_base_7, output_row_8=>output_row_MAC_base_8, output_row_9=>output_row_MAC_base_9, output_row_10=>output_row_MAC_base_10, output_row_11=>output_row_MAC_base_11, output_row_12=>output_row_MAC_base_12, output_row_13=>output_row_MAC_base_13, output_row_14=>output_row_MAC_base_14, output_row_15=>output_row_MAC_base_15, output_row_16=>output_row_MAC_base_16, output_row_17=>output_row_MAC_base_17, output_row_18=>output_row_MAC_base_18, output_row_19=>output_row_MAC_base_19, output_row_20=>output_row_MAC_base_20, output_row_21=>output_row_MAC_base_21, output_row_22=>output_row_MAC_base_22, output_row_23=>output_row_MAC_base_23, output_row_24=>output_row_MAC_base_24, output_row_25=>output_row_MAC_base_25, output_row_26=>output_row_MAC_base_26, output_row_27=>output_row_MAC_base_27, output_row_28=>output_row_MAC_base_28, output_row_29=>output_row_MAC_base_29, output_row_30=>output_row_MAC_base_30, output_row_31=>output_row_MAC_base_31, output_row_32=>output_row_MAC_base_32, output_row_33=>output_row_MAC_base_33, output_row_34=>output_row_MAC_base_34, output_row_35=>output_row_MAC_base_35, output_row_36=>output_row_MAC_base_36, output_row_37=>output_row_MAC_base_37, output_row_38=>output_row_MAC_base_38, output_row_39=>output_row_MAC_base_39, output_row_40=>output_row_MAC_base_40, output_row_41=>output_row_MAC_base_41, output_row_42=>output_row_MAC_base_42, output_row_43=>output_row_MAC_base_43, output_row_44=>output_row_MAC_base_44, output_row_45=>output_row_MAC_base_45, output_row_46=>output_row_MAC_base_46, output_row_47=>output_row_MAC_base_47, output_row_48=>output_row_MAC_base_48, output_row_49=>output_row_MAC_base_49, output_row_50=>output_row_MAC_base_50, output_row_51=>output_row_MAC_base_51, output_row_52=>output_row_MAC_base_52, output_row_53=>output_row_MAC_base_53, output_row_54=>output_row_MAC_base_54, output_row_55=>output_row_MAC_base_55, output_row_56=>output_row_MAC_base_56, output_row_57=>output_row_MAC_base_57, output_row_58=>output_row_MAC_base_58, output_row_59=>output_row_MAC_base_59, output_row_60=>output_row_MAC_base_60, output_row_61=>output_row_MAC_base_61, output_row_62=>output_row_MAC_base_62, output_row_63=>output_row_MAC_base_63, 157 | SEL_mux=>SEL_mux); 158 | 159 | bias_reg: regnbit GENERIC MAP(N=>32) PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>'1', D=>b_col, Q=>b_col_reg); 160 | bias_sum: bias_sum_64_32 PORT MAP(input_row_0=>output_row_MAC_base_0, input_row_1=>output_row_MAC_base_1, input_row_2=>output_row_MAC_base_2, input_row_3=>output_row_MAC_base_3, input_row_4=>output_row_MAC_base_4, input_row_5=>output_row_MAC_base_5, input_row_6=>output_row_MAC_base_6, input_row_7=>output_row_MAC_base_7, input_row_8=>output_row_MAC_base_8, input_row_9=>output_row_MAC_base_9, input_row_10=>output_row_MAC_base_10, input_row_11=>output_row_MAC_base_11, input_row_12=>output_row_MAC_base_12, input_row_13=>output_row_MAC_base_13, input_row_14=>output_row_MAC_base_14, input_row_15=>output_row_MAC_base_15, input_row_16=>output_row_MAC_base_16, input_row_17=>output_row_MAC_base_17, input_row_18=>output_row_MAC_base_18, input_row_19=>output_row_MAC_base_19, input_row_20=>output_row_MAC_base_20, input_row_21=>output_row_MAC_base_21, input_row_22=>output_row_MAC_base_22, input_row_23=>output_row_MAC_base_23, input_row_24=>output_row_MAC_base_24, input_row_25=>output_row_MAC_base_25, input_row_26=>output_row_MAC_base_26, input_row_27=>output_row_MAC_base_27, input_row_28=>output_row_MAC_base_28, input_row_29=>output_row_MAC_base_29, input_row_30=>output_row_MAC_base_30, input_row_31=>output_row_MAC_base_31, input_row_32=>output_row_MAC_base_32, input_row_33=>output_row_MAC_base_33, input_row_34=>output_row_MAC_base_34, input_row_35=>output_row_MAC_base_35, input_row_36=>output_row_MAC_base_36, input_row_37=>output_row_MAC_base_37, input_row_38=>output_row_MAC_base_38, input_row_39=>output_row_MAC_base_39, input_row_40=>output_row_MAC_base_40, input_row_41=>output_row_MAC_base_41, input_row_42=>output_row_MAC_base_42, input_row_43=>output_row_MAC_base_43, input_row_44=>output_row_MAC_base_44, input_row_45=>output_row_MAC_base_45, input_row_46=>output_row_MAC_base_46, input_row_47=>output_row_MAC_base_47, input_row_48=>output_row_MAC_base_48, input_row_49=>output_row_MAC_base_49, input_row_50=>output_row_MAC_base_50, input_row_51=>output_row_MAC_base_51, input_row_52=>output_row_MAC_base_52, input_row_53=>output_row_MAC_base_53, input_row_54=>output_row_MAC_base_54, input_row_55=>output_row_MAC_base_55, input_row_56=>output_row_MAC_base_56, input_row_57=>output_row_MAC_base_57, input_row_58=>output_row_MAC_base_58, input_row_59=>output_row_MAC_base_59, input_row_60=>output_row_MAC_base_60, input_row_61=>output_row_MAC_base_61, input_row_62=>output_row_MAC_base_62, input_row_63=>output_row_MAC_base_63, 161 | b_col=>b_col_reg, output_row_0=>output_row_0, output_row_1=>output_row_1, output_row_2=>output_row_2, output_row_3=>output_row_3, output_row_4=>output_row_4, output_row_5=>output_row_5, output_row_6=>output_row_6, output_row_7=>output_row_7, output_row_8=>output_row_8, output_row_9=>output_row_9, output_row_10=>output_row_10, output_row_11=>output_row_11, output_row_12=>output_row_12, output_row_13=>output_row_13, output_row_14=>output_row_14, output_row_15=>output_row_15, output_row_16=>output_row_16, output_row_17=>output_row_17, output_row_18=>output_row_18, output_row_19=>output_row_19, output_row_20=>output_row_20, output_row_21=>output_row_21, output_row_22=>output_row_22, output_row_23=>output_row_23, output_row_24=>output_row_24, output_row_25=>output_row_25, output_row_26=>output_row_26, output_row_27=>output_row_27, output_row_28=>output_row_28, output_row_29=>output_row_29, output_row_30=>output_row_30, output_row_31=>output_row_31, output_row_32=>output_row_32, output_row_33=>output_row_33, output_row_34=>output_row_34, output_row_35=>output_row_35, output_row_36=>output_row_36, output_row_37=>output_row_37, output_row_38=>output_row_38, output_row_39=>output_row_39, output_row_40=>output_row_40, output_row_41=>output_row_41, output_row_42=>output_row_42, output_row_43=>output_row_43, output_row_44=>output_row_44, output_row_45=>output_row_45, output_row_46=>output_row_46, output_row_47=>output_row_47, output_row_48=>output_row_48, output_row_49=>output_row_49, output_row_50=>output_row_50, output_row_51=>output_row_51, output_row_52=>output_row_52, output_row_53=>output_row_53, output_row_54=>output_row_54, output_row_55=>output_row_55, output_row_56=>output_row_56, output_row_57=>output_row_57, output_row_58=>output_row_58, output_row_59=>output_row_59, output_row_60=>output_row_60, output_row_61=>output_row_61, output_row_62=>output_row_62, output_row_63=>output_row_63); 162 | 163 | 164 | END behaviour; 165 | -------------------------------------------------------------------------------- /MatMul/MAC_64x16_8_bias_32/bias_sum_64_32.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY bias_sum_64_32 IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 8 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 9 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 10 | input_row_30, input_row_31, input_row_32, input_row_33, input_row_34, input_row_35, input_row_36, input_row_37, input_row_38, input_row_39, 11 | input_row_40, input_row_41, input_row_42, input_row_43, input_row_44, input_row_45, input_row_46, input_row_47, input_row_48, input_row_49, 12 | input_row_50, input_row_51, input_row_52, input_row_53, input_row_54, input_row_55, input_row_56, input_row_57, input_row_58, input_row_59, 13 | input_row_60, input_row_61, input_row_62, input_row_63: IN STD_LOGIC_VECTOR(31 downto 0); 14 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 15 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 16 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 17 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 18 | output_row_30, output_row_31, output_row_32, output_row_33, output_row_34, output_row_35, output_row_36, output_row_37, output_row_38, output_row_39, 19 | output_row_40, output_row_41, output_row_42, output_row_43, output_row_44, output_row_45, output_row_46, output_row_47, output_row_48, output_row_49, 20 | output_row_50, output_row_51, output_row_52, output_row_53, output_row_54, output_row_55, output_row_56, output_row_57, output_row_58, output_row_59, 21 | output_row_60, output_row_61, output_row_62, output_row_63: OUT STD_LOGIC_VECTOR(31 downto 0) 22 | ); 23 | END bias_sum_64_32; 24 | 25 | ARCHITECTURE behaviour OF bias_sum_64_32 IS 26 | 27 | 28 | 29 | BEGIN 30 | 31 | output_row_0 <= STD_LOGIC_VECTOR(SIGNED(input_row_0) + SIGNED(b_col)); 32 | output_row_1 <= STD_LOGIC_VECTOR(SIGNED(input_row_1) + SIGNED(b_col)); 33 | output_row_2 <= STD_LOGIC_VECTOR(SIGNED(input_row_2) + SIGNED(b_col)); 34 | output_row_3 <= STD_LOGIC_VECTOR(SIGNED(input_row_3) + SIGNED(b_col)); 35 | output_row_4 <= STD_LOGIC_VECTOR(SIGNED(input_row_4) + SIGNED(b_col)); 36 | output_row_5 <= STD_LOGIC_VECTOR(SIGNED(input_row_5) + SIGNED(b_col)); 37 | output_row_6 <= STD_LOGIC_VECTOR(SIGNED(input_row_6) + SIGNED(b_col)); 38 | output_row_7 <= STD_LOGIC_VECTOR(SIGNED(input_row_7) + SIGNED(b_col)); 39 | output_row_8 <= STD_LOGIC_VECTOR(SIGNED(input_row_8) + SIGNED(b_col)); 40 | output_row_9 <= STD_LOGIC_VECTOR(SIGNED(input_row_9) + SIGNED(b_col)); 41 | output_row_10 <= STD_LOGIC_VECTOR(SIGNED(input_row_10) + SIGNED(b_col)); 42 | output_row_11 <= STD_LOGIC_VECTOR(SIGNED(input_row_11) + SIGNED(b_col)); 43 | output_row_12 <= STD_LOGIC_VECTOR(SIGNED(input_row_12) + SIGNED(b_col)); 44 | output_row_13 <= STD_LOGIC_VECTOR(SIGNED(input_row_13) + SIGNED(b_col)); 45 | output_row_14 <= STD_LOGIC_VECTOR(SIGNED(input_row_14) + SIGNED(b_col)); 46 | output_row_15 <= STD_LOGIC_VECTOR(SIGNED(input_row_15) + SIGNED(b_col)); 47 | output_row_16 <= STD_LOGIC_VECTOR(SIGNED(input_row_16) + SIGNED(b_col)); 48 | output_row_17 <= STD_LOGIC_VECTOR(SIGNED(input_row_17) + SIGNED(b_col)); 49 | output_row_18 <= STD_LOGIC_VECTOR(SIGNED(input_row_18) + SIGNED(b_col)); 50 | output_row_19 <= STD_LOGIC_VECTOR(SIGNED(input_row_19) + SIGNED(b_col)); 51 | output_row_20 <= STD_LOGIC_VECTOR(SIGNED(input_row_20) + SIGNED(b_col)); 52 | output_row_21 <= STD_LOGIC_VECTOR(SIGNED(input_row_21) + SIGNED(b_col)); 53 | output_row_22 <= STD_LOGIC_VECTOR(SIGNED(input_row_22) + SIGNED(b_col)); 54 | output_row_23 <= STD_LOGIC_VECTOR(SIGNED(input_row_23) + SIGNED(b_col)); 55 | output_row_24 <= STD_LOGIC_VECTOR(SIGNED(input_row_24) + SIGNED(b_col)); 56 | output_row_25 <= STD_LOGIC_VECTOR(SIGNED(input_row_25) + SIGNED(b_col)); 57 | output_row_26 <= STD_LOGIC_VECTOR(SIGNED(input_row_26) + SIGNED(b_col)); 58 | output_row_27 <= STD_LOGIC_VECTOR(SIGNED(input_row_27) + SIGNED(b_col)); 59 | output_row_28 <= STD_LOGIC_VECTOR(SIGNED(input_row_28) + SIGNED(b_col)); 60 | output_row_29 <= STD_LOGIC_VECTOR(SIGNED(input_row_29) + SIGNED(b_col)); 61 | output_row_30 <= STD_LOGIC_VECTOR(SIGNED(input_row_30) + SIGNED(b_col)); 62 | output_row_31 <= STD_LOGIC_VECTOR(SIGNED(input_row_31) + SIGNED(b_col)); 63 | output_row_32 <= STD_LOGIC_VECTOR(SIGNED(input_row_32) + SIGNED(b_col)); 64 | output_row_33 <= STD_LOGIC_VECTOR(SIGNED(input_row_33) + SIGNED(b_col)); 65 | output_row_34 <= STD_LOGIC_VECTOR(SIGNED(input_row_34) + SIGNED(b_col)); 66 | output_row_35 <= STD_LOGIC_VECTOR(SIGNED(input_row_35) + SIGNED(b_col)); 67 | output_row_36 <= STD_LOGIC_VECTOR(SIGNED(input_row_36) + SIGNED(b_col)); 68 | output_row_37 <= STD_LOGIC_VECTOR(SIGNED(input_row_37) + SIGNED(b_col)); 69 | output_row_38 <= STD_LOGIC_VECTOR(SIGNED(input_row_38) + SIGNED(b_col)); 70 | output_row_39 <= STD_LOGIC_VECTOR(SIGNED(input_row_39) + SIGNED(b_col)); 71 | output_row_40 <= STD_LOGIC_VECTOR(SIGNED(input_row_40) + SIGNED(b_col)); 72 | output_row_41 <= STD_LOGIC_VECTOR(SIGNED(input_row_41) + SIGNED(b_col)); 73 | output_row_42 <= STD_LOGIC_VECTOR(SIGNED(input_row_42) + SIGNED(b_col)); 74 | output_row_43 <= STD_LOGIC_VECTOR(SIGNED(input_row_43) + SIGNED(b_col)); 75 | output_row_44 <= STD_LOGIC_VECTOR(SIGNED(input_row_44) + SIGNED(b_col)); 76 | output_row_45 <= STD_LOGIC_VECTOR(SIGNED(input_row_45) + SIGNED(b_col)); 77 | output_row_46 <= STD_LOGIC_VECTOR(SIGNED(input_row_46) + SIGNED(b_col)); 78 | output_row_47 <= STD_LOGIC_VECTOR(SIGNED(input_row_47) + SIGNED(b_col)); 79 | output_row_48 <= STD_LOGIC_VECTOR(SIGNED(input_row_48) + SIGNED(b_col)); 80 | output_row_49 <= STD_LOGIC_VECTOR(SIGNED(input_row_49) + SIGNED(b_col)); 81 | output_row_50 <= STD_LOGIC_VECTOR(SIGNED(input_row_50) + SIGNED(b_col)); 82 | output_row_51 <= STD_LOGIC_VECTOR(SIGNED(input_row_51) + SIGNED(b_col)); 83 | output_row_52 <= STD_LOGIC_VECTOR(SIGNED(input_row_52) + SIGNED(b_col)); 84 | output_row_53 <= STD_LOGIC_VECTOR(SIGNED(input_row_53) + SIGNED(b_col)); 85 | output_row_54 <= STD_LOGIC_VECTOR(SIGNED(input_row_54) + SIGNED(b_col)); 86 | output_row_55 <= STD_LOGIC_VECTOR(SIGNED(input_row_55) + SIGNED(b_col)); 87 | output_row_56 <= STD_LOGIC_VECTOR(SIGNED(input_row_56) + SIGNED(b_col)); 88 | output_row_57 <= STD_LOGIC_VECTOR(SIGNED(input_row_57) + SIGNED(b_col)); 89 | output_row_58 <= STD_LOGIC_VECTOR(SIGNED(input_row_58) + SIGNED(b_col)); 90 | output_row_59 <= STD_LOGIC_VECTOR(SIGNED(input_row_59) + SIGNED(b_col)); 91 | output_row_60 <= STD_LOGIC_VECTOR(SIGNED(input_row_60) + SIGNED(b_col)); 92 | output_row_61 <= STD_LOGIC_VECTOR(SIGNED(input_row_61) + SIGNED(b_col)); 93 | output_row_62 <= STD_LOGIC_VECTOR(SIGNED(input_row_62) + SIGNED(b_col)); 94 | output_row_63 <= STD_LOGIC_VECTOR(SIGNED(input_row_63) + SIGNED(b_col)); 95 | 96 | 97 | END behaviour; 98 | -------------------------------------------------------------------------------- /MatMul/MAC_64x16_8_bias_32/mux_16to1_nbit.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY mux_16to1_nbit IS 7 | GENERIC( 8 | N : POSITIVE := 2 9 | ); 10 | PORT (I0, I1, I2, I3, I4, I5, I6, I7, I8, I9, 11 | I10, I11, I12, I13, I14, I15: IN STD_LOGIC_VECTOR(N-1 downto 0); 12 | SEL_mux: IN STD_LOGIC_VECTOR(3 downto 0); 13 | O: OUT STD_LOGIC_VECTOR(N-1 downto 0) 14 | ); 15 | END mux_16to1_nbit; 16 | 17 | ARCHITECTURE behaviour OF mux_16to1_nbit IS 18 | 19 | COMPONENT mux_4to1_nbit_base IS 20 | GENERIC ( N : POSITIVE :=1); 21 | PORT( I0, I1, I2, I3: IN STD_LOGIC_VECTOR(N-1 downto 0); 22 | SEL : IN STD_LOGIC_VECTOR(1 downto 0); 23 | O : OUT STD_LOGIC_VECTOR(N-1 downto 0) 24 | ); 25 | END COMPONENT; 26 | 27 | 28 | SIGNAL output_mux_0_0: STD_LOGIC_VECTOR(N-1 downto 0); 29 | SIGNAL output_mux_0_1: STD_LOGIC_VECTOR(N-1 downto 0); 30 | SIGNAL output_mux_0_2: STD_LOGIC_VECTOR(N-1 downto 0); 31 | SIGNAL output_mux_0_3: STD_LOGIC_VECTOR(N-1 downto 0); 32 | 33 | BEGIN 34 | 35 | 36 | mux_0_0: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I0, I1=>I1, I2=>I2, I3=>I3, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_0); 37 | mux_0_1: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I4, I1=>I5, I2=>I6, I3=>I7, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_1); 38 | mux_0_2: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I8, I1=>I9, I2=>I10, I3=>I11, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_2); 39 | mux_0_3: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I12, I1=>I13, I2=>I14, I3=>I15, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_3); 40 | mux_1_out: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>output_mux_0_0, I1=>output_mux_0_1, I2=>output_mux_0_2, I3=>output_mux_0_3, SEL=>SEL_mux(3 downto 2), O=>O); 41 | 42 | END behaviour; 43 | -------------------------------------------------------------------------------- /MatMul/MAC_64x32_8_bias_32/MAC_bias_64x32_8.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY MAC_bias_64x32_8 IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 8 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 9 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 10 | input_row_30, input_row_31, input_row_32, input_row_33, input_row_34, input_row_35, input_row_36, input_row_37, input_row_38, input_row_39, 11 | input_row_40, input_row_41, input_row_42, input_row_43, input_row_44, input_row_45, input_row_46, input_row_47, input_row_48, input_row_49, 12 | input_row_50, input_row_51, input_row_52, input_row_53, input_row_54, input_row_55, input_row_56, input_row_57, input_row_58, input_row_59, 13 | input_row_60, input_row_61, input_row_62, input_row_63: IN STD_LOGIC_VECTOR(7 downto 0); 14 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7, input_col_8, input_col_9, 15 | input_col_10, input_col_11, input_col_12, input_col_13, input_col_14, input_col_15, input_col_16, input_col_17, input_col_18, input_col_19, 16 | input_col_20, input_col_21, input_col_22, input_col_23, input_col_24, input_col_25, input_col_26, input_col_27, input_col_28, input_col_29, 17 | input_col_30, input_col_31: IN STD_LOGIC_VECTOR(7 downto 0); 18 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 19 | SEL_mux: IN STD_LOGIC_VECTOR(4 downto 0); 20 | CLK, RST_n, ENABLE : IN STD_LOGIC; 21 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 22 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 23 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 24 | output_row_30, output_row_31, output_row_32, output_row_33, output_row_34, output_row_35, output_row_36, output_row_37, output_row_38, output_row_39, 25 | output_row_40, output_row_41, output_row_42, output_row_43, output_row_44, output_row_45, output_row_46, output_row_47, output_row_48, output_row_49, 26 | output_row_50, output_row_51, output_row_52, output_row_53, output_row_54, output_row_55, output_row_56, output_row_57, output_row_58, output_row_59, 27 | output_row_60, output_row_61, output_row_62, output_row_63: OUT STD_LOGIC_VECTOR(31 downto 0) 28 | ); 29 | END MAC_bias_64x32_8; 30 | 31 | ARCHITECTURE behaviour OF MAC_bias_64x32_8 IS 32 | 33 | 34 | COMPONENT MAC_64x32_8 IS 35 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 36 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 37 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 38 | input_row_30, input_row_31, input_row_32, input_row_33, input_row_34, input_row_35, input_row_36, input_row_37, input_row_38, input_row_39, 39 | input_row_40, input_row_41, input_row_42, input_row_43, input_row_44, input_row_45, input_row_46, input_row_47, input_row_48, input_row_49, 40 | input_row_50, input_row_51, input_row_52, input_row_53, input_row_54, input_row_55, input_row_56, input_row_57, input_row_58, input_row_59, 41 | input_row_60, input_row_61, input_row_62, input_row_63: IN STD_LOGIC_VECTOR(7 downto 0); 42 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7, input_col_8, input_col_9, 43 | input_col_10, input_col_11, input_col_12, input_col_13, input_col_14, input_col_15, input_col_16, input_col_17, input_col_18, input_col_19, 44 | input_col_20, input_col_21, input_col_22, input_col_23, input_col_24, input_col_25, input_col_26, input_col_27, input_col_28, input_col_29, 45 | input_col_30, input_col_31: IN STD_LOGIC_VECTOR(7 downto 0); 46 | SEL_mux: IN STD_LOGIC_VECTOR(4 downto 0); 47 | CLK, RST_n, ENABLE : IN STD_LOGIC; 48 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 49 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 50 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 51 | output_row_30, output_row_31, output_row_32, output_row_33, output_row_34, output_row_35, output_row_36, output_row_37, output_row_38, output_row_39, 52 | output_row_40, output_row_41, output_row_42, output_row_43, output_row_44, output_row_45, output_row_46, output_row_47, output_row_48, output_row_49, 53 | output_row_50, output_row_51, output_row_52, output_row_53, output_row_54, output_row_55, output_row_56, output_row_57, output_row_58, output_row_59, 54 | output_row_60, output_row_61, output_row_62, output_row_63: OUT STD_LOGIC_VECTOR(31 downto 0) 55 | ); 56 | END COMPONENT; 57 | 58 | 59 | COMPONENT bias_sum_64_32 IS 60 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 61 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 62 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 63 | input_row_30, input_row_31, input_row_32, input_row_33, input_row_34, input_row_35, input_row_36, input_row_37, input_row_38, input_row_39, 64 | input_row_40, input_row_41, input_row_42, input_row_43, input_row_44, input_row_45, input_row_46, input_row_47, input_row_48, input_row_49, 65 | input_row_50, input_row_51, input_row_52, input_row_53, input_row_54, input_row_55, input_row_56, input_row_57, input_row_58, input_row_59, 66 | input_row_60, input_row_61, input_row_62, input_row_63: IN STD_LOGIC_VECTOR(31 downto 0); 67 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 68 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 69 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 70 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 71 | output_row_30, output_row_31, output_row_32, output_row_33, output_row_34, output_row_35, output_row_36, output_row_37, output_row_38, output_row_39, 72 | output_row_40, output_row_41, output_row_42, output_row_43, output_row_44, output_row_45, output_row_46, output_row_47, output_row_48, output_row_49, 73 | output_row_50, output_row_51, output_row_52, output_row_53, output_row_54, output_row_55, output_row_56, output_row_57, output_row_58, output_row_59, 74 | output_row_60, output_row_61, output_row_62, output_row_63: OUT STD_LOGIC_VECTOR(31 downto 0) 75 | ); 76 | END COMPONENT; 77 | 78 | 79 | SIGNAL output_row_MAC_base_0: STD_LOGIC_VECTOR(31 downto 0); 80 | SIGNAL output_row_MAC_base_1: STD_LOGIC_VECTOR(31 downto 0); 81 | SIGNAL output_row_MAC_base_2: STD_LOGIC_VECTOR(31 downto 0); 82 | SIGNAL output_row_MAC_base_3: STD_LOGIC_VECTOR(31 downto 0); 83 | SIGNAL output_row_MAC_base_4: STD_LOGIC_VECTOR(31 downto 0); 84 | SIGNAL output_row_MAC_base_5: STD_LOGIC_VECTOR(31 downto 0); 85 | SIGNAL output_row_MAC_base_6: STD_LOGIC_VECTOR(31 downto 0); 86 | SIGNAL output_row_MAC_base_7: STD_LOGIC_VECTOR(31 downto 0); 87 | SIGNAL output_row_MAC_base_8: STD_LOGIC_VECTOR(31 downto 0); 88 | SIGNAL output_row_MAC_base_9: STD_LOGIC_VECTOR(31 downto 0); 89 | SIGNAL output_row_MAC_base_10: STD_LOGIC_VECTOR(31 downto 0); 90 | SIGNAL output_row_MAC_base_11: STD_LOGIC_VECTOR(31 downto 0); 91 | SIGNAL output_row_MAC_base_12: STD_LOGIC_VECTOR(31 downto 0); 92 | SIGNAL output_row_MAC_base_13: STD_LOGIC_VECTOR(31 downto 0); 93 | SIGNAL output_row_MAC_base_14: STD_LOGIC_VECTOR(31 downto 0); 94 | SIGNAL output_row_MAC_base_15: STD_LOGIC_VECTOR(31 downto 0); 95 | SIGNAL output_row_MAC_base_16: STD_LOGIC_VECTOR(31 downto 0); 96 | SIGNAL output_row_MAC_base_17: STD_LOGIC_VECTOR(31 downto 0); 97 | SIGNAL output_row_MAC_base_18: STD_LOGIC_VECTOR(31 downto 0); 98 | SIGNAL output_row_MAC_base_19: STD_LOGIC_VECTOR(31 downto 0); 99 | SIGNAL output_row_MAC_base_20: STD_LOGIC_VECTOR(31 downto 0); 100 | SIGNAL output_row_MAC_base_21: STD_LOGIC_VECTOR(31 downto 0); 101 | SIGNAL output_row_MAC_base_22: STD_LOGIC_VECTOR(31 downto 0); 102 | SIGNAL output_row_MAC_base_23: STD_LOGIC_VECTOR(31 downto 0); 103 | SIGNAL output_row_MAC_base_24: STD_LOGIC_VECTOR(31 downto 0); 104 | SIGNAL output_row_MAC_base_25: STD_LOGIC_VECTOR(31 downto 0); 105 | SIGNAL output_row_MAC_base_26: STD_LOGIC_VECTOR(31 downto 0); 106 | SIGNAL output_row_MAC_base_27: STD_LOGIC_VECTOR(31 downto 0); 107 | SIGNAL output_row_MAC_base_28: STD_LOGIC_VECTOR(31 downto 0); 108 | SIGNAL output_row_MAC_base_29: STD_LOGIC_VECTOR(31 downto 0); 109 | SIGNAL output_row_MAC_base_30: STD_LOGIC_VECTOR(31 downto 0); 110 | SIGNAL output_row_MAC_base_31: STD_LOGIC_VECTOR(31 downto 0); 111 | SIGNAL output_row_MAC_base_32: STD_LOGIC_VECTOR(31 downto 0); 112 | SIGNAL output_row_MAC_base_33: STD_LOGIC_VECTOR(31 downto 0); 113 | SIGNAL output_row_MAC_base_34: STD_LOGIC_VECTOR(31 downto 0); 114 | SIGNAL output_row_MAC_base_35: STD_LOGIC_VECTOR(31 downto 0); 115 | SIGNAL output_row_MAC_base_36: STD_LOGIC_VECTOR(31 downto 0); 116 | SIGNAL output_row_MAC_base_37: STD_LOGIC_VECTOR(31 downto 0); 117 | SIGNAL output_row_MAC_base_38: STD_LOGIC_VECTOR(31 downto 0); 118 | SIGNAL output_row_MAC_base_39: STD_LOGIC_VECTOR(31 downto 0); 119 | SIGNAL output_row_MAC_base_40: STD_LOGIC_VECTOR(31 downto 0); 120 | SIGNAL output_row_MAC_base_41: STD_LOGIC_VECTOR(31 downto 0); 121 | SIGNAL output_row_MAC_base_42: STD_LOGIC_VECTOR(31 downto 0); 122 | SIGNAL output_row_MAC_base_43: STD_LOGIC_VECTOR(31 downto 0); 123 | SIGNAL output_row_MAC_base_44: STD_LOGIC_VECTOR(31 downto 0); 124 | SIGNAL output_row_MAC_base_45: STD_LOGIC_VECTOR(31 downto 0); 125 | SIGNAL output_row_MAC_base_46: STD_LOGIC_VECTOR(31 downto 0); 126 | SIGNAL output_row_MAC_base_47: STD_LOGIC_VECTOR(31 downto 0); 127 | SIGNAL output_row_MAC_base_48: STD_LOGIC_VECTOR(31 downto 0); 128 | SIGNAL output_row_MAC_base_49: STD_LOGIC_VECTOR(31 downto 0); 129 | SIGNAL output_row_MAC_base_50: STD_LOGIC_VECTOR(31 downto 0); 130 | SIGNAL output_row_MAC_base_51: STD_LOGIC_VECTOR(31 downto 0); 131 | SIGNAL output_row_MAC_base_52: STD_LOGIC_VECTOR(31 downto 0); 132 | SIGNAL output_row_MAC_base_53: STD_LOGIC_VECTOR(31 downto 0); 133 | SIGNAL output_row_MAC_base_54: STD_LOGIC_VECTOR(31 downto 0); 134 | SIGNAL output_row_MAC_base_55: STD_LOGIC_VECTOR(31 downto 0); 135 | SIGNAL output_row_MAC_base_56: STD_LOGIC_VECTOR(31 downto 0); 136 | SIGNAL output_row_MAC_base_57: STD_LOGIC_VECTOR(31 downto 0); 137 | SIGNAL output_row_MAC_base_58: STD_LOGIC_VECTOR(31 downto 0); 138 | SIGNAL output_row_MAC_base_59: STD_LOGIC_VECTOR(31 downto 0); 139 | SIGNAL output_row_MAC_base_60: STD_LOGIC_VECTOR(31 downto 0); 140 | SIGNAL output_row_MAC_base_61: STD_LOGIC_VECTOR(31 downto 0); 141 | SIGNAL output_row_MAC_base_62: STD_LOGIC_VECTOR(31 downto 0); 142 | SIGNAL output_row_MAC_base_63: STD_LOGIC_VECTOR(31 downto 0); 143 | 144 | BEGIN 145 | 146 | MAC_base: MAC_64x32_8 PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>ENABLE, 147 | input_row_0=>input_row_0, input_row_1=>input_row_1, input_row_2=>input_row_2, input_row_3=>input_row_3, input_row_4=>input_row_4, input_row_5=>input_row_5, input_row_6=>input_row_6, input_row_7=>input_row_7, input_row_8=>input_row_8, input_row_9=>input_row_9, input_row_10=>input_row_10, input_row_11=>input_row_11, input_row_12=>input_row_12, input_row_13=>input_row_13, input_row_14=>input_row_14, input_row_15=>input_row_15, input_row_16=>input_row_16, input_row_17=>input_row_17, input_row_18=>input_row_18, input_row_19=>input_row_19, input_row_20=>input_row_20, input_row_21=>input_row_21, input_row_22=>input_row_22, input_row_23=>input_row_23, input_row_24=>input_row_24, input_row_25=>input_row_25, input_row_26=>input_row_26, input_row_27=>input_row_27, input_row_28=>input_row_28, input_row_29=>input_row_29, input_row_30=>input_row_30, input_row_31=>input_row_31, input_row_32=>input_row_32, input_row_33=>input_row_33, input_row_34=>input_row_34, input_row_35=>input_row_35, input_row_36=>input_row_36, input_row_37=>input_row_37, input_row_38=>input_row_38, input_row_39=>input_row_39, input_row_40=>input_row_40, input_row_41=>input_row_41, input_row_42=>input_row_42, input_row_43=>input_row_43, input_row_44=>input_row_44, input_row_45=>input_row_45, input_row_46=>input_row_46, input_row_47=>input_row_47, input_row_48=>input_row_48, input_row_49=>input_row_49, input_row_50=>input_row_50, input_row_51=>input_row_51, input_row_52=>input_row_52, input_row_53=>input_row_53, input_row_54=>input_row_54, input_row_55=>input_row_55, input_row_56=>input_row_56, input_row_57=>input_row_57, input_row_58=>input_row_58, input_row_59=>input_row_59, input_row_60=>input_row_60, input_row_61=>input_row_61, input_row_62=>input_row_62, input_row_63=>input_row_63, 148 | input_col_0=>input_col_0, input_col_1=>input_col_1, input_col_2=>input_col_2, input_col_3=>input_col_3, input_col_4=>input_col_4, input_col_5=>input_col_5, input_col_6=>input_col_6, input_col_7=>input_col_7, input_col_8=>input_col_8, input_col_9=>input_col_9, input_col_10=>input_col_10, input_col_11=>input_col_11, input_col_12=>input_col_12, input_col_13=>input_col_13, input_col_14=>input_col_14, input_col_15=>input_col_15, input_col_16=>input_col_16, input_col_17=>input_col_17, input_col_18=>input_col_18, input_col_19=>input_col_19, input_col_20=>input_col_20, input_col_21=>input_col_21, input_col_22=>input_col_22, input_col_23=>input_col_23, input_col_24=>input_col_24, input_col_25=>input_col_25, input_col_26=>input_col_26, input_col_27=>input_col_27, input_col_28=>input_col_28, input_col_29=>input_col_29, input_col_30=>input_col_30, input_col_31=>input_col_31, 149 | output_row_0=>output_row_MAC_base_0, output_row_1=>output_row_MAC_base_1, output_row_2=>output_row_MAC_base_2, output_row_3=>output_row_MAC_base_3, output_row_4=>output_row_MAC_base_4, output_row_5=>output_row_MAC_base_5, output_row_6=>output_row_MAC_base_6, output_row_7=>output_row_MAC_base_7, output_row_8=>output_row_MAC_base_8, output_row_9=>output_row_MAC_base_9, output_row_10=>output_row_MAC_base_10, output_row_11=>output_row_MAC_base_11, output_row_12=>output_row_MAC_base_12, output_row_13=>output_row_MAC_base_13, output_row_14=>output_row_MAC_base_14, output_row_15=>output_row_MAC_base_15, output_row_16=>output_row_MAC_base_16, output_row_17=>output_row_MAC_base_17, output_row_18=>output_row_MAC_base_18, output_row_19=>output_row_MAC_base_19, output_row_20=>output_row_MAC_base_20, output_row_21=>output_row_MAC_base_21, output_row_22=>output_row_MAC_base_22, output_row_23=>output_row_MAC_base_23, output_row_24=>output_row_MAC_base_24, output_row_25=>output_row_MAC_base_25, output_row_26=>output_row_MAC_base_26, output_row_27=>output_row_MAC_base_27, output_row_28=>output_row_MAC_base_28, output_row_29=>output_row_MAC_base_29, output_row_30=>output_row_MAC_base_30, output_row_31=>output_row_MAC_base_31, output_row_32=>output_row_MAC_base_32, output_row_33=>output_row_MAC_base_33, output_row_34=>output_row_MAC_base_34, output_row_35=>output_row_MAC_base_35, output_row_36=>output_row_MAC_base_36, output_row_37=>output_row_MAC_base_37, output_row_38=>output_row_MAC_base_38, output_row_39=>output_row_MAC_base_39, output_row_40=>output_row_MAC_base_40, output_row_41=>output_row_MAC_base_41, output_row_42=>output_row_MAC_base_42, output_row_43=>output_row_MAC_base_43, output_row_44=>output_row_MAC_base_44, output_row_45=>output_row_MAC_base_45, output_row_46=>output_row_MAC_base_46, output_row_47=>output_row_MAC_base_47, output_row_48=>output_row_MAC_base_48, output_row_49=>output_row_MAC_base_49, output_row_50=>output_row_MAC_base_50, output_row_51=>output_row_MAC_base_51, output_row_52=>output_row_MAC_base_52, output_row_53=>output_row_MAC_base_53, output_row_54=>output_row_MAC_base_54, output_row_55=>output_row_MAC_base_55, output_row_56=>output_row_MAC_base_56, output_row_57=>output_row_MAC_base_57, output_row_58=>output_row_MAC_base_58, output_row_59=>output_row_MAC_base_59, output_row_60=>output_row_MAC_base_60, output_row_61=>output_row_MAC_base_61, output_row_62=>output_row_MAC_base_62, output_row_63=>output_row_MAC_base_63, 150 | SEL_mux=>SEL_mux); 151 | bias_sum: bias_sum_64_32 PORT MAP(input_row_0=>output_row_MAC_base_0, input_row_1=>output_row_MAC_base_1, input_row_2=>output_row_MAC_base_2, input_row_3=>output_row_MAC_base_3, input_row_4=>output_row_MAC_base_4, input_row_5=>output_row_MAC_base_5, input_row_6=>output_row_MAC_base_6, input_row_7=>output_row_MAC_base_7, input_row_8=>output_row_MAC_base_8, input_row_9=>output_row_MAC_base_9, input_row_10=>output_row_MAC_base_10, input_row_11=>output_row_MAC_base_11, input_row_12=>output_row_MAC_base_12, input_row_13=>output_row_MAC_base_13, input_row_14=>output_row_MAC_base_14, input_row_15=>output_row_MAC_base_15, input_row_16=>output_row_MAC_base_16, input_row_17=>output_row_MAC_base_17, input_row_18=>output_row_MAC_base_18, input_row_19=>output_row_MAC_base_19, input_row_20=>output_row_MAC_base_20, input_row_21=>output_row_MAC_base_21, input_row_22=>output_row_MAC_base_22, input_row_23=>output_row_MAC_base_23, input_row_24=>output_row_MAC_base_24, input_row_25=>output_row_MAC_base_25, input_row_26=>output_row_MAC_base_26, input_row_27=>output_row_MAC_base_27, input_row_28=>output_row_MAC_base_28, input_row_29=>output_row_MAC_base_29, input_row_30=>output_row_MAC_base_30, input_row_31=>output_row_MAC_base_31, input_row_32=>output_row_MAC_base_32, input_row_33=>output_row_MAC_base_33, input_row_34=>output_row_MAC_base_34, input_row_35=>output_row_MAC_base_35, input_row_36=>output_row_MAC_base_36, input_row_37=>output_row_MAC_base_37, input_row_38=>output_row_MAC_base_38, input_row_39=>output_row_MAC_base_39, input_row_40=>output_row_MAC_base_40, input_row_41=>output_row_MAC_base_41, input_row_42=>output_row_MAC_base_42, input_row_43=>output_row_MAC_base_43, input_row_44=>output_row_MAC_base_44, input_row_45=>output_row_MAC_base_45, input_row_46=>output_row_MAC_base_46, input_row_47=>output_row_MAC_base_47, input_row_48=>output_row_MAC_base_48, input_row_49=>output_row_MAC_base_49, input_row_50=>output_row_MAC_base_50, input_row_51=>output_row_MAC_base_51, input_row_52=>output_row_MAC_base_52, input_row_53=>output_row_MAC_base_53, input_row_54=>output_row_MAC_base_54, input_row_55=>output_row_MAC_base_55, input_row_56=>output_row_MAC_base_56, input_row_57=>output_row_MAC_base_57, input_row_58=>output_row_MAC_base_58, input_row_59=>output_row_MAC_base_59, input_row_60=>output_row_MAC_base_60, input_row_61=>output_row_MAC_base_61, input_row_62=>output_row_MAC_base_62, input_row_63=>output_row_MAC_base_63, 152 | b_col=>b_col, output_row_0=>output_row_0, output_row_1=>output_row_1, output_row_2=>output_row_2, output_row_3=>output_row_3, output_row_4=>output_row_4, output_row_5=>output_row_5, output_row_6=>output_row_6, output_row_7=>output_row_7, output_row_8=>output_row_8, output_row_9=>output_row_9, output_row_10=>output_row_10, output_row_11=>output_row_11, output_row_12=>output_row_12, output_row_13=>output_row_13, output_row_14=>output_row_14, output_row_15=>output_row_15, output_row_16=>output_row_16, output_row_17=>output_row_17, output_row_18=>output_row_18, output_row_19=>output_row_19, output_row_20=>output_row_20, output_row_21=>output_row_21, output_row_22=>output_row_22, output_row_23=>output_row_23, output_row_24=>output_row_24, output_row_25=>output_row_25, output_row_26=>output_row_26, output_row_27=>output_row_27, output_row_28=>output_row_28, output_row_29=>output_row_29, output_row_30=>output_row_30, output_row_31=>output_row_31, output_row_32=>output_row_32, output_row_33=>output_row_33, output_row_34=>output_row_34, output_row_35=>output_row_35, output_row_36=>output_row_36, output_row_37=>output_row_37, output_row_38=>output_row_38, output_row_39=>output_row_39, output_row_40=>output_row_40, output_row_41=>output_row_41, output_row_42=>output_row_42, output_row_43=>output_row_43, output_row_44=>output_row_44, output_row_45=>output_row_45, output_row_46=>output_row_46, output_row_47=>output_row_47, output_row_48=>output_row_48, output_row_49=>output_row_49, output_row_50=>output_row_50, output_row_51=>output_row_51, output_row_52=>output_row_52, output_row_53=>output_row_53, output_row_54=>output_row_54, output_row_55=>output_row_55, output_row_56=>output_row_56, output_row_57=>output_row_57, output_row_58=>output_row_58, output_row_59=>output_row_59, output_row_60=>output_row_60, output_row_61=>output_row_61, output_row_62=>output_row_62, output_row_63=>output_row_63); 153 | 154 | 155 | END behaviour; 156 | -------------------------------------------------------------------------------- /MatMul/MAC_64x32_8_bias_32/bias_sum_64_32.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY bias_sum_64_32 IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 8 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 9 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 10 | input_row_30, input_row_31, input_row_32, input_row_33, input_row_34, input_row_35, input_row_36, input_row_37, input_row_38, input_row_39, 11 | input_row_40, input_row_41, input_row_42, input_row_43, input_row_44, input_row_45, input_row_46, input_row_47, input_row_48, input_row_49, 12 | input_row_50, input_row_51, input_row_52, input_row_53, input_row_54, input_row_55, input_row_56, input_row_57, input_row_58, input_row_59, 13 | input_row_60, input_row_61, input_row_62, input_row_63: IN STD_LOGIC_VECTOR(31 downto 0); 14 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 15 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 16 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 17 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 18 | output_row_30, output_row_31, output_row_32, output_row_33, output_row_34, output_row_35, output_row_36, output_row_37, output_row_38, output_row_39, 19 | output_row_40, output_row_41, output_row_42, output_row_43, output_row_44, output_row_45, output_row_46, output_row_47, output_row_48, output_row_49, 20 | output_row_50, output_row_51, output_row_52, output_row_53, output_row_54, output_row_55, output_row_56, output_row_57, output_row_58, output_row_59, 21 | output_row_60, output_row_61, output_row_62, output_row_63: OUT STD_LOGIC_VECTOR(31 downto 0) 22 | ); 23 | END bias_sum_64_32; 24 | 25 | ARCHITECTURE behaviour OF bias_sum_64_32 IS 26 | 27 | 28 | 29 | BEGIN 30 | 31 | output_row_0 <= STD_LOGIC_VECTOR(SIGNED(input_row_0) + SIGNED(b_col)); 32 | output_row_1 <= STD_LOGIC_VECTOR(SIGNED(input_row_1) + SIGNED(b_col)); 33 | output_row_2 <= STD_LOGIC_VECTOR(SIGNED(input_row_2) + SIGNED(b_col)); 34 | output_row_3 <= STD_LOGIC_VECTOR(SIGNED(input_row_3) + SIGNED(b_col)); 35 | output_row_4 <= STD_LOGIC_VECTOR(SIGNED(input_row_4) + SIGNED(b_col)); 36 | output_row_5 <= STD_LOGIC_VECTOR(SIGNED(input_row_5) + SIGNED(b_col)); 37 | output_row_6 <= STD_LOGIC_VECTOR(SIGNED(input_row_6) + SIGNED(b_col)); 38 | output_row_7 <= STD_LOGIC_VECTOR(SIGNED(input_row_7) + SIGNED(b_col)); 39 | output_row_8 <= STD_LOGIC_VECTOR(SIGNED(input_row_8) + SIGNED(b_col)); 40 | output_row_9 <= STD_LOGIC_VECTOR(SIGNED(input_row_9) + SIGNED(b_col)); 41 | output_row_10 <= STD_LOGIC_VECTOR(SIGNED(input_row_10) + SIGNED(b_col)); 42 | output_row_11 <= STD_LOGIC_VECTOR(SIGNED(input_row_11) + SIGNED(b_col)); 43 | output_row_12 <= STD_LOGIC_VECTOR(SIGNED(input_row_12) + SIGNED(b_col)); 44 | output_row_13 <= STD_LOGIC_VECTOR(SIGNED(input_row_13) + SIGNED(b_col)); 45 | output_row_14 <= STD_LOGIC_VECTOR(SIGNED(input_row_14) + SIGNED(b_col)); 46 | output_row_15 <= STD_LOGIC_VECTOR(SIGNED(input_row_15) + SIGNED(b_col)); 47 | output_row_16 <= STD_LOGIC_VECTOR(SIGNED(input_row_16) + SIGNED(b_col)); 48 | output_row_17 <= STD_LOGIC_VECTOR(SIGNED(input_row_17) + SIGNED(b_col)); 49 | output_row_18 <= STD_LOGIC_VECTOR(SIGNED(input_row_18) + SIGNED(b_col)); 50 | output_row_19 <= STD_LOGIC_VECTOR(SIGNED(input_row_19) + SIGNED(b_col)); 51 | output_row_20 <= STD_LOGIC_VECTOR(SIGNED(input_row_20) + SIGNED(b_col)); 52 | output_row_21 <= STD_LOGIC_VECTOR(SIGNED(input_row_21) + SIGNED(b_col)); 53 | output_row_22 <= STD_LOGIC_VECTOR(SIGNED(input_row_22) + SIGNED(b_col)); 54 | output_row_23 <= STD_LOGIC_VECTOR(SIGNED(input_row_23) + SIGNED(b_col)); 55 | output_row_24 <= STD_LOGIC_VECTOR(SIGNED(input_row_24) + SIGNED(b_col)); 56 | output_row_25 <= STD_LOGIC_VECTOR(SIGNED(input_row_25) + SIGNED(b_col)); 57 | output_row_26 <= STD_LOGIC_VECTOR(SIGNED(input_row_26) + SIGNED(b_col)); 58 | output_row_27 <= STD_LOGIC_VECTOR(SIGNED(input_row_27) + SIGNED(b_col)); 59 | output_row_28 <= STD_LOGIC_VECTOR(SIGNED(input_row_28) + SIGNED(b_col)); 60 | output_row_29 <= STD_LOGIC_VECTOR(SIGNED(input_row_29) + SIGNED(b_col)); 61 | output_row_30 <= STD_LOGIC_VECTOR(SIGNED(input_row_30) + SIGNED(b_col)); 62 | output_row_31 <= STD_LOGIC_VECTOR(SIGNED(input_row_31) + SIGNED(b_col)); 63 | output_row_32 <= STD_LOGIC_VECTOR(SIGNED(input_row_32) + SIGNED(b_col)); 64 | output_row_33 <= STD_LOGIC_VECTOR(SIGNED(input_row_33) + SIGNED(b_col)); 65 | output_row_34 <= STD_LOGIC_VECTOR(SIGNED(input_row_34) + SIGNED(b_col)); 66 | output_row_35 <= STD_LOGIC_VECTOR(SIGNED(input_row_35) + SIGNED(b_col)); 67 | output_row_36 <= STD_LOGIC_VECTOR(SIGNED(input_row_36) + SIGNED(b_col)); 68 | output_row_37 <= STD_LOGIC_VECTOR(SIGNED(input_row_37) + SIGNED(b_col)); 69 | output_row_38 <= STD_LOGIC_VECTOR(SIGNED(input_row_38) + SIGNED(b_col)); 70 | output_row_39 <= STD_LOGIC_VECTOR(SIGNED(input_row_39) + SIGNED(b_col)); 71 | output_row_40 <= STD_LOGIC_VECTOR(SIGNED(input_row_40) + SIGNED(b_col)); 72 | output_row_41 <= STD_LOGIC_VECTOR(SIGNED(input_row_41) + SIGNED(b_col)); 73 | output_row_42 <= STD_LOGIC_VECTOR(SIGNED(input_row_42) + SIGNED(b_col)); 74 | output_row_43 <= STD_LOGIC_VECTOR(SIGNED(input_row_43) + SIGNED(b_col)); 75 | output_row_44 <= STD_LOGIC_VECTOR(SIGNED(input_row_44) + SIGNED(b_col)); 76 | output_row_45 <= STD_LOGIC_VECTOR(SIGNED(input_row_45) + SIGNED(b_col)); 77 | output_row_46 <= STD_LOGIC_VECTOR(SIGNED(input_row_46) + SIGNED(b_col)); 78 | output_row_47 <= STD_LOGIC_VECTOR(SIGNED(input_row_47) + SIGNED(b_col)); 79 | output_row_48 <= STD_LOGIC_VECTOR(SIGNED(input_row_48) + SIGNED(b_col)); 80 | output_row_49 <= STD_LOGIC_VECTOR(SIGNED(input_row_49) + SIGNED(b_col)); 81 | output_row_50 <= STD_LOGIC_VECTOR(SIGNED(input_row_50) + SIGNED(b_col)); 82 | output_row_51 <= STD_LOGIC_VECTOR(SIGNED(input_row_51) + SIGNED(b_col)); 83 | output_row_52 <= STD_LOGIC_VECTOR(SIGNED(input_row_52) + SIGNED(b_col)); 84 | output_row_53 <= STD_LOGIC_VECTOR(SIGNED(input_row_53) + SIGNED(b_col)); 85 | output_row_54 <= STD_LOGIC_VECTOR(SIGNED(input_row_54) + SIGNED(b_col)); 86 | output_row_55 <= STD_LOGIC_VECTOR(SIGNED(input_row_55) + SIGNED(b_col)); 87 | output_row_56 <= STD_LOGIC_VECTOR(SIGNED(input_row_56) + SIGNED(b_col)); 88 | output_row_57 <= STD_LOGIC_VECTOR(SIGNED(input_row_57) + SIGNED(b_col)); 89 | output_row_58 <= STD_LOGIC_VECTOR(SIGNED(input_row_58) + SIGNED(b_col)); 90 | output_row_59 <= STD_LOGIC_VECTOR(SIGNED(input_row_59) + SIGNED(b_col)); 91 | output_row_60 <= STD_LOGIC_VECTOR(SIGNED(input_row_60) + SIGNED(b_col)); 92 | output_row_61 <= STD_LOGIC_VECTOR(SIGNED(input_row_61) + SIGNED(b_col)); 93 | output_row_62 <= STD_LOGIC_VECTOR(SIGNED(input_row_62) + SIGNED(b_col)); 94 | output_row_63 <= STD_LOGIC_VECTOR(SIGNED(input_row_63) + SIGNED(b_col)); 95 | 96 | 97 | END behaviour; 98 | -------------------------------------------------------------------------------- /MatMul/MAC_64x32_8_bias_32/mux_32to1_nbit.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY mux_32to1_nbit IS 7 | GENERIC( 8 | N : POSITIVE := 2 9 | ); 10 | PORT (I0, I1, I2, I3, I4, I5, I6, I7, I8, I9, 11 | I10, I11, I12, I13, I14, I15, I16, I17, I18, I19, 12 | I20, I21, I22, I23, I24, I25, I26, I27, I28, I29, 13 | I30, I31: IN STD_LOGIC_VECTOR(N-1 downto 0); 14 | SEL_mux: IN STD_LOGIC_VECTOR(4 downto 0); 15 | O: OUT STD_LOGIC_VECTOR(N-1 downto 0) 16 | ); 17 | END mux_32to1_nbit; 18 | 19 | ARCHITECTURE behaviour OF mux_32to1_nbit IS 20 | 21 | COMPONENT mux_4to1_nbit_base IS 22 | GENERIC ( N : POSITIVE :=1); 23 | PORT( I0, I1, I2, I3: IN STD_LOGIC_VECTOR(N-1 downto 0); 24 | SEL : IN STD_LOGIC_VECTOR(1 downto 0); 25 | O : OUT STD_LOGIC_VECTOR(N-1 downto 0) 26 | ); 27 | END COMPONENT; 28 | 29 | COMPONENT mux_2to1_nbit_base IS 30 | GENERIC ( N : POSITIVE :=1); 31 | PORT( I0, I1: IN STD_LOGIC_VECTOR(N-1 downto 0); 32 | SEL : IN STD_LOGIC; 33 | O : OUT STD_LOGIC_VECTOR(N-1 downto 0) 34 | ); 35 | END COMPONENT; 36 | 37 | 38 | SIGNAL output_mux_0_0: STD_LOGIC_VECTOR(N-1 downto 0); 39 | SIGNAL output_mux_0_1: STD_LOGIC_VECTOR(N-1 downto 0); 40 | SIGNAL output_mux_0_2: STD_LOGIC_VECTOR(N-1 downto 0); 41 | SIGNAL output_mux_0_3: STD_LOGIC_VECTOR(N-1 downto 0); 42 | SIGNAL output_mux_0_4: STD_LOGIC_VECTOR(N-1 downto 0); 43 | SIGNAL output_mux_0_5: STD_LOGIC_VECTOR(N-1 downto 0); 44 | SIGNAL output_mux_0_6: STD_LOGIC_VECTOR(N-1 downto 0); 45 | SIGNAL output_mux_0_7: STD_LOGIC_VECTOR(N-1 downto 0); 46 | SIGNAL output_mux_1_0: STD_LOGIC_VECTOR(N-1 downto 0); 47 | SIGNAL output_mux_1_1: STD_LOGIC_VECTOR(N-1 downto 0); 48 | 49 | BEGIN 50 | 51 | 52 | mux_0_0: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I0, I1=>I1, I2=>I2, I3=>I3, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_0); 53 | mux_0_1: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I4, I1=>I5, I2=>I6, I3=>I7, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_1); 54 | mux_0_2: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I8, I1=>I9, I2=>I10, I3=>I11, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_2); 55 | mux_0_3: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I12, I1=>I13, I2=>I14, I3=>I15, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_3); 56 | mux_0_4: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I16, I1=>I17, I2=>I18, I3=>I19, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_4); 57 | mux_0_5: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I20, I1=>I21, I2=>I22, I3=>I23, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_5); 58 | mux_0_6: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I24, I1=>I25, I2=>I26, I3=>I27, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_6); 59 | mux_0_7: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I28, I1=>I29, I2=>I30, I3=>I31, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_7); 60 | mux_1_0: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>output_mux_0_0, I1=>output_mux_0_1, I2=>output_mux_0_2, I3=>output_mux_0_3, SEL=>SEL_mux(3 downto 2), O=>output_mux_1_0); 61 | mux_1_1: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>output_mux_0_4, I1=>output_mux_0_5, I2=>output_mux_0_6, I3=>output_mux_0_7, SEL=>SEL_mux(3 downto 2), O=>output_mux_1_1); 62 | mux_2_out: mux_2to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>output_mux_1_0, I1=>output_mux_1_1, SEL=>SEL_mux(4), O=>O); 63 | 64 | END behaviour; 65 | -------------------------------------------------------------------------------- /MatMul/MAC_64x64_8_bias_32/bias_sum_64_32.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY bias_sum_64_32 IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 8 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15, input_row_16, input_row_17, input_row_18, input_row_19, 9 | input_row_20, input_row_21, input_row_22, input_row_23, input_row_24, input_row_25, input_row_26, input_row_27, input_row_28, input_row_29, 10 | input_row_30, input_row_31, input_row_32, input_row_33, input_row_34, input_row_35, input_row_36, input_row_37, input_row_38, input_row_39, 11 | input_row_40, input_row_41, input_row_42, input_row_43, input_row_44, input_row_45, input_row_46, input_row_47, input_row_48, input_row_49, 12 | input_row_50, input_row_51, input_row_52, input_row_53, input_row_54, input_row_55, input_row_56, input_row_57, input_row_58, input_row_59, 13 | input_row_60, input_row_61, input_row_62, input_row_63: IN STD_LOGIC_VECTOR(31 downto 0); 14 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 15 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 16 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15, output_row_16, output_row_17, output_row_18, output_row_19, 17 | output_row_20, output_row_21, output_row_22, output_row_23, output_row_24, output_row_25, output_row_26, output_row_27, output_row_28, output_row_29, 18 | output_row_30, output_row_31, output_row_32, output_row_33, output_row_34, output_row_35, output_row_36, output_row_37, output_row_38, output_row_39, 19 | output_row_40, output_row_41, output_row_42, output_row_43, output_row_44, output_row_45, output_row_46, output_row_47, output_row_48, output_row_49, 20 | output_row_50, output_row_51, output_row_52, output_row_53, output_row_54, output_row_55, output_row_56, output_row_57, output_row_58, output_row_59, 21 | output_row_60, output_row_61, output_row_62, output_row_63: OUT STD_LOGIC_VECTOR(31 downto 0) 22 | ); 23 | END bias_sum_64_32; 24 | 25 | ARCHITECTURE behaviour OF bias_sum_64_32 IS 26 | 27 | 28 | 29 | BEGIN 30 | 31 | output_row_0 <= STD_LOGIC_VECTOR(SIGNED(input_row_0) + SIGNED(b_col)); 32 | output_row_1 <= STD_LOGIC_VECTOR(SIGNED(input_row_1) + SIGNED(b_col)); 33 | output_row_2 <= STD_LOGIC_VECTOR(SIGNED(input_row_2) + SIGNED(b_col)); 34 | output_row_3 <= STD_LOGIC_VECTOR(SIGNED(input_row_3) + SIGNED(b_col)); 35 | output_row_4 <= STD_LOGIC_VECTOR(SIGNED(input_row_4) + SIGNED(b_col)); 36 | output_row_5 <= STD_LOGIC_VECTOR(SIGNED(input_row_5) + SIGNED(b_col)); 37 | output_row_6 <= STD_LOGIC_VECTOR(SIGNED(input_row_6) + SIGNED(b_col)); 38 | output_row_7 <= STD_LOGIC_VECTOR(SIGNED(input_row_7) + SIGNED(b_col)); 39 | output_row_8 <= STD_LOGIC_VECTOR(SIGNED(input_row_8) + SIGNED(b_col)); 40 | output_row_9 <= STD_LOGIC_VECTOR(SIGNED(input_row_9) + SIGNED(b_col)); 41 | output_row_10 <= STD_LOGIC_VECTOR(SIGNED(input_row_10) + SIGNED(b_col)); 42 | output_row_11 <= STD_LOGIC_VECTOR(SIGNED(input_row_11) + SIGNED(b_col)); 43 | output_row_12 <= STD_LOGIC_VECTOR(SIGNED(input_row_12) + SIGNED(b_col)); 44 | output_row_13 <= STD_LOGIC_VECTOR(SIGNED(input_row_13) + SIGNED(b_col)); 45 | output_row_14 <= STD_LOGIC_VECTOR(SIGNED(input_row_14) + SIGNED(b_col)); 46 | output_row_15 <= STD_LOGIC_VECTOR(SIGNED(input_row_15) + SIGNED(b_col)); 47 | output_row_16 <= STD_LOGIC_VECTOR(SIGNED(input_row_16) + SIGNED(b_col)); 48 | output_row_17 <= STD_LOGIC_VECTOR(SIGNED(input_row_17) + SIGNED(b_col)); 49 | output_row_18 <= STD_LOGIC_VECTOR(SIGNED(input_row_18) + SIGNED(b_col)); 50 | output_row_19 <= STD_LOGIC_VECTOR(SIGNED(input_row_19) + SIGNED(b_col)); 51 | output_row_20 <= STD_LOGIC_VECTOR(SIGNED(input_row_20) + SIGNED(b_col)); 52 | output_row_21 <= STD_LOGIC_VECTOR(SIGNED(input_row_21) + SIGNED(b_col)); 53 | output_row_22 <= STD_LOGIC_VECTOR(SIGNED(input_row_22) + SIGNED(b_col)); 54 | output_row_23 <= STD_LOGIC_VECTOR(SIGNED(input_row_23) + SIGNED(b_col)); 55 | output_row_24 <= STD_LOGIC_VECTOR(SIGNED(input_row_24) + SIGNED(b_col)); 56 | output_row_25 <= STD_LOGIC_VECTOR(SIGNED(input_row_25) + SIGNED(b_col)); 57 | output_row_26 <= STD_LOGIC_VECTOR(SIGNED(input_row_26) + SIGNED(b_col)); 58 | output_row_27 <= STD_LOGIC_VECTOR(SIGNED(input_row_27) + SIGNED(b_col)); 59 | output_row_28 <= STD_LOGIC_VECTOR(SIGNED(input_row_28) + SIGNED(b_col)); 60 | output_row_29 <= STD_LOGIC_VECTOR(SIGNED(input_row_29) + SIGNED(b_col)); 61 | output_row_30 <= STD_LOGIC_VECTOR(SIGNED(input_row_30) + SIGNED(b_col)); 62 | output_row_31 <= STD_LOGIC_VECTOR(SIGNED(input_row_31) + SIGNED(b_col)); 63 | output_row_32 <= STD_LOGIC_VECTOR(SIGNED(input_row_32) + SIGNED(b_col)); 64 | output_row_33 <= STD_LOGIC_VECTOR(SIGNED(input_row_33) + SIGNED(b_col)); 65 | output_row_34 <= STD_LOGIC_VECTOR(SIGNED(input_row_34) + SIGNED(b_col)); 66 | output_row_35 <= STD_LOGIC_VECTOR(SIGNED(input_row_35) + SIGNED(b_col)); 67 | output_row_36 <= STD_LOGIC_VECTOR(SIGNED(input_row_36) + SIGNED(b_col)); 68 | output_row_37 <= STD_LOGIC_VECTOR(SIGNED(input_row_37) + SIGNED(b_col)); 69 | output_row_38 <= STD_LOGIC_VECTOR(SIGNED(input_row_38) + SIGNED(b_col)); 70 | output_row_39 <= STD_LOGIC_VECTOR(SIGNED(input_row_39) + SIGNED(b_col)); 71 | output_row_40 <= STD_LOGIC_VECTOR(SIGNED(input_row_40) + SIGNED(b_col)); 72 | output_row_41 <= STD_LOGIC_VECTOR(SIGNED(input_row_41) + SIGNED(b_col)); 73 | output_row_42 <= STD_LOGIC_VECTOR(SIGNED(input_row_42) + SIGNED(b_col)); 74 | output_row_43 <= STD_LOGIC_VECTOR(SIGNED(input_row_43) + SIGNED(b_col)); 75 | output_row_44 <= STD_LOGIC_VECTOR(SIGNED(input_row_44) + SIGNED(b_col)); 76 | output_row_45 <= STD_LOGIC_VECTOR(SIGNED(input_row_45) + SIGNED(b_col)); 77 | output_row_46 <= STD_LOGIC_VECTOR(SIGNED(input_row_46) + SIGNED(b_col)); 78 | output_row_47 <= STD_LOGIC_VECTOR(SIGNED(input_row_47) + SIGNED(b_col)); 79 | output_row_48 <= STD_LOGIC_VECTOR(SIGNED(input_row_48) + SIGNED(b_col)); 80 | output_row_49 <= STD_LOGIC_VECTOR(SIGNED(input_row_49) + SIGNED(b_col)); 81 | output_row_50 <= STD_LOGIC_VECTOR(SIGNED(input_row_50) + SIGNED(b_col)); 82 | output_row_51 <= STD_LOGIC_VECTOR(SIGNED(input_row_51) + SIGNED(b_col)); 83 | output_row_52 <= STD_LOGIC_VECTOR(SIGNED(input_row_52) + SIGNED(b_col)); 84 | output_row_53 <= STD_LOGIC_VECTOR(SIGNED(input_row_53) + SIGNED(b_col)); 85 | output_row_54 <= STD_LOGIC_VECTOR(SIGNED(input_row_54) + SIGNED(b_col)); 86 | output_row_55 <= STD_LOGIC_VECTOR(SIGNED(input_row_55) + SIGNED(b_col)); 87 | output_row_56 <= STD_LOGIC_VECTOR(SIGNED(input_row_56) + SIGNED(b_col)); 88 | output_row_57 <= STD_LOGIC_VECTOR(SIGNED(input_row_57) + SIGNED(b_col)); 89 | output_row_58 <= STD_LOGIC_VECTOR(SIGNED(input_row_58) + SIGNED(b_col)); 90 | output_row_59 <= STD_LOGIC_VECTOR(SIGNED(input_row_59) + SIGNED(b_col)); 91 | output_row_60 <= STD_LOGIC_VECTOR(SIGNED(input_row_60) + SIGNED(b_col)); 92 | output_row_61 <= STD_LOGIC_VECTOR(SIGNED(input_row_61) + SIGNED(b_col)); 93 | output_row_62 <= STD_LOGIC_VECTOR(SIGNED(input_row_62) + SIGNED(b_col)); 94 | output_row_63 <= STD_LOGIC_VECTOR(SIGNED(input_row_63) + SIGNED(b_col)); 95 | 96 | 97 | END behaviour; 98 | -------------------------------------------------------------------------------- /MatMul/MAC_64x64_8_bias_32/mux_64to1_nbit.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY mux_64to1_nbit IS 7 | GENERIC( 8 | N : POSITIVE := 2 9 | ); 10 | PORT (I0, I1, I2, I3, I4, I5, I6, I7, I8, I9, 11 | I10, I11, I12, I13, I14, I15, I16, I17, I18, I19, 12 | I20, I21, I22, I23, I24, I25, I26, I27, I28, I29, 13 | I30, I31, I32, I33, I34, I35, I36, I37, I38, I39, 14 | I40, I41, I42, I43, I44, I45, I46, I47, I48, I49, 15 | I50, I51, I52, I53, I54, I55, I56, I57, I58, I59, 16 | I60, I61, I62, I63: IN STD_LOGIC_VECTOR(N-1 downto 0); 17 | SEL_mux: IN STD_LOGIC_VECTOR(5 downto 0); 18 | O: OUT STD_LOGIC_VECTOR(N-1 downto 0) 19 | ); 20 | END mux_64to1_nbit; 21 | 22 | ARCHITECTURE behaviour OF mux_64to1_nbit IS 23 | 24 | COMPONENT mux_4to1_nbit_base IS 25 | GENERIC ( N : POSITIVE :=1); 26 | PORT( I0, I1, I2, I3: IN STD_LOGIC_VECTOR(N-1 downto 0); 27 | SEL : IN STD_LOGIC_VECTOR(1 downto 0); 28 | O : OUT STD_LOGIC_VECTOR(N-1 downto 0) 29 | ); 30 | END COMPONENT; 31 | 32 | 33 | SIGNAL output_mux_0_0: STD_LOGIC_VECTOR(N-1 downto 0); 34 | SIGNAL output_mux_0_1: STD_LOGIC_VECTOR(N-1 downto 0); 35 | SIGNAL output_mux_0_2: STD_LOGIC_VECTOR(N-1 downto 0); 36 | SIGNAL output_mux_0_3: STD_LOGIC_VECTOR(N-1 downto 0); 37 | SIGNAL output_mux_0_4: STD_LOGIC_VECTOR(N-1 downto 0); 38 | SIGNAL output_mux_0_5: STD_LOGIC_VECTOR(N-1 downto 0); 39 | SIGNAL output_mux_0_6: STD_LOGIC_VECTOR(N-1 downto 0); 40 | SIGNAL output_mux_0_7: STD_LOGIC_VECTOR(N-1 downto 0); 41 | SIGNAL output_mux_0_8: STD_LOGIC_VECTOR(N-1 downto 0); 42 | SIGNAL output_mux_0_9: STD_LOGIC_VECTOR(N-1 downto 0); 43 | SIGNAL output_mux_0_10: STD_LOGIC_VECTOR(N-1 downto 0); 44 | SIGNAL output_mux_0_11: STD_LOGIC_VECTOR(N-1 downto 0); 45 | SIGNAL output_mux_0_12: STD_LOGIC_VECTOR(N-1 downto 0); 46 | SIGNAL output_mux_0_13: STD_LOGIC_VECTOR(N-1 downto 0); 47 | SIGNAL output_mux_0_14: STD_LOGIC_VECTOR(N-1 downto 0); 48 | SIGNAL output_mux_0_15: STD_LOGIC_VECTOR(N-1 downto 0); 49 | SIGNAL output_mux_1_0: STD_LOGIC_VECTOR(N-1 downto 0); 50 | SIGNAL output_mux_1_1: STD_LOGIC_VECTOR(N-1 downto 0); 51 | SIGNAL output_mux_1_2: STD_LOGIC_VECTOR(N-1 downto 0); 52 | SIGNAL output_mux_1_3: STD_LOGIC_VECTOR(N-1 downto 0); 53 | 54 | BEGIN 55 | 56 | 57 | mux_0_0: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I0, I1=>I1, I2=>I2, I3=>I3, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_0); 58 | mux_0_1: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I4, I1=>I5, I2=>I6, I3=>I7, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_1); 59 | mux_0_2: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I8, I1=>I9, I2=>I10, I3=>I11, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_2); 60 | mux_0_3: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I12, I1=>I13, I2=>I14, I3=>I15, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_3); 61 | mux_0_4: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I16, I1=>I17, I2=>I18, I3=>I19, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_4); 62 | mux_0_5: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I20, I1=>I21, I2=>I22, I3=>I23, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_5); 63 | mux_0_6: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I24, I1=>I25, I2=>I26, I3=>I27, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_6); 64 | mux_0_7: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I28, I1=>I29, I2=>I30, I3=>I31, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_7); 65 | mux_0_8: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I32, I1=>I33, I2=>I34, I3=>I35, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_8); 66 | mux_0_9: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I36, I1=>I37, I2=>I38, I3=>I39, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_9); 67 | mux_0_10: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I40, I1=>I41, I2=>I42, I3=>I43, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_10); 68 | mux_0_11: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I44, I1=>I45, I2=>I46, I3=>I47, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_11); 69 | mux_0_12: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I48, I1=>I49, I2=>I50, I3=>I51, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_12); 70 | mux_0_13: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I52, I1=>I53, I2=>I54, I3=>I55, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_13); 71 | mux_0_14: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I56, I1=>I57, I2=>I58, I3=>I59, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_14); 72 | mux_0_15: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>I60, I1=>I61, I2=>I62, I3=>I63, SEL=>SEL_mux(1 downto 0), O=>output_mux_0_15); 73 | mux_1_0: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>output_mux_0_0, I1=>output_mux_0_1, I2=>output_mux_0_2, I3=>output_mux_0_3, SEL=>SEL_mux(3 downto 2), O=>output_mux_1_0); 74 | mux_1_1: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>output_mux_0_4, I1=>output_mux_0_5, I2=>output_mux_0_6, I3=>output_mux_0_7, SEL=>SEL_mux(3 downto 2), O=>output_mux_1_1); 75 | mux_1_2: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>output_mux_0_8, I1=>output_mux_0_9, I2=>output_mux_0_10, I3=>output_mux_0_11, SEL=>SEL_mux(3 downto 2), O=>output_mux_1_2); 76 | mux_1_3: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>output_mux_0_12, I1=>output_mux_0_13, I2=>output_mux_0_14, I3=>output_mux_0_15, SEL=>SEL_mux(3 downto 2), O=>output_mux_1_3); 77 | mux_2_out: mux_4to1_nbit_base GENERIC MAP(N=>N) PORT MAP(I0=>output_mux_1_0, I1=>output_mux_1_1, I2=>output_mux_1_2, I3=>output_mux_1_3, SEL=>SEL_mux(5 downto 4), O=>O); 78 | 79 | END behaviour; 80 | -------------------------------------------------------------------------------- /MatMul/MAC_8x768_8_bias_32/bias_sum_8_32.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | 6 | ENTITY bias_sum_8_32 IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7: IN STD_LOGIC_VECTOR(31 downto 0); 8 | b_col : IN STD_LOGIC_VECTOR(31 downto 0); 9 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7: OUT STD_LOGIC_VECTOR(31 downto 0) 10 | ); 11 | END bias_sum_8_32; 12 | 13 | ARCHITECTURE behaviour OF bias_sum_8_32 IS 14 | 15 | 16 | 17 | BEGIN 18 | 19 | output_row_0 <= STD_LOGIC_VECTOR(SIGNED(input_row_0) + SIGNED(b_col)); 20 | output_row_1 <= STD_LOGIC_VECTOR(SIGNED(input_row_1) + SIGNED(b_col)); 21 | output_row_2 <= STD_LOGIC_VECTOR(SIGNED(input_row_2) + SIGNED(b_col)); 22 | output_row_3 <= STD_LOGIC_VECTOR(SIGNED(input_row_3) + SIGNED(b_col)); 23 | output_row_4 <= STD_LOGIC_VECTOR(SIGNED(input_row_4) + SIGNED(b_col)); 24 | output_row_5 <= STD_LOGIC_VECTOR(SIGNED(input_row_5) + SIGNED(b_col)); 25 | output_row_6 <= STD_LOGIC_VECTOR(SIGNED(input_row_6) + SIGNED(b_col)); 26 | output_row_7 <= STD_LOGIC_VECTOR(SIGNED(input_row_7) + SIGNED(b_col)); 27 | 28 | 29 | END behaviour; 30 | -------------------------------------------------------------------------------- /MatMul/bias/MAC_bias_base_8x8_8.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.std_logic_arith.all; 5 | 6 | ENTITY MAC_bias_base_8x8_8 IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7: IN STD_LOGIC_VECTOR(7 downto 0); 8 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7: IN STD_LOGIC_VECTOR(7 downto 0); 9 | b_0, b_1, b_2, b_3, b_4, b_5, b_6, b_7: IN STD_LOGIC_VECTOR(31 downto 0); 10 | SEL_mux: IN STD_LOGIC_VECTOR(2 downto 0); 11 | CLK, RST_n, ENABLE : IN STD_LOGIC; 12 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7: OUT STD_LOGIC_VECTOR(31 downto 0) 13 | ); 14 | END MAC_bias_base_8x8_8; 15 | 16 | ARCHITECTURE behaviour OF MAC_bias_base_8x8_8 IS 17 | 18 | 19 | COMPONENT MAC_8x8_8 IS 20 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7: IN STD_LOGIC_VECTOR(7 downto 0); 21 | input_col_0, input_col_1, input_col_2, input_col_3, input_col_4, input_col_5, input_col_6, input_col_7: IN STD_LOGIC_VECTOR(7 downto 0); 22 | SEL_mux: IN STD_LOGIC_VECTOR(2 downto 0); 23 | CLK, RST_n, ENABLE : IN STD_LOGIC; 24 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7: OUT STD_LOGIC_VECTOR(31 downto 0) 25 | ); 26 | END COMPONENT; 27 | 28 | 29 | COMPONENT bias_sum_8_32 IS 30 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7: IN STD_LOGIC_VECTOR(31 downto 0); 31 | b_0, b_1, b_2, b_3, b_4, b_5, b_6, b_7: IN STD_LOGIC_VECTOR(31 downto 0); 32 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7: OUT STD_LOGIC_VECTOR(31 downto 0) 33 | ); 34 | END COMPONENT; 35 | 36 | 37 | SIGNAL output_row_MAC_base_0: STD_LOGIC_VECTOR(31 downto 0); 38 | SIGNAL output_row_MAC_base_1: STD_LOGIC_VECTOR(31 downto 0); 39 | SIGNAL output_row_MAC_base_2: STD_LOGIC_VECTOR(31 downto 0); 40 | SIGNAL output_row_MAC_base_3: STD_LOGIC_VECTOR(31 downto 0); 41 | SIGNAL output_row_MAC_base_4: STD_LOGIC_VECTOR(31 downto 0); 42 | SIGNAL output_row_MAC_base_5: STD_LOGIC_VECTOR(31 downto 0); 43 | SIGNAL output_row_MAC_base_6: STD_LOGIC_VECTOR(31 downto 0); 44 | SIGNAL output_row_MAC_base_7: STD_LOGIC_VECTOR(31 downto 0); 45 | 46 | BEGIN 47 | 48 | MAC_base: MAC_8x8_8 PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>ENABLE, 49 | input_row_0=>input_row_0, input_row_1=>input_row_1, input_row_2=>input_row_2, input_row_3=>input_row_3, input_row_4=>input_row_4, input_row_5=>input_row_5, input_row_6=>input_row_6, input_row_7=>input_row_7, 50 | input_col_0=>input_col_0, input_col_1=>input_col_1, input_col_2=>input_col_2, input_col_3=>input_col_3, input_col_4=>input_col_4, input_col_5=>input_col_5, input_col_6=>input_col_6, input_col_7=>input_col_7, 51 | output_row_0=>output_row_MAC_base_0, output_row_1=>output_row_MAC_base_1, output_row_2=>output_row_MAC_base_2, output_row_3=>output_row_MAC_base_3, output_row_4=>output_row_MAC_base_4, output_row_5=>output_row_MAC_base_5, output_row_6=>output_row_MAC_base_6, output_row_7=>output_row_MAC_base_7, 52 | SEL_mux=>SEL_mux); 53 | bias_sum: bias_sum_8_32 PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>ENABLE, 54 | input_row_0=>output_row_MAC_base_0, input_row_1=>output_row_MAC_base_1, input_row_2=>output_row_MAC_base_2, input_row_3=>output_row_MAC_base_3, input_row_4=>output_row_MAC_base_4, input_row_5=>output_row_MAC_base_5, input_row_6=>output_row_MAC_base_6, input_row_7=>output_row_MAC_base_7, 55 | output_row_0=>output_row_0, output_row_1=>output_row_1, output_row_2=>output_row_2, output_row_3=>output_row_3, output_row_4=>output_row_4, output_row_5=>output_row_5, output_row_6=>output_row_6, output_row_7=>output_row_7); 56 | 57 | 58 | END behaviour; 59 | -------------------------------------------------------------------------------- /MatMul/bias/bias_sum_16_32.vhd: -------------------------------------------------------------------------------- 1 | 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.std_logic_arith.all; 5 | 6 | ENTITY bias_sum_16_32 IS 7 | PORT (input_row_0, input_row_1, input_row_2, input_row_3, input_row_4, input_row_5, input_row_6, input_row_7, input_row_8, input_row_9, 8 | input_row_10, input_row_11, input_row_12, input_row_13, input_row_14, input_row_15: IN STD_LOGIC_VECTOR(31 downto 0); 9 | b_0, b_1, b_2, b_3, b_4, b_5, b_6, b_7, b_8, b_9, 10 | b_10, b_11, b_12, b_13, b_14, b_15: IN STD_LOGIC_VECTOR(31 downto 0); 11 | output_row_0, output_row_1, output_row_2, output_row_3, output_row_4, output_row_5, output_row_6, output_row_7, output_row_8, output_row_9, 12 | output_row_10, output_row_11, output_row_12, output_row_13, output_row_14, output_row_15: OUT STD_LOGIC_VECTOR(31 downto 0) 13 | ); 14 | END bias_sum_16_32; 15 | 16 | ARCHITECTURE behaviour OF bias_sum_16_32 IS 17 | 18 | 19 | 20 | BEGIN 21 | 22 | output_row_0 <= STD_LOGIC_VECTOR(SIGNED(input_row_0) + SIGNED(b_0)); 23 | output_row_1 <= STD_LOGIC_VECTOR(SIGNED(input_row_1) + SIGNED(b_1)); 24 | output_row_2 <= STD_LOGIC_VECTOR(SIGNED(input_row_2) + SIGNED(b_2)); 25 | output_row_3 <= STD_LOGIC_VECTOR(SIGNED(input_row_3) + SIGNED(b_3)); 26 | output_row_4 <= STD_LOGIC_VECTOR(SIGNED(input_row_4) + SIGNED(b_4)); 27 | output_row_5 <= STD_LOGIC_VECTOR(SIGNED(input_row_5) + SIGNED(b_5)); 28 | output_row_6 <= STD_LOGIC_VECTOR(SIGNED(input_row_6) + SIGNED(b_6)); 29 | output_row_7 <= STD_LOGIC_VECTOR(SIGNED(input_row_7) + SIGNED(b_7)); 30 | output_row_8 <= STD_LOGIC_VECTOR(SIGNED(input_row_8) + SIGNED(b_8)); 31 | output_row_9 <= STD_LOGIC_VECTOR(SIGNED(input_row_9) + SIGNED(b_9)); 32 | output_row_10 <= STD_LOGIC_VECTOR(SIGNED(input_row_10) + SIGNED(b_10)); 33 | output_row_11 <= STD_LOGIC_VECTOR(SIGNED(input_row_11) + SIGNED(b_11)); 34 | output_row_12 <= STD_LOGIC_VECTOR(SIGNED(input_row_12) + SIGNED(b_12)); 35 | output_row_13 <= STD_LOGIC_VECTOR(SIGNED(input_row_13) + SIGNED(b_13)); 36 | output_row_14 <= STD_LOGIC_VECTOR(SIGNED(input_row_14) + SIGNED(b_14)); 37 | output_row_15 <= STD_LOGIC_VECTOR(SIGNED(input_row_15) + SIGNED(b_15)); 38 | 39 | 40 | END behaviour; 41 | -------------------------------------------------------------------------------- /MatMul/common/MAC.vhd: -------------------------------------------------------------------------------- 1 | LIBRARY ieee; 2 | USE ieee.std_logic_1164.all; 3 | USE ieee.numeric_std.all; 4 | 5 | ENTITY MAC IS 6 | GENERIC ( data_size : POSITIVE := 2; 7 | acc_size: POSITIVE := 16 8 | ); 9 | PORT( data_in_A, data_in_B : IN STD_LOGIC_VECTOR(data_size-1 downto 0); 10 | CLK, RST_n, ENABLE : IN STD_LOGIC; 11 | data_out : OUT STD_LOGIC_VECTOR(acc_size-1 downto 0) 12 | ); 13 | END MAC; 14 | 15 | ARCHITECTURE behaviour OF MAC IS 16 | 17 | SIGNAL product: STD_LOGIC_VECTOR(acc_size-1 downto 0); 18 | SIGNAL sum: STD_LOGIC_VECTOR(acc_size-1 downto 0); 19 | SIGNAL reg_out: STD_LOGIC_VECTOR(acc_size-1 downto 0); 20 | 21 | COMPONENT regnbit IS 22 | GENERIC ( N : POSITIVE := 2 23 | ); 24 | PORT( D : IN STD_LOGIC_VECTOR(N-1 downto 0); 25 | CLK, RST_n, ENABLE : IN STD_LOGIC; 26 | Q : OUT STD_LOGIC_VECTOR(N-1 downto 0) 27 | ); 28 | END component; 29 | 30 | BEGIN 31 | 32 | product <= STD_LOGIC_VECTOR(resize(SIGNED(data_in_A)*SIGNED(data_in_B), product'length)); 33 | 34 | sum <= STD_LOGIC_VECTOR(SIGNED(product)+SIGNED(reg_out)); 35 | 36 | acc_reg: regnbit GENERIC MAP (N=>acc_size) PORT MAP (D=>sum, CLK=>CLK, RST_n=>RST_n, ENABLE=>ENABLE, Q=>reg_out); 37 | 38 | data_out <= reg_out; 39 | 40 | END behaviour; 41 | -------------------------------------------------------------------------------- /MatMul/common/clk_gen.vhd: -------------------------------------------------------------------------------- 1 | library ieee; 2 | use ieee.std_logic_1164.all; 3 | use ieee.std_logic_arith.all; 4 | use ieee.std_logic_unsigned.all; 5 | 6 | entity clk_gen is 7 | port ( 8 | 9 | CLK : out std_logic; 10 | RST_n : out std_logic); 11 | 12 | end clk_gen; 13 | 14 | architecture beh of clk_gen is 15 | 16 | constant Ts : time := 10 ns; 17 | 18 | signal CLK_i : std_logic; 19 | 20 | begin -- beh 21 | 22 | process 23 | begin -- process 24 | if (CLK_i = 'U') then 25 | CLK_i <= '0'; 26 | else 27 | CLK_i <= not(CLK_i); 28 | end if; 29 | wait for Ts/2; 30 | end process; 31 | 32 | CLK <= CLK_i; 33 | 34 | process 35 | begin -- process 36 | RST_n <= '0'; 37 | wait for 3*Ts/2; 38 | RST_n <= '1'; 39 | wait; 40 | end process; 41 | 42 | end beh; 43 | 44 | 45 | -------------------------------------------------------------------------------- /MatMul/common/ff.vhd: -------------------------------------------------------------------------------- 1 | LIBRARY ieee; 2 | USE ieee.std_logic_1164.all; 3 | USE ieee.numeric_std.all; 4 | 5 | ENTITY ff IS 6 | PORT( D : IN STD_LOGIC; 7 | CLK, RST_n, ENABLE : IN STD_LOGIC; 8 | Q : OUT STD_LOGIC 9 | ); 10 | END ff; 11 | 12 | ARCHITECTURE behaviour OF ff IS 13 | BEGIN 14 | 15 | PROCESS(CLK, RST_n) 16 | BEGIN 17 | IF(RST_n='0') THEN 18 | Q <= '0'; 19 | ELSIF(CLK'EVENT AND CLK='1') THEN 20 | IF(ENABLE='1') THEN 21 | Q <= D; 22 | END IF; 23 | END IF; 24 | END PROCESS; 25 | 26 | END behaviour; 27 | -------------------------------------------------------------------------------- /MatMul/common/mux_2to1_nbit_base.vhd: -------------------------------------------------------------------------------- 1 | LIBRARY ieee; 2 | USE ieee.std_logic_1164.all; 3 | USE ieee.numeric_std.all; 4 | 5 | ENTITY mux_2to1_nbit_base IS 6 | GENERIC ( N : POSITIVE :=1 7 | ); 8 | PORT( 9 | I0, I1: IN STD_LOGIC_VECTOR(N-1 downto 0); 10 | SEL : IN STD_LOGIC; 11 | O : OUT STD_LOGIC_VECTOR(N-1 downto 0) 12 | ); 13 | END mux_2to1_nbit_base; 14 | 15 | ARCHITECTURE behaviour OF mux_2to1_nbit_base IS 16 | BEGIN 17 | 18 | --selection process 19 | PROCESS(I0, I1, SEL) 20 | BEGIN 21 | CASE SEL IS 22 | WHEN '0' => O <= I0; 23 | WHEN '1' => O <= I1; 24 | WHEN OTHERS => O <= (OTHERS=>'0'); 25 | END CASE; 26 | END PROCESS; 27 | 28 | END behaviour; 29 | -------------------------------------------------------------------------------- /MatMul/common/mux_4to1_nbit_base.vhd: -------------------------------------------------------------------------------- 1 | LIBRARY ieee; 2 | USE ieee.std_logic_1164.all; 3 | USE ieee.numeric_std.all; 4 | 5 | ENTITY mux_4to1_nbit_base IS 6 | GENERIC ( N : POSITIVE :=1 7 | ); 8 | PORT( I0, I1, I2, I3: IN STD_LOGIC_VECTOR(N-1 downto 0); 9 | SEL : IN STD_LOGIC_VECTOR(1 downto 0); 10 | O : OUT STD_LOGIC_VECTOR(N-1 downto 0) 11 | ); 12 | END mux_4to1_nbit_base; 13 | 14 | ARCHITECTURE behaviour OF mux_4to1_nbit_base IS 15 | BEGIN 16 | 17 | --selection process 18 | PROCESS(I0, I1, I2, I3, SEL) 19 | BEGIN 20 | CASE SEL IS 21 | WHEN "00" => O <= I0; 22 | WHEN "01" => O <= I1; 23 | WHEN "10" => O <= I2; 24 | WHEN "11" => O <= I3; 25 | WHEN OTHERS => O <= (OTHERS=>'0'); 26 | END CASE; 27 | END PROCESS; 28 | 29 | END behaviour; 30 | -------------------------------------------------------------------------------- /MatMul/common/regnbit.vhd: -------------------------------------------------------------------------------- 1 | LIBRARY ieee; 2 | USE ieee.std_logic_1164.all; 3 | USE ieee.numeric_std.all; 4 | 5 | ENTITY regnbit IS 6 | GENERIC ( N : POSITIVE := 2 7 | ); 8 | PORT( D : IN STD_LOGIC_VECTOR(N-1 downto 0); 9 | CLK, RST_n, ENABLE : IN STD_LOGIC; 10 | Q : OUT STD_LOGIC_VECTOR(N-1 downto 0) 11 | ); 12 | END regnbit; 13 | 14 | ARCHITECTURE behaviour OF regnbit IS 15 | BEGIN 16 | 17 | PROCESS(CLK, RST_n) 18 | BEGIN 19 | IF(RST_n='0') THEN 20 | Q <= (OTHERS=>'0'); 21 | ELSIF(CLK'EVENT AND CLK='1') THEN 22 | IF(ENABLE='1') THEN 23 | Q <= D; 24 | END IF; 25 | END IF; 26 | END PROCESS; 27 | 28 | END behaviour; 29 | -------------------------------------------------------------------------------- /MatMul/readme.txt: -------------------------------------------------------------------------------- 1 | Folder common contains vhd files used in all combinations. 2 | 3 | Folder structure: 4 | 5 | MAC_{row}x{col}_{n_bits}: 6 | MACs_{row}x{col}_{n_bits}.vhd 7 | mux_{col}to1_nbit.vhd 8 | sim: 9 | sim_MAC_base_{row}x{col}_{n_bits}.do 10 | tb: 11 | clk_gen.vhd 12 | data_sink_{row}x{col}_{n_bits}.vhd 13 | data_maker_{row}x{col}_{n_bits}.vhd 14 | tb_MAC_{row}x{col}_{n_bits}.v 15 | test files 16 | 17 | 18 | 19 | Folder bias contains equivalent files and scripts for MatMul with bias sum 20 | 21 | 22 | N.B.: files with "*_reg_*" refer to MatMul structure with input and output regs 23 | For MatMul needing mux_768to1 there is a dedicated file 24 | -------------------------------------------------------------------------------- /NonLinear/I_ERF.vhd: -------------------------------------------------------------------------------- 1 | LIBRARY ieee; 2 | USE ieee.std_logic_1164.all; 3 | USE ieee.numeric_std.all; 4 | 5 | ENTITY I_ERF IS 6 | PORT( q_in_erf : IN STD_LOGIC_VECTOR (31 downto 0); 7 | q_b, q_c, q_max : IN STD_LOGIC_VECTOR(31 downto 0); 8 | q_out_erf : OUT STD_LOGIC_VECTOR (31 downto 0) 9 | ); 10 | END I_ERF; 11 | 12 | ARCHITECTURE behaviour OF I_ERF IS 13 | 14 | SIGNAL q_abs, q_clip: STD_LOGIC_VECTOR(31 downto 0); 15 | SIGNAL q_l: STD_LOGIC_VECTOR(31 downto 0); 16 | SIGNAL q_sgn: STD_LOGIC; 17 | 18 | COMPONENT I_POLY IS 19 | GENERIC( bits_choice : NATURAL := 0); 20 | PORT( q, q_b, q_c : IN STD_LOGIC_VECTOR (31 downto 0); 21 | q_out : OUT STD_LOGIC_VECTOR (31 downto 0) 22 | ); 23 | END COMPONENT; 24 | 25 | BEGIN 26 | 27 | q_sgn <= q_in_erf(31); 28 | 29 | q_abs <= STD_LOGIC_VECTOR(abs(SIGNED(q_in_erf))); 30 | 31 | q_clip <= q_max when (q_abs>q_max) else q_abs; 32 | 33 | poly_unit: I_POLY GENERIC MAP(bits_choice=>0) PORT MAP (q=>q_clip, q_b=>q_b, q_c=>q_c, q_out=>q_l); 34 | 35 | q_out_erf <= q_l when (q_sgn='0') else STD_LOGIC_VECTOR(SIGNED(-SIGNED(q_l))); 36 | 37 | 38 | 39 | END behaviour; 40 | -------------------------------------------------------------------------------- /NonLinear/I_EXP.vhd: -------------------------------------------------------------------------------- 1 | LIBRARY ieee; 2 | USE ieee.std_logic_1164.all; 3 | USE ieee.numeric_std.all; 4 | 5 | ENTITY I_EXP IS 6 | GENERIC( bits_choice : NATURAL := 0); 7 | PORT( q_in_exp : IN STD_LOGIC_VECTOR (31 downto 0); 8 | q_b, q_c, q_ln2, q_ln2_neg_inv: IN STD_LOGIC_VECTOR(31 downto 0); 9 | q_out_exp : OUT STD_LOGIC_VECTOR (31 downto 0) 10 | ); 11 | END I_EXP; 12 | 13 | ARCHITECTURE behaviour OF I_EXP IS 14 | 15 | SIGNAL q_p, z, z_tmp, zq_ln2 : STD_LOGIC_VECTOR(31 downto 0) := (OTHERS=>'0'); 16 | --SIGNAL q_neg: STD_LOGIC_VECTOR(31 downto 0) := (OTHERS=>'0'); 17 | SIGNAL zq_ln2_tmp: STD_LOGIC_VECTOR(63 downto 0) := (OTHERS=>'0'); 18 | SIGNAL q_l: STD_LOGIC_VECTOR(31 downto 0) := (OTHERS=>'0'); 19 | SIGNAL sgn_z: STD_LOGIC; 20 | 21 | COMPONENT I_POLY_exp IS 22 | GENERIC( bits_choice : NATURAL := 0); 23 | PORT( q, q_b, q_c : IN STD_LOGIC_VECTOR (31 downto 0); 24 | q_out : OUT STD_LOGIC_VECTOR (31 downto 0) 25 | ); 26 | END COMPONENT; 27 | 28 | BEGIN 29 | 30 | 31 | -- q_neg <= STD_LOGIC_VECTOR(-SIGNED(q_in)); 32 | 33 | -- z_tmp <= STD_LOGIC_VECTOR(SIGNED(q_in_exp) * SIGNED(q_ln2_neg_inv)); 34 | --z <= STD_LOGIC_VECTOR(SIGNED(q_in_exp) / SIGNED(q_ln2_neg_inv)); 35 | -- z <= z_tmp(31+bits_choice downto bits_choice); 36 | 37 | sgn_z <= q_in_exp(31) XOR q_ln2_neg_inv(31); 38 | z_tmp <= STD_LOGIC_VECTOR(UNSIGNED(abs(SIGNED(q_in_exp))) / UNSIGNED(abs(SIGNED(q_ln2_neg_inv)))); 39 | 40 | z <= z_tmp when (sgn_z='0') else STD_LOGIC_VECTOR(SIGNED(-SIGNED(z_tmp))); 41 | 42 | zq_ln2_tmp <= STD_LOGIC_VECTOR(SIGNED(z) * SIGNED(q_ln2)); 43 | zq_ln2 <= zq_ln2_tmp(31+bits_choice downto bits_choice); 44 | 45 | q_p <= STD_LOGIC_VECTOR(SIGNED(q_in_exp) + SIGNED(zq_ln2)); 46 | 47 | poly: I_POLY_exp GENERIC MAP(bits_choice=>bits_choice) PORT MAP(q=>q_p, q_b=>q_b, q_c=>q_c, q_out=>q_l); 48 | 49 | shifter: process(q_l, z) 50 | begin 51 | if to_integer(signed(z)) > 0 then 52 | q_out_exp <= STD_LOGIC_VECTOR(shift_right(SIGNED(q_l), TO_INTEGER(UNSIGNED(z)))); 53 | else 54 | q_out_exp <= STD_LOGIC_VECTOR(shift_left(SIGNED(q_l), TO_INTEGER(UNSIGNED(abs(SIGNED(z)))))); 55 | end if; 56 | end process; 57 | 58 | -- q_out_exp <= STD_LOGIC_VECTOR(shift_right(SIGNED(q_l), 5)); 59 | 60 | END behaviour; 61 | -------------------------------------------------------------------------------- /NonLinear/I_GELU.vhd: -------------------------------------------------------------------------------- 1 | LIBRARY ieee; 2 | USE ieee.std_logic_1164.all; 3 | USE ieee.numeric_std.all; 4 | 5 | ENTITY I_GELU IS 6 | PORT( q_in_gelu : IN STD_LOGIC_VECTOR (31 downto 0); 7 | q_b, q_c, q_max, q_1 : IN STD_LOGIC_VECTOR(31 downto 0); 8 | q_out_gelu : OUT STD_LOGIC_VECTOR (31 downto 0) 9 | ); 10 | END I_GELU; 11 | 12 | ARCHITECTURE behaviour OF I_GELU IS 13 | 14 | SIGNAL q_erf, sum_erf_1: STD_LOGIC_VECTOR(31 downto 0); 15 | SIGNAL out_tmp: STD_LOGIC_VECTOR(63 downto 0); 16 | 17 | COMPONENT I_ERF IS 18 | PORT( q_in_erf : IN STD_LOGIC_VECTOR (31 downto 0); 19 | q_b, q_c, q_max : IN STD_LOGIC_VECTOR(31 downto 0); 20 | q_out_erf : OUT STD_LOGIC_VECTOR (31 downto 0) 21 | ); 22 | END COMPONENT; 23 | 24 | BEGIN 25 | 26 | erf_unit: I_ERF PORT MAP (q_in_erf=>q_in_gelu, q_b=>q_b, q_c=>q_c, q_max=>q_max, q_out_erf=>q_erf); 27 | 28 | sum_erf_1 <= STD_LOGIC_VECTOR(SIGNED(q_erf) + SIGNED(q_1)); 29 | 30 | out_tmp <= STD_LOGIC_VECTOR(SIGNED(sum_erf_1) * SIGNED(q_in_gelu)); 31 | 32 | q_out_gelu <= out_tmp(31 downto 0); 33 | 34 | 35 | END behaviour; 36 | -------------------------------------------------------------------------------- /NonLinear/I_POLY.vhd: -------------------------------------------------------------------------------- 1 | LIBRARY ieee; 2 | USE ieee.std_logic_1164.all; 3 | USE ieee.numeric_std.all; 4 | 5 | ENTITY I_POLY IS 6 | GENERIC( bits_choice : NATURAL := 0); 7 | PORT( q, q_b, q_c : IN STD_LOGIC_VECTOR (31 downto 0); 8 | -- CLK, RST_n, ENABLE : IN STD_LOGIC; 9 | q_out : OUT STD_LOGIC_VECTOR (31 downto 0) 10 | ); 11 | END I_POLY; 12 | 13 | ARCHITECTURE behaviour OF I_POLY IS 14 | 15 | SIGNAL square: STD_LOGIC_VECTOR(63 downto 0); 16 | SIGNAL square_32: STD_LOGIC_VECTOR(31 downto 0); 17 | SIGNAL sum_qb: STD_LOGIC_VECTOR(31 downto 0); 18 | 19 | BEGIN 20 | 21 | sum_qb <= STD_LOGIC_VECTOR(SIGNED(q) + SIGNED(q_b)); 22 | 23 | square <= STD_LOGIC_VECTOR(SIGNED(sum_qb) * SIGNED(sum_qb)); 24 | 25 | square_32 <= square(31+bits_choice downto bits_choice); 26 | 27 | q_out <= STD_LOGIC_VECTOR(SIGNED(square_32) + SIGNED(q_c)); 28 | 29 | END behaviour; 30 | -------------------------------------------------------------------------------- /NonLinear/I_POLY_exp.vhd: -------------------------------------------------------------------------------- 1 | LIBRARY ieee; 2 | USE ieee.std_logic_1164.all; 3 | USE ieee.numeric_std.all; 4 | 5 | ENTITY I_POLY_exp IS 6 | GENERIC( bits_choice : NATURAL := 0); 7 | PORT( q, q_b, q_c : IN STD_LOGIC_VECTOR (31 downto 0); 8 | -- CLK, RST_n, ENABLE : IN STD_LOGIC; 9 | q_out : OUT STD_LOGIC_VECTOR (31 downto 0) 10 | ); 11 | END I_POLY_exp; 12 | 13 | ARCHITECTURE behaviour OF I_POLY_exp IS 14 | 15 | SIGNAL square: STD_LOGIC_VECTOR(63 downto 0); 16 | SIGNAL square_32: STD_LOGIC_VECTOR(31 downto 0); 17 | SIGNAL sum_qb: STD_LOGIC_VECTOR(31 downto 0); 18 | 19 | BEGIN 20 | 21 | sum_qb <= STD_LOGIC_VECTOR(SIGNED(q) + SIGNED(q_b)); 22 | 23 | square <= STD_LOGIC_VECTOR(SIGNED(sum_qb) * SIGNED(q)); 24 | 25 | square_32 <= square(31+bits_choice downto bits_choice); 26 | 27 | q_out <= STD_LOGIC_VECTOR(SIGNED(square_32) + SIGNED(q_c)); 28 | 29 | END behaviour; 30 | -------------------------------------------------------------------------------- /NonLinear/I_SOFTMAX.vhd: -------------------------------------------------------------------------------- 1 | LIBRARY ieee; 2 | USE ieee.std_logic_1164.all; 3 | USE ieee.numeric_std.all; 4 | 5 | ENTITY I_SOFTMAX IS 6 | PORT( q_in_soft : IN STD_LOGIC_VECTOR (31 downto 0); 7 | q_b, q_c, q_ln2, q_ln2_neg_inv: IN STD_LOGIC_VECTOR(31 downto 0); 8 | CLK, RST_n, EN_max, EN_acc: IN STD_LOGIC; 9 | q_out_soft : OUT STD_LOGIC_VECTOR (31 downto 0) 10 | ); 11 | END I_SOFTMAX; 12 | 13 | ARCHITECTURE behaviour OF I_SOFTMAX IS 14 | 15 | COMPONENT I_EXP IS 16 | GENERIC( bits_choice : NATURAL := 0); 17 | PORT( q_in_exp : IN STD_LOGIC_VECTOR (31 downto 0); 18 | q_b, q_c, q_ln2, q_ln2_neg_inv: IN STD_LOGIC_VECTOR(31 downto 0); 19 | q_out_exp : OUT STD_LOGIC_VECTOR (31 downto 0) 20 | ); 21 | END COMPONENT; 22 | 23 | SIGNAL q_max, q_diff: STD_LOGIC_VECTOR(31 downto 0); 24 | SIGNAL q_exp, acc, q_out_tmp: STD_LOGIC_VECTOR(31 downto 0); 25 | SIGNAL sgn_out : STD_LOGIC; 26 | 27 | BEGIN 28 | 29 | 30 | max_acc_proc: PROCESS(CLK, RST_n) 31 | BEGIN 32 | IF(RST_n = '0') THEN 33 | q_max <= (OTHERS=>'0'); 34 | acc <= (OTHERS=>'0'); 35 | ELSIF(CLK'EVENT AND CLK='1') THEN 36 | IF(EN_max = '1') THEN 37 | IF(SIGNED(q_in_soft) > SIGNED(q_max)) THEN 38 | q_max <= q_in_soft; 39 | END IF; 40 | END IF; 41 | IF(EN_acc = '1') THEN 42 | acc <= STD_LOGIC_VECTOR(SIGNED(acc) + SIGNED(q_exp)); 43 | END IF; 44 | END IF; 45 | END PROCESS; 46 | 47 | q_diff <= STD_LOGIC_VECTOR(SIGNED(q_in_soft) - SIGNED(q_max)); 48 | 49 | exp_unit: I_EXP GENERIC MAP(bits_choice=>0) PORT MAP (q_in_exp=>q_diff, q_b=>q_b, q_c=>q_c, q_ln2=>q_ln2, q_ln2_neg_inv=>q_ln2_neg_inv, q_out_exp=>q_exp); 50 | 51 | --q_out_soft <= STD_LOGIC_VECTOR(SIGNED(q_exp) / SIGNED(acc)); 52 | sgn_out <= q_exp(31) XOR acc(31); 53 | q_out_tmp <= STD_LOGIC_VECTOR(UNSIGNED(abs(SIGNED(q_exp))) / UNSIGNED(abs(SIGNED(acc)))); 54 | 55 | q_out_soft <= q_out_tmp when (sgn_out='0') else STD_LOGIC_VECTOR(SIGNED(-SIGNED(q_out_tmp))); 56 | 57 | 58 | END behaviour; 59 | -------------------------------------------------------------------------------- /NonLinear/I_SQRT.vhd: -------------------------------------------------------------------------------- 1 | LIBRARY ieee; 2 | USE ieee.std_logic_1164.all; 3 | USE ieee.numeric_std.all; 4 | 5 | ENTITY I_SQRT IS 6 | PORT( n : IN STD_LOGIC_VECTOR (31 downto 0); 7 | CLK, RST_n, EN, SEL: IN STD_LOGIC; 8 | VAL, Z: OUT STD_LOGIC; 9 | out_sqrt : OUT STD_LOGIC_VECTOR (31 downto 0) 10 | ); 11 | END I_SQRT; 12 | 13 | ARCHITECTURE behaviour OF I_SQRT IS 14 | 15 | SIGNAL n_bits_div: STD_LOGIC_VECTOR(4 downto 0); 16 | SIGNAL x_0, x_i_mux, x_i, x_next: STD_LOGIC_VECTOR(31 downto 0); 17 | SIGNAL sum: STD_LOGIC_VECTOR(32 downto 0); 18 | SIGNAL div_n_xi, div_n_xi_tmp: STD_LOGIC_VECTOR(31 downto 0); 19 | SIGNAL sgn_out : STD_LOGIC; 20 | 21 | BEGIN 22 | 23 | -- x0 = 2**(Bits(n)/2)=2**(32/2)=2**16=65536 24 | x_0 <= x"00010000"; 25 | 26 | x_i_mux <= x_0 when (SEL='0') else x_next; 27 | 28 | sgn_out <= n(31) XOR x_i(31); 29 | div_n_xi_tmp <= STD_LOGIC_VECTOR(UNSIGNED(abs(SIGNED(n))) / UNSIGNED(abs(SIGNED(x_i)))); 30 | 31 | div_n_xi <= div_n_xi_tmp when (sgn_out='0') else STD_LOGIC_VECTOR(SIGNED(-SIGNED(div_n_xi_tmp))); 32 | 33 | sum <= STD_LOGIC_VECTOR(SIGNED(x_i(31) & x_i) + SIGNED(div_n_xi(31) & div_n_xi)); 34 | 35 | x_next <= sum(32 downto 1); 36 | 37 | PROCESS(CLK, RST_n) 38 | BEGIN 39 | IF(RST_n='0') THEN 40 | out_sqrt <= (OTHERS=>'0'); 41 | x_i <= (OTHERS=>'0'); 42 | VAL <= '0'; 43 | Z <= '0'; 44 | ELSIF(CLK'EVENT AND CLK='1') THEN 45 | IF(EN = '1') THEN 46 | IF(n=x"00000000") THEN 47 | out_sqrt <= (OTHERS=>'0'); 48 | VAL <= '0'; 49 | Z <= '1'; 50 | ELSIF(x_next >= x_i) THEN 51 | out_sqrt <= x_i; 52 | VAL <= '1'; 53 | Z <= '0'; 54 | ELSE 55 | VAL <= '0'; 56 | Z <= '0'; 57 | END IF; 58 | END IF; 59 | x_i <= x_i_mux; 60 | 61 | END IF; 62 | END PROCESS; 63 | 64 | 65 | 66 | 67 | 68 | END behaviour; 69 | -------------------------------------------------------------------------------- /NonLinear/I_SQRT_2.vhd: -------------------------------------------------------------------------------- 1 | LIBRARY ieee; 2 | USE ieee.std_logic_1164.all; 3 | USE ieee.numeric_std.all; 4 | 5 | ENTITY I_SQRT_2 IS 6 | PORT( n : IN STD_LOGIC_VECTOR (31 downto 0); 7 | CLK, RST_n, EN, SEL: IN STD_LOGIC; 8 | VAL, Z: OUT STD_LOGIC; 9 | out_sqrt : OUT STD_LOGIC_VECTOR (31 downto 0) 10 | ); 11 | END I_SQRT_2; 12 | 13 | ARCHITECTURE behaviour OF I_SQRT_2 IS 14 | 15 | COMPONENT regnbit IS 16 | GENERIC ( N : POSITIVE := 2 17 | ); 18 | PORT( D : IN STD_LOGIC_VECTOR(N-1 downto 0); 19 | CLK, RST_n, ENABLE : IN STD_LOGIC; 20 | Q : OUT STD_LOGIC_VECTOR(N-1 downto 0) 21 | ); 22 | END COMPONENT; 23 | 24 | COMPONENT ff IS 25 | PORT( D : IN STD_LOGIC; 26 | CLK, RST_n, ENABLE : IN STD_LOGIC; 27 | Q : OUT STD_LOGIC 28 | ); 29 | END COMPONENT; 30 | 31 | SIGNAL n_bits_div: STD_LOGIC_VECTOR(4 downto 0); 32 | SIGNAL x_0, x_i_mux, x_i, x_next, sqrt: STD_LOGIC_VECTOR(31 downto 0); 33 | SIGNAL val_comb, z_comb: STD_LOGIC; 34 | SIGNAL sum: STD_LOGIC_VECTOR(32 downto 0); 35 | SIGNAL div_n_xi: STD_LOGIC_VECTOR(31 downto 0); 36 | 37 | BEGIN 38 | 39 | -- x0 = 2**(Bits(n)/2)=2**(32/2)=2**16=65536 40 | x_0 <= x"00010000"; 41 | 42 | x_i_mux <= x_0 when (SEL='0') else x_next; 43 | 44 | div_n_xi <= STD_LOGIC_VECTOR(SIGNED(n) / SIGNED(x_i)); 45 | 46 | sum <= STD_LOGIC_VECTOR(SIGNED(x_i(31) & x_i) + SIGNED(div_n_xi(31) & div_n_xi)); 47 | 48 | x_next <= sum(32 downto 1); 49 | 50 | out_sqrt_reg: regnbit GENERIC MAP(N=>32) PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>EN, D=>sqrt, Q=>out_sqrt); 51 | x_i_reg: regnbit GENERIC MAP(N=>32) PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>'1', D=>x_i_mux, Q=>x_i); 52 | val_ff: ff PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>EN, D=>val_comb, Q=>VAL); 53 | z_ff: ff PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>EN, D=>z_comb, Q=>Z); 54 | 55 | 56 | PROCESS(n, x_next, x_i) 57 | BEGIN 58 | IF(n=x"00000000") THEN 59 | sqrt <= (OTHERS=>'0'); 60 | val_comb <= '0'; 61 | z_comb <= '1'; 62 | ELSIF(x_next >= x_i) THEN 63 | sqrt <= x_i; 64 | val_comb <= '1'; 65 | z_comb <= '0'; 66 | ELSE 67 | val_comb <= '0'; 68 | z_comb <= '0'; 69 | sqrt <= (OTHERS=>'0'); 70 | END IF; 71 | END PROCESS; 72 | 73 | 74 | END behaviour; 75 | -------------------------------------------------------------------------------- /NonLinear/I_SQRT_test.vhd: -------------------------------------------------------------------------------- 1 | --LIBRARY ieee, DWARE; 2 | LIBRARY ieee; 3 | USE ieee.std_logic_1164.all; 4 | USE ieee.numeric_std.all; 5 | --USE DWARE.DW_Foundation_arith.all; 6 | 7 | ENTITY I_SQRT IS 8 | PORT( n : IN STD_LOGIC_VECTOR (31 downto 0); 9 | CLK, RST_n, EN, SEL: IN STD_LOGIC; 10 | VAL, Z: OUT STD_LOGIC; 11 | out_sqrt : OUT STD_LOGIC_VECTOR (31 downto 0) 12 | ); 13 | END I_SQRT; 14 | 15 | ARCHITECTURE behaviour OF I_SQRT IS 16 | 17 | SIGNAL n_bits_div: STD_LOGIC_VECTOR(4 downto 0); 18 | SIGNAL x_0, x_i_mux, x_i, x_next: STD_LOGIC_VECTOR(31 downto 0); 19 | SIGNAL sum: STD_LOGIC_VECTOR(32 downto 0); 20 | SIGNAL div_n_xi, div_n_xi_tmp: STD_LOGIC_VECTOR(31 downto 0); 21 | SIGNAL sgn_out : STD_LOGIC; 22 | 23 | component DW_div_func is 24 | generic (width : positive :=8 ); 25 | port ( a : in std_logic_vector(width-1 downto 0); 26 | b : in std_logic_vector(width-1 downto 0); 27 | quotient_tc: out std_logic_vector(width-1 downto 0)); 28 | end component; 29 | 30 | BEGIN 31 | 32 | -- x0 = 2**(Bits(n)/2)=2**(32/2)=2**16=65536 33 | x_0 <= x"00010000"; 34 | 35 | x_i_mux <= x_0 when (SEL='0') else x_next; 36 | 37 | sgn_out <= n(31) XOR x_i(31); 38 | div_n_xi_tmp <= STD_LOGIC_VECTOR(UNSIGNED(abs(SIGNED(n))) / UNSIGNED(abs(SIGNED(x_i)))); 39 | --div: DW_div_func generic map(width=>32) port map(a=>n, b=>x_i, quotient_tc=>div_n_xi); 40 | 41 | div_n_xi <= div_n_xi_tmp when (sgn_out='0') else STD_LOGIC_VECTOR(SIGNED(-SIGNED(div_n_xi_tmp))); 42 | 43 | sum <= STD_LOGIC_VECTOR(SIGNED(x_i(31) & x_i) + SIGNED(div_n_xi(31) & div_n_xi)); 44 | --sum <= STD_LOGIC_VECTOR(SIGNED(x_i) + SIGNED(x_i)); 45 | 46 | x_next <= sum(32 downto 1); 47 | 48 | --x_next <= sum; 49 | 50 | PROCESS(CLK, RST_n) 51 | BEGIN 52 | IF(RST_n='0') THEN 53 | out_sqrt <= (OTHERS=>'0'); 54 | x_i <= (OTHERS=>'0'); 55 | VAL <= '0'; 56 | Z <= '0'; 57 | ELSIF(CLK'EVENT AND CLK='1') THEN 58 | IF(EN = '1') THEN 59 | IF(n=x"00000000") THEN 60 | out_sqrt <= (OTHERS=>'0'); 61 | VAL <= '0'; 62 | Z <= '1'; 63 | ELSIF(x_next >= x_i) THEN 64 | out_sqrt <= x_i; 65 | VAL <= '1'; 66 | Z <= '0'; 67 | ELSE 68 | VAL <= '0'; 69 | Z <= '0'; 70 | END IF; 71 | END IF; 72 | x_i <= x_i_mux; 73 | 74 | END IF; 75 | END PROCESS; 76 | 77 | 78 | 79 | 80 | 81 | END behaviour; 82 | -------------------------------------------------------------------------------- /NonLinear/LayerNorm.vhd: -------------------------------------------------------------------------------- 1 | LIBRARY ieee; 2 | USE ieee.std_logic_1164.all; 3 | USE ieee.numeric_std.all; 4 | 5 | ENTITY LayerNorm IS 6 | GENERIC( bits_choice : NATURAL := 0); 7 | PORT( q_in_norm : IN STD_LOGIC_VECTOR (31 downto 0); 8 | c: IN STD_LOGIC_VECTOR(31 downto 0); 9 | CLK, RST_n, EN_acc, EN_avg, EN_sqrt, EN_sigma, SEL, SEL_sqrt: IN STD_LOGIC; 10 | VAL, Z: OUT STD_LOGIC; 11 | q_out_norm : OUT STD_LOGIC_VECTOR (31 downto 0) 12 | ); 13 | END LayerNorm; 14 | 15 | ARCHITECTURE beh OF LayerNorm IS 16 | 17 | COMPONENT regnbit IS 18 | GENERIC ( N : POSITIVE := 2 19 | ); 20 | PORT( D : IN STD_LOGIC_VECTOR(N-1 downto 0); 21 | CLK, RST_n, ENABLE : IN STD_LOGIC; 22 | Q : OUT STD_LOGIC_VECTOR(N-1 downto 0) 23 | ); 24 | END COMPONENT; 25 | 26 | COMPONENT I_SQRT IS 27 | PORT( n : IN STD_LOGIC_VECTOR (31 downto 0); 28 | CLK, RST_n, EN, SEL: IN STD_LOGIC; 29 | VAL, Z: OUT STD_LOGIC; 30 | out_sqrt : OUT STD_LOGIC_VECTOR (31 downto 0) 31 | ); 32 | END COMPONENT; 33 | 34 | SIGNAL in_mux, sum, acc, acc_scaled, acc_scaled_tmp, out_sqrt, avg, sigma, square, diff, q_out_norm_tmp: STD_LOGIC_VECTOR(31 downto 0); 35 | SIGNAL square_tmp : STD_LOGIC_VECTOR(63 downto 0); 36 | SIGNAL sgn_out_acc, sgn_out_out: STD_LOGIC; 37 | 38 | BEGIN 39 | 40 | in_mux <= q_in_norm when (SEL='0') else square; 41 | 42 | sum <= STD_LOGIC_VECTOR(SIGNED(in_mux) + SIGNED(acc)); 43 | 44 | acc_reg: regnbit GENERIC MAP(N=>32) PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>EN_acc, D=>sum, Q=>acc); 45 | 46 | --acc_scaled <= STD_LOGIC_VECTOR(SIGNED(acc) / SIGNED(c)); 47 | sgn_out_acc <= acc(31) XOR c(31); 48 | acc_scaled_tmp <= STD_LOGIC_VECTOR(UNSIGNED(abs(SIGNED(acc))) / UNSIGNED(abs(SIGNED(c)))); 49 | 50 | acc_scaled <= acc_scaled_tmp when (sgn_out_acc='0') else STD_LOGIC_VECTOR(SIGNED(-SIGNED(acc_scaled_tmp))); 51 | 52 | avg_reg: regnbit GENERIC MAP(N=>32) PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>EN_avg, D=>acc_scaled, Q=>avg); 53 | 54 | -- if sqrt has its CU change this 55 | sqrt_unit: I_SQRT PORT MAP(n=>acc_scaled, CLK=>CLK, RST_n=>RST_n, EN=>EN_sqrt, SEL=>SEL_sqrt, VAL=>VAL, Z=>Z, out_sqrt=>out_sqrt); 56 | 57 | sigma_reg: regnbit GENERIC MAP(N=>32) PORT MAP(CLK=>CLK, RST_n=>RST_n, ENABLE=>EN_sigma, D=>out_sqrt, Q=>sigma); 58 | 59 | diff <= STD_LOGIC_VECTOR(SIGNED(q_in_norm) - SIGNED(avg)); 60 | 61 | square_tmp <= STD_LOGIC_VECTOR(SIGNED(diff) * SIGNED(diff)); 62 | square <= square_tmp(31+bits_choice downto bits_choice); 63 | 64 | --q_out_norm <= STD_LOGIC_VECTOR(SIGNED(diff) / SIGNED(sigma)); 65 | sgn_out_out <= diff(31) XOR sigma(31); 66 | q_out_norm_tmp <= STD_LOGIC_VECTOR(UNSIGNED(abs(SIGNED(diff))) / UNSIGNED(abs(SIGNED(sigma)))); 67 | 68 | q_out_norm <= q_out_norm_tmp when (sgn_out_out='0') else STD_LOGIC_VECTOR(SIGNED(-SIGNED(q_out_norm_tmp))); 69 | 70 | END beh; 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /NonLinear/REQUANTIZATION.vhd: -------------------------------------------------------------------------------- 1 | LIBRARY ieee; 2 | USE ieee.std_logic_1164.all; 3 | USE ieee.numeric_std.all; 4 | 5 | ENTITY REQUANTIZATION IS 6 | PORT( q_in_req : IN STD_LOGIC_VECTOR (31 downto 0); 7 | m : IN STD_LOGIC_VECTOR(31 downto 0); 8 | c : IN STD_LOGIC_VECTOR(5 downto 0); 9 | q_out_req : OUT STD_LOGIC_VECTOR (7 downto 0) 10 | ); 11 | END REQUANTIZATION; 12 | 13 | ARCHITECTURE beh of REQUANTIZATION is 14 | 15 | SIGNAL product, tmp : STD_LOGIC_VECTOR(63 downto 0); 16 | 17 | BEGIN 18 | 19 | product <= STD_LOGIC_VECTOR(SIGNED(q_in_req)*SIGNED(m)); 20 | 21 | tmp <= STD_LOGIC_VECTOR(shift_right(SIGNED(product), TO_INTEGER(unsigned(c)))); 22 | 23 | q_out_req <= tmp(7 downto 0); 24 | 25 | END beh; 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /NonLinear/ff.vhd: -------------------------------------------------------------------------------- 1 | LIBRARY ieee; 2 | USE ieee.std_logic_1164.all; 3 | USE ieee.numeric_std.all; 4 | 5 | ENTITY ff IS 6 | PORT( D : IN STD_LOGIC; 7 | CLK, RST_n, ENABLE : IN STD_LOGIC; 8 | Q : OUT STD_LOGIC 9 | ); 10 | END ff; 11 | 12 | ARCHITECTURE behaviour OF ff IS 13 | BEGIN 14 | 15 | PROCESS(CLK, RST_n) 16 | BEGIN 17 | IF(RST_n='0') THEN 18 | Q <= '0'; 19 | ELSIF(CLK'EVENT AND CLK='1') THEN 20 | IF(ENABLE='1') THEN 21 | Q <= D; 22 | END IF; 23 | END IF; 24 | END PROCESS; 25 | 26 | END behaviour; 27 | -------------------------------------------------------------------------------- /NonLinear/regnbit.vhd: -------------------------------------------------------------------------------- 1 | LIBRARY ieee; 2 | USE ieee.std_logic_1164.all; 3 | USE ieee.numeric_std.all; 4 | 5 | ENTITY regnbit IS 6 | GENERIC ( N : POSITIVE := 2 7 | ); 8 | PORT( D : IN STD_LOGIC_VECTOR(N-1 downto 0); 9 | CLK, RST_n, ENABLE : IN STD_LOGIC; 10 | Q : OUT STD_LOGIC_VECTOR(N-1 downto 0) 11 | ); 12 | END regnbit; 13 | 14 | ARCHITECTURE behaviour OF regnbit IS 15 | BEGIN 16 | 17 | PROCESS(CLK, RST_n) 18 | BEGIN 19 | IF(RST_n='0') THEN 20 | Q <= (OTHERS=>'0'); 21 | ELSIF(CLK'EVENT AND CLK='1') THEN 22 | IF(ENABLE='1') THEN 23 | Q <= D; 24 | END IF; 25 | END IF; 26 | END PROCESS; 27 | 28 | END behaviour; 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SwiftTron: An Efficient Hardware Accelerator for Quantized Transformers 2 | This repository provides the source codes of the SwiftTron hardware design for Quantized Transformers. If you used these results in your research, please refer to the paper 3 | ``` 4 | A. Marchisio, D. Dura, M. Capra, M. Martina, G. Masera and M. Shafique, "SwiftTron: An Efficient Hardware Accelerator for Quantized Transformers," 2023 International Joint Conference on Neural Networks (IJCNN), Queensland, Australia, June 2023. 5 | ``` 6 | ``` 7 | @INPROCEEDINGS{Marchisio2023SwiftTron, 8 | author={A. {Marchisio} and D. {Dura} and M. {Capra} and M. {Martina} and G. {Masera} and M. {Shafique}}, 9 | booktitle={2023 International Joint Conference on Neural Networks (IJCNN)}, 10 | title={SwiftTron: An Efficient Hardware Accelerator for Quantized Transformers}, 11 | year={2023}, 12 | volume={}, 13 | number={}, 14 | pages={}} 15 | ``` 16 | --------------------------------------------------------------------------------