├── .gitignore
├── Functions
    ├── act.m
    ├── activation_bp.m
    ├── activation_ff.m
    ├── activation_init.m
    ├── dact.m
    └── loss_handle.m
├── LICENSE
├── Layer
    ├── activation
    │   ├── activation_bp.m
    │   ├── activation_ff.m
    │   ├── activation_init.m
    │   ├── activation_init_cpu.m
    │   └── activation_init_gpu.m
    ├── dense
    │   ├── dense_bp.m
    │   ├── dense_bp_gpu.m
    │   ├── dense_ff.m
    │   ├── dense_ff_gpu.m
    │   ├── dense_init_cpu.m
    │   └── dense_init_gpu.m
    ├── dropout
    │   ├── drop.m
    │   ├── dropout_bp.m
    │   ├── dropout_bp_gpu.m
    │   ├── dropout_ff.m
    │   ├── dropout_ff_gpu.m
    │   ├── dropout_init_cpu.m
    │   └── dropout_init_gpu.m
    ├── lstm
    │   ├── lstm_bp.m
    │   ├── lstm_bp_gpu.m
    │   ├── lstm_ff.m
    │   ├── lstm_ff_gpu.m
    │   ├── lstm_init_cpu.m
    │   └── lstm_init_gpu.m
    ├── tensor
    │   ├── tensor_init.m
    │   ├── tensor_init_cpu.m
    │   └── tensor_init_gpu.m
    └── tensor_init_gpu.m
├── Model
    ├── eval_loss.m
    ├── layer_optimize.m
    ├── model_evaluate.m
    ├── model_init.m
    ├── model_load.m
    ├── model_predict.m
    ├── model_save.m
    └── model_train.m
├── README.md
├── cudnn_LSTM_FF.m
├── cumexhelp.h
├── example
    ├── char_rnn
    │   ├── Christ2FSM.m
    │   ├── char_rnn.m
    │   ├── textgenerate.m
    │   └── txt2seq.m
    ├── keras_test_lstm.py
    ├── test_lstm.asv
    ├── test_lstm.m
    └── test_mlp.m
├── matDL_128X128.ico
├── mat_cudnn.cu
├── mat_cudnn.h
├── mat_cudnn_test.cu
├── mat_cudnn_test.h
└── mat_cudnn_test.mexw64


/.gitignore:
--------------------------------------------------------------------------------
1 | *.mat
2 | *.txt
3 | 


--------------------------------------------------------------------------------
/Functions/act.m:
--------------------------------------------------------------------------------
 1 | function y=act(x,fun)
 2 | switch fun
 3 |     case 'sigmoid'
 4 |         y = 1./(1+exp(-x));
 5 |         return
 6 |     case 'tanh'
 7 |         y=tanh(x);
 8 |         return
 9 |     case 'softmax'
10 |         E=exp(x- max(x,[],2));
11 |         y =  E./ sum(E,2) ;
12 |         return
13 |     case 'Relu'
14 |         y=x.*(x>0);
15 |         return
16 |     case 'linear'
17 |         y=x;
18 |         return
19 | end
20 | end


--------------------------------------------------------------------------------
/Functions/activation_bp.m:
--------------------------------------------------------------------------------
 1 | function layer=activation_bp(layer,next_layer)
 2 | if isequal(class(next_layer),'struct')
 3 |     if ~isequal(size(next_layer.dx),layer.output_shape)
 4 |         error('Shape unmatched!')
 5 |     end
 6 |     layer.e=next_layer.dx;
 7 | end
 8 | layer.dx=layer.e.*layer.dact(layer.output);
 9 | end
10 | 


--------------------------------------------------------------------------------
/Functions/activation_ff.m:
--------------------------------------------------------------------------------
1 | function layer=activation_ff(layer,prelayer)
2 | if ~isequal(size(prelayer.output),layer.input_shape)
3 |     error('Shape unmatched!')
4 | end
5 | layer.output=layer.act(prelayer.output);
6 | end


--------------------------------------------------------------------------------
/Functions/activation_init.m:
--------------------------------------------------------------------------------
 1 | function layer= activation_init( prelayer,act_fun,flag,loss )
 2 | %% Basic layer attributes
 3 | layer.type='activation';
 4 | layer.trainable=0;
 5 | layer.flag=flag;
 6 | layer.prelayer_type=prelayer.type;
 7 | 
 8 | layer.batch=1;
 9 | layer.epoch=1;
10 | 
11 | layer.input_shape=prelayer.output_shape;
12 | layer.output_shape=prelayer.output_shape;
13 | 
14 | % layer.input=prelayer.output;
15 | layer.output=prelayer.output;
16 | 
17 | if ~strcmpi(layer.prelayer_type,'input')&&flag
18 |     layer.dx=layer.output;
19 | end
20 | layer.e=layer.output;
21 | 
22 | if nargin>2
23 |     [layer.loss_f,layer.loss_df]=loss_handle(loss);
24 |     layer.loss=[];
25 | end
26 | layer.act=@(x)act(x,act_fun); 
27 | layer.dact=@(x)dact(x,act_fun); 
28 | layer.ff=@(layer,prelayer)activation_ff(layer,prelayer);
29 | layer.bp=@(layer,next_layer)activation_bp(layer,next_layer);
30 | layer.configs.type=layer.type;
31 | layer.configs.input_shape=layer.input_shape;
32 | layer.configs.output_shape=layer.output_shape;
33 | layer.configs.act_fun=act_fun;
34 | end
35 | 
36 | 


--------------------------------------------------------------------------------
/Functions/dact.m:
--------------------------------------------------------------------------------
 1 | function dx=dact(y,fun)
 2 | switch fun
 3 |     case 'sigmoid'
 4 |         dx = y .* (1 - y);
 5 |         return
 6 |     case 'tanh'
 7 |         dx=1-y.^2;
 8 |         return
 9 |     case 'Relu'
10 |         dx=(y>single(0));
11 |         return
12 |     case 'linear'
13 |         dx = y;
14 |         return
15 |     case  'softmax'
16 |         dx=y;
17 |         return
18 | end      
19 | end


--------------------------------------------------------------------------------
/Functions/loss_handle.m:
--------------------------------------------------------------------------------
 1 | function [f,df]=loss_handle(type)
 2 | syms y_true y_pred num
 3 | switch type
 4 |     case 'mse'
 5 |         symsf(y_true,y_pred)=(y_true-y_pred).^2;
 6 |         f=matlabFunction(symsf);
 7 |         df=matlabFunction(diff(symsf,y_pred));
 8 |         return
 9 |     case 'cross_entropy'
10 |         symsf(y_true,y_pred)=-1.*sum(y_true.*(y_pred)+(1-y_true).*log(1-y_pred));
11 |         f=matlabFunction(symsf);
12 |         df=matlabFunction(diff(symsf,y_pred));
13 |         return   
14 |     case 'categorical_cross_entropy'
15 |         symsf(y_true,y_pred)=-1.*y_true.*log(y_pred);
16 |         f=matlabFunction(symsf);
17 |         df=@(y_true,y_pre)y_pre-y_true;
18 | end
19 | end
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     {one line to give the program's name and a brief idea of what it does.}
635 |     Copyright (C) {year}  {name of author}
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     {project}  Copyright (C) {year}  {fullname}
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <http://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <http://www.gnu.org/philosophy/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------
/Layer/activation/activation_bp.m:
--------------------------------------------------------------------------------
 1 | function layer=activation_bp(layer,next_layer)
 2 | if isequal(class(next_layer),'struct')
 3 |     if ~isequal(size(next_layer.dx),layer.output_shape)
 4 |         error('Shape unmatched!')
 5 |     end
 6 |     layer.e=next_layer.dx;
 7 | end
 8 | layer.dx=layer.e.*layer.dact(layer.output);
 9 | end
10 | 


--------------------------------------------------------------------------------
/Layer/activation/activation_ff.m:
--------------------------------------------------------------------------------
1 | function layer=activation_ff(layer,prelayer)
2 | if ~isequal(size(prelayer.output),layer.input_shape)
3 |     error('Shape unmatched!')
4 | end
5 | layer.output=layer.act(prelayer.output);
6 | end


--------------------------------------------------------------------------------
/Layer/activation/activation_init.m:
--------------------------------------------------------------------------------
 1 | function layer= activation_init( prelayer,act_fun,flag,loss )
 2 | %% Basic layer attributes
 3 | layer.type='activation';
 4 | layer.trainable=0;
 5 | layer.flag=flag;
 6 | layer.prelayer_type=prelayer.type;
 7 | 
 8 | layer.batch=1;
 9 | layer.epoch=1;
10 | 
11 | layer.input_shape=prelayer.output_shape;
12 | layer.output_shape=prelayer.output_shape;
13 | 
14 | % layer.input=prelayer.output;
15 | layer.output=prelayer.output;
16 | 
17 | if ~strcmpi(layer.prelayer_type,'input')&&flag
18 |     layer.dx=layer.output;
19 | end
20 | layer.e=layer.output;
21 | 
22 | if nargin>3
23 |     [layer.loss_f,layer.loss_df]=loss_handle(loss);
24 |     layer.loss=[];
25 | end
26 | layer.act=@(x)act(x,act_fun); 
27 | layer.dact=@(x)dact(x,act_fun); 
28 | layer.ff=@(layer,prelayer)activation_ff(layer,prelayer);
29 | layer.bp=@(layer,next_layer)activation_bp(layer,next_layer);
30 | layer.configs.type=layer.type;
31 | layer.configs.input_shape=layer.input_shape;
32 | layer.configs.output_shape=layer.output_shape;
33 | layer.configs.act_fun=act_fun;
34 | end
35 | 
36 | 


--------------------------------------------------------------------------------
/Layer/activation/activation_init_cpu.m:
--------------------------------------------------------------------------------
 1 | function layer= activation_init_cpu( prelayer,act_fun,flag,loss )
 2 | %% Basic layer attributes
 3 | layer.type='activation';
 4 | layer.trainable=0;
 5 | layer.flag=flag;
 6 | layer.prelayer_type=prelayer.type;
 7 | 
 8 | layer.batch=1;
 9 | layer.epoch=1;
10 | 
11 | layer.input_shape=prelayer.output_shape;
12 | layer.output_shape=prelayer.output_shape;
13 | 
14 | % layer.input=prelayer.output;
15 | layer.output=prelayer.output;
16 | 
17 | if ~strcmpi(layer.prelayer_type,'input')&&flag
18 |     layer.dx=layer.output;
19 | end
20 | layer.e=layer.output;
21 | 
22 | if nargin>3
23 |     [layer.loss_f,layer.loss_df]=loss_handle(loss);
24 |     layer.loss=[];
25 | end
26 | layer.act=@(x)act(x,act_fun); 
27 | layer.dact=@(x)dact(x,act_fun); 
28 | layer.ff=@(layer,prelayer)activation_ff(layer,prelayer);
29 | layer.bp=@(layer,next_layer)activation_bp(layer,next_layer);
30 | layer.configs.type=layer.type;
31 | layer.configs.input_shape=layer.input_shape;
32 | layer.configs.output_shape=layer.output_shape;
33 | layer.configs.act_fun=act_fun;
34 | end
35 | 
36 | 


--------------------------------------------------------------------------------
/Layer/activation/activation_init_gpu.m:
--------------------------------------------------------------------------------
 1 | function layer= activation_init_gpu( prelayer,act_fun,flag,loss )
 2 | %% Basic layer attributes
 3 | layer.type='activation';
 4 | layer.trainable=0;
 5 | layer.flag=flag;
 6 | layer.prelayer_type=prelayer.type;
 7 | 
 8 | layer.batch=1;
 9 | layer.epoch=1;
10 | 
11 | layer.input_shape=prelayer.output_shape;
12 | layer.output_shape=prelayer.output_shape;
13 | 
14 | % layer.input=prelayer.output;
15 | layer.output=prelayer.output;
16 | 
17 | if ~strcmpi(layer.prelayer_type,'input')&&flag
18 |     layer.dx=layer.output;
19 | end
20 | layer.e=layer.output;
21 | 
22 | if nargin>3
23 |     [layer.loss_f,layer.loss_df]=loss_handle(loss);
24 |     layer.loss=[];
25 | end
26 | layer.act=@(x)act(x,act_fun); 
27 | layer.dact=@(x)dact(x,act_fun); 
28 | layer.ff=@(layer,prelayer)activation_ff(layer,prelayer);
29 | layer.bp=@(layer,next_layer)activation_bp(layer,next_layer);
30 | layer.configs.type=layer.type;
31 | layer.configs.input_shape=layer.input_shape;
32 | layer.configs.output_shape=layer.output_shape;
33 | layer.configs.act_fun=act_fun;
34 | end
35 | 
36 | 


--------------------------------------------------------------------------------
/Layer/dense/dense_bp.m:
--------------------------------------------------------------------------------
 1 | function layer =dense_bp(layer,next_layer)
 2 | if isequal(class(next_layer),'struct')
 3 |     if ~isequal(size(next_layer.dx),layer.output_shape)
 4 |         error('Shape unmatched!')
 5 |     end
 6 |     layer.e=next_layer.dx;
 7 | end
 8 | if layer.timedistributed
 9 |     layer.dW=reshape(permute(layer.input,[2,1,3]),layer.weights_dim,[])*reshape(permute(layer.e,[2,1,3]),4*hiddensize,[])';
10 |     if ~isequal(layer.prelayer_type,'input')
11 |         layer.dx(:)=mult_3d(layer.e,layer.W(1:end-1,:)');
12 |     end
13 | else
14 |     layer.dW=layer.input'*layer.e;
15 |     if ~isequal(layer.prelayer_type,'input')
16 |         layer.dx=layer.e*layer.W(1:end-1,:)';
17 |     end
18 | end
19 | end
20 | function a=sq(a)
21 | a=reshape(a,size(a,1),[]);
22 | end
23 | function c=mult_3d(a,b)
24 | shape=size(a);
25 | timestep=shape(end);
26 | dim=shape(2);
27 | batchsize=shape(1);
28 | c=permute(reshape((reshape(permute(a,[2,1,3]),dim,[])'*b)',[dim,batchsize,timestep]),[2,1,3]);
29 | end


--------------------------------------------------------------------------------
/Layer/dense/dense_bp_gpu.m:
--------------------------------------------------------------------------------
 1 | function layer =dense_bp_gpu(layer,next_layer)
 2 | if isequal(class(next_layer),'struct')
 3 |     if ~isequal(size(next_layer.dx),layer.output_shape)
 4 |         error('Shape unmatched!')
 5 |     end
 6 |     layer.e=next_layer.dx;
 7 | end
 8 | layer.dW=sq(layer.e)*sq(layer.input)';
 9 | if ~isequal(layer.prelayer_type,'input')
10 |     if layer.timedistributed
11 |         layer.dx(:)=layer.W(:,1:end-1)'*sq(layer.e);
12 |     else
13 |         layer.dx=layer.W(:,1:end-1)'*layer.e;
14 |     end
15 | end
16 | end
17 | function a=sq(a)
18 | a=reshape(a,size(a,1),[]);
19 | end
20 | 


--------------------------------------------------------------------------------
/Layer/dense/dense_ff.m:
--------------------------------------------------------------------------------
 1 | function layer=dense_ff(layer,prelayer)
 2 | if isequal(class(prelayer),'struct')
 3 |     if ~isequal(size(prelayer.output),layer.input_shape)
 4 |         error('Shape unmatched!')
 5 |     end
 6 |     if layer.timedistributed
 7 |         layer.input(:,1:end-1,:)=prelayer.output;
 8 |     else
 9 |         layer.input(:,1:end-1)=prelayer.output;
10 |     end
11 | else
12 |     if layer.timedistributed
13 |         layer.input(:,1:end-1,:)=prelayer;
14 |     else
15 |         layer.input(:,1:end-1)=prelayer;
16 |     end
17 | end
18 | if layer.timedistributed
19 |     layer.output(:)=mult_3d(layer.input,layer.W);
20 | else
21 |     layer.output=layer.input*layer.W;
22 | end
23 | end
24 | function c=mult_3d(a,b)
25 | shape=size(a);
26 | timestep=shape(end);
27 | dim=shape(2);
28 | batchsize=shape(1);
29 | c=permute(reshape((reshape(permute(a,[2,1,3]),dim,[])'*b)',[dim,batchsize,timestep]),[2,1,3]);
30 | end


--------------------------------------------------------------------------------
/Layer/dense/dense_ff_gpu.m:
--------------------------------------------------------------------------------
 1 | function layer=dense_ff_gpu(layer,prelayer)
 2 | if isequal(class(prelayer),'struct')
 3 |     if ~isequal(size(prelayer.output),layer.input_shape)
 4 |         error('Shape unmatched!')
 5 |     end
 6 |     if layer.timedistributed
 7 |         layer.input(1:end-1,:,:)=prelayer.output;
 8 |     else
 9 |         layer.input(1:end-1,:)=prelayer.output;
10 |     end
11 | else
12 |     if layer.timedistributed
13 |         layer.input(1:end-1,:,:)=prelayer;
14 |     else
15 |         layer.input(1:end-1,:)=prelayer;
16 |     end
17 | end
18 | if layer.timedistributed
19 |     layer.output(:)=layer.W*sq(layer.input);
20 | else
21 |     layer.output=layer.W*layer.input;
22 | end
23 | end
24 | function a=sq(a)
25 | a=reshape(a,size(a,1),[]);
26 | end
27 | 


--------------------------------------------------------------------------------
/Layer/dense/dense_init_cpu.m:
--------------------------------------------------------------------------------
 1 | function layer=dense_init_cpu(prelayer,hiddensize ,flag,loss)
 2 | %% Basic layer attributes
 3 | %Input tensor sahpe
 4 | layer.trainable=1;
 5 | layer.flag=flag;
 6 | layer.input_shape=prelayer.output_shape;
 7 | if numel(prelayer.output_shape)>2
 8 |     layer.timedistributed=1;
 9 |     layer.output_shape=[layer.input_shape(1),hiddensize,layer.input_shape(end)];
10 | else
11 |     layer.timedistributed=0;
12 |     layer.output_shape=[layer.input_shape(1),hiddensize];
13 | end
14 | dim=prelayer.output_shape(2);
15 | batchsize=prelayer.output_shape(1);
16 | layer.type='dense';
17 | layer.prelayer_type=prelayer.type;
18 | layer.hiddensize=hiddensize;
19 | layer.batchsize=batchsize;
20 | layer.batch=1;
21 | layer.epoch=1;
22 | %% Dense layer attributes
23 | %W contains weights bias
24 | layer.weights_dim=dim+1;
25 | layer.W=(rand([layer.weights_dim,hiddensize],'single')-0.5)./100;
26 | if layer.timedistributed
27 |     layer.input=ones([layer.input_shape(1),layer.input_shape(2)+1,layer.input_shape(3)],'single');
28 | else
29 |     layer.input=ones([layer.input_shape(1),layer.input_shape(2)+1],'single');
30 | end
31 | layer.output=zeros(layer.output_shape,'single');
32 | if ~strcmpi(layer.prelayer_type,'input')&&flag
33 |     layer.dx=zeros(layer.input_shape,'single');
34 | end
35 | layer.e=layer.output;
36 | if nargin>3&&flag
37 |     [layer.loss_f,layer.loss_df]=loss_handle(loss);
38 |     layer.loss=[];
39 | end
40 | layer.ff=@(layer,prelayer)dense_ff(layer,prelayer);
41 | layer.bp=@(layer,next_layer)dense_bp(layer,next_layer);
42 | 
43 | layer.configs.type=layer.type;
44 | layer.configs.input_shape=layer.input_shape;
45 | layer.configs.output_shape=layer.output_shape;
46 | layer.configs.hiddensize=layer.hiddensize;
47 | layer.configs.W=size(layer.W);
48 | end
49 | 


--------------------------------------------------------------------------------
/Layer/dense/dense_init_gpu.m:
--------------------------------------------------------------------------------
 1 | function layer=dense_init_gpu(prelayer,hiddensize ,flag,loss)
 2 | %% Basic layer attributes
 3 | %Input tensor sahpe
 4 | layer.trainable=1;
 5 | layer.flag=flag;
 6 | layer.input_shape=prelayer.output_shape;
 7 | if numel(prelayer.output_shape)>2
 8 |     layer.timedistributed=1;
 9 |     layer.output_shape=[layer.input_shape(1),hiddensize,layer.input_shape(end)];
10 | else
11 |     layer.timedistributed=0;
12 |     layer.output_shape=[layer.input_shape(1),hiddensize];
13 | end
14 | dim=prelayer.output_shape(2);
15 | batchsize=prelayer.output_shape(1);
16 | layer.type='dense';
17 | layer.prelayer_type=prelayer.type;
18 | layer.hiddensize=hiddensize;
19 | layer.batchsize=batchsize;
20 | layer.batch=1;
21 | layer.epoch=1;
22 | %% Dense layer attributes
23 | %W contains weights bias
24 | layer.weights_dim=dim+1;
25 | layer.W=(rand([layer.weights_dim,hiddensize],'single','gpuArray')-0.5)./100;
26 | if layer.timedistributed
27 |     layer.input=ones([layer.input_shape(1),layer.input_shape(2)+1,layer.input_shape(3)],'single','gpuArray');
28 | else
29 |     layer.input=ones([layer.input_shape(1),layer.input_shape(2)+1],'single','gpuArray');
30 | end
31 | layer.output=zeros(layer.output_shape,'single','gpuArray');
32 | if ~strcmpi(layer.prelayer_type,'input')&&flag
33 |     layer.dx=zeros(layer.input_shape,'single','gpuArray');
34 | end
35 | layer.e=layer.output;
36 | if nargin>3&&flag
37 |     [layer.loss_f,layer.loss_df]=loss_handle(loss);
38 |     layer.loss=[];
39 | end
40 | layer.ff=@(layer,prelayer)dense_ff(layer,prelayer);
41 | layer.bp=@(layer,next_layer)dense_bp(layer,next_layer);
42 | 
43 | layer.configs.type=layer.type;
44 | layer.configs.input_shape=layer.input_shape;
45 | layer.configs.output_shape=layer.output_shape;
46 | layer.configs.hiddensize=layer.hiddensize;
47 | layer.configs.W=size(layer.W);
48 | end
49 | 


--------------------------------------------------------------------------------
/Layer/dropout/drop.m:
--------------------------------------------------------------------------------
1 | function [mask,mask_index]=drop(mask,drop_rate)
2 | mask_index=randperm(numel(mask),floor(numel(mask)*drop_rate));
3 | mask(mask_index)=0;
4 | end


--------------------------------------------------------------------------------
/Layer/dropout/dropout_bp.m:
--------------------------------------------------------------------------------
1 | function layer=dropout_bp(layer,next_layer)
2 | layer.dx=next_layer.dx.*layer.mask;
3 | end


--------------------------------------------------------------------------------
/Layer/dropout/dropout_bp_gpu.m:
--------------------------------------------------------------------------------
1 | function layer=dropout_bp_gpu(layer,next_layer)
2 | layer.dx=next_layer.dx.*layer.mask;
3 | end


--------------------------------------------------------------------------------
/Layer/dropout/dropout_ff.m:
--------------------------------------------------------------------------------
1 | function layer=dropout_ff(layer,prelayer)
2 | if layer.flag
3 |     [layer.mask,layer.mask_index]=layer.drop(layer.mask,layer.drop_rate);
4 |     layer.output=prelayer.output.*layer.mask;
5 | else
6 |     layer.output=prelayer.output*layer.drop_rate;
7 | end
8 | end


--------------------------------------------------------------------------------
/Layer/dropout/dropout_ff_gpu.m:
--------------------------------------------------------------------------------
1 | function layer=dropout_ff_gpu(layer,prelayer)
2 | if layer.flag
3 |     [layer.mask,layer.mask_index]=layer.drop(layer.mask,layer.drop_rate);
4 |     layer.output=prelayer.output.*layer.mask;
5 | else
6 |     layer.output=prelayer.output*layer.drop_rate;
7 | end
8 | end


--------------------------------------------------------------------------------
/Layer/dropout/dropout_init_cpu.m:
--------------------------------------------------------------------------------
 1 | function layer=dropout_init_cpu(prelayer,drop_rate ,flag,loss)
 2 | %% Basic layer attributes
 3 | layer.trainable=0;
 4 | layer.flag=flag;
 5 | layer.input_shape=prelayer.output_shape;
 6 | batchsize=prelayer.output_shape(1);
 7 | layer.type='dropout';
 8 | layer.prelayer_type=prelayer.type;
 9 | layer.output_shape=layer.input_shape;
10 | layer.batchsize=batchsize;
11 | layer.batch=1;
12 | layer.epoch=1;
13 | %% Dropout layer attributes
14 | layer.drop_rate=drop_rate;
15 | if layer.flag
16 |     layer.mask=ones(layer.output_shape,'single');
17 | end
18 | layer.output=zeros(layer.output_shape,'single');
19 | if ~strcmpi(layer.prelayer_type,'input')&&flag
20 |     layer.dx=zeros(layer.input_shape,'single');
21 | end
22 | if nargin>3&&flag
23 |     [layer.loss_f,layer.loss_df]=loss_handle(loss);
24 |     layer.loss=[];
25 | end
26 | layer.drop=@(mask,drop_rate)drop(mask,drop_rate);
27 | layer.ff=@(layer,prelayer)dropout_ff(layer,prelayer);
28 | layer.bp=@(layer,next_layer)dropout_bp(layer,next_layer);
29 | 
30 | layer.configs.type=layer.type;
31 | layer.configs.input_shape=layer.input_shape;
32 | layer.configs.output_shape=layer.output_shape;
33 | layer.configs.drop_rate=layer.drop_rate;
34 | end
35 | 


--------------------------------------------------------------------------------
/Layer/dropout/dropout_init_gpu.m:
--------------------------------------------------------------------------------
 1 | function layer=dropout_init_gpu(prelayer,drop_rate ,flag,loss)
 2 | %% Basic layer attributes
 3 | layer.trainable=0;
 4 | layer.flag=flag;
 5 | layer.input_shape=prelayer.output_shape;
 6 | batchsize=prelayer.output_shape(1);
 7 | layer.type='dropout';
 8 | layer.prelayer_type=prelayer.type;
 9 | layer.output_shape=layer.input_shape;
10 | layer.batchsize=batchsize;
11 | layer.batch=1;
12 | layer.epoch=1;
13 | %% Dropout layer attributes
14 | layer.drop_rate=drop_rate;
15 | if layer.flag
16 |     layer.mask=ones(layer.output_shape,'single','gpuArray');
17 | end
18 | layer.output=zeros(layer.output_shape,'single','gpuArray');
19 | if ~strcmpi(layer.prelayer_type,'input')&&flag
20 |     layer.dx=zeros(layer.input_shape,'single','gpuArray');
21 | end
22 | if nargin>3&&flag
23 |     [layer.loss_f,layer.loss_df]=loss_handle(loss);
24 |     layer.loss=[];
25 | end
26 | layer.drop=@(mask,drop_rate)drop(mask,drop_rate);
27 | layer.ff=@(layer,prelayer)dropout_ff(layer,prelayer);
28 | layer.bp=@(layer,next_layer)dropout_bp(layer,next_layer);
29 | 
30 | layer.configs.type=layer.type;
31 | layer.configs.input_shape=layer.input_shape;
32 | layer.configs.output_shape=layer.output_shape;
33 | layer.configs.drop_rate=layer.drop_rate;
34 | end
35 | 


--------------------------------------------------------------------------------
/Layer/lstm/lstm_bp.m:
--------------------------------------------------------------------------------
 1 | function layer=lstm_bp(layer,next_layer)
 2 | if isequal(class(next_layer),'struct')
 3 |     if ~isequal(size(next_layer.dx),layer.output_shape)
 4 |         error('Shape unmatched!')
 5 |     end
 6 |     if layer.return_sequence
 7 |         layer.e=next_layer.dx;
 8 |     else
 9 |         layer.e(:,end,:)=next_layer.dx;
10 |     end
11 | end
12 | timestep=layer.timestep;
13 | hiddensize=layer.hiddensize;
14 | batchsize=layer.batchsize;
15 | dim=layer.input_shape(2);
16 | r_x=1:dim+1;
17 | r_h=dim+1+(1:hiddensize);
18 | r_ifo=1:3*hiddensize;
19 | r_f=1:hiddensize;
20 | r_i=hiddensize+1:2*hiddensize;
21 | r_o=2*hiddensize+1:3*hiddensize;
22 | r_tc=3*hiddensize+1:4*hiddensize;
23 | %% Backpropagation through time
24 | for t=timestep:-1:2
25 |     % d_h(t) = e(t) + d_a(t+1)*W
26 |     layer.dh(:,:,t)=layer.e(:,:,t)+layer.dma(:,:,t+1)*layer.W(r_h,:)';
27 |     % d_c(t) = d_h(t) .* o(t) * tanh'(c(t))
28 |     layer.dsc(:,:,t)=layer.dh(:,:,t).*layer.mb(:,r_o,t).*layer.dact_h(layer.sc(:,:,t));
29 |     %db_o(t) = d_h(t) * bc(t)
30 |     layer.dmb(:,r_o,t)=layer.dh(:,:,t).*layer.bc(:,:,t);
31 |     % db_i(t) = d_c(t) .* tc(t)
32 |     layer.dmb(:,r_i,t)=layer.dsc(:,:,t).*layer.mb(:,r_tc,t);
33 |     % db_tc(t) = db_c(t) .* i(t)
34 |     layer.dmb(:,r_tc,t)=layer.dsc(:,:,t).*layer.mb(:,r_i,t);
35 |     % db_f(t) = db_c(t) .* c(t-1)
36 |     layer.dmb(:,r_f,t)=layer.dsc(:,:,t).*layer.sc(:,:,t-1);
37 |     %da=act'(b).*db
38 |     layer.dma(:,r_ifo,t)=layer.dact_f(layer.mb(:,r_ifo,t)).*layer.dmb(:,r_ifo,t);
39 |     layer.dma(:,r_tc,t)=layer.dact_tc(layer.mb(:,r_tc,t)).*layer.dmb(:,r_tc,t);
40 | end
41 | t=1;
42 | layer.dh(:,:,t)=layer.e(:,:,t)+layer.dma(:,:,t+1)*layer.W(r_h,:)';
43 | layer.dsc(:,:,t)=layer.dh(:,:,t).*layer.mb(:,r_o,t).*layer.dact_h(layer.sc(:,:,t));
44 | layer.dmb(:,r_o,t)=layer.dh(:,:,t).*layer.bc(:,:,t);
45 | layer.dmb(:,r_i,t)=layer.dsc(:,:,t).*layer.mb(:,r_tc,t);
46 | layer.dmb(:,r_tc,t)=layer.dsc(:,:,t).*layer.mb(:,r_i,t);
47 | layer.dma(:,r_ifo,t)=layer.dact_f(layer.mb(:,r_ifo,t)).*layer.dmb(:,r_ifo,t);
48 | layer.dma(:,r_tc,t)=layer.dact_tc(layer.mb(:,r_tc,t)).*layer.dmb(:,r_tc,t);
49 | 
50 | if ~isequal(layer.prelayer_type,'input')
51 |     layer.dx(:)=mult_3d(layer.dma(:,:,1:end-1),layer.W(:,r_x(1:end-1))');
52 | end
53 | %layer.dma(:,r_f,2:end)=layer.dma(:,r_f,2:end)./(timestep-1);
54 | %layer.dma(:,hiddensize+1:end,:)=layer.dma(:,hiddensize+1:end,:)./timestep;
55 | layer.dW=reshape(permute(layer.xh,[2,1,3]),layer.weights_dim,[])*reshape(permute(layer.dma,[2,1,3]),4*hiddensize,[])';
56 | end
57 | function c=mult_3d(a,b)
58 | input_shape=size(a);
59 | output_dim=size(b,2);
60 | timestep=input_shape(end);
61 | input_dim=input_shape(2);
62 | batchsize=input_shape(1);
63 | c=permute(reshape((reshape(permute(a,[2,1,3]),input_dim,[])'*b)',[output_dim,batchsize,timestep]),[2,1,3]);
64 | end


--------------------------------------------------------------------------------
/Layer/lstm/lstm_bp_gpu.m:
--------------------------------------------------------------------------------
 1 | function layer=lstm_bp_gpu(layer,next_layer)
 2 | if isequal(class(next_layer),'struct')
 3 |     if ~isequal(size(next_layer.dx),layer.output_shape)
 4 |         error('Shape unmatched!')
 5 |     end
 6 |     if layer.return_sequence
 7 |         layer.e=next_layer.dx;
 8 |     else
 9 |         layer.e(:,end,:)=next_layer.dx;
10 |     end
11 | end
12 | timestep=layer.timestep;
13 | hiddensize=layer.hiddensize;
14 | batchsize=layer.batchsize;
15 | dim=layer.input_shape(1);
16 | r_x=1:dim+1;
17 | r_h=dim+1+(1:hiddensize);
18 | r_ifo=1:3*hiddensize;
19 | r_f=1:hiddensize;
20 | r_i=hiddensize+1:2*hiddensize;
21 | r_o=2*hiddensize+1:3*hiddensize;
22 | r_tc=3*hiddensize+1:4*hiddensize;
23 | %% Backpropagation through time
24 | for t=timestep:-1:2
25 |     % d_h(t) = e(t) + d_a(t+1)*W
26 |     layer.dh(:,t,:)=sq(layer.e(:,t,:))+layer.W(:,r_h)'*sq(layer.dma(:,t+1,:));
27 |     % d_c(t) = d_h(t) .* o(t) * tanh'(c(t))
28 |     layer.dsc(:,t,:)=layer.dh(:,t,:).*layer.mb(r_o,t,:).*layer.dact_h(layer.sc(:,t,:));
29 |     %db_o(t) = d_h(t) * bc(t)
30 |     layer.dmb(r_o,t,:)=layer.dh(:,t,:).*layer.bc(:,t,:);
31 |     % db_i(t) = d_c(t) .* tc(t)
32 |     layer.dmb(r_i,t,:)=layer.dsc(:,t,:).*layer.mb(r_tc,t,:);
33 |     % db_tc(t) = db_c(t) .* i(t)
34 |     layer.dmb(r_tc,t,:)=layer.dsc(:,t,:).*layer.mb(r_i,t,:);
35 |     % db_f(t) = db_c(t) .* c(t-1)
36 |     layer.dmb(r_f,t,:)=layer.dsc(:,t,:).*layer.sc(:,t-1,:);
37 |     %da=act'(b).*db
38 |     layer.dma(r_ifo,t,:)=layer.dact_f(layer.mb(r_ifo,t,:)).*layer.dmb(r_ifo,t,:);
39 |     layer.dma(r_tc,t,:)=layer.dact_tc(layer.mb(r_tc,t,:)).*layer.dmb(r_tc,t,:);
40 | end
41 | t=1;
42 | layer.dh(:,t,:)=sq(layer.e(:,t,:))+layer.W(:,r_h)'*sq(layer.dma(:,t+1,:));
43 | layer.dsc(:,t,:)=layer.dh(:,t,:).*layer.mb(r_o,t,:).*layer.dact_h(layer.sc(:,t,:));
44 | layer.dmb(r_o,t,:)=layer.dh(:,t,:).*layer.bc(:,t,:);
45 | layer.dmb(r_i,t,:)=layer.dsc(:,t,:).*layer.mb(r_tc,t,:);
46 | layer.dmb(r_tc,t,:)=layer.dsc(:,t,:).*layer.mb(r_i,t,:);
47 | layer.dma(r_ifo,t,:)=layer.dact_f(layer.mb(r_ifo,t,:)).*layer.dmb(r_ifo,t,:);
48 | layer.dma(r_tc,t,:)=layer.dact_tc(layer.mb(r_tc,t,:)).*layer.dmb(r_tc,t,:);
49 | 
50 | layer.dma(r_f,2:end,:)=layer.dma(r_f,2:end,:)./(timestep-1);
51 | layer.dma(hiddensize+1:end,:)=layer.dma(hiddensize+1:end,:)./timestep;
52 | layer.dW=layer.dma(:,:)*layer.xh(:,:)'./batchsize;
53 | if ~isequal(layer.prelayer_type,'input')
54 |     layer.dx(:)=layer.W(:,r_x(1:end-1))'*sq(layer.dma(:,1:end-1,:))./batchsize;
55 | end
56 | end
57 | function a=sq(a)
58 | a=reshape(a,size(a,1),[]);
59 | end


--------------------------------------------------------------------------------
/Layer/lstm/lstm_ff.m:
--------------------------------------------------------------------------------
 1 | function layer=lstm_ff(layer,prelayer)
 2 | timestep=layer.timestep;
 3 | hiddensize=layer.hiddensize;
 4 | dim=layer.input_shape(2);
 5 | r_x=1:dim+1;%range of x and bias
 6 | r_h=dim+1+(1:hiddensize);%range of h
 7 | r_ifo=1:3*hiddensize;%range of forget,input and output gates
 8 | r_f=1:hiddensize;%range of forget gate
 9 | r_i=hiddensize+1:2*hiddensize;%~input gate
10 | r_o=2*hiddensize+1:3*hiddensize;%~output gate
11 | r_tc=3*hiddensize+1:4*hiddensize;%range of tilde c gate
12 | %the xh is a 2d tensor contain x,bias,and h,((r_x)-1,1:end-1,:) is the area of x
13 | %assign value from input tensor
14 | if isequal(class(prelayer),'struct')
15 |     if ~isequal(size(prelayer.output),layer.input_shape)
16 |         error('Shape unmatched!')
17 |     end
18 |     layer.xh(:,r_x(1:end-1),1:end-1)=prelayer.output;
19 | else
20 |     layer.xh(:,r_x(1:end-1),1:end-1)=prelayer;
21 | end
22 | %compute all x(t)*W_x+bias in one time at first
23 | layer.maX(:)=mult_3d(layer.xh(:,r_x,1:end-1),layer.W(r_x,:));
24 | 
25 | %% Feed forward
26 | %t=1
27 | layer.ma( :,:,1)=layer.maX( :,:,1);
28 | layer.mb( :,r_ifo,1)=layer.act_f(layer.ma(:,r_ifo,1));
29 | layer.mb( :,r_tc,1)=layer.act_tc(layer.ma(:,r_tc,1));
30 | layer.sc( :,:,1)=layer.mb(:,r_i,1).*layer.mb(:,r_tc,1);
31 | layer.bc( :,:,1)=layer.act_h(layer.sc( :,:,1));
32 | layer.xh(:,r_h,2)=layer.bc(:,:,1).*layer.mb(:,r_o,1);
33 | %t>1
34 | for t=2:timestep
35 |     % a(t) = W_x * x(t) + W_h * h(t-1)
36 |     layer.ma( :,:,t)=layer.maX( :,:,t)+layer.xh( :,r_h,t)*layer.W(r_h,:);
37 |     %b(t)=act(a(t))
38 |     %The active functions of i,f,o gates are sigmoid,compute in one time
39 |     layer.mb( :,r_ifo,t)=layer.act_f(layer.ma( :,r_ifo,t));
40 |     %The active function of tc gate is tanh
41 |     layer.mb( :,r_tc,t)=layer.act_tc(layer.ma( :,r_tc,t));
42 |     % c(t) = f(t) * c(t-1) + i(t) * tc(t)
43 |     layer.sc( :,:,t)=layer.sc( :,:,t-1).*layer.mb( :,r_f,t)+layer.mb( :,r_i,t).*layer.mb( :,r_tc,t);
44 |     %tanh(c(t))
45 |     layer.bc( :,:,t)=layer.act_h(layer.sc( :,:,t));
46 |     % h(t) = o(t) * tanh(c(t))
47 |     layer.xh( :,r_h,t+1)=layer.bc( :,:,t).*layer.mb( :,r_o,t);
48 |     if layer.return_sequence
49 |         layer.output=layer.xh(:,r_h,2:end);
50 |     else
51 |         layer.output=sq(layer.xh(:,r_h,1));
52 |     end
53 | end
54 | end
55 | function c=mult_3d(a,b)
56 | input_shape=size(a);
57 | output_dim=size(b,2);
58 | timestep=input_shape(end);
59 | input_dim=input_shape(2);
60 | batchsize=input_shape(1);
61 | c=permute(reshape((reshape(permute(a,[2,1,3]),input_dim,[])'*b)',[output_dim,batchsize,timestep]),[2,1,3]);
62 | end


--------------------------------------------------------------------------------
/Layer/lstm/lstm_ff_gpu.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuantumLiu/matDL/ce75b21dd97e9d58c074d50915d5fc000ee46afb/Layer/lstm/lstm_ff_gpu.m


--------------------------------------------------------------------------------
/Layer/lstm/lstm_init_cpu.m:
--------------------------------------------------------------------------------
 1 | function layer=lstm_init_cpu(prelayer,hiddensize,return_sequence,flag,loss)
 2 | %% Basic layer attributes
 3 | %Input tensor sahpe
 4 | layer.input_shape=prelayer.output_shape;
 5 | layer.trainable=1;
 6 | layer.flag=flag;
 7 | 
 8 | dim=prelayer.output_shape(2);
 9 | timestep=prelayer.output_shape(3);
10 | batchsize=prelayer.output_shape(1);
11 | if nargin<3
12 |     return_sequence=1;
13 | end
14 | layer.return_sequence=return_sequence;
15 | if return_sequence
16 | %Output tensor shape
17 |     layer.output_shape=[batchsize,hiddensize,timestep];
18 | else
19 |     layer.output_shape=[batchsize,hiddensize];        
20 | end
21 | %The type of the layer
22 | layer.type='lstm';
23 | %conected layer type
24 | layer.prelayer_type=prelayer.type;
25 | %The hiddensize of the layer
26 | layer.hiddensize=hiddensize;
27 | 
28 | layer.batch=1;
29 | layer.epoch=1;
30 | %% lstm layer attributes
31 | %Timestep 
32 | layer.timestep=timestep;
33 | layer.batchsize=batchsize;
34 | %n is the number of unrolled timesteps in one batch
35 | layer.n=batchsize*timestep;
36 | %Put x(t) and h(t) in one array 
37 | layer.xh=ones([batchsize,dim+1+hiddensize,timestep+1],'single');
38 | %W is the weights of all four gates and bias
39 | layer.weights_dim=dim+1+hiddensize;
40 | layer.W=(rand([layer.weights_dim,4*hiddensize],'single')-0.5)./100;
41 | %Compute the value of x_t*wx_t for all ts in one time
42 | layer.maX=zeros([batchsize,4*hiddensize,timestep],'single');
43 | %value before activited
44 | layer.ma=layer.maX;
45 | %value activited
46 | layer.mb=layer.maX;
47 | %sc:state of cell
48 | layer.sc=zeros([batchsize,hiddensize,timestep],'single');
49 | layer.bc=layer.sc;
50 | %The output tensor and error
51 | layer.output=zeros(layer.output_shape,'single');
52 | layer.e=layer.sc;
53 | if layer.flag
54 | %diffs
55 | layer.dW=zeros(size(layer.W),'single');
56 | layer.dma=zeros([batchsize,4*hiddensize,timestep+1],'single');
57 | layer.dmb=layer.dma;
58 | layer.dsc=layer.sc;
59 | layer.dh=layer.dsc;
60 | end
61 | if ~strcmpi(layer.prelayer_type,'input')&&layer.flag
62 |     layer.dx=zeros(layer.input_shape,'single');
63 | end
64 | if nargin>4
65 |     [layer.loss_f,layer.loss_df]=loss_handle(loss);
66 |     layer.loss=[];
67 | end
68 | %% methods
69 | layer.act_f =@(x)act(x,'sigmoid'); % active function for gate
70 | layer.act_tc =@(x)act(x, 'tanh'); % active function for tc
71 | layer.act_h = @(x)act(x, 'tanh');
72 | 
73 | layer.dact_f= @(x)dact(x,'sigmoid');
74 | layer.dact_tc =@(x)dact(x, 'tanh'); % active function for tc
75 | layer.dact_h = @(x)dact(x, 'tanh');
76 | layer.ff=@(layer,prelayer)lstm_ff(layer,prelayer);
77 | layer.bp=@(layer,next_layer)lstm_bp(layer,next_layer);
78 | 
79 | layer.configs.type=layer.type;
80 | layer.configs.input_shape=layer.input_shape;
81 | layer.configs.output_shape=layer.output_shape;
82 | layer.configs.hiddensize=layer.hiddensize;
83 | layer.configs.W=size(layer.W);
84 | end


--------------------------------------------------------------------------------
/Layer/lstm/lstm_init_gpu.m:
--------------------------------------------------------------------------------
 1 | function layer=lstm_init_gpu(prelayer,hiddensize,return_sequence,flag,loss)
 2 | %% Basic layer attributes
 3 | %Input tensor sahpe
 4 | layer.input_shape=prelayer.output_shape;
 5 | layer.trainable=1;
 6 | layer.flag=flag;
 7 | 
 8 | dim=prelayer.output_shape(2);
 9 | timestep=prelayer.output_shape(3);
10 | batchsize=prelayer.output_shape(1);
11 | if nargin<3
12 |     return_sequence=1;
13 | end
14 | layer.return_sequence=return_sequence;
15 | if return_sequence
16 | %Output tensor shape
17 |     layer.output_shape=[batchsize,hiddensize,timestep];
18 | else
19 |     layer.output_shape=[batchsize,hiddensize];        
20 | end
21 | %The type of the layer
22 | layer.type='lstm';
23 | %conected layer type
24 | layer.prelayer_type=prelayer.type;
25 | %The hiddensize of the layer
26 | layer.hiddensize=hiddensize;
27 | 
28 | layer.batch=1;
29 | layer.epoch=1;
30 | %% lstm layer attributes
31 | %Timestep 
32 | layer.timestep=timestep;
33 | layer.batchsize=batchsize;
34 | %n is the number of unrolled timesteps in one batch
35 | layer.n=batchsize*timestep;
36 | %Put x(t) and h(t) in one array 
37 | layer.xh=ones([batchsize,dim+1+hiddensize,timestep+1],'single','gpuArray');
38 | %W is the weights of all four gates and bias
39 | layer.weights_dim=dim+1+hiddensize;
40 | layer.W=(rand([layer.weights_dim,4*hiddensize],'single','gpuArray')-0.5)./100;
41 | %Compute the value of x_t*wx_t for all ts in one time
42 | layer.maX=zeros([batchsize,4*hiddensize,timestep],'single','gpuArray');
43 | %value before activited
44 | layer.ma=layer.maX;
45 | %value activited
46 | layer.mb=layer.maX;
47 | %sc:state of cell
48 | layer.sc=zeros([batchsize,hiddensize,timestep],'single','gpuArray');
49 | layer.bc=layer.sc;
50 | %The output tensor and error
51 | layer.output=zeros(layer.output_shape,'single','gpuArray');
52 | layer.e=layer.sc;
53 | if layer.flag
54 | %diffs
55 | layer.dW=zeros(size(layer.W),'single','gpuArray');
56 | layer.dma=zeros([batchsize,4*hiddensize,timestep+1],'single','gpuArray');
57 | layer.dmb=layer.dma;
58 | layer.dsc=layer.sc;
59 | layer.dh=layer.dsc;
60 | end
61 | if ~strcmpi(layer.prelayer_type,'input')&&layer.flag
62 |     layer.dx=zeros(layer.input_shape,'single','gpuArray');
63 | end
64 | if nargin>4
65 |     [layer.loss_f,layer.loss_df]=loss_handle(loss);
66 |     layer.loss=[];
67 | end
68 | %% methods
69 | layer.act_f =@(x)act(x,'sigmoid'); % active function for gate
70 | layer.act_tc =@(x)act(x, 'tanh'); % active function for tc
71 | layer.act_h = @(x)act(x, 'tanh');
72 | 
73 | layer.dact_f= @(x)dact(x,'sigmoid');
74 | layer.dact_tc =@(x)dact(x, 'tanh'); % active function for tc
75 | layer.dact_h = @(x)dact(x, 'tanh');
76 | layer.ff=@(layer,prelayer)lstm_ff(layer,prelayer);
77 | layer.bp=@(layer,next_layer)lstm_bp(layer,next_layer);
78 | 
79 | layer.configs.type=layer.type;
80 | layer.configs.input_shape=layer.input_shape;
81 | layer.configs.output_shape=layer.output_shape;
82 | layer.configs.hiddensize=layer.hiddensize;
83 | layer.configs.W=size(layer.W);
84 | end


--------------------------------------------------------------------------------
/Layer/tensor/tensor_init.m:
--------------------------------------------------------------------------------
 1 | function layer=tensor_init(input_shape,type,loss)
 2 | %% A tensor layer ,can be a input layer or a ouyput alyer
 3 | %% Basic layer attributes
 4 | %Input tensor sahpe
 5 | layer.input_shape=input_shape;
 6 | %Output tensor shape
 7 | layer.output_shape=input_shape;
 8 | %The type of the layer
 9 | layer.type=type;
10 | if nargin>2
11 |     [layer.loss_f,layer.loss_df]=loss_handle(loss);
12 |     layer.loss=[];
13 | end
14 | layer.configs.type=layer.type;
15 | layer.configs.input_shape=layer.input_shape;
16 | layer.configs.output_shape=layer.output_shape;
17 | end


--------------------------------------------------------------------------------
/Layer/tensor/tensor_init_cpu.m:
--------------------------------------------------------------------------------
 1 | function layer=tensor_init_gpu(input_shape,type,loss)
 2 | %% A tensor layer ,can be a input layer or a ouyput alyer
 3 | %% Basic layer attributes
 4 | %Input tensor sahpe
 5 | layer.input_shape=input_shape;
 6 | %Output tensor shape
 7 | layer.output_shape=input_shape;
 8 | %The type of the layer
 9 | layer.type=type;
10 | if nargin>2
11 |     [layer.loss_f,layer.loss_df]=loss_handle(loss);
12 |     layer.loss=[];
13 | end
14 | layer.configs.type=layer.type;
15 | layer.configs.input_shape=layer.input_shape;
16 | layer.configs.output_shape=layer.output_shape;
17 | end


--------------------------------------------------------------------------------
/Layer/tensor/tensor_init_gpu.m:
--------------------------------------------------------------------------------
 1 | function layer=tensor_init_gpu(input_shape,type,loss)
 2 | %% A tensor layer ,can be a input layer or a ouyput alyer
 3 | %% Basic layer attributes
 4 | %Input tensor sahpe
 5 | layer.input_shape=input_shape;
 6 | %Output tensor shape
 7 | layer.output_shape=input_shape;
 8 | %The type of the layer
 9 | layer.type=type;
10 | if nargin>2
11 |     [layer.loss_f,layer.loss_df]=loss_handle(loss);
12 |     layer.loss=[];
13 | end
14 | layer.configs.type=layer.type;
15 | layer.configs.input_shape=layer.input_shape;
16 | layer.configs.output_shape=layer.output_shape;
17 | end


--------------------------------------------------------------------------------
/Layer/tensor_init_gpu.m:
--------------------------------------------------------------------------------
 1 | function layer=tensor_init_gpu(input_shape,type,loss)
 2 | %% A tensor layer ,can be a input layer or a ouyput alyer
 3 | %% Basic layer attributes
 4 | %Input tensor sahpe
 5 | layer.input_shape=input_shape;
 6 | %Output tensor shape
 7 | layer.output_shape=input_shape;
 8 | %The type of the layer
 9 | layer.type=type;
10 | if nargin>2
11 |     [layer.loss_f,layer.loss_df]=loss_handle(loss);
12 |     layer.loss=[];
13 | end
14 | layer.configs.type=layer.type;
15 | layer.configs.input_shape=layer.input_shape;
16 | layer.configs.output_shape=layer.output_shape;
17 | end


--------------------------------------------------------------------------------
/Model/eval_loss.m:
--------------------------------------------------------------------------------
 1 | function [outputlayer,loss]=eval_loss(outputlayer,y_true,flag)
 2 | dim=size(y_true,2);
 3 | loss=dim*feval(@(x)mean(x(:)),outputlayer.loss_f(single(y_true),outputlayer.output));
 4 | if flag
 5 |     outputlayer.loss=[outputlayer.loss,loss];
 6 |     if isequal(outputlayer.type,'lstm')&& ~outputlayer.return_sequence
 7 |         outputlayer.e(:,:,end)=outputlayer.loss_df(y_true,outputlayer.output);
 8 |     else
 9 |         outputlayer.e=outputlayer.loss_df(single(y_true),outputlayer.output);
10 |     end
11 | end
12 | end
13 | 


--------------------------------------------------------------------------------
/Model/layer_optimize.m:
--------------------------------------------------------------------------------
 1 | function layer=layer_optimize(layer,pars,batch,epoch)
 2 | if nargin <2
 3 |     pars.opt='sgd';
 4 | end
 5 | switch pars.opt
 6 |     case 'sgd'
 7 |         if pars.momentum >0
 8 |             if batch==1
 9 |                 layer.vW=pars.learningrate*layer.dW;
10 |             else
11 |                 layer.vW=pars.momentum*layer.vW+pars.learningrate*layer.dW;
12 |             end
13 |             layer.W=layer.W-layer.vW;
14 |         else
15 |             layer.W=layer.W-pars.learningrate*layer.dW;
16 |         end
17 | end
18 | layer.batch=batch;
19 | layer.epoch=epoch;
20 | end


--------------------------------------------------------------------------------
/Model/model_evaluate.m:
--------------------------------------------------------------------------------
1 | function mean_loss=model_evaluate(model,x,y_true)
2 | y_pred=model.predict(model,x);
3 | dim=size(y_true,2);
4 | mean_loss=dim*feval(@(x)mean(x(:)),model.layers{end-1}.loss_f(single(y_true),y_pred));
5 | end


--------------------------------------------------------------------------------
/Model/model_init.m:
--------------------------------------------------------------------------------
  1 | function model=model_init(input_shape,configs ,flag,optimizer,device)
  2 | switch nargin
  3 |     case 2
  4 |         flag=0;
  5 |         device='cpu';
  6 |     case 3
  7 |         if flag
  8 |             optimizer.type='sgd';
  9 |             optimizer.momentum=0;
 10 |             optimizer.learningrate=0.01;
 11 |         end
 12 |         device='cpu';
 13 |     case 4
 14 |         device='cpu';
 15 | end
 16 | if nargin<3
 17 |     flag=0;
 18 | end
 19 | model.flag=flag;
 20 | if nargin<4&&flag
 21 |     optimizer.type='sgd';
 22 |     optimizer.momentum=0;
 23 |     optimizer.learningrate=0.01;
 24 | end
 25 | model.layers=cell(1,length(configs)+1);
 26 | model.layers{1}=tensor_init(input_shape,'input');
 27 | switch device
 28 |     case 'cpu'
 29 |         for l=2:length(model.layers)
 30 |             model.layers{l}=layer_init_cpu(model.layers{l-1},configs{l-1},flag);
 31 |         end
 32 |     case 'gpu'
 33 |         for l=2:length(model.layers)
 34 |             model.layers{l}=layer_init_gpu(model.layers{l-1},configs{l-1},flag);
 35 |         end
 36 | end
 37 | model.layers=[model.layers,0];
 38 | for l=1:length(model.layers)-1
 39 |     disp(['layer ' ,num2str(l),' :']);
 40 |     disp(model.layers{l}.configs);
 41 | end
 42 | 
 43 | model.input_shape=model.layers{1}.input_shape(2:end);
 44 | model.output_shape=model.layers{end-1}.output_shape(2:end);
 45 | model.batchsize=input_shape(1);
 46 | model.loss=[];
 47 | model.configs=configs;
 48 | if flag
 49 |     model.optimizer=optimizer;
 50 |     model.optimize=@(layer,optimizer,batch,epoch)layer_optimize(layer,optimizer,batch,epoch);
 51 | end
 52 | model.eval_loss=@(outputlayer,y_true,flag)eval_loss(outputlayer,y_true,flag);
 53 | model.predict=@(model,x)model_predict(model,x);
 54 | model.save=@(model,filename)model_save(model,filename);
 55 | model.evaluate=@(model,x,y_true)model_evaluate(model,x,y_true);
 56 | if flag
 57 |     model.train=@(model,x,y,nb_epoch,verbose,filename)model_train(model,x,y,nb_epoch,verbose,filename);
 58 | end
 59 | end
 60 | function layer=layer_init_gpu(prelayer,config,flag)
 61 | switch config.type
 62 |     case 'lstm'
 63 |         if isfield(config,'loss')
 64 |             layer=lstm_init_gpu(prelayer,config.hiddensize,config.return_sequence,flag,config.loss);
 65 |         else
 66 |             layer=lstm_init_gpu(prelayer,config.hiddensize,config.return_sequence,flag);
 67 |         end
 68 |     case 'dense'
 69 |         if isfield(config,'loss')
 70 |             layer=dense_init_gpu(prelayer,config.hiddensize,flag,config.loss);
 71 |         else
 72 |             layer=dense_init_gpu(prelayer,config.hiddensize,flag);
 73 |         end
 74 |     case 'activation'
 75 |         if isfield(config,'loss')
 76 |             layer=activation_init_gpu(prelayer,config.act_fun,flag,config.loss);
 77 |         else
 78 |             layer=activation_init_gpu(prelayer,config.act_fun,flag);
 79 |         end
 80 |     case 'dropout'
 81 |         if isfield(config,'loss')
 82 |             layer=dropout_init_gpu(prelayer,config.drop_rate,flag,config.loss);
 83 |         else
 84 |             layer=dropout_init_gpu(prelayer,config.drop_rate,flag);
 85 |         end
 86 | end
 87 | end
 88 | function layer=layer_init_cpu(prelayer,config,flag)
 89 | switch config.type
 90 |     case 'lstm'
 91 |         if isfield(config,'loss')
 92 |             layer=lstm_init_cpu(prelayer,config.hiddensize,config.return_sequence,flag,config.loss);
 93 |         else
 94 |             layer=lstm_init_cpu(prelayer,config.hiddensize,config.return_sequence,flag);
 95 |         end
 96 |     case 'dense'
 97 |         if isfield(config,'loss')
 98 |             layer=dense_init_cpu(prelayer,config.hiddensize,flag,config.loss);
 99 |         else
100 |             layer=dense_init_cpu(prelayer,config.hiddensize,flag);
101 |         end
102 |     case 'activation'
103 |         if isfield(config,'loss')
104 |             layer=activation_init_cpu(prelayer,config.act_fun,flag,config.loss);
105 |         else
106 |             layer=activation_init_cpu(prelayer,config.act_fun,flag);
107 |         end
108 |     case 'dropout'
109 |         if isfield(config,'loss')
110 |             layer=dropout_init_cpu(prelayer,config.drop_rate,flag,config.loss);
111 |         else
112 |             layer=dropout_init_cpu(prelayer,config.drop_rate,flag);
113 |         end
114 | end
115 | end


--------------------------------------------------------------------------------
/Model/model_load.m:
--------------------------------------------------------------------------------
 1 | function model=model_load(minimodel,batch_size,flag,optimizer,device)
 2 | if nargin<2
 3 |     batch_size=32;
 4 | end
 5 | if nargin<3
 6 |     flag=0;
 7 | end
 8 | if nargin<4&&flag
 9 |     optimizer.type='sgd';
10 |     optimizer.momentum=0;
11 |     optimizer.learningrate=0.01;
12 | elseif ~flag
13 |     optimizer=[];
14 | end
15 | if isequal(class(minimodel),'char')
16 |     load(minimodel);
17 | end
18 | model=model_init([batch_size,minimodel.input_shape],minimodel.configs,flag,optimizer,device);
19 | end


--------------------------------------------------------------------------------
/Model/model_predict.m:
--------------------------------------------------------------------------------
 1 | function y_pred=model_predict(model,x)
 2 | batchsize=model.batchsize;
 3 | shape_x=size(x);
 4 | nb_batch=floor(shape_x(1)/batchsize);
 5 | m=mod(shape_x(1),batchsize);
 6 | y_pred=zeros([shape_x(1),model.output_shape],'single');
 7 | for batch=1:nb_batch
 8 |     %% ff
 9 |     if numel(shape_x)==2
10 |         model.layers{1}=x((batch-1)*batchsize+1:batch*batchsize,:);
11 |     elseif numel(shape_x)==3
12 |         model.layers{1}=x((batch-1)*batchsize+1:batch*batchsize,:,:);
13 |     else
14 |         error('The number of dims of input data must be 2/3');
15 |     end
16 |     for l=2:length(model.layers)-1
17 |         model.layers{l}=model.layers{l}.ff(model.layers{l},model.layers{l-1});
18 |     end
19 |     if numel(size(y_pred))>2
20 |         y_pred((batch-1)*batchsize+1:batch*batchsize,:,:)=gather(model.layers{end-1}.output);
21 |     else
22 |         y_pred((batch-1)*batchsize+1:batch*batchsize,:)=gather(model.layers{end-1}.output);
23 |     end
24 | end
25 | if m
26 |     if numel(shape_x)==2
27 |         model.layers{1}=x(end-batchsize+1:end,:);
28 |     elseif numel(shape_x)==3
29 |         model.layers{1}=x(end-batchsize+1:end,:,:);
30 |     else
31 |         error('The number of dims of input data must be 2/3');
32 |     end
33 |     for l=2:length(model.layers)-1
34 |         model.layers{l}=model.layers{l}.ff(model.layers{l},model.layers{l-1});
35 |     end
36 |     if numel(size(y_pred))>2
37 |         y_pred(end-batchsize+1:end,:,:)=gather(model.layers{end-1}.output);
38 |     else
39 |         y_pred(end-batchsize+1:end,:,:)=gather(model.layers{end-1}.output);
40 |     end
41 | end
42 | end


--------------------------------------------------------------------------------
/Model/model_save.m:
--------------------------------------------------------------------------------
 1 | function model_save(model,filename)
 2 | % if nargin<3
 3 | %     batchsize=model.input_shape(end);
 4 | % end
 5 | minimodel.input_shape=model.input_shape;
 6 | minimodel.output_shape=model.output_shape;
 7 | minimodel.configs=model.configs;
 8 | for l=2:length(model.layers)-1
 9 |     if  model.layers{l}.trainable
10 |     minimodel.Ws{l}=gather(model.layers{l}.W);
11 |     end
12 | end
13 | save(filename,'minimodel','-v7.3');
14 | end
15 | 
16 | 


--------------------------------------------------------------------------------
/Model/model_train.m:
--------------------------------------------------------------------------------
 1 | function model=model_train(model,x,y,nb_epoch,verbose,filename)
 2 | if nargin<5
 3 |     verbose=0;
 4 | end
 5 | if nargin<6
 6 |     filename=0;
 7 | end
 8 | batchsize=model.batchsize;
 9 | shape_x=size(x);
10 | shape_y=size(y);
11 | g_batch=1;
12 | nb_batch=floor(shape_x(1)/batchsize)*nb_epoch;
13 | if verbose
14 |     h = waitbar(g_batch/nb_batch,'Training model');
15 | end
16 | model.epoch_loss=[];
17 | model.batch_loss=[];
18 | if verbose>=2
19 |     f_epoch=figure('Name',' epochs loss');
20 |     f_batch=figure('Name',' batches loss');
21 | end
22 | for epoch=1:nb_epoch
23 |     batch=1;
24 |     tic;
25 |     epoch_batch_loss=[];
26 |     while batch*batchsize<=shape_x(1)
27 |         %% ff
28 |         if numel(shape_x)==2
29 |             model.layers{1}=x((batch-1)*batchsize+1:batch*batchsize,:);
30 |         elseif numel(shape_x)==3
31 |             model.layers{1}=x((batch-1)*batchsize+1:batch*batchsize,:,:);
32 |         else
33 |             error('The number of dims of input data must be 2/3');
34 |         end
35 |         for l=2:length(model.layers)-1
36 |             model.layers{l}=model.layers{l}.ff(model.layers{l},model.layers{l-1});
37 |         end
38 |         %% eval
39 |         if numel(shape_y)==2
40 |             model.layers{end-1}=model.eval_loss(model.layers{end-1},y((batch-1)*batchsize+1:batch*batchsize,:),model.flag);
41 |         elseif numel(shape_y)==3
42 |             model.layers{end-1}=model.eval_loss(model.layers{end-1},y((batch-1)*batchsize+1:batch*batchsize,:,:),model.flag);
43 |         else
44 |             error('The number of dims of output data must be 2/3');
45 |         end
46 |         epoch_batch_loss=[epoch_batch_loss,model.layers{end-1}.loss(end)];
47 |         cu_epoch_loss=mean(epoch_batch_loss(:));
48 |         model.batch_loss=model.layers{end-1}.loss;
49 |         if verbose>=3
50 |             set(0,'CurrentFigure',f_batch);
51 |             plot(model.batch_loss,'r-');hold off;
52 |         end
53 |         if verbose
54 |             pro=num2str(100*g_batch/nb_batch);
55 |             message=['Training model ','Epoch: ',num2str(epoch),'/',num2str(nb_epoch), ' Progress: ',pro,'%',' loss: ',num2str(cu_epoch_loss)];
56 |             waitbar(g_batch/nb_batch,h,message);
57 |         end
58 |         %% bp
59 |         for l=length(model.layers)-1:-1:2
60 |             if model.layers{l}.trainable
61 |                 model.layers{l}=model.optimize(model.layers{l}.bp(model.layers{l},model.layers{l+1}),model.optimizer,batch,epoch);
62 |             else
63 |                 model.layers{l}=model.layers{l}.bp(model.layers{l},model.layers{l+1});
64 |             end
65 |         end
66 |         batch=batch+1;
67 |         g_batch=g_batch+1;
68 |     end
69 |     toc
70 |     model.epoch_loss=[model.epoch_loss,cu_epoch_loss];
71 |     if verbose>=2
72 |         set(0,'CurrentFigure',f_epoch);
73 |         plot(model.epoch_loss,'r-');
74 |         set(0,'CurrentFigure',f_batch);
75 |         plot(model.batch_loss,'r-');
76 |     end
77 | end
78 | if filename
79 |     model.save(model,filename);
80 | end
81 | delete(h);
82 | end


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # matDL  
  2 | ![matDL icon](./matDL_128X128.ico)  
  3 | v0.42 BETA  
  4 | A lightweight MATLAB deeplearning toolbox,based on gpuArray.  
  5 | One of the fastest matlab's RNN libs.
  6 | ## Performance
  7 | model:A LSTM model has [1024,1024,1024] hidensizes and 10 
  8 | timestep with a 256 dims input.  
  9 | Device: i7-4710hq,GTX940m  
 10 | matDL: 60sec/epoch Keras(1.2.2,Tensorflow backend,cudnn5.1): 29sec/epoch 
 11 | ## Features
 12 | High parallel Implementation.
 13 | 
 14 | 
 15 | * Concatance the weights of 4 gates to **W** and the values of **x** and **h** of every timesteps in a batch to a 3D tensor **xh**.Compute **x*W** for every timesteps of every samples in a batch at one time.
 16 | * Compute the activated values of **input,forget ,ouput gates** at one time.
 17 | 
 18 | OOP style
 19 | * Use `struct` type to define a **layer** class and a **model** class.Define **ff**, **bp**, **optimize** methods by using a `FunctionHandle`.  
 20 | 
 21 | ## APIs
 22 | ### Model
 23 | * A `model` is a set of `layers`,`data` and `optimizer`.
 24 | * build
 25 |     * `model=model_init(input_shape,configs ,flag,optimizer)`
 26 |     * arguments:  
 27 |         * `input_shape` : a `vector`,`[input_dim,batchsize]` or `[input_dim,timestep,batchsize]`
 28 |         * `configs` : `cell` ,configures of each layers  
 29 |         * `flag` : `bool` ,0 is predict model,1 is trrain model
 30 |         * `optimizer` : `struct` ,keywords: `opt`(type of optimizer) ,`learningrate` 
 31 | * attributes :
 32 |     * `model.input_shape`  
 33 |     * `model.output_shape`
 34 |     * `model.batchsize`
 35 |     * `model.configs`
 36 |     * `model.flag`
 37 |     * `model.layers`
 38 |     * `model.optimizer` (if `flag`)
 39 |     * `model.loss`
 40 | * methods:
 41 |     * private:
 42 |         * `model.eval_loss=@(outputlayer,y_true,flag)eval_loss(outputlayer,y_true,flag)`
 43 |         * `model.optimize=@(layer,optimizer,batch,epoch)layer_optimize(layer,optimizer,batch,epoch)`
 44 |     * public:
 45 |         * `model.train=@(model,x,y,nb_epoch,verbose,filename)model_train(model,x,y,nb_epoch,verbose,filename)`
 46 |             * `model=model.train(model,x,y,nb_epoch,verbose,filename)`  
 47 |                 * arguments:
 48 |                     * `model` : self  
 49 |                     * `x`:input,shape:[dim,timestep,nb_samples],or [dim,nb_samples]  
 50 |                     * `y`:targets  
 51 |                     * `nb_epoch`: how many epochs you want to train
 52 |                     * `verbose` :0,1,2,3,0 means no waitbar an figure,1 means showing waitbar only,2 means showing waitbar and plotting figures every epoch,3 means  showing waitbar and plotting figures every epoch an batch.   
 53 |         * `model.predict=@(model,x)model_predict(model,x)`
 54 |             * `y=model.predict(model,x)`  
 55 |         * `model.evaluate=@(model,x,y_true)model_evaluate(model,x,y_true)`
 56 |             * `mean_loss=model.evaluate(model,x,y_true)`
 57 |         * `model.save=@(filename)model_save(model,filename)`  
 58 |             *  `model.save(filename)`    
 59 |             * Save layers weigths and configs to a`.mat` file.
 60 | * reload:  
 61 |     * `model=model_load(minimodel,batch_size,flag,optimizer)`   
 62 |         * `minimodel` is the minimodel saved by `model.save()`,can be a `struct` variable or a `string` of filename.  
 63 | * **example**: 
 64 | x=rand(100,10,3200,'single','gpuArray');   
 65 | y=(zeros(512,10,3200'single','gpuArray'));  
 66 | y(1,:,:)=1;  
 67 | %% Define a model which has 2 lstm layers with 512 hiddenunits,and a timedistrbuted dense layer with 512 hiddenunits  
 68 | input_shape=[100,10,64];%input dim is 100,timestep is 10,batchsize is 64  
 69 | hiddensizes=[512,512,512];  
 70 | for l=1:length(hiddensize)  
 71 |     configs{l}.type='lstm';  
 72 |     configs{l}.hiddensize=hiddensize(l);  
 73 |     configs{l}.return_sequence=1;  
 74 | end  
 75 | configs{l+1}.type='activation';  
 76 | configs{l+1}.act_fun='softmax';  
 77 | configs{l+1}.loss='categorical_cross_entropy';  
 78 | optimizer.learningrate=0.1;  
 79 | optimizer.momentum=0.2;  
 80 | optimizer.opt='sgd';
 81 | model=model_init(input_shape,configs,1,optimizer);  
 82 | %% Train the model  
 83 | model=model.train(model,x,y,nb_epoch,3,'example/minimodel_f.mat');  
 84 | or  
 85 | `test_lstm(50,[512,512,512],256,10,64,5);`
 86 |     
 87 | 
 88 | ### Layers
 89 | #### Layer class: 
 90 | * attributes:  
 91 |     * `type` : `string`,type of the layer,available types:`input`,`dense`,`lstm`,`activation`  
 92 |     * `prelayer_type` : `string`,type of the previous layer,available types:`input`,`dense`,`lstm`,`activation`
 93 |     * `trainable` : `bool`,is the layer trainable
 94 |     * `flag` : train model or predict model  
 95 |     * `configs` :configures of the layer  
 96 |     * `input_shape` : `vector`,`[input_dim,batchsize]` or `[input_dim,timestep,batchsize]`
 97 |     * `output_shape` : `vector`,`[hiddensize,batchsize]`or`[hiddensize,timestep,batchsize]`
 98 |     * `batch` : `int`,how many batches have been passed
 99 |     * `epoch` : same to `batch`
100 | * methods:  
101 |     * `layer=**layer_init(prelayer,loss,kwgrs)`
102 |         * Built and init a layer.If the layer is a `input` layer,`prelayer` argument should be `input_shape`
103 |     * `layer=layer.ff(layer,prelayer)`
104 |     * `layer=layer.bp(layer,nextlayer)`  
105 |     ##### LSTM layer(layer)  
106 |         * `layer=lstm_init_gpu(prelayer,hiddensize,return_sequence,flag,loss)`
107 |         * A LSTM(**Long-Short Term Memory unit - Hochreiter 1997**) layer,see [there]:http://deeplearning.net/tutorial/lstm.html for a step-by-step description of the algorithm.
108 |             * aviliable configures:
109 |                 * `config.hiddensize` : `int`(`double`),number of hidden units(output dim)
110 |                 * `config.return_sequence` :`bool`(`double`),return sequences or not.if `return_sequences`,output will be a 3D tensor with shape (hiddensize,timestep,batchsize). Else ,a 2D tensor with shape (hiddensize,batchsize). 
111 |                 * `config.loss` : `string`,type of loss function.Optional,only be used if the layer is an ouput layer.
112 |                 * **example**
113 |                 
114 | 
115 | 	 
116 | 


--------------------------------------------------------------------------------
/cudnn_LSTM_FF.m:
--------------------------------------------------------------------------------
1 | [e,n]=loadlibrary('C:\projects\mexcuda\matcudnn\mat_cudnn_test','mat_cudnn_test.h','includepath','C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include','addheader','cudnn.h','addheader','cuda_runtime.h');
2 | ax=ones([128,256,20],'single','gpuArray');
3 | reserve=libpointer('voidPtr');
4 | tic;
5 | for i=1:100
6 | calllib('mat_cudnn_test','MAT_CUDNN_RNN_LSTM_FF',ax,reserve);
7 | end
8 | toc;
9 | 


--------------------------------------------------------------------------------
/cumexhelp.h:
--------------------------------------------------------------------------------
 1 | #ifndef CUMEXHELP
 2 |     #define CUMEXHELP
 3 |     #include <cudnn.h>
 4 |     #include <cuda_runtime.h>
 5 |     #include <cuda.h>
 6 |     #include <stdio.h>
 7 |     #include <device_launch_parameters.h>
 8 |     #include <mex.h>
 9 |     #include "gpu/mxGPUArray.h"
10 | #endif


--------------------------------------------------------------------------------
/example/char_rnn/Christ2FSM.m:
--------------------------------------------------------------------------------
 1 | function fsmgospel=Christ2FSM(bibel)
 2 | dict{1}={'god','God','LORD','Lord','lord','holy','Holy',' Amen','heaven','Heaven','hell','Hell','angle','Angle'...
 3 |     ,'demon','Demon','christ','Christ','water','Water'};
 4 | dict{2}={'monster','Monster','FSM','FSM','FSM','yummy','Yummy',' RAmen','plate','Plate','sewer','Sewer','ball','Ball'...
 5 |     ,'fork','Fork','pasta','Pasta','soip','Soup'};
 6 | if ~length(dict{1})==length(dict{2})
 7 |     error('Keywords dict length unmatched');
 8 | end
 9 | fsmgospel=bibel;
10 | for i=1:length(dict{1})
11 |     old=dict{1}{i};
12 |     new=dict{2}{i};
13 |     fsmgospel=strrep(fsmgospel,old,new);
14 | end


--------------------------------------------------------------------------------
/example/char_rnn/char_rnn.m:
--------------------------------------------------------------------------------
 1 | function char_rnn(data_filename,hiddensize,timestep,batch_size,nb_epoch)
 2 | load(data_filename,'x');
 3 | load(data_filename,'y');
 4 | x=reshape(x(:,1:(timestep*batch_size)*floor(length(x)/(timestep*batch_size))),size(x,1),timestep,[]);
 5 | y=reshape(y(:,1:(timestep*batch_size)*floor(length(y)/(timestep*batch_size))),size(y,1),timestep,[]);
 6 | y=squeeze(y(:,end,:));
 7 | input_shape=[size(x,1),timestep,batch_size];
 8 | for l=1:length(hiddensize)-2
 9 |     configs{l}.type='lstm';configs{l}.hiddensize=hiddensize(l);configs{l}.return_sequence=1;
10 | end
11 | l=l+1;
12 | configs{l}.type='dropout';configs{l}.drop_rate=0.5;
13 | l=l+1;
14 | configs{l}.type='lstm';configs{l}.hiddensize=hiddensize(l-1);configs{l}.return_sequence=0;
15 | configs{l+1}.type='dense';configs{l+1}.hiddensize=hiddensize(l);
16 | configs{l+2}.type='activation';configs{l+2}.act_fun='softmax';configs{l+2}.loss='categorical_cross_entropy';
17 | optimizer.learningrate=0.001;
18 | optimizer.momentum=0;
19 | optimizer.opt='sgd';
20 | model=model_init(input_shape,configs,1,optimizer);
21 | profile on;
22 | model=model.train(model,x,y,nb_epoch,3,'example/minimodel_f.mat');
23 | profile report;
24 | end
25 | 


--------------------------------------------------------------------------------
/example/char_rnn/textgenerate.m:
--------------------------------------------------------------------------------
1 | function text=textgenerate(model,dic,term)
2 | seed=double('In the beginning Monster created the plate and the earth.Ge1:2 And the earth was without form, and void; and darkness was upon the face of the deep.');
3 | seed=seed(1:50)
4 | end
5 | function index=sample(pred,temp)
6 | pred=exp(log(double(pred))./temp);
7 | [~,index]=max(pred,[],1);
8 | end


--------------------------------------------------------------------------------
/example/char_rnn/txt2seq.m:
--------------------------------------------------------------------------------
 1 | function [x,y,dic]=txt2seq(text,threshold)
 2 | if nargin<2
 3 |     threshold=50000;
 4 | end
 5 | if exist(text,'file')
 6 |     text=cell2mat(importdata(text)');
 7 | end
 8 | [dic,~,index]=unique(double(text));
 9 | for i=1:length(dic)
10 |     if numel(find(index==i))<=length(text)/threshold
11 |         text(index==i)=',';
12 |     end
13 | end
14 | [dic,~,index]=unique(double(text));
15 | seq=zeros(length(dic),length(index),'int8');
16 | for i=1:length(index)
17 |     seq(index(i),i)=1;
18 | end
19 | x=seq(:,1:end-1);
20 | y=seq(:,2:end);
21 | end
22 |     
23 |     


--------------------------------------------------------------------------------
/example/keras_test_lstm.py:
--------------------------------------------------------------------------------
 1 | from keras.models import Sequential
 2 | from keras.layers import LSTM
 3 | import numpy as np
 4 | import time
 5 | def main(nb_batch=100,hiddensize=512,input_dim=100,timestep=10,batch_size=32,nb_epoch=1):
 6 |     x=np.ones((nb_batch*batch_size,timestep,input_dim)).astype('float32')
 7 |     y=np.ones((nb_batch*batch_size,timestep,hiddensize))
 8 |     model = Sequential()
 9 |     model.add(LSTM(implementation=2,output_dim=hiddensize, input_shape=(timestep,input_dim),return_sequences=True))
10 | #==============================================================================
11 | #     model.add(LSTM(output_dim=hiddensize,return_sequences=True))
12 | #     model.add(LSTM(output_dim=hiddensize,return_sequences=True))
13 | #==============================================================================
14 |     model.compile(loss='mse',optimizer='sgd')
15 |     start=time.time()
16 |     model.predict(x=x,batch_size=batch_size)
17 |     duration=time.time()-start
18 |     print('Duration: ',duration,' sec')
19 | if __name__ == "__main__":
20 |     main(nb_batch=100,hiddensize=512,input_dim=256,timestep=20,batch_size=128,nb_epoch=1)
21 | 


--------------------------------------------------------------------------------
/example/test_lstm.asv:
--------------------------------------------------------------------------------
 1 | function test_lstm(nb_batch,hiddensizes,input_dim,timestep,batch_size,nb_epoch)
 2 | optimizer.learningrate=0.01;
 3 | optimizer.momentum=0;
 4 | optimizer.opt='sgd';
 5 | x=rand(input_dim,timestep,batch_size*nb_batch,'single','gpuArray');
 6 | y=(zeros(hiddensizes(end),timestep,batch_size*nb_batch,'single','gpuArray'));
 7 | y(1,:,:)=1;
 8 | input_shape=[input_dim,timestep,batch_size];
 9 | for l=1:length(hiddensizes)
10 | configs{l}.type='lstm';configs{l}.hiddensize=hiddensizes(l);configs{l}.return_sequence=1;
11 | 
12 | end
13 | configs{l+1}.type='activation';configs{l+1}.act_fun='softmax';configs{l+1}.loss='categorical_cross_entropy';
14 | model=model_init(input_shape,configs,1,optimizer);
15 | profile on;
16 | model=model.train(x,y,nb_epoch,2,0);
17 | profile report;
18 | end


--------------------------------------------------------------------------------
/example/test_lstm.m:
--------------------------------------------------------------------------------
 1 | function test_lstm(nb_batch,hiddensizes,input_dim,timestep,batch_size,nb_epoch)
 2 | optimizer.learningrate=0.01;
 3 | optimizer.momentum=0.2;
 4 | optimizer.opt='sgd';
 5 | x=sin(ones(batch_size*nb_batch,input_dim,timestep,'single','gpuArray')+5);
 6 | y=(zeros(batch_size*nb_batch,hiddensizes(end),timestep,'single','gpuArray'));
 7 | y(:,1,:)=1;
 8 | input_shape=[batch_size,input_dim,timestep];
 9 | for l=1:length(hiddensizes)
10 | configs{l}.type='lstm';configs{l}.hiddensize=hiddensizes(l);configs{l}.return_sequence=1;
11 | end
12 | configs{l+1}.type='dropout';configs{l+1}.drop_rate=0.5;
13 | configs{l+2}.type='activation';configs{l+2}.act_fun='softmax';configs{l+2}.loss='categorical_cross_entropy';
14 | model=model_init(input_shape,configs,1,optimizer);
15 | profile on;
16 | model=model.train(model,x,y,nb_epoch,3,0);
17 | %loss=model.evaluate(model,x,y);
18 | %disp(loss);
19 | profile report;
20 | end


--------------------------------------------------------------------------------
/example/test_mlp.m:
--------------------------------------------------------------------------------
 1 | function test_mlp(nb_batch,hiddensize,input_dim,batch_size,nb_epoch)
 2 | input_shape=[batch_size,input_dim];
 3 | l=1;
 4 | for i=1:length(hiddensize)
 5 | configs{l}.type='dense';configs{l}.hiddensize=hiddensize(i);
 6 | l=l+1;
 7 | configs{l}.type='activation';configs{l}.act_fun='Relu';
 8 | l=l+1;
 9 | configs{l}.type='dropout';configs{l}.drop_rate=0.5;
10 | end
11 | configs{l+1}.type='activation';configs{l+1}.act_fun='softmax';configs{l+1}.loss='categorical_cross_entropy';
12 | optimizer.learningrate=0.01;
13 | optimizer.momentum=0.5;
14 | optimizer.opt='sgd';
15 | model=model_init(input_shape,configs,1,optimizer);
16 | x=rand(batch_size*nb_batch,input_dim);
17 | y=(zeros(batch_size*nb_batch,hiddensize(end)));
18 | y(:,1,:)=1;
19 | model=model.train(model,x,y,nb_epoch,3,0);%not save
20 | loss=model.evaluate(model,x,y);
21 | disp(loss);
22 | y_pred=model.predict(model,x);
23 | end


--------------------------------------------------------------------------------
/matDL_128X128.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuantumLiu/matDL/ce75b21dd97e9d58c074d50915d5fc000ee46afb/matDL_128X128.ico


--------------------------------------------------------------------------------
/mat_cudnn.cu:
--------------------------------------------------------------------------------
  1 | #include"mat_cudnn.h"
  2 | void GET_GPU_CONST_PTR(mxArray const *arrayPtr,float const *dataPtr)
  3 | {
  4 |     dataPtr=(float const *)(mxGPUGetData(mxGPUCreateFromMxArray(arrayPtr)));
  5 | }
  6 | void GET_GPU_PTR(mxArray const *arrayPtr,float *dataPtr)
  7 | {
  8 |     dataPtr=(float *)(mxGPUGetData(mxGPUCreateFromMxArray(arrayPtr)));
  9 | }
 10 | void MAT_CUDNN_LSTM_FF(mxArray const *x_array,mxArray const *w_array,void **reserveSpace,int* minibatch,int* hiddenSize,int* inputSize,int* seqLength )
 11 | {   // -------------------------   
 12 |    // Create cudnn context
 13 |    // -------------------------  
 14 |    mxInitGPU();
 15 |    cudnnHandle_t cudnnHandle;   
 16 |    cudnnErrCheck(cudnnCreate(&cudnnHandle));
 17 | 
 18 |    cudnnTensorDescriptor_t *xDesc, *yDesc, *dxDesc, *dyDesc;
 19 |    cudnnTensorDescriptor_t hxDesc, cxDesc;
 20 |    cudnnTensorDescriptor_t hyDesc, cyDesc;
 21 |    cudnnTensorDescriptor_t dhxDesc, dcxDesc;
 22 |    cudnnTensorDescriptor_t dhyDesc, dcyDesc;
 23 |    
 24 |    xDesc = (cudnnTensorDescriptor_t*)malloc(*seqLength * sizeof(cudnnTensorDescriptor_t));
 25 |    yDesc = (cudnnTensorDescriptor_t*)malloc(*seqLength * sizeof(cudnnTensorDescriptor_t));
 26 |    dxDesc = (cudnnTensorDescriptor_t*)malloc(*seqLength * sizeof(cudnnTensorDescriptor_t));
 27 |    dyDesc = (cudnnTensorDescriptor_t*)malloc(*seqLength * sizeof(cudnnTensorDescriptor_t));
 28 |    
 29 |    int dimA[3];
 30 |    int strideA[3];
 31 |    // In this example dimA[1] is constant across the whole sequence
 32 |    // This isn't required, all that is required is that it does not increase.
 33 |    for (int i = 0; i < *seqLength; i++) {
 34 |       cudnnErrCheck(cudnnCreateTensorDescriptor(&xDesc[i]));
 35 |       cudnnErrCheck(cudnnCreateTensorDescriptor(&yDesc[i]));
 36 |       cudnnErrCheck(cudnnCreateTensorDescriptor(&dxDesc[i]));
 37 |       cudnnErrCheck(cudnnCreateTensorDescriptor(&dyDesc[i]));
 38 |    
 39 |       dimA[0] = *miniBatch;
 40 |       dimA[1] = *inputSize;
 41 |       dimA[2] = 1;
 42 |      
 43 |       strideA[0] = dimA[2] * dimA[1];
 44 |       strideA[1] = dimA[2];
 45 |       strideA[2] = 1;
 46 | 
 47 |       cudnnErrCheck(cudnnSetTensorNdDescriptor(xDesc[i], CUDNN_DATA_FLOAT, 3, dimA, strideA));
 48 |       cudnnErrCheck(cudnnSetTensorNdDescriptor(dxDesc[i], CUDNN_DATA_FLOAT, 3, dimA, strideA));
 49 |       
 50 |       dimA[0] = *miniBatch;
 51 |       dimA[1] = *hiddenSize;
 52 |       dimA[2] = 1;
 53 | 
 54 |       strideA[0] = dimA[2] * dimA[1];
 55 |       strideA[1] = dimA[2];
 56 |       strideA[2] = 1;
 57 |       
 58 |       cudnnErrCheck(cudnnSetTensorNdDescriptor(yDesc[i], CUDNN_DATA_FLOAT, 3, dimA, strideA));
 59 |       cudnnErrCheck(cudnnSetTensorNdDescriptor(dyDesc[i], CUDNN_DATA_FLOAT, 3, dimA, strideA));
 60 |    }
 61 |    dimA[0] = 1;
 62 |    dimA[1] = *miniBatch;
 63 |    dimA[2] = *hiddenSize;
 64 |    
 65 |    strideA[0] = dimA[2] * dimA[1];
 66 |    strideA[1] = dimA[2];
 67 |    strideA[2] = 1;
 68 |    
 69 |    cudnnErrCheck(cudnnCreateTensorDescriptor(&hxDesc));
 70 |    cudnnErrCheck(cudnnCreateTensorDescriptor(&cxDesc));
 71 |    cudnnErrCheck(cudnnCreateTensorDescriptor(&hyDesc));
 72 |    cudnnErrCheck(cudnnCreateTensorDescriptor(&cyDesc));
 73 |    cudnnErrCheck(cudnnCreateTensorDescriptor(&dhxDesc));
 74 |    cudnnErrCheck(cudnnCreateTensorDescriptor(&dcxDesc));
 75 |    cudnnErrCheck(cudnnCreateTensorDescriptor(&dhyDesc));
 76 |    cudnnErrCheck(cudnnCreateTensorDescriptor(&dcyDesc));
 77 |    
 78 |    cudnnErrCheck(cudnnSetTensorNdDescriptor(hxDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
 79 |    cudnnErrCheck(cudnnSetTensorNdDescriptor(cxDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
 80 |    cudnnErrCheck(cudnnSetTensorNdDescriptor(hyDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
 81 |    cudnnErrCheck(cudnnSetTensorNdDescriptor(cyDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
 82 |    cudnnErrCheck(cudnnSetTensorNdDescriptor(dhxDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
 83 |    cudnnErrCheck(cudnnSetTensorNdDescriptor(dcxDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
 84 |    cudnnErrCheck(cudnnSetTensorNdDescriptor(dhyDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
 85 |    cudnnErrCheck(cudnnSetTensorNdDescriptor(dcyDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
 86 |    // -------------------------
 87 |    // Set up the dropout descriptor (needed for the RNN descriptor)
 88 |    // -------------------------
 89 |    unsigned long long seed = 1337ull; // Pick a seed.
 90 |    
 91 |    cudnnDropoutDescriptor_t dropoutDesc;
 92 |    cudnnErrCheck(cudnnCreateDropoutDescriptor(&dropoutDesc));
 93 |    
 94 |    // How much memory does dropout need for states?
 95 |    // These states are used to generate random numbers internally
 96 |    // and should not be freed until the RNN descriptor is no longer used
 97 |    size_t stateSize;
 98 |    void *states;
 99 |    cudnnErrCheck(cudnnDropoutGetStatesSize(cudnnHandle, &stateSize));
100 |    
101 |    cudaErrCheck(cudaMalloc(&states, stateSize));
102 |    float dropout=0;
103 |    cudnnErrCheck(cudnnSetDropoutDescriptor(dropoutDesc, 
104 |                              cudnnHandle,
105 |                              dropout, 
106 |                              states, 
107 |                              stateSize, 
108 |                              seed));
109 |                              
110 |    // -------------------------   
111 |    // Set up the RNN descriptor
112 |    // -------------------------
113 |    cudnnRNNDescriptor_t rnnDesc;
114 |    cudnnRNNMode_t RNNMode;
115 |    
116 |    cudnnErrCheck(cudnnCreateRNNDescriptor(&rnnDesc));
117 |    
118 |    RNNMode = CUDNN_LSTM;
119 |       
120 |    cudnnErrCheck(cudnnSetRNNDescriptor(rnnDesc,
121 |                                        hiddenSize, 
122 |                                        numLayers, 
123 |                                        dropoutDesc,
124 |                                        CUDNN_LINEAR_INPUT, // We can also skip the input matrix transformation
125 |                                        CUDNN_UNIDIRECTIONAL, 
126 |                                        RNNMode, 
127 |                                        CUDNN_DATA_FLOAT));
128 |    // -------------------------
129 |    // Set up parameters
130 |    // -------------------------
131 |    // This needs to be done after the rnn descriptor is set as otherwise
132 |    // we don't know how many parameters we have to allocate
133 |    void *w;   
134 |    void *dw;   
135 | 
136 |    cudnnFilterDescriptor_t wDesc, dwDesc;
137 |    
138 |    cudnnErrCheck(cudnnCreateFilterDescriptor(&wDesc));
139 |    cudnnErrCheck(cudnnCreateFilterDescriptor(&dwDesc));
140 |    
141 |    size_t weightsSize;
142 |    cudnnErrCheck(cudnnGetRNNParamsSize(cudnnHandle, rnnDesc, xDesc[0], &weightsSize, CUDNN_DATA_FLOAT));
143 |    
144 |    int dimW[3];   
145 |    dimW[0] =  weightsSize / sizeof(float);
146 |    dimW[1] = 1;
147 |    dimW[2] = 1;
148 |       
149 |    cudnnErrCheck(cudnnSetFilterNdDescriptor(wDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 3, dimW));   
150 |    cudnnErrCheck(cudnnSetFilterNdDescriptor(dwDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 3, dimW));   
151 |    
152 |    cudaErrCheck(cudaMalloc((void**)&w,  weightsSize));
153 |    cudaErrCheck(cudaMalloc((void**)&dw, weightsSize));
154 |    
155 |    
156 |    // -------------------------
157 |    // Set up work space and reserved memory
158 |    // -------------------------   
159 |    void *workspace;
160 |    
161 |    size_t workSize;
162 |    size_t reserveSize;
163 | 
164 |    // Need for every pass
165 |    cudnnErrCheck(cudnnGetRNNWorkspaceSize(cudnnHandle, rnnDesc, seqLength, xDesc, &workSize));
166 |    // Only needed in training, shouldn't be touched between passes.
167 |    cudnnErrCheck(cudnnGetRNNTrainingReserveSize(cudnnHandle, rnnDesc, seqLength, xDesc, &reserveSize));
168 |     
169 |    cudaErrCheck(cudaMalloc((void**)&workspace, workSize));
170 |    cudaErrCheck(cudaMalloc((void**)&reserveSpace, reserveSize));
171 |    // Weights
172 |    int numLinearLayers = 0;
173 |    for (int layer = 0; layer < numLayers * (bidirectional ? 2 : 1); layer++) {
174 |       for (int linLayerID = 0; linLayerID < numLinearLayers; linLayerID++) {
175 |          cudnnFilterDescriptor_t linLayerMatDesc;
176 |          cudnnErrCheck(cudnnCreateFilterDescriptor(&linLayerMatDesc));
177 |          float *linLayerMat;
178 |          
179 |          cudnnErrCheck(cudnnGetRNNLinLayerMatrixParams( cudnnHandle,
180 |                                                         rnnDesc,  
181 |                                                         layer,
182 |                                                         xDesc[0], 
183 |                                                         wDesc, 
184 |                                                         w,
185 |                                                         linLayerID,  
186 |                                                         linLayerMatDesc, 
187 |                                                         (void**)&linLayerMat));
188 |          
189 |          cudnnDataType_t dataType;
190 |          cudnnTensorFormat_t format;
191 |          int nbDims;
192 |          int filterDimA[3];
193 |          cudnnErrCheck(cudnnGetFilterNdDescriptor(linLayerMatDesc,
194 |                                                   3,
195 |                                                   &dataType,
196 |                                                   &format,
197 |                                                   &nbDims,
198 |                                                   filterDimA));
199 |                                                   
200 |          initGPUData(linLayerMat, filterDimA[0] * filterDimA[1] * filterDimA[2], 1.f / (float)(filterDimA[0] * filterDimA[1] * filterDimA[2]));                                                 
201 | 
202 |          cudnnErrCheck(cudnnDestroyFilterDescriptor(linLayerMatDesc));         
203 |          
204 |          cudnnFilterDescriptor_t linLayerBiasDesc;
205 |          cudnnErrCheck(cudnnCreateFilterDescriptor(&linLayerBiasDesc));
206 |          float *linLayerBias;
207 |          
208 |          cudnnErrCheck(cudnnGetRNNLinLayerBiasParams( cudnnHandle,
209 |                                                         rnnDesc,  
210 |                                                         layer,
211 |                                                         xDesc[0], 
212 |                                                         wDesc, 
213 |                                                         w,
214 |                                                         linLayerID,  
215 |                                                         linLayerBiasDesc, 
216 |                                                         (void**)&linLayerBias));
217 |          
218 |          cudnnErrCheck(cudnnGetFilterNdDescriptor(linLayerBiasDesc,
219 |                                                   3,
220 |                                                   &dataType,
221 |                                                   &format,
222 |                                                   &nbDims,
223 |                                                   filterDimA));
224 |                                                   
225 |          initGPUData(linLayerBias, filterDimA[0] * filterDimA[1] * filterDimA[2], 1.f);
226 |                                                   
227 |          cudnnErrCheck(cudnnDestroyFilterDescriptor(linLayerBiasDesc));
228 |       }
229 |    }
230 |    // *********************************************************************************************************
231 |    // At this point all of the setup is done. We now need to pass through the RNN.
232 |    // *********************************************************************************************************
233 |    
234 |   
235 |    
236 |    cudaErrCheck(cudaDeviceSynchronize());
237 |    
238 |    cudaEvent_t start, stop;
239 |    float timeForward, timeBackward1, timeBackward2;
240 |    cudaErrCheck(cudaEventCreate(&start));
241 |    cudaErrCheck(cudaEventCreate(&stop));
242 |    
243 |    cudaErrCheck(cudaEventRecord(start));   
244 | 
245 |    // If we're not training we use this instead
246 |    // cudnnErrCheck(cudnnRNNForwardInference(cudnnHandle, 
247 |                                          // rnnDesc, 
248 |                                          // xDesc, 
249 |                                          // x, 
250 |                                          // hxDesc,
251 |                                          // hx, 
252 |                                          // cxDesc, 
253 |                                          // cx, 
254 |                                          // wDesc, 
255 |                                          // w, 
256 |                                          // yDesc,  
257 |                                          // y, 
258 |                                          // hyDesc, 
259 |                                          // hy, 
260 |                                          // cyDesc, 
261 |                                          // cy, 
262 |                                          // workspace, 
263 |                                          // workSize));
264 | 
265 |    cudnnErrCheck(cudnnRNNForwardTraining(cudnnHandle, 
266 |                                          rnnDesc, 
267 |                                          seqLength,                                          
268 |                                          xDesc, 
269 |                                          x, 
270 |                                          hxDesc,
271 |                                          hx, 
272 |                                          cxDesc, 
273 |                                          cx, 
274 |                                          wDesc, 
275 |                                          w, 
276 |                                          yDesc,  
277 |                                          y, 
278 |                                          hyDesc, 
279 |                                          hy, 
280 |                                          cyDesc, 
281 |                                          cy, 
282 |                                          workspace, 
283 |                                          workSize,
284 |                                          reserveSpace, 
285 |                                          reserveSize));
286 |    
287 | 
288 | }


--------------------------------------------------------------------------------
/mat_cudnn.h:
--------------------------------------------------------------------------------
 1 | #define EXPORT_FCNS
 2 | #ifndef SHRHELP
 3 |     #include "shrhelp.h"
 4 | #endif
 5 | 
 6 | #include "cumexhelp.h"
 7 | 
 8 | #ifndef MATCUDNN
 9 |     #define MATCUDNN
10 | #endif
11 | 
12 | #ifdef __cplusplus
13 |     #include <stddef.h>
14 |     extern "C"
15 |     {
16 | #endif
17 | 
18 | 	EXPORTED_FUNCTION void MAT_CUDNN_test(void* x);
19 | 
20 | #ifdef __cplusplus
21 |     }
22 | #endif
23 | 


--------------------------------------------------------------------------------
/mat_cudnn_test.cu:
--------------------------------------------------------------------------------
  1 | #include "cumexhelp.h"
  2 | #define EXPORT_FCNS
  3 | #ifndef SHRHELP
  4 |     #include "shrhelp.h"
  5 | #endif
  6 | #include "mat_cudnn_test.h"
  7 | 
  8 | #define cudaErrCheck(stat) { cudaErrCheck_((stat)); }
  9 | void cudaErrCheck_(cudaError_t stat) {
 10 |     if (stat != cudaSuccess) {
 11 |         mexPrintf("CUDA Error: %s\n", cudaGetErrorString(stat));
 12 |         mexErrMsgTxt("CUDA Error");
 13 |     }
 14 | }
 15 | #define cudnnErrCheck(stat) { cudnnErrCheck_((stat)); }
 16 | void cudnnErrCheck_(cudnnStatus_t stat) {
 17 |     if (stat != CUDNN_STATUS_SUCCESS) {
 18 |         mexPrintf( "cuDNN Error: %s\n", cudnnGetErrorString(stat));
 19 |         mexErrMsgTxt("cuDNN Error");
 20 |     }
 21 | }
 22 | __global__ void initGPUData_ker(float *data, int numElements, float value) {
 23 |    int tid = blockIdx.x * blockDim.x + threadIdx.x;
 24 |    if (tid < numElements) {
 25 |       data[tid] = value;
 26 |    }
 27 | }
 28 | void initGPUData(float *data, int numElements, float value) {
 29 |    dim3 gridDim;
 30 |    dim3 blockDim;
 31 |    
 32 |    blockDim.x = 1024;
 33 |    gridDim.x = (numElements + blockDim.x - 1) / blockDim.x;
 34 |    
 35 |    initGPUData_ker <<< gridDim, blockDim >>> (data, numElements, value);
 36 | }
 37 | // void GET_GPU_CONST_PTR(mxArray *arrayPtr,float const *dataPtr)
 38 | // {
 39 | //     dataPtr=(float const *)(mxGPUGetDataReadOnly (mxGPUCreateFromMxArray(arrayPtr)));
 40 | // }
 41 | // void GET_GPU_PTR(mxArray *arrayPtr,float *dataPtr)
 42 | // {
 43 | //     dataPtr=(float *)(mxGPUGetData(mxGPUCreateFromMxArray(arrayPtr)));
 44 | // }
 45 | 
 46 | EXPORTED_FUNCTION void MAT_CUDNN_test(void* x)
 47 | {
 48 |     int(*seqLength)=10;
 49 |     int (*inputSize)=128;
 50 |     int (*miniBatch)=64;
 51 |     cudnnHandle_t cudnnHandle;
 52 |     cudnnErrCheck(cudnnCreate(&cudnnHandle));
 53 |     cudaErrCheck(cudaMalloc((void**)&x,(*seqLength) * (*inputSize) * (*miniBatch) * sizeof(float)));
 54 |     cudnnDestroy(cudnnHandle);
 55 |     cudaFree(x);
 56 | }
 57 | EXPORTED_FUNCTION void MAT_CUDNN_RNN_LSTM_FF(mxArray const *ax,mxArray const *aw,mxArray *ah,mxArray *ac,int *hiddenSize,int *miniBatch，int *inputSize，int *seqLength,void *reserveSpace)
 58 | {
 59 |    //int(*seqLength)=20;
 60 |    int numLayers=1;
 61 |    //int hiddenSize=256;
 62 |    //int (*inputSize)=128;
 63 |    //int (*miniBatch)=64;
 64 |    float dropout=0.0;
 65 |    bool bidirectional=0;
 66 |    int mode=2;
 67 |    cudnnHandle_t cudnnHandle;   
 68 |    cudnnErrCheck(cudnnCreate(&cudnnHandle));
 69 |    float const *x=(float const *)mxGPUGetDataReadOnly(mxGPUCreateFromMxArray(ax));
 70 |    void *hx = NULL;
 71 |    void *cx = NULL;
 72 |    void *y;
 73 |    void *hy = NULL;
 74 |    void *cy = NULL;
 75 |    cudaErrCheck(cudaMalloc((void**)&hx, numLayers * (*hiddenSize) * (*miniBatch) * (bidirectional ? 2 : 1) * sizeof(float)));
 76 |    cudaErrCheck(cudaMalloc((void**)&cx, numLayers * (*hiddenSize) * (*miniBatch) * (bidirectional ? 2 : 1) * sizeof(float)));
 77 |    cudaErrCheck(cudaMalloc((void**)&y,(*seqLength) * (*hiddenSize) * (*miniBatch) * (bidirectional ? 2 : 1) * sizeof(float)));
 78 |    cudaErrCheck(cudaMalloc((void**)&hy, numLayers * (*hiddenSize) * (*miniBatch) * (bidirectional ? 2 : 1) * sizeof(float)));
 79 |    cudaErrCheck(cudaMalloc((void**)&cy, numLayers * (*hiddenSize) * (*miniBatch) * (bidirectional ? 2 : 1) * sizeof(float)));
 80 |    cudnnTensorDescriptor_t *xDesc, *yDesc;
 81 |    cudnnTensorDescriptor_t hxDesc, cxDesc;
 82 |    cudnnTensorDescriptor_t hyDesc, cyDesc;
 83 |    xDesc = (cudnnTensorDescriptor_t*)malloc((*seqLength) * sizeof(cudnnTensorDescriptor_t));
 84 |    yDesc = (cudnnTensorDescriptor_t*)malloc((*seqLength) * sizeof(cudnnTensorDescriptor_t));
 85 |    int dimA[3];
 86 |    int strideA[3];
 87 |    for (int i = 0; i <(*seqLength); i++) {
 88 |       cudnnErrCheck(cudnnCreateTensorDescriptor(&xDesc[i]));
 89 |       cudnnErrCheck(cudnnCreateTensorDescriptor(&yDesc[i]));
 90 |    
 91 |       dimA[0] = (*miniBatch);
 92 |       dimA[1] = (*inputSize);
 93 |       dimA[2] = 1;
 94 |      
 95 |       strideA[0] = dimA[2] * dimA[1];
 96 |       strideA[1] = dimA[2];
 97 |       strideA[2] = 1;
 98 | 
 99 |       cudnnErrCheck(cudnnSetTensorNdDescriptor(xDesc[i], CUDNN_DATA_FLOAT, 3, dimA, strideA));
100 |       
101 |       dimA[0] = (*miniBatch);
102 |       dimA[1] = bidirectional ? (*hiddenSize) * 2 : (*hiddenSize);
103 |       dimA[2] = 1;
104 | 
105 |       strideA[0] = dimA[2] * dimA[1];
106 |       strideA[1] = dimA[2];
107 |       strideA[2] = 1;
108 |       
109 |       cudnnErrCheck(cudnnSetTensorNdDescriptor(yDesc[i], CUDNN_DATA_FLOAT, 3, dimA, strideA));
110 |    }
111 |    
112 |    
113 |    dimA[0] = numLayers * (bidirectional ? 2 : 1);
114 |    dimA[1] = (*miniBatch);
115 |    dimA[2] = (*hiddenSize);
116 |    
117 |    strideA[0] = dimA[2] * dimA[1];
118 |    strideA[1] = dimA[2];
119 |    strideA[2] = 1;
120 |    
121 |    cudnnErrCheck(cudnnCreateTensorDescriptor(&hxDesc));
122 |    cudnnErrCheck(cudnnCreateTensorDescriptor(&cxDesc));
123 |    cudnnErrCheck(cudnnCreateTensorDescriptor(&hyDesc));
124 |    cudnnErrCheck(cudnnCreateTensorDescriptor(&cyDesc));
125 |    cudnnErrCheck(cudnnSetTensorNdDescriptor(hxDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
126 |    cudnnErrCheck(cudnnSetTensorNdDescriptor(cxDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
127 |    cudnnErrCheck(cudnnSetTensorNdDescriptor(hyDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
128 |    cudnnErrCheck(cudnnSetTensorNdDescriptor(cyDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
129 |    unsigned long long seed = 1337ull; // Pick a seed.
130 |    
131 |    cudnnDropoutDescriptor_t dropoutDesc;
132 |    cudnnErrCheck(cudnnCreateDropoutDescriptor(&dropoutDesc));
133 |    
134 |    // How much memory does dropout need for states?
135 |    // These states are used to generate random numbers internally
136 |    // and should not be freed until the RNN descriptor is no longer used
137 |    size_t stateSize;
138 |    void *states;
139 |    cudnnErrCheck(cudnnDropoutGetStatesSize(cudnnHandle, &stateSize));
140 |    
141 |    cudaErrCheck(cudaMalloc(&states, stateSize));
142 |    
143 |    cudnnErrCheck(cudnnSetDropoutDescriptor(dropoutDesc, 
144 |                              cudnnHandle,
145 |                              dropout, 
146 |                              states, 
147 |                              stateSize, 
148 |                              seed));
149 |                              
150 |    // -------------------------   
151 |    // Set up the RNN descriptor
152 |    // -------------------------
153 |    cudnnRNNDescriptor_t rnnDesc;
154 |    cudnnRNNMode_t RNNMode;
155 |    
156 |    cudnnErrCheck(cudnnCreateRNNDescriptor(&rnnDesc));
157 |    
158 |    if      (mode == 0) RNNMode = CUDNN_RNN_RELU;
159 |    else if (mode == 1) RNNMode = CUDNN_RNN_TANH;
160 |    else if (mode == 2) RNNMode = CUDNN_LSTM;
161 |    else if (mode == 3) RNNMode = CUDNN_GRU;
162 |       
163 |    cudnnErrCheck(cudnnSetRNNDescriptor(rnnDesc,
164 |                                        (*hiddenSize), 
165 |                                        numLayers, 
166 |                                        dropoutDesc,
167 |                                        CUDNN_LINEAR_INPUT, // We can also skip the input matrix transformation
168 |                                        bidirectional ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, 
169 |                                        RNNMode, 
170 |                                        CUDNN_DATA_FLOAT));
171 |    void *w;   
172 |    cudnnFilterDescriptor_t wDesc;
173 |    cudnnErrCheck(cudnnCreateFilterDescriptor(&wDesc));   
174 |    size_t weightsSize;
175 |    cudnnErrCheck(cudnnGetRNNParamsSize(cudnnHandle, rnnDesc, xDesc[0], &weightsSize, CUDNN_DATA_FLOAT));
176 |    
177 |    int dimW[3];   
178 |    dimW[0] =  weightsSize / sizeof(float);
179 |    dimW[1] = 1;
180 |    dimW[2] = 1;
181 |       
182 |    cudnnErrCheck(cudnnSetFilterNdDescriptor(wDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 3, dimW));   
183 |    
184 |    cudaErrCheck(cudaMalloc((void**)&w,  weightsSize));
185 |    void *workspace;
186 |    size_t workSize;
187 |    size_t reserveSize;
188 |    cudnnErrCheck(cudnnGetRNNWorkspaceSize(cudnnHandle, rnnDesc,(*seqLength), xDesc, &workSize));
189 |    // Only needed in training, shouldn't be touched between passes.
190 |    cudnnErrCheck(cudnnGetRNNTrainingReserveSize(cudnnHandle, rnnDesc,(*seqLength), xDesc, &reserveSize));
191 |     
192 |    cudaErrCheck(cudaMalloc((void**)&workspace, workSize));
193 |    cudaErrCheck(cudaMalloc((void**)&reserveSpace, reserveSize));
194 |    //if (hx != NULL) initGPUData((float*)hx, numLayers * hiddenSize * (*miniBatch) * (bidirectional ? 2 : 1), 1.f);
195 |    //if (cx != NULL) initGPUData((float*)cx, numLayers * hiddenSize * (*miniBatch) * (bidirectional ? 2 : 1), 1.f);
196 |    int numLinearLayers = 0;
197 |    if (RNNMode == CUDNN_RNN_RELU || RNNMode == CUDNN_RNN_TANH) {
198 |       numLinearLayers = 2;
199 |    }
200 |    else if (RNNMode == CUDNN_LSTM) {
201 |       numLinearLayers = 8;
202 |    }
203 |    else if (RNNMode == CUDNN_GRU) {
204 |       numLinearLayers = 6;
205 |    }
206 |    for (int layer = 0; layer < numLayers * (bidirectional ? 2 : 1); layer++) {
207 |       for (int linLayerID = 0; linLayerID < numLinearLayers; linLayerID++) {
208 |          cudnnFilterDescriptor_t linLayerMatDesc;
209 |          cudnnErrCheck(cudnnCreateFilterDescriptor(&linLayerMatDesc));
210 |          float *linLayerMat;
211 |          
212 |          cudnnErrCheck(cudnnGetRNNLinLayerMatrixParams( cudnnHandle,
213 |                                                         rnnDesc,  
214 |                                                         layer,
215 |                                                         xDesc[0], 
216 |                                                         wDesc, 
217 |                                                         w,
218 |                                                         linLayerID,  
219 |                                                         linLayerMatDesc, 
220 |                                                         (void**)&linLayerMat));
221 |          
222 |          cudnnDataType_t dataType;
223 |          cudnnTensorFormat_t format;
224 |          int nbDims;
225 |          int filterDimA[3];
226 |          cudnnErrCheck(cudnnGetFilterNdDescriptor(linLayerMatDesc,
227 |                                                   3,
228 |                                                   &dataType,
229 |                                                   &format,
230 |                                                   &nbDims,
231 |                                                   filterDimA));
232 |                                                   
233 |          initGPUData(linLayerMat, filterDimA[0] * filterDimA[1] * filterDimA[2], 1.f / (float)(filterDimA[0] * filterDimA[1] * filterDimA[2]));                                                 
234 | 
235 |          cudnnErrCheck(cudnnDestroyFilterDescriptor(linLayerMatDesc));         
236 |          
237 |          cudnnFilterDescriptor_t linLayerBiasDesc;
238 |          cudnnErrCheck(cudnnCreateFilterDescriptor(&linLayerBiasDesc));
239 |          float *linLayerBias;
240 |          
241 |          cudnnErrCheck(cudnnGetRNNLinLayerBiasParams( cudnnHandle,
242 |                                                         rnnDesc,  
243 |                                                         layer,
244 |                                                         xDesc[0], 
245 |                                                         wDesc, 
246 |                                                         w,
247 |                                                         linLayerID,  
248 |                                                         linLayerBiasDesc, 
249 |                                                         (void**)&linLayerBias));
250 |          
251 |          cudnnErrCheck(cudnnGetFilterNdDescriptor(linLayerBiasDesc,
252 |                                                   3,
253 |                                                   &dataType,
254 |                                                   &format,
255 |                                                   &nbDims,
256 |                                                   filterDimA));
257 |                                                   
258 |          initGPUData(linLayerBias, filterDimA[0] * filterDimA[1] * filterDimA[2], 1.f);
259 |                                                   
260 |          cudnnErrCheck(cudnnDestroyFilterDescriptor(linLayerBiasDesc));
261 |       }
262 |    }
263 |    cudaErrCheck(cudaDeviceSynchronize());
264 |    
265 |    cudnnErrCheck(cudnnRNNForwardTraining(cudnnHandle, 
266 |                                          rnnDesc, 
267 |                                         (*seqLength),                                          
268 |                                          xDesc, 
269 |                                          x, 
270 |                                          hxDesc,
271 |                                          hx, 
272 |                                          cxDesc, 
273 |                                          cx, 
274 |                                          wDesc, 
275 |                                          w, 
276 |                                          yDesc,  
277 |                                          y, 
278 |                                          hyDesc, 
279 |                                          hy, 
280 |                                          cyDesc, 
281 |                                          cy, 
282 |                                          workspace, 
283 |                                          workSize,
284 |                                          reserveSpace, 
285 |                                          reserveSize));
286 | 
287 | 
288 | 
289 |    cudaFree(hx);
290 |    cudaFree(cx);
291 |    cudaFree(y);
292 |    cudaFree(hy);
293 |    cudaFree(cy);
294 |    cudaFree(workspace);
295 |    cudaFree(reserveSpace);
296 |    cudaFree(w);
297 |    cudnnDestroy(cudnnHandle);
298 | 
299 | }
300 | void mexFunction( int nlhs, mxArray *plhs[],
301 |         int nrhs, const mxArray*prhs[] )
302 | {
303 | }
304 | 


--------------------------------------------------------------------------------
/mat_cudnn_test.h:
--------------------------------------------------------------------------------
 1 | #include "shrhelp.h"
 2 | #ifndef CUMEXHELP
 3 | #include <mex.h>
 4 | #endif
 5 | #ifdef __cplusplus
 6 |     #include <stddef.h>
 7 |     extern "C"
 8 |     {
 9 | #endif
10 |     EXPORTED_FUNCTION void MAT_CUDNN_test(void* x);
11 |     EXPORTED_FUNCTION void MAT_CUDNN_RNN_LSTM_FF(mxArray const *ax,void *reserveSpace);
12 | #ifdef __cplusplus
13 |     }
14 | #endif


--------------------------------------------------------------------------------
/mat_cudnn_test.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuantumLiu/matDL/ce75b21dd97e9d58c074d50915d5fc000ee46afb/mat_cudnn_test.mexw64


--------------------------------------------------------------------------------