├── .gitignore
├── Functions
├── act.m
├── activation_bp.m
├── activation_ff.m
├── activation_init.m
├── dact.m
└── loss_handle.m
├── LICENSE
├── Layer
├── activation
│ ├── activation_bp.m
│ ├── activation_ff.m
│ ├── activation_init.m
│ ├── activation_init_cpu.m
│ └── activation_init_gpu.m
├── dense
│ ├── dense_bp.m
│ ├── dense_bp_gpu.m
│ ├── dense_ff.m
│ ├── dense_ff_gpu.m
│ ├── dense_init_cpu.m
│ └── dense_init_gpu.m
├── dropout
│ ├── drop.m
│ ├── dropout_bp.m
│ ├── dropout_bp_gpu.m
│ ├── dropout_ff.m
│ ├── dropout_ff_gpu.m
│ ├── dropout_init_cpu.m
│ └── dropout_init_gpu.m
├── lstm
│ ├── lstm_bp.m
│ ├── lstm_bp_gpu.m
│ ├── lstm_ff.m
│ ├── lstm_ff_gpu.m
│ ├── lstm_init_cpu.m
│ └── lstm_init_gpu.m
├── tensor
│ ├── tensor_init.m
│ ├── tensor_init_cpu.m
│ └── tensor_init_gpu.m
└── tensor_init_gpu.m
├── Model
├── eval_loss.m
├── layer_optimize.m
├── model_evaluate.m
├── model_init.m
├── model_load.m
├── model_predict.m
├── model_save.m
└── model_train.m
├── README.md
├── cudnn_LSTM_FF.m
├── cumexhelp.h
├── example
├── char_rnn
│ ├── Christ2FSM.m
│ ├── char_rnn.m
│ ├── textgenerate.m
│ └── txt2seq.m
├── keras_test_lstm.py
├── test_lstm.asv
├── test_lstm.m
└── test_mlp.m
├── matDL_128X128.ico
├── mat_cudnn.cu
├── mat_cudnn.h
├── mat_cudnn_test.cu
├── mat_cudnn_test.h
└── mat_cudnn_test.mexw64
/.gitignore:
--------------------------------------------------------------------------------
1 | *.mat
2 | *.txt
3 |
--------------------------------------------------------------------------------
/Functions/act.m:
--------------------------------------------------------------------------------
1 | function y=act(x,fun)
2 | switch fun
3 | case 'sigmoid'
4 | y = 1./(1+exp(-x));
5 | return
6 | case 'tanh'
7 | y=tanh(x);
8 | return
9 | case 'softmax'
10 | E=exp(x- max(x,[],2));
11 | y = E./ sum(E,2) ;
12 | return
13 | case 'Relu'
14 | y=x.*(x>0);
15 | return
16 | case 'linear'
17 | y=x;
18 | return
19 | end
20 | end
--------------------------------------------------------------------------------
/Functions/activation_bp.m:
--------------------------------------------------------------------------------
1 | function layer=activation_bp(layer,next_layer)
2 | if isequal(class(next_layer),'struct')
3 | if ~isequal(size(next_layer.dx),layer.output_shape)
4 | error('Shape unmatched!')
5 | end
6 | layer.e=next_layer.dx;
7 | end
8 | layer.dx=layer.e.*layer.dact(layer.output);
9 | end
10 |
--------------------------------------------------------------------------------
/Functions/activation_ff.m:
--------------------------------------------------------------------------------
1 | function layer=activation_ff(layer,prelayer)
2 | if ~isequal(size(prelayer.output),layer.input_shape)
3 | error('Shape unmatched!')
4 | end
5 | layer.output=layer.act(prelayer.output);
6 | end
--------------------------------------------------------------------------------
/Functions/activation_init.m:
--------------------------------------------------------------------------------
1 | function layer= activation_init( prelayer,act_fun,flag,loss )
2 | %% Basic layer attributes
3 | layer.type='activation';
4 | layer.trainable=0;
5 | layer.flag=flag;
6 | layer.prelayer_type=prelayer.type;
7 |
8 | layer.batch=1;
9 | layer.epoch=1;
10 |
11 | layer.input_shape=prelayer.output_shape;
12 | layer.output_shape=prelayer.output_shape;
13 |
14 | % layer.input=prelayer.output;
15 | layer.output=prelayer.output;
16 |
17 | if ~strcmpi(layer.prelayer_type,'input')&&flag
18 | layer.dx=layer.output;
19 | end
20 | layer.e=layer.output;
21 |
22 | if nargin>2
23 | [layer.loss_f,layer.loss_df]=loss_handle(loss);
24 | layer.loss=[];
25 | end
26 | layer.act=@(x)act(x,act_fun);
27 | layer.dact=@(x)dact(x,act_fun);
28 | layer.ff=@(layer,prelayer)activation_ff(layer,prelayer);
29 | layer.bp=@(layer,next_layer)activation_bp(layer,next_layer);
30 | layer.configs.type=layer.type;
31 | layer.configs.input_shape=layer.input_shape;
32 | layer.configs.output_shape=layer.output_shape;
33 | layer.configs.act_fun=act_fun;
34 | end
35 |
36 |
--------------------------------------------------------------------------------
/Functions/dact.m:
--------------------------------------------------------------------------------
1 | function dx=dact(y,fun)
2 | switch fun
3 | case 'sigmoid'
4 | dx = y .* (1 - y);
5 | return
6 | case 'tanh'
7 | dx=1-y.^2;
8 | return
9 | case 'Relu'
10 | dx=(y>single(0));
11 | return
12 | case 'linear'
13 | dx = y;
14 | return
15 | case 'softmax'
16 | dx=y;
17 | return
18 | end
19 | end
--------------------------------------------------------------------------------
/Functions/loss_handle.m:
--------------------------------------------------------------------------------
1 | function [f,df]=loss_handle(type)
2 | syms y_true y_pred num
3 | switch type
4 | case 'mse'
5 | symsf(y_true,y_pred)=(y_true-y_pred).^2;
6 | f=matlabFunction(symsf);
7 | df=matlabFunction(diff(symsf,y_pred));
8 | return
9 | case 'cross_entropy'
10 | symsf(y_true,y_pred)=-1.*sum(y_true.*(y_pred)+(1-y_true).*log(1-y_pred));
11 | f=matlabFunction(symsf);
12 | df=matlabFunction(diff(symsf,y_pred));
13 | return
14 | case 'categorical_cross_entropy'
15 | symsf(y_true,y_pred)=-1.*y_true.*log(y_pred);
16 | f=matlabFunction(symsf);
17 | df=@(y_true,y_pre)y_pre-y_true;
18 | end
19 | end
20 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 | {one line to give the program's name and a brief idea of what it does.}
635 | Copyright (C) {year} {name of author}
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | {project} Copyright (C) {year} {fullname}
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/Layer/activation/activation_bp.m:
--------------------------------------------------------------------------------
1 | function layer=activation_bp(layer,next_layer)
2 | if isequal(class(next_layer),'struct')
3 | if ~isequal(size(next_layer.dx),layer.output_shape)
4 | error('Shape unmatched!')
5 | end
6 | layer.e=next_layer.dx;
7 | end
8 | layer.dx=layer.e.*layer.dact(layer.output);
9 | end
10 |
--------------------------------------------------------------------------------
/Layer/activation/activation_ff.m:
--------------------------------------------------------------------------------
1 | function layer=activation_ff(layer,prelayer)
2 | if ~isequal(size(prelayer.output),layer.input_shape)
3 | error('Shape unmatched!')
4 | end
5 | layer.output=layer.act(prelayer.output);
6 | end
--------------------------------------------------------------------------------
/Layer/activation/activation_init.m:
--------------------------------------------------------------------------------
1 | function layer= activation_init( prelayer,act_fun,flag,loss )
2 | %% Basic layer attributes
3 | layer.type='activation';
4 | layer.trainable=0;
5 | layer.flag=flag;
6 | layer.prelayer_type=prelayer.type;
7 |
8 | layer.batch=1;
9 | layer.epoch=1;
10 |
11 | layer.input_shape=prelayer.output_shape;
12 | layer.output_shape=prelayer.output_shape;
13 |
14 | % layer.input=prelayer.output;
15 | layer.output=prelayer.output;
16 |
17 | if ~strcmpi(layer.prelayer_type,'input')&&flag
18 | layer.dx=layer.output;
19 | end
20 | layer.e=layer.output;
21 |
22 | if nargin>3
23 | [layer.loss_f,layer.loss_df]=loss_handle(loss);
24 | layer.loss=[];
25 | end
26 | layer.act=@(x)act(x,act_fun);
27 | layer.dact=@(x)dact(x,act_fun);
28 | layer.ff=@(layer,prelayer)activation_ff(layer,prelayer);
29 | layer.bp=@(layer,next_layer)activation_bp(layer,next_layer);
30 | layer.configs.type=layer.type;
31 | layer.configs.input_shape=layer.input_shape;
32 | layer.configs.output_shape=layer.output_shape;
33 | layer.configs.act_fun=act_fun;
34 | end
35 |
36 |
--------------------------------------------------------------------------------
/Layer/activation/activation_init_cpu.m:
--------------------------------------------------------------------------------
1 | function layer= activation_init_cpu( prelayer,act_fun,flag,loss )
2 | %% Basic layer attributes
3 | layer.type='activation';
4 | layer.trainable=0;
5 | layer.flag=flag;
6 | layer.prelayer_type=prelayer.type;
7 |
8 | layer.batch=1;
9 | layer.epoch=1;
10 |
11 | layer.input_shape=prelayer.output_shape;
12 | layer.output_shape=prelayer.output_shape;
13 |
14 | % layer.input=prelayer.output;
15 | layer.output=prelayer.output;
16 |
17 | if ~strcmpi(layer.prelayer_type,'input')&&flag
18 | layer.dx=layer.output;
19 | end
20 | layer.e=layer.output;
21 |
22 | if nargin>3
23 | [layer.loss_f,layer.loss_df]=loss_handle(loss);
24 | layer.loss=[];
25 | end
26 | layer.act=@(x)act(x,act_fun);
27 | layer.dact=@(x)dact(x,act_fun);
28 | layer.ff=@(layer,prelayer)activation_ff(layer,prelayer);
29 | layer.bp=@(layer,next_layer)activation_bp(layer,next_layer);
30 | layer.configs.type=layer.type;
31 | layer.configs.input_shape=layer.input_shape;
32 | layer.configs.output_shape=layer.output_shape;
33 | layer.configs.act_fun=act_fun;
34 | end
35 |
36 |
--------------------------------------------------------------------------------
/Layer/activation/activation_init_gpu.m:
--------------------------------------------------------------------------------
1 | function layer= activation_init_gpu( prelayer,act_fun,flag,loss )
2 | %% Basic layer attributes
3 | layer.type='activation';
4 | layer.trainable=0;
5 | layer.flag=flag;
6 | layer.prelayer_type=prelayer.type;
7 |
8 | layer.batch=1;
9 | layer.epoch=1;
10 |
11 | layer.input_shape=prelayer.output_shape;
12 | layer.output_shape=prelayer.output_shape;
13 |
14 | % layer.input=prelayer.output;
15 | layer.output=prelayer.output;
16 |
17 | if ~strcmpi(layer.prelayer_type,'input')&&flag
18 | layer.dx=layer.output;
19 | end
20 | layer.e=layer.output;
21 |
22 | if nargin>3
23 | [layer.loss_f,layer.loss_df]=loss_handle(loss);
24 | layer.loss=[];
25 | end
26 | layer.act=@(x)act(x,act_fun);
27 | layer.dact=@(x)dact(x,act_fun);
28 | layer.ff=@(layer,prelayer)activation_ff(layer,prelayer);
29 | layer.bp=@(layer,next_layer)activation_bp(layer,next_layer);
30 | layer.configs.type=layer.type;
31 | layer.configs.input_shape=layer.input_shape;
32 | layer.configs.output_shape=layer.output_shape;
33 | layer.configs.act_fun=act_fun;
34 | end
35 |
36 |
--------------------------------------------------------------------------------
/Layer/dense/dense_bp.m:
--------------------------------------------------------------------------------
1 | function layer =dense_bp(layer,next_layer)
2 | if isequal(class(next_layer),'struct')
3 | if ~isequal(size(next_layer.dx),layer.output_shape)
4 | error('Shape unmatched!')
5 | end
6 | layer.e=next_layer.dx;
7 | end
8 | if layer.timedistributed
9 | layer.dW=reshape(permute(layer.input,[2,1,3]),layer.weights_dim,[])*reshape(permute(layer.e,[2,1,3]),4*hiddensize,[])';
10 | if ~isequal(layer.prelayer_type,'input')
11 | layer.dx(:)=mult_3d(layer.e,layer.W(1:end-1,:)');
12 | end
13 | else
14 | layer.dW=layer.input'*layer.e;
15 | if ~isequal(layer.prelayer_type,'input')
16 | layer.dx=layer.e*layer.W(1:end-1,:)';
17 | end
18 | end
19 | end
20 | function a=sq(a)
21 | a=reshape(a,size(a,1),[]);
22 | end
23 | function c=mult_3d(a,b)
24 | shape=size(a);
25 | timestep=shape(end);
26 | dim=shape(2);
27 | batchsize=shape(1);
28 | c=permute(reshape((reshape(permute(a,[2,1,3]),dim,[])'*b)',[dim,batchsize,timestep]),[2,1,3]);
29 | end
--------------------------------------------------------------------------------
/Layer/dense/dense_bp_gpu.m:
--------------------------------------------------------------------------------
1 | function layer =dense_bp_gpu(layer,next_layer)
2 | if isequal(class(next_layer),'struct')
3 | if ~isequal(size(next_layer.dx),layer.output_shape)
4 | error('Shape unmatched!')
5 | end
6 | layer.e=next_layer.dx;
7 | end
8 | layer.dW=sq(layer.e)*sq(layer.input)';
9 | if ~isequal(layer.prelayer_type,'input')
10 | if layer.timedistributed
11 | layer.dx(:)=layer.W(:,1:end-1)'*sq(layer.e);
12 | else
13 | layer.dx=layer.W(:,1:end-1)'*layer.e;
14 | end
15 | end
16 | end
17 | function a=sq(a)
18 | a=reshape(a,size(a,1),[]);
19 | end
20 |
--------------------------------------------------------------------------------
/Layer/dense/dense_ff.m:
--------------------------------------------------------------------------------
1 | function layer=dense_ff(layer,prelayer)
2 | if isequal(class(prelayer),'struct')
3 | if ~isequal(size(prelayer.output),layer.input_shape)
4 | error('Shape unmatched!')
5 | end
6 | if layer.timedistributed
7 | layer.input(:,1:end-1,:)=prelayer.output;
8 | else
9 | layer.input(:,1:end-1)=prelayer.output;
10 | end
11 | else
12 | if layer.timedistributed
13 | layer.input(:,1:end-1,:)=prelayer;
14 | else
15 | layer.input(:,1:end-1)=prelayer;
16 | end
17 | end
18 | if layer.timedistributed
19 | layer.output(:)=mult_3d(layer.input,layer.W);
20 | else
21 | layer.output=layer.input*layer.W;
22 | end
23 | end
24 | function c=mult_3d(a,b)
25 | shape=size(a);
26 | timestep=shape(end);
27 | dim=shape(2);
28 | batchsize=shape(1);
29 | c=permute(reshape((reshape(permute(a,[2,1,3]),dim,[])'*b)',[dim,batchsize,timestep]),[2,1,3]);
30 | end
--------------------------------------------------------------------------------
/Layer/dense/dense_ff_gpu.m:
--------------------------------------------------------------------------------
1 | function layer=dense_ff_gpu(layer,prelayer)
2 | if isequal(class(prelayer),'struct')
3 | if ~isequal(size(prelayer.output),layer.input_shape)
4 | error('Shape unmatched!')
5 | end
6 | if layer.timedistributed
7 | layer.input(1:end-1,:,:)=prelayer.output;
8 | else
9 | layer.input(1:end-1,:)=prelayer.output;
10 | end
11 | else
12 | if layer.timedistributed
13 | layer.input(1:end-1,:,:)=prelayer;
14 | else
15 | layer.input(1:end-1,:)=prelayer;
16 | end
17 | end
18 | if layer.timedistributed
19 | layer.output(:)=layer.W*sq(layer.input);
20 | else
21 | layer.output=layer.W*layer.input;
22 | end
23 | end
24 | function a=sq(a)
25 | a=reshape(a,size(a,1),[]);
26 | end
27 |
--------------------------------------------------------------------------------
/Layer/dense/dense_init_cpu.m:
--------------------------------------------------------------------------------
1 | function layer=dense_init_cpu(prelayer,hiddensize ,flag,loss)
2 | %% Basic layer attributes
3 | %Input tensor sahpe
4 | layer.trainable=1;
5 | layer.flag=flag;
6 | layer.input_shape=prelayer.output_shape;
7 | if numel(prelayer.output_shape)>2
8 | layer.timedistributed=1;
9 | layer.output_shape=[layer.input_shape(1),hiddensize,layer.input_shape(end)];
10 | else
11 | layer.timedistributed=0;
12 | layer.output_shape=[layer.input_shape(1),hiddensize];
13 | end
14 | dim=prelayer.output_shape(2);
15 | batchsize=prelayer.output_shape(1);
16 | layer.type='dense';
17 | layer.prelayer_type=prelayer.type;
18 | layer.hiddensize=hiddensize;
19 | layer.batchsize=batchsize;
20 | layer.batch=1;
21 | layer.epoch=1;
22 | %% Dense layer attributes
23 | %W contains weights bias
24 | layer.weights_dim=dim+1;
25 | layer.W=(rand([layer.weights_dim,hiddensize],'single')-0.5)./100;
26 | if layer.timedistributed
27 | layer.input=ones([layer.input_shape(1),layer.input_shape(2)+1,layer.input_shape(3)],'single');
28 | else
29 | layer.input=ones([layer.input_shape(1),layer.input_shape(2)+1],'single');
30 | end
31 | layer.output=zeros(layer.output_shape,'single');
32 | if ~strcmpi(layer.prelayer_type,'input')&&flag
33 | layer.dx=zeros(layer.input_shape,'single');
34 | end
35 | layer.e=layer.output;
36 | if nargin>3&&flag
37 | [layer.loss_f,layer.loss_df]=loss_handle(loss);
38 | layer.loss=[];
39 | end
40 | layer.ff=@(layer,prelayer)dense_ff(layer,prelayer);
41 | layer.bp=@(layer,next_layer)dense_bp(layer,next_layer);
42 |
43 | layer.configs.type=layer.type;
44 | layer.configs.input_shape=layer.input_shape;
45 | layer.configs.output_shape=layer.output_shape;
46 | layer.configs.hiddensize=layer.hiddensize;
47 | layer.configs.W=size(layer.W);
48 | end
49 |
--------------------------------------------------------------------------------
/Layer/dense/dense_init_gpu.m:
--------------------------------------------------------------------------------
1 | function layer=dense_init_gpu(prelayer,hiddensize ,flag,loss)
2 | %% Basic layer attributes
3 | %Input tensor sahpe
4 | layer.trainable=1;
5 | layer.flag=flag;
6 | layer.input_shape=prelayer.output_shape;
7 | if numel(prelayer.output_shape)>2
8 | layer.timedistributed=1;
9 | layer.output_shape=[layer.input_shape(1),hiddensize,layer.input_shape(end)];
10 | else
11 | layer.timedistributed=0;
12 | layer.output_shape=[layer.input_shape(1),hiddensize];
13 | end
14 | dim=prelayer.output_shape(2);
15 | batchsize=prelayer.output_shape(1);
16 | layer.type='dense';
17 | layer.prelayer_type=prelayer.type;
18 | layer.hiddensize=hiddensize;
19 | layer.batchsize=batchsize;
20 | layer.batch=1;
21 | layer.epoch=1;
22 | %% Dense layer attributes
23 | %W contains weights bias
24 | layer.weights_dim=dim+1;
25 | layer.W=(rand([layer.weights_dim,hiddensize],'single','gpuArray')-0.5)./100;
26 | if layer.timedistributed
27 | layer.input=ones([layer.input_shape(1),layer.input_shape(2)+1,layer.input_shape(3)],'single','gpuArray');
28 | else
29 | layer.input=ones([layer.input_shape(1),layer.input_shape(2)+1],'single','gpuArray');
30 | end
31 | layer.output=zeros(layer.output_shape,'single','gpuArray');
32 | if ~strcmpi(layer.prelayer_type,'input')&&flag
33 | layer.dx=zeros(layer.input_shape,'single','gpuArray');
34 | end
35 | layer.e=layer.output;
36 | if nargin>3&&flag
37 | [layer.loss_f,layer.loss_df]=loss_handle(loss);
38 | layer.loss=[];
39 | end
40 | layer.ff=@(layer,prelayer)dense_ff(layer,prelayer);
41 | layer.bp=@(layer,next_layer)dense_bp(layer,next_layer);
42 |
43 | layer.configs.type=layer.type;
44 | layer.configs.input_shape=layer.input_shape;
45 | layer.configs.output_shape=layer.output_shape;
46 | layer.configs.hiddensize=layer.hiddensize;
47 | layer.configs.W=size(layer.W);
48 | end
49 |
--------------------------------------------------------------------------------
/Layer/dropout/drop.m:
--------------------------------------------------------------------------------
1 | function [mask,mask_index]=drop(mask,drop_rate)
2 | mask_index=randperm(numel(mask),floor(numel(mask)*drop_rate));
3 | mask(mask_index)=0;
4 | end
--------------------------------------------------------------------------------
/Layer/dropout/dropout_bp.m:
--------------------------------------------------------------------------------
1 | function layer=dropout_bp(layer,next_layer)
2 | layer.dx=next_layer.dx.*layer.mask;
3 | end
--------------------------------------------------------------------------------
/Layer/dropout/dropout_bp_gpu.m:
--------------------------------------------------------------------------------
1 | function layer=dropout_bp_gpu(layer,next_layer)
2 | layer.dx=next_layer.dx.*layer.mask;
3 | end
--------------------------------------------------------------------------------
/Layer/dropout/dropout_ff.m:
--------------------------------------------------------------------------------
1 | function layer=dropout_ff(layer,prelayer)
2 | if layer.flag
3 | [layer.mask,layer.mask_index]=layer.drop(layer.mask,layer.drop_rate);
4 | layer.output=prelayer.output.*layer.mask;
5 | else
6 | layer.output=prelayer.output*layer.drop_rate;
7 | end
8 | end
--------------------------------------------------------------------------------
/Layer/dropout/dropout_ff_gpu.m:
--------------------------------------------------------------------------------
1 | function layer=dropout_ff_gpu(layer,prelayer)
2 | if layer.flag
3 | [layer.mask,layer.mask_index]=layer.drop(layer.mask,layer.drop_rate);
4 | layer.output=prelayer.output.*layer.mask;
5 | else
6 | layer.output=prelayer.output*layer.drop_rate;
7 | end
8 | end
--------------------------------------------------------------------------------
/Layer/dropout/dropout_init_cpu.m:
--------------------------------------------------------------------------------
1 | function layer=dropout_init_cpu(prelayer,drop_rate ,flag,loss)
2 | %% Basic layer attributes
3 | layer.trainable=0;
4 | layer.flag=flag;
5 | layer.input_shape=prelayer.output_shape;
6 | batchsize=prelayer.output_shape(1);
7 | layer.type='dropout';
8 | layer.prelayer_type=prelayer.type;
9 | layer.output_shape=layer.input_shape;
10 | layer.batchsize=batchsize;
11 | layer.batch=1;
12 | layer.epoch=1;
13 | %% Dropout layer attributes
14 | layer.drop_rate=drop_rate;
15 | if layer.flag
16 | layer.mask=ones(layer.output_shape,'single');
17 | end
18 | layer.output=zeros(layer.output_shape,'single');
19 | if ~strcmpi(layer.prelayer_type,'input')&&flag
20 | layer.dx=zeros(layer.input_shape,'single');
21 | end
22 | if nargin>3&&flag
23 | [layer.loss_f,layer.loss_df]=loss_handle(loss);
24 | layer.loss=[];
25 | end
26 | layer.drop=@(mask,drop_rate)drop(mask,drop_rate);
27 | layer.ff=@(layer,prelayer)dropout_ff(layer,prelayer);
28 | layer.bp=@(layer,next_layer)dropout_bp(layer,next_layer);
29 |
30 | layer.configs.type=layer.type;
31 | layer.configs.input_shape=layer.input_shape;
32 | layer.configs.output_shape=layer.output_shape;
33 | layer.configs.drop_rate=layer.drop_rate;
34 | end
35 |
--------------------------------------------------------------------------------
/Layer/dropout/dropout_init_gpu.m:
--------------------------------------------------------------------------------
1 | function layer=dropout_init_gpu(prelayer,drop_rate ,flag,loss)
2 | %% Basic layer attributes
3 | layer.trainable=0;
4 | layer.flag=flag;
5 | layer.input_shape=prelayer.output_shape;
6 | batchsize=prelayer.output_shape(1);
7 | layer.type='dropout';
8 | layer.prelayer_type=prelayer.type;
9 | layer.output_shape=layer.input_shape;
10 | layer.batchsize=batchsize;
11 | layer.batch=1;
12 | layer.epoch=1;
13 | %% Dropout layer attributes
14 | layer.drop_rate=drop_rate;
15 | if layer.flag
16 | layer.mask=ones(layer.output_shape,'single','gpuArray');
17 | end
18 | layer.output=zeros(layer.output_shape,'single','gpuArray');
19 | if ~strcmpi(layer.prelayer_type,'input')&&flag
20 | layer.dx=zeros(layer.input_shape,'single','gpuArray');
21 | end
22 | if nargin>3&&flag
23 | [layer.loss_f,layer.loss_df]=loss_handle(loss);
24 | layer.loss=[];
25 | end
26 | layer.drop=@(mask,drop_rate)drop(mask,drop_rate);
27 | layer.ff=@(layer,prelayer)dropout_ff(layer,prelayer);
28 | layer.bp=@(layer,next_layer)dropout_bp(layer,next_layer);
29 |
30 | layer.configs.type=layer.type;
31 | layer.configs.input_shape=layer.input_shape;
32 | layer.configs.output_shape=layer.output_shape;
33 | layer.configs.drop_rate=layer.drop_rate;
34 | end
35 |
--------------------------------------------------------------------------------
/Layer/lstm/lstm_bp.m:
--------------------------------------------------------------------------------
1 | function layer=lstm_bp(layer,next_layer)
2 | if isequal(class(next_layer),'struct')
3 | if ~isequal(size(next_layer.dx),layer.output_shape)
4 | error('Shape unmatched!')
5 | end
6 | if layer.return_sequence
7 | layer.e=next_layer.dx;
8 | else
9 | layer.e(:,end,:)=next_layer.dx;
10 | end
11 | end
12 | timestep=layer.timestep;
13 | hiddensize=layer.hiddensize;
14 | batchsize=layer.batchsize;
15 | dim=layer.input_shape(2);
16 | r_x=1:dim+1;
17 | r_h=dim+1+(1:hiddensize);
18 | r_ifo=1:3*hiddensize;
19 | r_f=1:hiddensize;
20 | r_i=hiddensize+1:2*hiddensize;
21 | r_o=2*hiddensize+1:3*hiddensize;
22 | r_tc=3*hiddensize+1:4*hiddensize;
23 | %% Backpropagation through time
24 | for t=timestep:-1:2
25 | % d_h(t) = e(t) + d_a(t+1)*W
26 | layer.dh(:,:,t)=layer.e(:,:,t)+layer.dma(:,:,t+1)*layer.W(r_h,:)';
27 | % d_c(t) = d_h(t) .* o(t) * tanh'(c(t))
28 | layer.dsc(:,:,t)=layer.dh(:,:,t).*layer.mb(:,r_o,t).*layer.dact_h(layer.sc(:,:,t));
29 | %db_o(t) = d_h(t) * bc(t)
30 | layer.dmb(:,r_o,t)=layer.dh(:,:,t).*layer.bc(:,:,t);
31 | % db_i(t) = d_c(t) .* tc(t)
32 | layer.dmb(:,r_i,t)=layer.dsc(:,:,t).*layer.mb(:,r_tc,t);
33 | % db_tc(t) = db_c(t) .* i(t)
34 | layer.dmb(:,r_tc,t)=layer.dsc(:,:,t).*layer.mb(:,r_i,t);
35 | % db_f(t) = db_c(t) .* c(t-1)
36 | layer.dmb(:,r_f,t)=layer.dsc(:,:,t).*layer.sc(:,:,t-1);
37 | %da=act'(b).*db
38 | layer.dma(:,r_ifo,t)=layer.dact_f(layer.mb(:,r_ifo,t)).*layer.dmb(:,r_ifo,t);
39 | layer.dma(:,r_tc,t)=layer.dact_tc(layer.mb(:,r_tc,t)).*layer.dmb(:,r_tc,t);
40 | end
41 | t=1;
42 | layer.dh(:,:,t)=layer.e(:,:,t)+layer.dma(:,:,t+1)*layer.W(r_h,:)';
43 | layer.dsc(:,:,t)=layer.dh(:,:,t).*layer.mb(:,r_o,t).*layer.dact_h(layer.sc(:,:,t));
44 | layer.dmb(:,r_o,t)=layer.dh(:,:,t).*layer.bc(:,:,t);
45 | layer.dmb(:,r_i,t)=layer.dsc(:,:,t).*layer.mb(:,r_tc,t);
46 | layer.dmb(:,r_tc,t)=layer.dsc(:,:,t).*layer.mb(:,r_i,t);
47 | layer.dma(:,r_ifo,t)=layer.dact_f(layer.mb(:,r_ifo,t)).*layer.dmb(:,r_ifo,t);
48 | layer.dma(:,r_tc,t)=layer.dact_tc(layer.mb(:,r_tc,t)).*layer.dmb(:,r_tc,t);
49 |
50 | if ~isequal(layer.prelayer_type,'input')
51 | layer.dx(:)=mult_3d(layer.dma(:,:,1:end-1),layer.W(:,r_x(1:end-1))');
52 | end
53 | %layer.dma(:,r_f,2:end)=layer.dma(:,r_f,2:end)./(timestep-1);
54 | %layer.dma(:,hiddensize+1:end,:)=layer.dma(:,hiddensize+1:end,:)./timestep;
55 | layer.dW=reshape(permute(layer.xh,[2,1,3]),layer.weights_dim,[])*reshape(permute(layer.dma,[2,1,3]),4*hiddensize,[])';
56 | end
57 | function c=mult_3d(a,b)
58 | input_shape=size(a);
59 | output_dim=size(b,2);
60 | timestep=input_shape(end);
61 | input_dim=input_shape(2);
62 | batchsize=input_shape(1);
63 | c=permute(reshape((reshape(permute(a,[2,1,3]),input_dim,[])'*b)',[output_dim,batchsize,timestep]),[2,1,3]);
64 | end
--------------------------------------------------------------------------------
/Layer/lstm/lstm_bp_gpu.m:
--------------------------------------------------------------------------------
1 | function layer=lstm_bp_gpu(layer,next_layer)
2 | if isequal(class(next_layer),'struct')
3 | if ~isequal(size(next_layer.dx),layer.output_shape)
4 | error('Shape unmatched!')
5 | end
6 | if layer.return_sequence
7 | layer.e=next_layer.dx;
8 | else
9 | layer.e(:,end,:)=next_layer.dx;
10 | end
11 | end
12 | timestep=layer.timestep;
13 | hiddensize=layer.hiddensize;
14 | batchsize=layer.batchsize;
15 | dim=layer.input_shape(1);
16 | r_x=1:dim+1;
17 | r_h=dim+1+(1:hiddensize);
18 | r_ifo=1:3*hiddensize;
19 | r_f=1:hiddensize;
20 | r_i=hiddensize+1:2*hiddensize;
21 | r_o=2*hiddensize+1:3*hiddensize;
22 | r_tc=3*hiddensize+1:4*hiddensize;
23 | %% Backpropagation through time
24 | for t=timestep:-1:2
25 | % d_h(t) = e(t) + d_a(t+1)*W
26 | layer.dh(:,t,:)=sq(layer.e(:,t,:))+layer.W(:,r_h)'*sq(layer.dma(:,t+1,:));
27 | % d_c(t) = d_h(t) .* o(t) * tanh'(c(t))
28 | layer.dsc(:,t,:)=layer.dh(:,t,:).*layer.mb(r_o,t,:).*layer.dact_h(layer.sc(:,t,:));
29 | %db_o(t) = d_h(t) * bc(t)
30 | layer.dmb(r_o,t,:)=layer.dh(:,t,:).*layer.bc(:,t,:);
31 | % db_i(t) = d_c(t) .* tc(t)
32 | layer.dmb(r_i,t,:)=layer.dsc(:,t,:).*layer.mb(r_tc,t,:);
33 | % db_tc(t) = db_c(t) .* i(t)
34 | layer.dmb(r_tc,t,:)=layer.dsc(:,t,:).*layer.mb(r_i,t,:);
35 | % db_f(t) = db_c(t) .* c(t-1)
36 | layer.dmb(r_f,t,:)=layer.dsc(:,t,:).*layer.sc(:,t-1,:);
37 | %da=act'(b).*db
38 | layer.dma(r_ifo,t,:)=layer.dact_f(layer.mb(r_ifo,t,:)).*layer.dmb(r_ifo,t,:);
39 | layer.dma(r_tc,t,:)=layer.dact_tc(layer.mb(r_tc,t,:)).*layer.dmb(r_tc,t,:);
40 | end
41 | t=1;
42 | layer.dh(:,t,:)=sq(layer.e(:,t,:))+layer.W(:,r_h)'*sq(layer.dma(:,t+1,:));
43 | layer.dsc(:,t,:)=layer.dh(:,t,:).*layer.mb(r_o,t,:).*layer.dact_h(layer.sc(:,t,:));
44 | layer.dmb(r_o,t,:)=layer.dh(:,t,:).*layer.bc(:,t,:);
45 | layer.dmb(r_i,t,:)=layer.dsc(:,t,:).*layer.mb(r_tc,t,:);
46 | layer.dmb(r_tc,t,:)=layer.dsc(:,t,:).*layer.mb(r_i,t,:);
47 | layer.dma(r_ifo,t,:)=layer.dact_f(layer.mb(r_ifo,t,:)).*layer.dmb(r_ifo,t,:);
48 | layer.dma(r_tc,t,:)=layer.dact_tc(layer.mb(r_tc,t,:)).*layer.dmb(r_tc,t,:);
49 |
50 | layer.dma(r_f,2:end,:)=layer.dma(r_f,2:end,:)./(timestep-1);
51 | layer.dma(hiddensize+1:end,:)=layer.dma(hiddensize+1:end,:)./timestep;
52 | layer.dW=layer.dma(:,:)*layer.xh(:,:)'./batchsize;
53 | if ~isequal(layer.prelayer_type,'input')
54 | layer.dx(:)=layer.W(:,r_x(1:end-1))'*sq(layer.dma(:,1:end-1,:))./batchsize;
55 | end
56 | end
57 | function a=sq(a)
58 | a=reshape(a,size(a,1),[]);
59 | end
--------------------------------------------------------------------------------
/Layer/lstm/lstm_ff.m:
--------------------------------------------------------------------------------
1 | function layer=lstm_ff(layer,prelayer)
2 | timestep=layer.timestep;
3 | hiddensize=layer.hiddensize;
4 | dim=layer.input_shape(2);
5 | r_x=1:dim+1;%range of x and bias
6 | r_h=dim+1+(1:hiddensize);%range of h
7 | r_ifo=1:3*hiddensize;%range of forget,input and output gates
8 | r_f=1:hiddensize;%range of forget gate
9 | r_i=hiddensize+1:2*hiddensize;%~input gate
10 | r_o=2*hiddensize+1:3*hiddensize;%~output gate
11 | r_tc=3*hiddensize+1:4*hiddensize;%range of tilde c gate
12 | %the xh is a 2d tensor contain x,bias,and h,((r_x)-1,1:end-1,:) is the area of x
13 | %assign value from input tensor
14 | if isequal(class(prelayer),'struct')
15 | if ~isequal(size(prelayer.output),layer.input_shape)
16 | error('Shape unmatched!')
17 | end
18 | layer.xh(:,r_x(1:end-1),1:end-1)=prelayer.output;
19 | else
20 | layer.xh(:,r_x(1:end-1),1:end-1)=prelayer;
21 | end
22 | %compute all x(t)*W_x+bias in one time at first
23 | layer.maX(:)=mult_3d(layer.xh(:,r_x,1:end-1),layer.W(r_x,:));
24 |
25 | %% Feed forward
26 | %t=1
27 | layer.ma( :,:,1)=layer.maX( :,:,1);
28 | layer.mb( :,r_ifo,1)=layer.act_f(layer.ma(:,r_ifo,1));
29 | layer.mb( :,r_tc,1)=layer.act_tc(layer.ma(:,r_tc,1));
30 | layer.sc( :,:,1)=layer.mb(:,r_i,1).*layer.mb(:,r_tc,1);
31 | layer.bc( :,:,1)=layer.act_h(layer.sc( :,:,1));
32 | layer.xh(:,r_h,2)=layer.bc(:,:,1).*layer.mb(:,r_o,1);
33 | %t>1
34 | for t=2:timestep
35 | % a(t) = W_x * x(t) + W_h * h(t-1)
36 | layer.ma( :,:,t)=layer.maX( :,:,t)+layer.xh( :,r_h,t)*layer.W(r_h,:);
37 | %b(t)=act(a(t))
38 | %The active functions of i,f,o gates are sigmoid,compute in one time
39 | layer.mb( :,r_ifo,t)=layer.act_f(layer.ma( :,r_ifo,t));
40 | %The active function of tc gate is tanh
41 | layer.mb( :,r_tc,t)=layer.act_tc(layer.ma( :,r_tc,t));
42 | % c(t) = f(t) * c(t-1) + i(t) * tc(t)
43 | layer.sc( :,:,t)=layer.sc( :,:,t-1).*layer.mb( :,r_f,t)+layer.mb( :,r_i,t).*layer.mb( :,r_tc,t);
44 | %tanh(c(t))
45 | layer.bc( :,:,t)=layer.act_h(layer.sc( :,:,t));
46 | % h(t) = o(t) * tanh(c(t))
47 | layer.xh( :,r_h,t+1)=layer.bc( :,:,t).*layer.mb( :,r_o,t);
48 | if layer.return_sequence
49 | layer.output=layer.xh(:,r_h,2:end);
50 | else
51 | layer.output=sq(layer.xh(:,r_h,1));
52 | end
53 | end
54 | end
55 | function c=mult_3d(a,b)
56 | input_shape=size(a);
57 | output_dim=size(b,2);
58 | timestep=input_shape(end);
59 | input_dim=input_shape(2);
60 | batchsize=input_shape(1);
61 | c=permute(reshape((reshape(permute(a,[2,1,3]),input_dim,[])'*b)',[output_dim,batchsize,timestep]),[2,1,3]);
62 | end
--------------------------------------------------------------------------------
/Layer/lstm/lstm_ff_gpu.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuantumLiu/matDL/ce75b21dd97e9d58c074d50915d5fc000ee46afb/Layer/lstm/lstm_ff_gpu.m
--------------------------------------------------------------------------------
/Layer/lstm/lstm_init_cpu.m:
--------------------------------------------------------------------------------
1 | function layer=lstm_init_cpu(prelayer,hiddensize,return_sequence,flag,loss)
2 | %% Basic layer attributes
3 | %Input tensor sahpe
4 | layer.input_shape=prelayer.output_shape;
5 | layer.trainable=1;
6 | layer.flag=flag;
7 |
8 | dim=prelayer.output_shape(2);
9 | timestep=prelayer.output_shape(3);
10 | batchsize=prelayer.output_shape(1);
11 | if nargin<3
12 | return_sequence=1;
13 | end
14 | layer.return_sequence=return_sequence;
15 | if return_sequence
16 | %Output tensor shape
17 | layer.output_shape=[batchsize,hiddensize,timestep];
18 | else
19 | layer.output_shape=[batchsize,hiddensize];
20 | end
21 | %The type of the layer
22 | layer.type='lstm';
23 | %conected layer type
24 | layer.prelayer_type=prelayer.type;
25 | %The hiddensize of the layer
26 | layer.hiddensize=hiddensize;
27 |
28 | layer.batch=1;
29 | layer.epoch=1;
30 | %% lstm layer attributes
31 | %Timestep
32 | layer.timestep=timestep;
33 | layer.batchsize=batchsize;
34 | %n is the number of unrolled timesteps in one batch
35 | layer.n=batchsize*timestep;
36 | %Put x(t) and h(t) in one array
37 | layer.xh=ones([batchsize,dim+1+hiddensize,timestep+1],'single');
38 | %W is the weights of all four gates and bias
39 | layer.weights_dim=dim+1+hiddensize;
40 | layer.W=(rand([layer.weights_dim,4*hiddensize],'single')-0.5)./100;
41 | %Compute the value of x_t*wx_t for all ts in one time
42 | layer.maX=zeros([batchsize,4*hiddensize,timestep],'single');
43 | %value before activited
44 | layer.ma=layer.maX;
45 | %value activited
46 | layer.mb=layer.maX;
47 | %sc:state of cell
48 | layer.sc=zeros([batchsize,hiddensize,timestep],'single');
49 | layer.bc=layer.sc;
50 | %The output tensor and error
51 | layer.output=zeros(layer.output_shape,'single');
52 | layer.e=layer.sc;
53 | if layer.flag
54 | %diffs
55 | layer.dW=zeros(size(layer.W),'single');
56 | layer.dma=zeros([batchsize,4*hiddensize,timestep+1],'single');
57 | layer.dmb=layer.dma;
58 | layer.dsc=layer.sc;
59 | layer.dh=layer.dsc;
60 | end
61 | if ~strcmpi(layer.prelayer_type,'input')&&layer.flag
62 | layer.dx=zeros(layer.input_shape,'single');
63 | end
64 | if nargin>4
65 | [layer.loss_f,layer.loss_df]=loss_handle(loss);
66 | layer.loss=[];
67 | end
68 | %% methods
69 | layer.act_f =@(x)act(x,'sigmoid'); % active function for gate
70 | layer.act_tc =@(x)act(x, 'tanh'); % active function for tc
71 | layer.act_h = @(x)act(x, 'tanh');
72 |
73 | layer.dact_f= @(x)dact(x,'sigmoid');
74 | layer.dact_tc =@(x)dact(x, 'tanh'); % active function for tc
75 | layer.dact_h = @(x)dact(x, 'tanh');
76 | layer.ff=@(layer,prelayer)lstm_ff(layer,prelayer);
77 | layer.bp=@(layer,next_layer)lstm_bp(layer,next_layer);
78 |
79 | layer.configs.type=layer.type;
80 | layer.configs.input_shape=layer.input_shape;
81 | layer.configs.output_shape=layer.output_shape;
82 | layer.configs.hiddensize=layer.hiddensize;
83 | layer.configs.W=size(layer.W);
84 | end
--------------------------------------------------------------------------------
/Layer/lstm/lstm_init_gpu.m:
--------------------------------------------------------------------------------
1 | function layer=lstm_init_gpu(prelayer,hiddensize,return_sequence,flag,loss)
2 | %% Basic layer attributes
3 | %Input tensor sahpe
4 | layer.input_shape=prelayer.output_shape;
5 | layer.trainable=1;
6 | layer.flag=flag;
7 |
8 | dim=prelayer.output_shape(2);
9 | timestep=prelayer.output_shape(3);
10 | batchsize=prelayer.output_shape(1);
11 | if nargin<3
12 | return_sequence=1;
13 | end
14 | layer.return_sequence=return_sequence;
15 | if return_sequence
16 | %Output tensor shape
17 | layer.output_shape=[batchsize,hiddensize,timestep];
18 | else
19 | layer.output_shape=[batchsize,hiddensize];
20 | end
21 | %The type of the layer
22 | layer.type='lstm';
23 | %conected layer type
24 | layer.prelayer_type=prelayer.type;
25 | %The hiddensize of the layer
26 | layer.hiddensize=hiddensize;
27 |
28 | layer.batch=1;
29 | layer.epoch=1;
30 | %% lstm layer attributes
31 | %Timestep
32 | layer.timestep=timestep;
33 | layer.batchsize=batchsize;
34 | %n is the number of unrolled timesteps in one batch
35 | layer.n=batchsize*timestep;
36 | %Put x(t) and h(t) in one array
37 | layer.xh=ones([batchsize,dim+1+hiddensize,timestep+1],'single','gpuArray');
38 | %W is the weights of all four gates and bias
39 | layer.weights_dim=dim+1+hiddensize;
40 | layer.W=(rand([layer.weights_dim,4*hiddensize],'single','gpuArray')-0.5)./100;
41 | %Compute the value of x_t*wx_t for all ts in one time
42 | layer.maX=zeros([batchsize,4*hiddensize,timestep],'single','gpuArray');
43 | %value before activited
44 | layer.ma=layer.maX;
45 | %value activited
46 | layer.mb=layer.maX;
47 | %sc:state of cell
48 | layer.sc=zeros([batchsize,hiddensize,timestep],'single','gpuArray');
49 | layer.bc=layer.sc;
50 | %The output tensor and error
51 | layer.output=zeros(layer.output_shape,'single','gpuArray');
52 | layer.e=layer.sc;
53 | if layer.flag
54 | %diffs
55 | layer.dW=zeros(size(layer.W),'single','gpuArray');
56 | layer.dma=zeros([batchsize,4*hiddensize,timestep+1],'single','gpuArray');
57 | layer.dmb=layer.dma;
58 | layer.dsc=layer.sc;
59 | layer.dh=layer.dsc;
60 | end
61 | if ~strcmpi(layer.prelayer_type,'input')&&layer.flag
62 | layer.dx=zeros(layer.input_shape,'single','gpuArray');
63 | end
64 | if nargin>4
65 | [layer.loss_f,layer.loss_df]=loss_handle(loss);
66 | layer.loss=[];
67 | end
68 | %% methods
69 | layer.act_f =@(x)act(x,'sigmoid'); % active function for gate
70 | layer.act_tc =@(x)act(x, 'tanh'); % active function for tc
71 | layer.act_h = @(x)act(x, 'tanh');
72 |
73 | layer.dact_f= @(x)dact(x,'sigmoid');
74 | layer.dact_tc =@(x)dact(x, 'tanh'); % active function for tc
75 | layer.dact_h = @(x)dact(x, 'tanh');
76 | layer.ff=@(layer,prelayer)lstm_ff(layer,prelayer);
77 | layer.bp=@(layer,next_layer)lstm_bp(layer,next_layer);
78 |
79 | layer.configs.type=layer.type;
80 | layer.configs.input_shape=layer.input_shape;
81 | layer.configs.output_shape=layer.output_shape;
82 | layer.configs.hiddensize=layer.hiddensize;
83 | layer.configs.W=size(layer.W);
84 | end
--------------------------------------------------------------------------------
/Layer/tensor/tensor_init.m:
--------------------------------------------------------------------------------
1 | function layer=tensor_init(input_shape,type,loss)
2 | %% A tensor layer ,can be a input layer or a ouyput alyer
3 | %% Basic layer attributes
4 | %Input tensor sahpe
5 | layer.input_shape=input_shape;
6 | %Output tensor shape
7 | layer.output_shape=input_shape;
8 | %The type of the layer
9 | layer.type=type;
10 | if nargin>2
11 | [layer.loss_f,layer.loss_df]=loss_handle(loss);
12 | layer.loss=[];
13 | end
14 | layer.configs.type=layer.type;
15 | layer.configs.input_shape=layer.input_shape;
16 | layer.configs.output_shape=layer.output_shape;
17 | end
--------------------------------------------------------------------------------
/Layer/tensor/tensor_init_cpu.m:
--------------------------------------------------------------------------------
1 | function layer=tensor_init_gpu(input_shape,type,loss)
2 | %% A tensor layer ,can be a input layer or a ouyput alyer
3 | %% Basic layer attributes
4 | %Input tensor sahpe
5 | layer.input_shape=input_shape;
6 | %Output tensor shape
7 | layer.output_shape=input_shape;
8 | %The type of the layer
9 | layer.type=type;
10 | if nargin>2
11 | [layer.loss_f,layer.loss_df]=loss_handle(loss);
12 | layer.loss=[];
13 | end
14 | layer.configs.type=layer.type;
15 | layer.configs.input_shape=layer.input_shape;
16 | layer.configs.output_shape=layer.output_shape;
17 | end
--------------------------------------------------------------------------------
/Layer/tensor/tensor_init_gpu.m:
--------------------------------------------------------------------------------
1 | function layer=tensor_init_gpu(input_shape,type,loss)
2 | %% A tensor layer ,can be a input layer or a ouyput alyer
3 | %% Basic layer attributes
4 | %Input tensor sahpe
5 | layer.input_shape=input_shape;
6 | %Output tensor shape
7 | layer.output_shape=input_shape;
8 | %The type of the layer
9 | layer.type=type;
10 | if nargin>2
11 | [layer.loss_f,layer.loss_df]=loss_handle(loss);
12 | layer.loss=[];
13 | end
14 | layer.configs.type=layer.type;
15 | layer.configs.input_shape=layer.input_shape;
16 | layer.configs.output_shape=layer.output_shape;
17 | end
--------------------------------------------------------------------------------
/Layer/tensor_init_gpu.m:
--------------------------------------------------------------------------------
1 | function layer=tensor_init_gpu(input_shape,type,loss)
2 | %% A tensor layer ,can be a input layer or a ouyput alyer
3 | %% Basic layer attributes
4 | %Input tensor sahpe
5 | layer.input_shape=input_shape;
6 | %Output tensor shape
7 | layer.output_shape=input_shape;
8 | %The type of the layer
9 | layer.type=type;
10 | if nargin>2
11 | [layer.loss_f,layer.loss_df]=loss_handle(loss);
12 | layer.loss=[];
13 | end
14 | layer.configs.type=layer.type;
15 | layer.configs.input_shape=layer.input_shape;
16 | layer.configs.output_shape=layer.output_shape;
17 | end
--------------------------------------------------------------------------------
/Model/eval_loss.m:
--------------------------------------------------------------------------------
1 | function [outputlayer,loss]=eval_loss(outputlayer,y_true,flag)
2 | dim=size(y_true,2);
3 | loss=dim*feval(@(x)mean(x(:)),outputlayer.loss_f(single(y_true),outputlayer.output));
4 | if flag
5 | outputlayer.loss=[outputlayer.loss,loss];
6 | if isequal(outputlayer.type,'lstm')&& ~outputlayer.return_sequence
7 | outputlayer.e(:,:,end)=outputlayer.loss_df(y_true,outputlayer.output);
8 | else
9 | outputlayer.e=outputlayer.loss_df(single(y_true),outputlayer.output);
10 | end
11 | end
12 | end
13 |
--------------------------------------------------------------------------------
/Model/layer_optimize.m:
--------------------------------------------------------------------------------
1 | function layer=layer_optimize(layer,pars,batch,epoch)
2 | if nargin <2
3 | pars.opt='sgd';
4 | end
5 | switch pars.opt
6 | case 'sgd'
7 | if pars.momentum >0
8 | if batch==1
9 | layer.vW=pars.learningrate*layer.dW;
10 | else
11 | layer.vW=pars.momentum*layer.vW+pars.learningrate*layer.dW;
12 | end
13 | layer.W=layer.W-layer.vW;
14 | else
15 | layer.W=layer.W-pars.learningrate*layer.dW;
16 | end
17 | end
18 | layer.batch=batch;
19 | layer.epoch=epoch;
20 | end
--------------------------------------------------------------------------------
/Model/model_evaluate.m:
--------------------------------------------------------------------------------
1 | function mean_loss=model_evaluate(model,x,y_true)
2 | y_pred=model.predict(model,x);
3 | dim=size(y_true,2);
4 | mean_loss=dim*feval(@(x)mean(x(:)),model.layers{end-1}.loss_f(single(y_true),y_pred));
5 | end
--------------------------------------------------------------------------------
/Model/model_init.m:
--------------------------------------------------------------------------------
1 | function model=model_init(input_shape,configs ,flag,optimizer,device)
2 | switch nargin
3 | case 2
4 | flag=0;
5 | device='cpu';
6 | case 3
7 | if flag
8 | optimizer.type='sgd';
9 | optimizer.momentum=0;
10 | optimizer.learningrate=0.01;
11 | end
12 | device='cpu';
13 | case 4
14 | device='cpu';
15 | end
16 | if nargin<3
17 | flag=0;
18 | end
19 | model.flag=flag;
20 | if nargin<4&&flag
21 | optimizer.type='sgd';
22 | optimizer.momentum=0;
23 | optimizer.learningrate=0.01;
24 | end
25 | model.layers=cell(1,length(configs)+1);
26 | model.layers{1}=tensor_init(input_shape,'input');
27 | switch device
28 | case 'cpu'
29 | for l=2:length(model.layers)
30 | model.layers{l}=layer_init_cpu(model.layers{l-1},configs{l-1},flag);
31 | end
32 | case 'gpu'
33 | for l=2:length(model.layers)
34 | model.layers{l}=layer_init_gpu(model.layers{l-1},configs{l-1},flag);
35 | end
36 | end
37 | model.layers=[model.layers,0];
38 | for l=1:length(model.layers)-1
39 | disp(['layer ' ,num2str(l),' :']);
40 | disp(model.layers{l}.configs);
41 | end
42 |
43 | model.input_shape=model.layers{1}.input_shape(2:end);
44 | model.output_shape=model.layers{end-1}.output_shape(2:end);
45 | model.batchsize=input_shape(1);
46 | model.loss=[];
47 | model.configs=configs;
48 | if flag
49 | model.optimizer=optimizer;
50 | model.optimize=@(layer,optimizer,batch,epoch)layer_optimize(layer,optimizer,batch,epoch);
51 | end
52 | model.eval_loss=@(outputlayer,y_true,flag)eval_loss(outputlayer,y_true,flag);
53 | model.predict=@(model,x)model_predict(model,x);
54 | model.save=@(model,filename)model_save(model,filename);
55 | model.evaluate=@(model,x,y_true)model_evaluate(model,x,y_true);
56 | if flag
57 | model.train=@(model,x,y,nb_epoch,verbose,filename)model_train(model,x,y,nb_epoch,verbose,filename);
58 | end
59 | end
60 | function layer=layer_init_gpu(prelayer,config,flag)
61 | switch config.type
62 | case 'lstm'
63 | if isfield(config,'loss')
64 | layer=lstm_init_gpu(prelayer,config.hiddensize,config.return_sequence,flag,config.loss);
65 | else
66 | layer=lstm_init_gpu(prelayer,config.hiddensize,config.return_sequence,flag);
67 | end
68 | case 'dense'
69 | if isfield(config,'loss')
70 | layer=dense_init_gpu(prelayer,config.hiddensize,flag,config.loss);
71 | else
72 | layer=dense_init_gpu(prelayer,config.hiddensize,flag);
73 | end
74 | case 'activation'
75 | if isfield(config,'loss')
76 | layer=activation_init_gpu(prelayer,config.act_fun,flag,config.loss);
77 | else
78 | layer=activation_init_gpu(prelayer,config.act_fun,flag);
79 | end
80 | case 'dropout'
81 | if isfield(config,'loss')
82 | layer=dropout_init_gpu(prelayer,config.drop_rate,flag,config.loss);
83 | else
84 | layer=dropout_init_gpu(prelayer,config.drop_rate,flag);
85 | end
86 | end
87 | end
88 | function layer=layer_init_cpu(prelayer,config,flag)
89 | switch config.type
90 | case 'lstm'
91 | if isfield(config,'loss')
92 | layer=lstm_init_cpu(prelayer,config.hiddensize,config.return_sequence,flag,config.loss);
93 | else
94 | layer=lstm_init_cpu(prelayer,config.hiddensize,config.return_sequence,flag);
95 | end
96 | case 'dense'
97 | if isfield(config,'loss')
98 | layer=dense_init_cpu(prelayer,config.hiddensize,flag,config.loss);
99 | else
100 | layer=dense_init_cpu(prelayer,config.hiddensize,flag);
101 | end
102 | case 'activation'
103 | if isfield(config,'loss')
104 | layer=activation_init_cpu(prelayer,config.act_fun,flag,config.loss);
105 | else
106 | layer=activation_init_cpu(prelayer,config.act_fun,flag);
107 | end
108 | case 'dropout'
109 | if isfield(config,'loss')
110 | layer=dropout_init_cpu(prelayer,config.drop_rate,flag,config.loss);
111 | else
112 | layer=dropout_init_cpu(prelayer,config.drop_rate,flag);
113 | end
114 | end
115 | end
--------------------------------------------------------------------------------
/Model/model_load.m:
--------------------------------------------------------------------------------
1 | function model=model_load(minimodel,batch_size,flag,optimizer,device)
2 | if nargin<2
3 | batch_size=32;
4 | end
5 | if nargin<3
6 | flag=0;
7 | end
8 | if nargin<4&&flag
9 | optimizer.type='sgd';
10 | optimizer.momentum=0;
11 | optimizer.learningrate=0.01;
12 | elseif ~flag
13 | optimizer=[];
14 | end
15 | if isequal(class(minimodel),'char')
16 | load(minimodel);
17 | end
18 | model=model_init([batch_size,minimodel.input_shape],minimodel.configs,flag,optimizer,device);
19 | end
--------------------------------------------------------------------------------
/Model/model_predict.m:
--------------------------------------------------------------------------------
1 | function y_pred=model_predict(model,x)
2 | batchsize=model.batchsize;
3 | shape_x=size(x);
4 | nb_batch=floor(shape_x(1)/batchsize);
5 | m=mod(shape_x(1),batchsize);
6 | y_pred=zeros([shape_x(1),model.output_shape],'single');
7 | for batch=1:nb_batch
8 | %% ff
9 | if numel(shape_x)==2
10 | model.layers{1}=x((batch-1)*batchsize+1:batch*batchsize,:);
11 | elseif numel(shape_x)==3
12 | model.layers{1}=x((batch-1)*batchsize+1:batch*batchsize,:,:);
13 | else
14 | error('The number of dims of input data must be 2/3');
15 | end
16 | for l=2:length(model.layers)-1
17 | model.layers{l}=model.layers{l}.ff(model.layers{l},model.layers{l-1});
18 | end
19 | if numel(size(y_pred))>2
20 | y_pred((batch-1)*batchsize+1:batch*batchsize,:,:)=gather(model.layers{end-1}.output);
21 | else
22 | y_pred((batch-1)*batchsize+1:batch*batchsize,:)=gather(model.layers{end-1}.output);
23 | end
24 | end
25 | if m
26 | if numel(shape_x)==2
27 | model.layers{1}=x(end-batchsize+1:end,:);
28 | elseif numel(shape_x)==3
29 | model.layers{1}=x(end-batchsize+1:end,:,:);
30 | else
31 | error('The number of dims of input data must be 2/3');
32 | end
33 | for l=2:length(model.layers)-1
34 | model.layers{l}=model.layers{l}.ff(model.layers{l},model.layers{l-1});
35 | end
36 | if numel(size(y_pred))>2
37 | y_pred(end-batchsize+1:end,:,:)=gather(model.layers{end-1}.output);
38 | else
39 | y_pred(end-batchsize+1:end,:,:)=gather(model.layers{end-1}.output);
40 | end
41 | end
42 | end
--------------------------------------------------------------------------------
/Model/model_save.m:
--------------------------------------------------------------------------------
1 | function model_save(model,filename)
2 | % if nargin<3
3 | % batchsize=model.input_shape(end);
4 | % end
5 | minimodel.input_shape=model.input_shape;
6 | minimodel.output_shape=model.output_shape;
7 | minimodel.configs=model.configs;
8 | for l=2:length(model.layers)-1
9 | if model.layers{l}.trainable
10 | minimodel.Ws{l}=gather(model.layers{l}.W);
11 | end
12 | end
13 | save(filename,'minimodel','-v7.3');
14 | end
15 |
16 |
--------------------------------------------------------------------------------
/Model/model_train.m:
--------------------------------------------------------------------------------
1 | function model=model_train(model,x,y,nb_epoch,verbose,filename)
2 | if nargin<5
3 | verbose=0;
4 | end
5 | if nargin<6
6 | filename=0;
7 | end
8 | batchsize=model.batchsize;
9 | shape_x=size(x);
10 | shape_y=size(y);
11 | g_batch=1;
12 | nb_batch=floor(shape_x(1)/batchsize)*nb_epoch;
13 | if verbose
14 | h = waitbar(g_batch/nb_batch,'Training model');
15 | end
16 | model.epoch_loss=[];
17 | model.batch_loss=[];
18 | if verbose>=2
19 | f_epoch=figure('Name',' epochs loss');
20 | f_batch=figure('Name',' batches loss');
21 | end
22 | for epoch=1:nb_epoch
23 | batch=1;
24 | tic;
25 | epoch_batch_loss=[];
26 | while batch*batchsize<=shape_x(1)
27 | %% ff
28 | if numel(shape_x)==2
29 | model.layers{1}=x((batch-1)*batchsize+1:batch*batchsize,:);
30 | elseif numel(shape_x)==3
31 | model.layers{1}=x((batch-1)*batchsize+1:batch*batchsize,:,:);
32 | else
33 | error('The number of dims of input data must be 2/3');
34 | end
35 | for l=2:length(model.layers)-1
36 | model.layers{l}=model.layers{l}.ff(model.layers{l},model.layers{l-1});
37 | end
38 | %% eval
39 | if numel(shape_y)==2
40 | model.layers{end-1}=model.eval_loss(model.layers{end-1},y((batch-1)*batchsize+1:batch*batchsize,:),model.flag);
41 | elseif numel(shape_y)==3
42 | model.layers{end-1}=model.eval_loss(model.layers{end-1},y((batch-1)*batchsize+1:batch*batchsize,:,:),model.flag);
43 | else
44 | error('The number of dims of output data must be 2/3');
45 | end
46 | epoch_batch_loss=[epoch_batch_loss,model.layers{end-1}.loss(end)];
47 | cu_epoch_loss=mean(epoch_batch_loss(:));
48 | model.batch_loss=model.layers{end-1}.loss;
49 | if verbose>=3
50 | set(0,'CurrentFigure',f_batch);
51 | plot(model.batch_loss,'r-');hold off;
52 | end
53 | if verbose
54 | pro=num2str(100*g_batch/nb_batch);
55 | message=['Training model ','Epoch: ',num2str(epoch),'/',num2str(nb_epoch), ' Progress: ',pro,'%',' loss: ',num2str(cu_epoch_loss)];
56 | waitbar(g_batch/nb_batch,h,message);
57 | end
58 | %% bp
59 | for l=length(model.layers)-1:-1:2
60 | if model.layers{l}.trainable
61 | model.layers{l}=model.optimize(model.layers{l}.bp(model.layers{l},model.layers{l+1}),model.optimizer,batch,epoch);
62 | else
63 | model.layers{l}=model.layers{l}.bp(model.layers{l},model.layers{l+1});
64 | end
65 | end
66 | batch=batch+1;
67 | g_batch=g_batch+1;
68 | end
69 | toc
70 | model.epoch_loss=[model.epoch_loss,cu_epoch_loss];
71 | if verbose>=2
72 | set(0,'CurrentFigure',f_epoch);
73 | plot(model.epoch_loss,'r-');
74 | set(0,'CurrentFigure',f_batch);
75 | plot(model.batch_loss,'r-');
76 | end
77 | end
78 | if filename
79 | model.save(model,filename);
80 | end
81 | delete(h);
82 | end
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # matDL
2 | 
3 | v0.42 BETA
4 | A lightweight MATLAB deeplearning toolbox,based on gpuArray.
5 | One of the fastest matlab's RNN libs.
6 | ## Performance
7 | model:A LSTM model has [1024,1024,1024] hidensizes and 10
8 | timestep with a 256 dims input.
9 | Device: i7-4710hq,GTX940m
10 | matDL: 60sec/epoch Keras(1.2.2,Tensorflow backend,cudnn5.1): 29sec/epoch
11 | ## Features
12 | High parallel Implementation.
13 |
14 |
15 | * Concatance the weights of 4 gates to **W** and the values of **x** and **h** of every timesteps in a batch to a 3D tensor **xh**.Compute **x*W** for every timesteps of every samples in a batch at one time.
16 | * Compute the activated values of **input,forget ,ouput gates** at one time.
17 |
18 | OOP style
19 | * Use `struct` type to define a **layer** class and a **model** class.Define **ff**, **bp**, **optimize** methods by using a `FunctionHandle`.
20 |
21 | ## APIs
22 | ### Model
23 | * A `model` is a set of `layers`,`data` and `optimizer`.
24 | * build
25 | * `model=model_init(input_shape,configs ,flag,optimizer)`
26 | * arguments:
27 | * `input_shape` : a `vector`,`[input_dim,batchsize]` or `[input_dim,timestep,batchsize]`
28 | * `configs` : `cell` ,configures of each layers
29 | * `flag` : `bool` ,0 is predict model,1 is trrain model
30 | * `optimizer` : `struct` ,keywords: `opt`(type of optimizer) ,`learningrate`
31 | * attributes :
32 | * `model.input_shape`
33 | * `model.output_shape`
34 | * `model.batchsize`
35 | * `model.configs`
36 | * `model.flag`
37 | * `model.layers`
38 | * `model.optimizer` (if `flag`)
39 | * `model.loss`
40 | * methods:
41 | * private:
42 | * `model.eval_loss=@(outputlayer,y_true,flag)eval_loss(outputlayer,y_true,flag)`
43 | * `model.optimize=@(layer,optimizer,batch,epoch)layer_optimize(layer,optimizer,batch,epoch)`
44 | * public:
45 | * `model.train=@(model,x,y,nb_epoch,verbose,filename)model_train(model,x,y,nb_epoch,verbose,filename)`
46 | * `model=model.train(model,x,y,nb_epoch,verbose,filename)`
47 | * arguments:
48 | * `model` : self
49 | * `x`:input,shape:[dim,timestep,nb_samples],or [dim,nb_samples]
50 | * `y`:targets
51 | * `nb_epoch`: how many epochs you want to train
52 | * `verbose` :0,1,2,3,0 means no waitbar an figure,1 means showing waitbar only,2 means showing waitbar and plotting figures every epoch,3 means showing waitbar and plotting figures every epoch an batch.
53 | * `model.predict=@(model,x)model_predict(model,x)`
54 | * `y=model.predict(model,x)`
55 | * `model.evaluate=@(model,x,y_true)model_evaluate(model,x,y_true)`
56 | * `mean_loss=model.evaluate(model,x,y_true)`
57 | * `model.save=@(filename)model_save(model,filename)`
58 | * `model.save(filename)`
59 | * Save layers weigths and configs to a`.mat` file.
60 | * reload:
61 | * `model=model_load(minimodel,batch_size,flag,optimizer)`
62 | * `minimodel` is the minimodel saved by `model.save()`,can be a `struct` variable or a `string` of filename.
63 | * **example**:
64 | x=rand(100,10,3200,'single','gpuArray');
65 | y=(zeros(512,10,3200'single','gpuArray'));
66 | y(1,:,:)=1;
67 | %% Define a model which has 2 lstm layers with 512 hiddenunits,and a timedistrbuted dense layer with 512 hiddenunits
68 | input_shape=[100,10,64];%input dim is 100,timestep is 10,batchsize is 64
69 | hiddensizes=[512,512,512];
70 | for l=1:length(hiddensize)
71 | configs{l}.type='lstm';
72 | configs{l}.hiddensize=hiddensize(l);
73 | configs{l}.return_sequence=1;
74 | end
75 | configs{l+1}.type='activation';
76 | configs{l+1}.act_fun='softmax';
77 | configs{l+1}.loss='categorical_cross_entropy';
78 | optimizer.learningrate=0.1;
79 | optimizer.momentum=0.2;
80 | optimizer.opt='sgd';
81 | model=model_init(input_shape,configs,1,optimizer);
82 | %% Train the model
83 | model=model.train(model,x,y,nb_epoch,3,'example/minimodel_f.mat');
84 | or
85 | `test_lstm(50,[512,512,512],256,10,64,5);`
86 |
87 |
88 | ### Layers
89 | #### Layer class:
90 | * attributes:
91 | * `type` : `string`,type of the layer,available types:`input`,`dense`,`lstm`,`activation`
92 | * `prelayer_type` : `string`,type of the previous layer,available types:`input`,`dense`,`lstm`,`activation`
93 | * `trainable` : `bool`,is the layer trainable
94 | * `flag` : train model or predict model
95 | * `configs` :configures of the layer
96 | * `input_shape` : `vector`,`[input_dim,batchsize]` or `[input_dim,timestep,batchsize]`
97 | * `output_shape` : `vector`,`[hiddensize,batchsize]`or`[hiddensize,timestep,batchsize]`
98 | * `batch` : `int`,how many batches have been passed
99 | * `epoch` : same to `batch`
100 | * methods:
101 | * `layer=**layer_init(prelayer,loss,kwgrs)`
102 | * Built and init a layer.If the layer is a `input` layer,`prelayer` argument should be `input_shape`
103 | * `layer=layer.ff(layer,prelayer)`
104 | * `layer=layer.bp(layer,nextlayer)`
105 | ##### LSTM layer(layer)
106 | * `layer=lstm_init_gpu(prelayer,hiddensize,return_sequence,flag,loss)`
107 | * A LSTM(**Long-Short Term Memory unit - Hochreiter 1997**) layer,see [there]:http://deeplearning.net/tutorial/lstm.html for a step-by-step description of the algorithm.
108 | * aviliable configures:
109 | * `config.hiddensize` : `int`(`double`),number of hidden units(output dim)
110 | * `config.return_sequence` :`bool`(`double`),return sequences or not.if `return_sequences`,output will be a 3D tensor with shape (hiddensize,timestep,batchsize). Else ,a 2D tensor with shape (hiddensize,batchsize).
111 | * `config.loss` : `string`,type of loss function.Optional,only be used if the layer is an ouput layer.
112 | * **example**
113 |
114 |
115 |
116 |
--------------------------------------------------------------------------------
/cudnn_LSTM_FF.m:
--------------------------------------------------------------------------------
1 | [e,n]=loadlibrary('C:\projects\mexcuda\matcudnn\mat_cudnn_test','mat_cudnn_test.h','includepath','C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include','addheader','cudnn.h','addheader','cuda_runtime.h');
2 | ax=ones([128,256,20],'single','gpuArray');
3 | reserve=libpointer('voidPtr');
4 | tic;
5 | for i=1:100
6 | calllib('mat_cudnn_test','MAT_CUDNN_RNN_LSTM_FF',ax,reserve);
7 | end
8 | toc;
9 |
--------------------------------------------------------------------------------
/cumexhelp.h:
--------------------------------------------------------------------------------
1 | #ifndef CUMEXHELP
2 | #define CUMEXHELP
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include "gpu/mxGPUArray.h"
10 | #endif
--------------------------------------------------------------------------------
/example/char_rnn/Christ2FSM.m:
--------------------------------------------------------------------------------
1 | function fsmgospel=Christ2FSM(bibel)
2 | dict{1}={'god','God','LORD','Lord','lord','holy','Holy',' Amen','heaven','Heaven','hell','Hell','angle','Angle'...
3 | ,'demon','Demon','christ','Christ','water','Water'};
4 | dict{2}={'monster','Monster','FSM','FSM','FSM','yummy','Yummy',' RAmen','plate','Plate','sewer','Sewer','ball','Ball'...
5 | ,'fork','Fork','pasta','Pasta','soip','Soup'};
6 | if ~length(dict{1})==length(dict{2})
7 | error('Keywords dict length unmatched');
8 | end
9 | fsmgospel=bibel;
10 | for i=1:length(dict{1})
11 | old=dict{1}{i};
12 | new=dict{2}{i};
13 | fsmgospel=strrep(fsmgospel,old,new);
14 | end
--------------------------------------------------------------------------------
/example/char_rnn/char_rnn.m:
--------------------------------------------------------------------------------
1 | function char_rnn(data_filename,hiddensize,timestep,batch_size,nb_epoch)
2 | load(data_filename,'x');
3 | load(data_filename,'y');
4 | x=reshape(x(:,1:(timestep*batch_size)*floor(length(x)/(timestep*batch_size))),size(x,1),timestep,[]);
5 | y=reshape(y(:,1:(timestep*batch_size)*floor(length(y)/(timestep*batch_size))),size(y,1),timestep,[]);
6 | y=squeeze(y(:,end,:));
7 | input_shape=[size(x,1),timestep,batch_size];
8 | for l=1:length(hiddensize)-2
9 | configs{l}.type='lstm';configs{l}.hiddensize=hiddensize(l);configs{l}.return_sequence=1;
10 | end
11 | l=l+1;
12 | configs{l}.type='dropout';configs{l}.drop_rate=0.5;
13 | l=l+1;
14 | configs{l}.type='lstm';configs{l}.hiddensize=hiddensize(l-1);configs{l}.return_sequence=0;
15 | configs{l+1}.type='dense';configs{l+1}.hiddensize=hiddensize(l);
16 | configs{l+2}.type='activation';configs{l+2}.act_fun='softmax';configs{l+2}.loss='categorical_cross_entropy';
17 | optimizer.learningrate=0.001;
18 | optimizer.momentum=0;
19 | optimizer.opt='sgd';
20 | model=model_init(input_shape,configs,1,optimizer);
21 | profile on;
22 | model=model.train(model,x,y,nb_epoch,3,'example/minimodel_f.mat');
23 | profile report;
24 | end
25 |
--------------------------------------------------------------------------------
/example/char_rnn/textgenerate.m:
--------------------------------------------------------------------------------
1 | function text=textgenerate(model,dic,term)
2 | seed=double('In the beginning Monster created the plate and the earth.Ge1:2 And the earth was without form, and void; and darkness was upon the face of the deep.');
3 | seed=seed(1:50)
4 | end
5 | function index=sample(pred,temp)
6 | pred=exp(log(double(pred))./temp);
7 | [~,index]=max(pred,[],1);
8 | end
--------------------------------------------------------------------------------
/example/char_rnn/txt2seq.m:
--------------------------------------------------------------------------------
1 | function [x,y,dic]=txt2seq(text,threshold)
2 | if nargin<2
3 | threshold=50000;
4 | end
5 | if exist(text,'file')
6 | text=cell2mat(importdata(text)');
7 | end
8 | [dic,~,index]=unique(double(text));
9 | for i=1:length(dic)
10 | if numel(find(index==i))<=length(text)/threshold
11 | text(index==i)=',';
12 | end
13 | end
14 | [dic,~,index]=unique(double(text));
15 | seq=zeros(length(dic),length(index),'int8');
16 | for i=1:length(index)
17 | seq(index(i),i)=1;
18 | end
19 | x=seq(:,1:end-1);
20 | y=seq(:,2:end);
21 | end
22 |
23 |
--------------------------------------------------------------------------------
/example/keras_test_lstm.py:
--------------------------------------------------------------------------------
1 | from keras.models import Sequential
2 | from keras.layers import LSTM
3 | import numpy as np
4 | import time
5 | def main(nb_batch=100,hiddensize=512,input_dim=100,timestep=10,batch_size=32,nb_epoch=1):
6 | x=np.ones((nb_batch*batch_size,timestep,input_dim)).astype('float32')
7 | y=np.ones((nb_batch*batch_size,timestep,hiddensize))
8 | model = Sequential()
9 | model.add(LSTM(implementation=2,output_dim=hiddensize, input_shape=(timestep,input_dim),return_sequences=True))
10 | #==============================================================================
11 | # model.add(LSTM(output_dim=hiddensize,return_sequences=True))
12 | # model.add(LSTM(output_dim=hiddensize,return_sequences=True))
13 | #==============================================================================
14 | model.compile(loss='mse',optimizer='sgd')
15 | start=time.time()
16 | model.predict(x=x,batch_size=batch_size)
17 | duration=time.time()-start
18 | print('Duration: ',duration,' sec')
19 | if __name__ == "__main__":
20 | main(nb_batch=100,hiddensize=512,input_dim=256,timestep=20,batch_size=128,nb_epoch=1)
21 |
--------------------------------------------------------------------------------
/example/test_lstm.asv:
--------------------------------------------------------------------------------
1 | function test_lstm(nb_batch,hiddensizes,input_dim,timestep,batch_size,nb_epoch)
2 | optimizer.learningrate=0.01;
3 | optimizer.momentum=0;
4 | optimizer.opt='sgd';
5 | x=rand(input_dim,timestep,batch_size*nb_batch,'single','gpuArray');
6 | y=(zeros(hiddensizes(end),timestep,batch_size*nb_batch,'single','gpuArray'));
7 | y(1,:,:)=1;
8 | input_shape=[input_dim,timestep,batch_size];
9 | for l=1:length(hiddensizes)
10 | configs{l}.type='lstm';configs{l}.hiddensize=hiddensizes(l);configs{l}.return_sequence=1;
11 |
12 | end
13 | configs{l+1}.type='activation';configs{l+1}.act_fun='softmax';configs{l+1}.loss='categorical_cross_entropy';
14 | model=model_init(input_shape,configs,1,optimizer);
15 | profile on;
16 | model=model.train(x,y,nb_epoch,2,0);
17 | profile report;
18 | end
--------------------------------------------------------------------------------
/example/test_lstm.m:
--------------------------------------------------------------------------------
1 | function test_lstm(nb_batch,hiddensizes,input_dim,timestep,batch_size,nb_epoch)
2 | optimizer.learningrate=0.01;
3 | optimizer.momentum=0.2;
4 | optimizer.opt='sgd';
5 | x=sin(ones(batch_size*nb_batch,input_dim,timestep,'single','gpuArray')+5);
6 | y=(zeros(batch_size*nb_batch,hiddensizes(end),timestep,'single','gpuArray'));
7 | y(:,1,:)=1;
8 | input_shape=[batch_size,input_dim,timestep];
9 | for l=1:length(hiddensizes)
10 | configs{l}.type='lstm';configs{l}.hiddensize=hiddensizes(l);configs{l}.return_sequence=1;
11 | end
12 | configs{l+1}.type='dropout';configs{l+1}.drop_rate=0.5;
13 | configs{l+2}.type='activation';configs{l+2}.act_fun='softmax';configs{l+2}.loss='categorical_cross_entropy';
14 | model=model_init(input_shape,configs,1,optimizer);
15 | profile on;
16 | model=model.train(model,x,y,nb_epoch,3,0);
17 | %loss=model.evaluate(model,x,y);
18 | %disp(loss);
19 | profile report;
20 | end
--------------------------------------------------------------------------------
/example/test_mlp.m:
--------------------------------------------------------------------------------
1 | function test_mlp(nb_batch,hiddensize,input_dim,batch_size,nb_epoch)
2 | input_shape=[batch_size,input_dim];
3 | l=1;
4 | for i=1:length(hiddensize)
5 | configs{l}.type='dense';configs{l}.hiddensize=hiddensize(i);
6 | l=l+1;
7 | configs{l}.type='activation';configs{l}.act_fun='Relu';
8 | l=l+1;
9 | configs{l}.type='dropout';configs{l}.drop_rate=0.5;
10 | end
11 | configs{l+1}.type='activation';configs{l+1}.act_fun='softmax';configs{l+1}.loss='categorical_cross_entropy';
12 | optimizer.learningrate=0.01;
13 | optimizer.momentum=0.5;
14 | optimizer.opt='sgd';
15 | model=model_init(input_shape,configs,1,optimizer);
16 | x=rand(batch_size*nb_batch,input_dim);
17 | y=(zeros(batch_size*nb_batch,hiddensize(end)));
18 | y(:,1,:)=1;
19 | model=model.train(model,x,y,nb_epoch,3,0);%not save
20 | loss=model.evaluate(model,x,y);
21 | disp(loss);
22 | y_pred=model.predict(model,x);
23 | end
--------------------------------------------------------------------------------
/matDL_128X128.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuantumLiu/matDL/ce75b21dd97e9d58c074d50915d5fc000ee46afb/matDL_128X128.ico
--------------------------------------------------------------------------------
/mat_cudnn.cu:
--------------------------------------------------------------------------------
1 | #include"mat_cudnn.h"
2 | void GET_GPU_CONST_PTR(mxArray const *arrayPtr,float const *dataPtr)
3 | {
4 | dataPtr=(float const *)(mxGPUGetData(mxGPUCreateFromMxArray(arrayPtr)));
5 | }
6 | void GET_GPU_PTR(mxArray const *arrayPtr,float *dataPtr)
7 | {
8 | dataPtr=(float *)(mxGPUGetData(mxGPUCreateFromMxArray(arrayPtr)));
9 | }
10 | void MAT_CUDNN_LSTM_FF(mxArray const *x_array,mxArray const *w_array,void **reserveSpace,int* minibatch,int* hiddenSize,int* inputSize,int* seqLength )
11 | { // -------------------------
12 | // Create cudnn context
13 | // -------------------------
14 | mxInitGPU();
15 | cudnnHandle_t cudnnHandle;
16 | cudnnErrCheck(cudnnCreate(&cudnnHandle));
17 |
18 | cudnnTensorDescriptor_t *xDesc, *yDesc, *dxDesc, *dyDesc;
19 | cudnnTensorDescriptor_t hxDesc, cxDesc;
20 | cudnnTensorDescriptor_t hyDesc, cyDesc;
21 | cudnnTensorDescriptor_t dhxDesc, dcxDesc;
22 | cudnnTensorDescriptor_t dhyDesc, dcyDesc;
23 |
24 | xDesc = (cudnnTensorDescriptor_t*)malloc(*seqLength * sizeof(cudnnTensorDescriptor_t));
25 | yDesc = (cudnnTensorDescriptor_t*)malloc(*seqLength * sizeof(cudnnTensorDescriptor_t));
26 | dxDesc = (cudnnTensorDescriptor_t*)malloc(*seqLength * sizeof(cudnnTensorDescriptor_t));
27 | dyDesc = (cudnnTensorDescriptor_t*)malloc(*seqLength * sizeof(cudnnTensorDescriptor_t));
28 |
29 | int dimA[3];
30 | int strideA[3];
31 | // In this example dimA[1] is constant across the whole sequence
32 | // This isn't required, all that is required is that it does not increase.
33 | for (int i = 0; i < *seqLength; i++) {
34 | cudnnErrCheck(cudnnCreateTensorDescriptor(&xDesc[i]));
35 | cudnnErrCheck(cudnnCreateTensorDescriptor(&yDesc[i]));
36 | cudnnErrCheck(cudnnCreateTensorDescriptor(&dxDesc[i]));
37 | cudnnErrCheck(cudnnCreateTensorDescriptor(&dyDesc[i]));
38 |
39 | dimA[0] = *miniBatch;
40 | dimA[1] = *inputSize;
41 | dimA[2] = 1;
42 |
43 | strideA[0] = dimA[2] * dimA[1];
44 | strideA[1] = dimA[2];
45 | strideA[2] = 1;
46 |
47 | cudnnErrCheck(cudnnSetTensorNdDescriptor(xDesc[i], CUDNN_DATA_FLOAT, 3, dimA, strideA));
48 | cudnnErrCheck(cudnnSetTensorNdDescriptor(dxDesc[i], CUDNN_DATA_FLOAT, 3, dimA, strideA));
49 |
50 | dimA[0] = *miniBatch;
51 | dimA[1] = *hiddenSize;
52 | dimA[2] = 1;
53 |
54 | strideA[0] = dimA[2] * dimA[1];
55 | strideA[1] = dimA[2];
56 | strideA[2] = 1;
57 |
58 | cudnnErrCheck(cudnnSetTensorNdDescriptor(yDesc[i], CUDNN_DATA_FLOAT, 3, dimA, strideA));
59 | cudnnErrCheck(cudnnSetTensorNdDescriptor(dyDesc[i], CUDNN_DATA_FLOAT, 3, dimA, strideA));
60 | }
61 | dimA[0] = 1;
62 | dimA[1] = *miniBatch;
63 | dimA[2] = *hiddenSize;
64 |
65 | strideA[0] = dimA[2] * dimA[1];
66 | strideA[1] = dimA[2];
67 | strideA[2] = 1;
68 |
69 | cudnnErrCheck(cudnnCreateTensorDescriptor(&hxDesc));
70 | cudnnErrCheck(cudnnCreateTensorDescriptor(&cxDesc));
71 | cudnnErrCheck(cudnnCreateTensorDescriptor(&hyDesc));
72 | cudnnErrCheck(cudnnCreateTensorDescriptor(&cyDesc));
73 | cudnnErrCheck(cudnnCreateTensorDescriptor(&dhxDesc));
74 | cudnnErrCheck(cudnnCreateTensorDescriptor(&dcxDesc));
75 | cudnnErrCheck(cudnnCreateTensorDescriptor(&dhyDesc));
76 | cudnnErrCheck(cudnnCreateTensorDescriptor(&dcyDesc));
77 |
78 | cudnnErrCheck(cudnnSetTensorNdDescriptor(hxDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
79 | cudnnErrCheck(cudnnSetTensorNdDescriptor(cxDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
80 | cudnnErrCheck(cudnnSetTensorNdDescriptor(hyDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
81 | cudnnErrCheck(cudnnSetTensorNdDescriptor(cyDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
82 | cudnnErrCheck(cudnnSetTensorNdDescriptor(dhxDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
83 | cudnnErrCheck(cudnnSetTensorNdDescriptor(dcxDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
84 | cudnnErrCheck(cudnnSetTensorNdDescriptor(dhyDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
85 | cudnnErrCheck(cudnnSetTensorNdDescriptor(dcyDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
86 | // -------------------------
87 | // Set up the dropout descriptor (needed for the RNN descriptor)
88 | // -------------------------
89 | unsigned long long seed = 1337ull; // Pick a seed.
90 |
91 | cudnnDropoutDescriptor_t dropoutDesc;
92 | cudnnErrCheck(cudnnCreateDropoutDescriptor(&dropoutDesc));
93 |
94 | // How much memory does dropout need for states?
95 | // These states are used to generate random numbers internally
96 | // and should not be freed until the RNN descriptor is no longer used
97 | size_t stateSize;
98 | void *states;
99 | cudnnErrCheck(cudnnDropoutGetStatesSize(cudnnHandle, &stateSize));
100 |
101 | cudaErrCheck(cudaMalloc(&states, stateSize));
102 | float dropout=0;
103 | cudnnErrCheck(cudnnSetDropoutDescriptor(dropoutDesc,
104 | cudnnHandle,
105 | dropout,
106 | states,
107 | stateSize,
108 | seed));
109 |
110 | // -------------------------
111 | // Set up the RNN descriptor
112 | // -------------------------
113 | cudnnRNNDescriptor_t rnnDesc;
114 | cudnnRNNMode_t RNNMode;
115 |
116 | cudnnErrCheck(cudnnCreateRNNDescriptor(&rnnDesc));
117 |
118 | RNNMode = CUDNN_LSTM;
119 |
120 | cudnnErrCheck(cudnnSetRNNDescriptor(rnnDesc,
121 | hiddenSize,
122 | numLayers,
123 | dropoutDesc,
124 | CUDNN_LINEAR_INPUT, // We can also skip the input matrix transformation
125 | CUDNN_UNIDIRECTIONAL,
126 | RNNMode,
127 | CUDNN_DATA_FLOAT));
128 | // -------------------------
129 | // Set up parameters
130 | // -------------------------
131 | // This needs to be done after the rnn descriptor is set as otherwise
132 | // we don't know how many parameters we have to allocate
133 | void *w;
134 | void *dw;
135 |
136 | cudnnFilterDescriptor_t wDesc, dwDesc;
137 |
138 | cudnnErrCheck(cudnnCreateFilterDescriptor(&wDesc));
139 | cudnnErrCheck(cudnnCreateFilterDescriptor(&dwDesc));
140 |
141 | size_t weightsSize;
142 | cudnnErrCheck(cudnnGetRNNParamsSize(cudnnHandle, rnnDesc, xDesc[0], &weightsSize, CUDNN_DATA_FLOAT));
143 |
144 | int dimW[3];
145 | dimW[0] = weightsSize / sizeof(float);
146 | dimW[1] = 1;
147 | dimW[2] = 1;
148 |
149 | cudnnErrCheck(cudnnSetFilterNdDescriptor(wDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 3, dimW));
150 | cudnnErrCheck(cudnnSetFilterNdDescriptor(dwDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 3, dimW));
151 |
152 | cudaErrCheck(cudaMalloc((void**)&w, weightsSize));
153 | cudaErrCheck(cudaMalloc((void**)&dw, weightsSize));
154 |
155 |
156 | // -------------------------
157 | // Set up work space and reserved memory
158 | // -------------------------
159 | void *workspace;
160 |
161 | size_t workSize;
162 | size_t reserveSize;
163 |
164 | // Need for every pass
165 | cudnnErrCheck(cudnnGetRNNWorkspaceSize(cudnnHandle, rnnDesc, seqLength, xDesc, &workSize));
166 | // Only needed in training, shouldn't be touched between passes.
167 | cudnnErrCheck(cudnnGetRNNTrainingReserveSize(cudnnHandle, rnnDesc, seqLength, xDesc, &reserveSize));
168 |
169 | cudaErrCheck(cudaMalloc((void**)&workspace, workSize));
170 | cudaErrCheck(cudaMalloc((void**)&reserveSpace, reserveSize));
171 | // Weights
172 | int numLinearLayers = 0;
173 | for (int layer = 0; layer < numLayers * (bidirectional ? 2 : 1); layer++) {
174 | for (int linLayerID = 0; linLayerID < numLinearLayers; linLayerID++) {
175 | cudnnFilterDescriptor_t linLayerMatDesc;
176 | cudnnErrCheck(cudnnCreateFilterDescriptor(&linLayerMatDesc));
177 | float *linLayerMat;
178 |
179 | cudnnErrCheck(cudnnGetRNNLinLayerMatrixParams( cudnnHandle,
180 | rnnDesc,
181 | layer,
182 | xDesc[0],
183 | wDesc,
184 | w,
185 | linLayerID,
186 | linLayerMatDesc,
187 | (void**)&linLayerMat));
188 |
189 | cudnnDataType_t dataType;
190 | cudnnTensorFormat_t format;
191 | int nbDims;
192 | int filterDimA[3];
193 | cudnnErrCheck(cudnnGetFilterNdDescriptor(linLayerMatDesc,
194 | 3,
195 | &dataType,
196 | &format,
197 | &nbDims,
198 | filterDimA));
199 |
200 | initGPUData(linLayerMat, filterDimA[0] * filterDimA[1] * filterDimA[2], 1.f / (float)(filterDimA[0] * filterDimA[1] * filterDimA[2]));
201 |
202 | cudnnErrCheck(cudnnDestroyFilterDescriptor(linLayerMatDesc));
203 |
204 | cudnnFilterDescriptor_t linLayerBiasDesc;
205 | cudnnErrCheck(cudnnCreateFilterDescriptor(&linLayerBiasDesc));
206 | float *linLayerBias;
207 |
208 | cudnnErrCheck(cudnnGetRNNLinLayerBiasParams( cudnnHandle,
209 | rnnDesc,
210 | layer,
211 | xDesc[0],
212 | wDesc,
213 | w,
214 | linLayerID,
215 | linLayerBiasDesc,
216 | (void**)&linLayerBias));
217 |
218 | cudnnErrCheck(cudnnGetFilterNdDescriptor(linLayerBiasDesc,
219 | 3,
220 | &dataType,
221 | &format,
222 | &nbDims,
223 | filterDimA));
224 |
225 | initGPUData(linLayerBias, filterDimA[0] * filterDimA[1] * filterDimA[2], 1.f);
226 |
227 | cudnnErrCheck(cudnnDestroyFilterDescriptor(linLayerBiasDesc));
228 | }
229 | }
230 | // *********************************************************************************************************
231 | // At this point all of the setup is done. We now need to pass through the RNN.
232 | // *********************************************************************************************************
233 |
234 |
235 |
236 | cudaErrCheck(cudaDeviceSynchronize());
237 |
238 | cudaEvent_t start, stop;
239 | float timeForward, timeBackward1, timeBackward2;
240 | cudaErrCheck(cudaEventCreate(&start));
241 | cudaErrCheck(cudaEventCreate(&stop));
242 |
243 | cudaErrCheck(cudaEventRecord(start));
244 |
245 | // If we're not training we use this instead
246 | // cudnnErrCheck(cudnnRNNForwardInference(cudnnHandle,
247 | // rnnDesc,
248 | // xDesc,
249 | // x,
250 | // hxDesc,
251 | // hx,
252 | // cxDesc,
253 | // cx,
254 | // wDesc,
255 | // w,
256 | // yDesc,
257 | // y,
258 | // hyDesc,
259 | // hy,
260 | // cyDesc,
261 | // cy,
262 | // workspace,
263 | // workSize));
264 |
265 | cudnnErrCheck(cudnnRNNForwardTraining(cudnnHandle,
266 | rnnDesc,
267 | seqLength,
268 | xDesc,
269 | x,
270 | hxDesc,
271 | hx,
272 | cxDesc,
273 | cx,
274 | wDesc,
275 | w,
276 | yDesc,
277 | y,
278 | hyDesc,
279 | hy,
280 | cyDesc,
281 | cy,
282 | workspace,
283 | workSize,
284 | reserveSpace,
285 | reserveSize));
286 |
287 |
288 | }
--------------------------------------------------------------------------------
/mat_cudnn.h:
--------------------------------------------------------------------------------
1 | #define EXPORT_FCNS
2 | #ifndef SHRHELP
3 | #include "shrhelp.h"
4 | #endif
5 |
6 | #include "cumexhelp.h"
7 |
8 | #ifndef MATCUDNN
9 | #define MATCUDNN
10 | #endif
11 |
12 | #ifdef __cplusplus
13 | #include
14 | extern "C"
15 | {
16 | #endif
17 |
18 | EXPORTED_FUNCTION void MAT_CUDNN_test(void* x);
19 |
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 |
--------------------------------------------------------------------------------
/mat_cudnn_test.cu:
--------------------------------------------------------------------------------
1 | #include "cumexhelp.h"
2 | #define EXPORT_FCNS
3 | #ifndef SHRHELP
4 | #include "shrhelp.h"
5 | #endif
6 | #include "mat_cudnn_test.h"
7 |
8 | #define cudaErrCheck(stat) { cudaErrCheck_((stat)); }
9 | void cudaErrCheck_(cudaError_t stat) {
10 | if (stat != cudaSuccess) {
11 | mexPrintf("CUDA Error: %s\n", cudaGetErrorString(stat));
12 | mexErrMsgTxt("CUDA Error");
13 | }
14 | }
15 | #define cudnnErrCheck(stat) { cudnnErrCheck_((stat)); }
16 | void cudnnErrCheck_(cudnnStatus_t stat) {
17 | if (stat != CUDNN_STATUS_SUCCESS) {
18 | mexPrintf( "cuDNN Error: %s\n", cudnnGetErrorString(stat));
19 | mexErrMsgTxt("cuDNN Error");
20 | }
21 | }
22 | __global__ void initGPUData_ker(float *data, int numElements, float value) {
23 | int tid = blockIdx.x * blockDim.x + threadIdx.x;
24 | if (tid < numElements) {
25 | data[tid] = value;
26 | }
27 | }
28 | void initGPUData(float *data, int numElements, float value) {
29 | dim3 gridDim;
30 | dim3 blockDim;
31 |
32 | blockDim.x = 1024;
33 | gridDim.x = (numElements + blockDim.x - 1) / blockDim.x;
34 |
35 | initGPUData_ker <<< gridDim, blockDim >>> (data, numElements, value);
36 | }
37 | // void GET_GPU_CONST_PTR(mxArray *arrayPtr,float const *dataPtr)
38 | // {
39 | // dataPtr=(float const *)(mxGPUGetDataReadOnly (mxGPUCreateFromMxArray(arrayPtr)));
40 | // }
41 | // void GET_GPU_PTR(mxArray *arrayPtr,float *dataPtr)
42 | // {
43 | // dataPtr=(float *)(mxGPUGetData(mxGPUCreateFromMxArray(arrayPtr)));
44 | // }
45 |
46 | EXPORTED_FUNCTION void MAT_CUDNN_test(void* x)
47 | {
48 | int(*seqLength)=10;
49 | int (*inputSize)=128;
50 | int (*miniBatch)=64;
51 | cudnnHandle_t cudnnHandle;
52 | cudnnErrCheck(cudnnCreate(&cudnnHandle));
53 | cudaErrCheck(cudaMalloc((void**)&x,(*seqLength) * (*inputSize) * (*miniBatch) * sizeof(float)));
54 | cudnnDestroy(cudnnHandle);
55 | cudaFree(x);
56 | }
57 | EXPORTED_FUNCTION void MAT_CUDNN_RNN_LSTM_FF(mxArray const *ax,mxArray const *aw,mxArray *ah,mxArray *ac,int *hiddenSize,int *miniBatch,int *inputSize,int *seqLength,void *reserveSpace)
58 | {
59 | //int(*seqLength)=20;
60 | int numLayers=1;
61 | //int hiddenSize=256;
62 | //int (*inputSize)=128;
63 | //int (*miniBatch)=64;
64 | float dropout=0.0;
65 | bool bidirectional=0;
66 | int mode=2;
67 | cudnnHandle_t cudnnHandle;
68 | cudnnErrCheck(cudnnCreate(&cudnnHandle));
69 | float const *x=(float const *)mxGPUGetDataReadOnly(mxGPUCreateFromMxArray(ax));
70 | void *hx = NULL;
71 | void *cx = NULL;
72 | void *y;
73 | void *hy = NULL;
74 | void *cy = NULL;
75 | cudaErrCheck(cudaMalloc((void**)&hx, numLayers * (*hiddenSize) * (*miniBatch) * (bidirectional ? 2 : 1) * sizeof(float)));
76 | cudaErrCheck(cudaMalloc((void**)&cx, numLayers * (*hiddenSize) * (*miniBatch) * (bidirectional ? 2 : 1) * sizeof(float)));
77 | cudaErrCheck(cudaMalloc((void**)&y,(*seqLength) * (*hiddenSize) * (*miniBatch) * (bidirectional ? 2 : 1) * sizeof(float)));
78 | cudaErrCheck(cudaMalloc((void**)&hy, numLayers * (*hiddenSize) * (*miniBatch) * (bidirectional ? 2 : 1) * sizeof(float)));
79 | cudaErrCheck(cudaMalloc((void**)&cy, numLayers * (*hiddenSize) * (*miniBatch) * (bidirectional ? 2 : 1) * sizeof(float)));
80 | cudnnTensorDescriptor_t *xDesc, *yDesc;
81 | cudnnTensorDescriptor_t hxDesc, cxDesc;
82 | cudnnTensorDescriptor_t hyDesc, cyDesc;
83 | xDesc = (cudnnTensorDescriptor_t*)malloc((*seqLength) * sizeof(cudnnTensorDescriptor_t));
84 | yDesc = (cudnnTensorDescriptor_t*)malloc((*seqLength) * sizeof(cudnnTensorDescriptor_t));
85 | int dimA[3];
86 | int strideA[3];
87 | for (int i = 0; i <(*seqLength); i++) {
88 | cudnnErrCheck(cudnnCreateTensorDescriptor(&xDesc[i]));
89 | cudnnErrCheck(cudnnCreateTensorDescriptor(&yDesc[i]));
90 |
91 | dimA[0] = (*miniBatch);
92 | dimA[1] = (*inputSize);
93 | dimA[2] = 1;
94 |
95 | strideA[0] = dimA[2] * dimA[1];
96 | strideA[1] = dimA[2];
97 | strideA[2] = 1;
98 |
99 | cudnnErrCheck(cudnnSetTensorNdDescriptor(xDesc[i], CUDNN_DATA_FLOAT, 3, dimA, strideA));
100 |
101 | dimA[0] = (*miniBatch);
102 | dimA[1] = bidirectional ? (*hiddenSize) * 2 : (*hiddenSize);
103 | dimA[2] = 1;
104 |
105 | strideA[0] = dimA[2] * dimA[1];
106 | strideA[1] = dimA[2];
107 | strideA[2] = 1;
108 |
109 | cudnnErrCheck(cudnnSetTensorNdDescriptor(yDesc[i], CUDNN_DATA_FLOAT, 3, dimA, strideA));
110 | }
111 |
112 |
113 | dimA[0] = numLayers * (bidirectional ? 2 : 1);
114 | dimA[1] = (*miniBatch);
115 | dimA[2] = (*hiddenSize);
116 |
117 | strideA[0] = dimA[2] * dimA[1];
118 | strideA[1] = dimA[2];
119 | strideA[2] = 1;
120 |
121 | cudnnErrCheck(cudnnCreateTensorDescriptor(&hxDesc));
122 | cudnnErrCheck(cudnnCreateTensorDescriptor(&cxDesc));
123 | cudnnErrCheck(cudnnCreateTensorDescriptor(&hyDesc));
124 | cudnnErrCheck(cudnnCreateTensorDescriptor(&cyDesc));
125 | cudnnErrCheck(cudnnSetTensorNdDescriptor(hxDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
126 | cudnnErrCheck(cudnnSetTensorNdDescriptor(cxDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
127 | cudnnErrCheck(cudnnSetTensorNdDescriptor(hyDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
128 | cudnnErrCheck(cudnnSetTensorNdDescriptor(cyDesc, CUDNN_DATA_FLOAT, 3, dimA, strideA));
129 | unsigned long long seed = 1337ull; // Pick a seed.
130 |
131 | cudnnDropoutDescriptor_t dropoutDesc;
132 | cudnnErrCheck(cudnnCreateDropoutDescriptor(&dropoutDesc));
133 |
134 | // How much memory does dropout need for states?
135 | // These states are used to generate random numbers internally
136 | // and should not be freed until the RNN descriptor is no longer used
137 | size_t stateSize;
138 | void *states;
139 | cudnnErrCheck(cudnnDropoutGetStatesSize(cudnnHandle, &stateSize));
140 |
141 | cudaErrCheck(cudaMalloc(&states, stateSize));
142 |
143 | cudnnErrCheck(cudnnSetDropoutDescriptor(dropoutDesc,
144 | cudnnHandle,
145 | dropout,
146 | states,
147 | stateSize,
148 | seed));
149 |
150 | // -------------------------
151 | // Set up the RNN descriptor
152 | // -------------------------
153 | cudnnRNNDescriptor_t rnnDesc;
154 | cudnnRNNMode_t RNNMode;
155 |
156 | cudnnErrCheck(cudnnCreateRNNDescriptor(&rnnDesc));
157 |
158 | if (mode == 0) RNNMode = CUDNN_RNN_RELU;
159 | else if (mode == 1) RNNMode = CUDNN_RNN_TANH;
160 | else if (mode == 2) RNNMode = CUDNN_LSTM;
161 | else if (mode == 3) RNNMode = CUDNN_GRU;
162 |
163 | cudnnErrCheck(cudnnSetRNNDescriptor(rnnDesc,
164 | (*hiddenSize),
165 | numLayers,
166 | dropoutDesc,
167 | CUDNN_LINEAR_INPUT, // We can also skip the input matrix transformation
168 | bidirectional ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL,
169 | RNNMode,
170 | CUDNN_DATA_FLOAT));
171 | void *w;
172 | cudnnFilterDescriptor_t wDesc;
173 | cudnnErrCheck(cudnnCreateFilterDescriptor(&wDesc));
174 | size_t weightsSize;
175 | cudnnErrCheck(cudnnGetRNNParamsSize(cudnnHandle, rnnDesc, xDesc[0], &weightsSize, CUDNN_DATA_FLOAT));
176 |
177 | int dimW[3];
178 | dimW[0] = weightsSize / sizeof(float);
179 | dimW[1] = 1;
180 | dimW[2] = 1;
181 |
182 | cudnnErrCheck(cudnnSetFilterNdDescriptor(wDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 3, dimW));
183 |
184 | cudaErrCheck(cudaMalloc((void**)&w, weightsSize));
185 | void *workspace;
186 | size_t workSize;
187 | size_t reserveSize;
188 | cudnnErrCheck(cudnnGetRNNWorkspaceSize(cudnnHandle, rnnDesc,(*seqLength), xDesc, &workSize));
189 | // Only needed in training, shouldn't be touched between passes.
190 | cudnnErrCheck(cudnnGetRNNTrainingReserveSize(cudnnHandle, rnnDesc,(*seqLength), xDesc, &reserveSize));
191 |
192 | cudaErrCheck(cudaMalloc((void**)&workspace, workSize));
193 | cudaErrCheck(cudaMalloc((void**)&reserveSpace, reserveSize));
194 | //if (hx != NULL) initGPUData((float*)hx, numLayers * hiddenSize * (*miniBatch) * (bidirectional ? 2 : 1), 1.f);
195 | //if (cx != NULL) initGPUData((float*)cx, numLayers * hiddenSize * (*miniBatch) * (bidirectional ? 2 : 1), 1.f);
196 | int numLinearLayers = 0;
197 | if (RNNMode == CUDNN_RNN_RELU || RNNMode == CUDNN_RNN_TANH) {
198 | numLinearLayers = 2;
199 | }
200 | else if (RNNMode == CUDNN_LSTM) {
201 | numLinearLayers = 8;
202 | }
203 | else if (RNNMode == CUDNN_GRU) {
204 | numLinearLayers = 6;
205 | }
206 | for (int layer = 0; layer < numLayers * (bidirectional ? 2 : 1); layer++) {
207 | for (int linLayerID = 0; linLayerID < numLinearLayers; linLayerID++) {
208 | cudnnFilterDescriptor_t linLayerMatDesc;
209 | cudnnErrCheck(cudnnCreateFilterDescriptor(&linLayerMatDesc));
210 | float *linLayerMat;
211 |
212 | cudnnErrCheck(cudnnGetRNNLinLayerMatrixParams( cudnnHandle,
213 | rnnDesc,
214 | layer,
215 | xDesc[0],
216 | wDesc,
217 | w,
218 | linLayerID,
219 | linLayerMatDesc,
220 | (void**)&linLayerMat));
221 |
222 | cudnnDataType_t dataType;
223 | cudnnTensorFormat_t format;
224 | int nbDims;
225 | int filterDimA[3];
226 | cudnnErrCheck(cudnnGetFilterNdDescriptor(linLayerMatDesc,
227 | 3,
228 | &dataType,
229 | &format,
230 | &nbDims,
231 | filterDimA));
232 |
233 | initGPUData(linLayerMat, filterDimA[0] * filterDimA[1] * filterDimA[2], 1.f / (float)(filterDimA[0] * filterDimA[1] * filterDimA[2]));
234 |
235 | cudnnErrCheck(cudnnDestroyFilterDescriptor(linLayerMatDesc));
236 |
237 | cudnnFilterDescriptor_t linLayerBiasDesc;
238 | cudnnErrCheck(cudnnCreateFilterDescriptor(&linLayerBiasDesc));
239 | float *linLayerBias;
240 |
241 | cudnnErrCheck(cudnnGetRNNLinLayerBiasParams( cudnnHandle,
242 | rnnDesc,
243 | layer,
244 | xDesc[0],
245 | wDesc,
246 | w,
247 | linLayerID,
248 | linLayerBiasDesc,
249 | (void**)&linLayerBias));
250 |
251 | cudnnErrCheck(cudnnGetFilterNdDescriptor(linLayerBiasDesc,
252 | 3,
253 | &dataType,
254 | &format,
255 | &nbDims,
256 | filterDimA));
257 |
258 | initGPUData(linLayerBias, filterDimA[0] * filterDimA[1] * filterDimA[2], 1.f);
259 |
260 | cudnnErrCheck(cudnnDestroyFilterDescriptor(linLayerBiasDesc));
261 | }
262 | }
263 | cudaErrCheck(cudaDeviceSynchronize());
264 |
265 | cudnnErrCheck(cudnnRNNForwardTraining(cudnnHandle,
266 | rnnDesc,
267 | (*seqLength),
268 | xDesc,
269 | x,
270 | hxDesc,
271 | hx,
272 | cxDesc,
273 | cx,
274 | wDesc,
275 | w,
276 | yDesc,
277 | y,
278 | hyDesc,
279 | hy,
280 | cyDesc,
281 | cy,
282 | workspace,
283 | workSize,
284 | reserveSpace,
285 | reserveSize));
286 |
287 |
288 |
289 | cudaFree(hx);
290 | cudaFree(cx);
291 | cudaFree(y);
292 | cudaFree(hy);
293 | cudaFree(cy);
294 | cudaFree(workspace);
295 | cudaFree(reserveSpace);
296 | cudaFree(w);
297 | cudnnDestroy(cudnnHandle);
298 |
299 | }
300 | void mexFunction( int nlhs, mxArray *plhs[],
301 | int nrhs, const mxArray*prhs[] )
302 | {
303 | }
304 |
--------------------------------------------------------------------------------
/mat_cudnn_test.h:
--------------------------------------------------------------------------------
1 | #include "shrhelp.h"
2 | #ifndef CUMEXHELP
3 | #include
4 | #endif
5 | #ifdef __cplusplus
6 | #include
7 | extern "C"
8 | {
9 | #endif
10 | EXPORTED_FUNCTION void MAT_CUDNN_test(void* x);
11 | EXPORTED_FUNCTION void MAT_CUDNN_RNN_LSTM_FF(mxArray const *ax,void *reserveSpace);
12 | #ifdef __cplusplus
13 | }
14 | #endif
--------------------------------------------------------------------------------
/mat_cudnn_test.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuantumLiu/matDL/ce75b21dd97e9d58c074d50915d5fc000ee46afb/mat_cudnn_test.mexw64
--------------------------------------------------------------------------------