├── LICENSE.txt
├── README.md
├── environment.yml
├── keras.json.for_TensorFlow
├── keras.json.for_Theano
├── nbs
    ├── char-rnn.ipynb
    ├── convolution-intro.ipynb
    ├── dogs_cats_redux.ipynb
    ├── dogscats-ensemble.ipynb
    ├── imagenet_batchnorm.ipynb
    ├── lesson1.ipynb
    ├── lesson2.ipynb
    ├── lesson3.ipynb
    ├── lesson4.ipynb
    ├── lesson5.ipynb
    ├── lesson6.ipynb
    ├── lesson7.ipynb
    ├── mnist.ipynb
    ├── resnet50.py
    ├── sgd-intro.ipynb
    ├── statefarm-sample.ipynb
    ├── statefarm.ipynb
    ├── utils.py
    ├── vgg16.py
    ├── vgg16bn.py
    └── wordvectors.ipynb
└── nbs2
    ├── DCGAN.ipynb
    ├── Keras-Tensorflow-Tutorial.ipynb
    ├── attention_wrapper.py
    ├── babi-memnn.ipynb
    ├── batcher.py
    ├── bcolz_array_iterator.py
    ├── bcolz_iter_test.ipynb
    ├── dcgan.py
    ├── densenet-keras.ipynb
    ├── imagenet_process.ipynb
    ├── kmeans.py
    ├── kmeans_test.ipynb
    ├── meanshift.ipynb
    ├── neural-sr.ipynb
    ├── neural-style-pytorch.ipynb
    ├── neural-style.ipynb
    ├── pytorch-tut.ipynb
    ├── rossman.ipynb
    ├── rossman_exp.py
    ├── seq2seq-translation.ipynb
    ├── spelling_bee_RNN.ipynb
    ├── taxi.ipynb
    ├── taxi_data_prep_and_mlp.ipynb
    ├── tf-basics.ipynb
    ├── tiramisu-keras.ipynb
    ├── tiramisu-pytorch.ipynb
    ├── torch_utils.py
    ├── translate-pytorch.ipynb
    ├── translate.ipynb
    ├── utils2.py
    ├── vgg16.py
    ├── vgg16_avg.py
    └── wgan-pytorch.ipynb


/LICENSE.txt:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Modified notebooks and Python files for Keras 2 and Python 3 from the fast.ai Deep Learning course v.1
 2 | The repository includes modified copies of the original Jupyter notebooks and Python files from the excellent
 3 | (and really unique) deep learning course "Practical Deep Learning For Coders" Part 1 and Part 2, v.1,
 4 | created by [fast.ai](http://fast.ai).
 5 | 
 6 | The [original files](https://github.com/fastai/courses) require Keras 1. One main goal has been to modify the original files to the minimum extent possible. The comments added to the modules generally start with *"# -"* when they are not just *"# Keras 2"*.
 7 | 
 8 | The current version of the repository has been tested with **_Keras 2.1.2_**.
 9 | The previous version, tested with _Keras 2.0.6_, is available [here](https://github.com/roebius/deeplearning_keras2/releases).
10 | ### Part 1
11 | Located in the _nbs_ folder. Tested on _Ubuntu 16.04_ and _Python 3.5_, installed through [Anaconda](https://www.anaconda.com), using the [Theano](http://deeplearning.net/software/theano/) 1.0.1 backend.  
12 | 
13 | ### Part 2
14 | Located in the _nbs2_ folder. Tested on _Ubuntu 16.04_ and _Python 3.5_, installed through [Anaconda](https://www.anaconda.com), using the [TensorFlow](https://www.tensorflow.org/) 1.3.0 backend.
15 | A few modules requiring PyTorch were also tested, using [PyTorch](http://pytorch.org/) 0.3.0.  
16 | 
17 | The files _keras.json.for\_TensorFlow_ and _keras.json.for\_Theano_ provide a template for the appropriate _keras.json_ file, based on which one of the two backends needs to be used by Keras.
18 | 
19 | An _environment.yml_ file for creating a suitable [conda environment](https://conda.io/docs/user-guide/tasks/manage-environments.html) is provided. 
20 | 
21 | 
22 | ### Notes and issues about Part 2
23 | *neural-style.ipynb*: due to a function parameter change in _Keras 2.1_, the _VGG16_ provided by _Keras 2.1_ has been used instead of the original custom module _vgg16\_avg.py_
24 | 
25 | *rossman.ipynb*: section "Using 3rd place data" has been left out for lack of the required data
26 | 
27 | *spelling_bee_RNN.ipynb* and *attention_wrapper.py*: due to the changed implementation of the recurrent.py module in Keras 2.1, the attention part of the notebook doesn't work anymore
28 | 
29 | *taxi_data_prep_and_mlp.ipynb*: section "Uh oh ..." has been left out. Caveat: running all the notebook at once exhausted 128 GB RAM; I was able to run each section individually only after resetting the notebook kernel each time
30 | 
31 | *tiramisu-keras.ipynb*: in order to run the larger size model I had to reset the notebook kernel in order to free up enough GPU memory (almost 12 GB) and jump directly to the model
32 | 
33 | 
34 | #### Left-out modules
35 | *neural-style-pytorch.ipynb* (found no way to load the VGG weights; it looks like some version compatibility issue)
36 | 
37 | *rossman_exp.py*
38 | 
39 | *seq2seq-translation.ipynb*
40 | 
41 | *taxi.ipynb*
42 | 
43 | *tiramisu-pytorch.ipynb*
44 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
  1 | name: p3
  2 | channels:
  3 |   - pytorch
  4 |   - conda-forge
  5 |   - defaults
  6 | dependencies:
  7 |   - backports.weakref=1.0rc1=py35_0
  8 |   - bleach=1.5.0=py35_0
  9 |   - distributed=1.20.2=py35_0
 10 |   - html5lib=0.9999999=py35_0
 11 |   - jupyter_contrib_core=0.3.3=py35_1
 12 |   - jupyter_nbextensions_configurator=0.3.0=py35_0
 13 |   - markdown=2.6.9=py35_0
 14 |   - asn1crypto=0.23.0=py35h4ab26a5_0
 15 |   - backports=1.0=py35hd471ac7_1
 16 |   - bcolz=1.1.2=py35hcb27967_0
 17 |   - binutils_impl_linux-64=2.28.1=h04c84fa_2
 18 |   - binutils_linux-64=7.2.0=25
 19 |   - bokeh=0.12.13=py35h2f9c1c0_0
 20 |   - boto=2.48.0=py35h2cfd601_1
 21 |   - bz2file=0.98=py35_0
 22 |   - bzip2=1.0.6=h6d464ef_2
 23 |   - ca-certificates=2017.08.26=h1d4fec5_0
 24 |   - certifi=2017.11.5=py35h9749603_0
 25 |   - cffi=1.11.2=py35hc7b2db7_0
 26 |   - chardet=3.0.4=py35hb6e9ddf_1
 27 |   - click=6.7=py35h353a69f_0
 28 |   - cloudpickle=0.5.2=py35hbe86bc5_0
 29 |   - cryptography=2.1.4=py35hbeb2da1_0
 30 |   - cudatoolkit=8.0=3
 31 |   - cudnn=6.0.21=cuda8.0_0
 32 |   - cycler=0.10.0=py35hc4d5149_0
 33 |   - cython=0.27.3=py35h6cdc64b_0
 34 |   - dask=0.16.0=py35hcb8ecc8_0
 35 |   - dask-core=0.16.0=py35hfc66869_0
 36 |   - dbus=1.10.22=h3b5a359_0
 37 |   - decorator=4.1.2=py35h3a268aa_0
 38 |   - entrypoints=0.2.3=py35h48174a2_2
 39 |   - expat=2.2.5=he0dffb1_0
 40 |   - fastcache=1.0.2=py35hec2bbaa_0
 41 |   - fontconfig=2.12.4=h88586e7_1
 42 |   - freetype=2.8=hab7d2ae_1
 43 |   - gcc_impl_linux-64=7.2.0=hc5ce805_2
 44 |   - gcc_linux-64=7.2.0=25
 45 |   - gensim=3.1.0=py35h7300b16_0
 46 |   - glib=2.53.6=h5d9569c_2
 47 |   - gmp=6.1.2=h6c8ec71_1
 48 |   - gmpy2=2.0.8=py35hd0a1c9a_2
 49 |   - gst-plugins-base=1.12.2=he3457e5_0
 50 |   - gstreamer=1.12.2=h4f93127_0
 51 |   - gxx_impl_linux-64=7.2.0=hd3faf3d_2
 52 |   - gxx_linux-64=7.2.0=25
 53 |   - h5py=2.7.1=py35h8d53cdc_0
 54 |   - hdf5=1.10.1=h9caa474_1
 55 |   - heapdict=1.0.0=py35h51e6c10_0
 56 |   - icu=58.2=h9c2bf20_1
 57 |   - idna=2.6=py35h8605a33_1
 58 |   - imageio=2.2.0=py35hd0a6de2_0
 59 |   - intel-openmp=2018.0.0=hc7b2577_8
 60 |   - ipykernel=4.7.0=py35h2f9c1c0_0
 61 |   - ipython=6.2.1=py35hd850d2a_1
 62 |   - ipython_genutils=0.2.0=py35hc9e07d0_0
 63 |   - ipywidgets=7.0.5=py35h8147dc1_0
 64 |   - jedi=0.11.0=py35_2
 65 |   - jinja2=2.10=py35h480ab6d_0
 66 |   - jpeg=9b=h024ee3a_2
 67 |   - jsonschema=2.6.0=py35h4395190_0
 68 |   - jupyter=1.0.0=py35hd38625c_0
 69 |   - jupyter_client=5.1.0=py35h2bff583_0
 70 |   - jupyter_console=5.2.0=py35h4044a63_1
 71 |   - jupyter_core=4.4.0=py35ha89e94b_0
 72 |   - keras=2.1.2=py35_0
 73 |   - libedit=3.1=heed3624_0
 74 |   - libffi=3.2.1=hd88cf55_4
 75 |   - libgcc=7.2.0=h69d50b8_2
 76 |   - libgcc-ng=7.2.0=h7cc24e2_2
 77 |   - libgfortran-ng=7.2.0=h9f7466a_2
 78 |   - libgpuarray=0.7.5=h14c3975_0
 79 |   - libpng=1.6.32=hbd3595f_4
 80 |   - libprotobuf=3.4.1=h5b8497f_0
 81 |   - libsodium=1.0.15=hf101ebd_0
 82 |   - libstdcxx-ng=7.2.0=h7a57d05_2
 83 |   - libtiff=4.0.9=h28f6b97_0
 84 |   - libxcb=1.12=hcd93eb1_4
 85 |   - libxml2=2.9.4=h2e8b1d7_6
 86 |   - locket=0.2.0=py35h170bc82_1
 87 |   - lzo=2.10=h49e0be7_2
 88 |   - mako=1.0.7=py35h69899ea_0
 89 |   - markupsafe=1.0=py35h4f4fcf6_1
 90 |   - matplotlib=2.1.1=py35ha26af80_0
 91 |   - mistune=0.8.1=py35h9251d8c_0
 92 |   - mkl=2018.0.1=h19d6760_4
 93 |   - mkl-service=1.1.2=py35h0fc7090_4
 94 |   - mpc=1.0.3=hec55b23_5
 95 |   - mpfr=3.1.5=h11a74b3_2
 96 |   - mpmath=1.0.0=py35h7ce6e34_2
 97 |   - msgpack-python=0.4.8=py35h783f4c8_0
 98 |   - nbconvert=5.3.1=py35hc5194e3_0
 99 |   - nbformat=4.4.0=py35h12e6e07_0
100 |   - ncurses=6.0=h9df7e31_2
101 |   - networkx=2.0=py35hc690e10_0
102 |   - nltk=3.2.5=py35h09ad193_0
103 |   - notebook=5.2.2=py35he644770_0
104 |   - numexpr=2.6.4=py35h119f745_0
105 |   - numpy=1.13.3=py35hd829ed6_0
106 |   - olefile=0.44=py35h2c86149_0
107 |   - openssl=1.0.2n=hb7f436b_0
108 |   - pandas=0.22.0=py35hf484d3e_0
109 |   - pandoc=1.19.2.1=hea2e7c5_1
110 |   - pandocfilters=1.4.2=py35h1565a15_1
111 |   - parso=0.1.1=py35h1b200a3_0
112 |   - partd=0.3.8=py35h68187f2_0
113 |   - pcre=8.41=hc27e229_1
114 |   - pexpect=4.3.0=py35hf410859_0
115 |   - pickleshare=0.7.4=py35hd57304d_0
116 |   - pillow=5.0.0=py35h3deb7b8_0
117 |   - pip=9.0.1=py35h7e7da9d_4
118 |   - prompt_toolkit=1.0.15=py35hc09de7a_0
119 |   - protobuf=3.4.1=py35he6b9134_0
120 |   - psutil=5.4.1=py35h2e39a06_0
121 |   - ptyprocess=0.5.2=py35h38ce0a3_0
122 |   - pycparser=2.18=py35h61b3040_1
123 |   - pygments=2.2.0=py35h0f41973_0
124 |   - pygpu=0.7.5=py35h14c3975_0
125 |   - pyopenssl=17.5.0=py35h4f8b8c8_0
126 |   - pyparsing=2.2.0=py35h041ed72_1
127 |   - pyqt=5.6.0=py35h0e41ada_5
128 |   - pysocks=1.6.7=py35h6aefbb0_1
129 |   - pytables=3.4.2=py35hfa98db7_2
130 |   - python=3.5.4=h417fded_24
131 |   - python-dateutil=2.6.1=py35h90d5b31_1
132 |   - pytz=2017.3=py35hb13c558_0
133 |   - pywavelets=0.5.2=py35h53ec731_0
134 |   - pyyaml=3.12=py35h46ef4ae_1
135 |   - pyzmq=16.0.3=py35ha889422_0
136 |   - qt=5.6.2=h974d657_12
137 |   - qtconsole=4.3.1=py35h4626a06_0
138 |   - readline=7.0=ha6073c6_4
139 |   - requests=2.18.4=py35hb9e6ad1_1
140 |   - scikit-image=0.13.1=py35h14c3975_1
141 |   - scikit-learn=0.19.1=py35hbf1f462_0
142 |   - scipy=1.0.0=py35hcbbe4a2_0
143 |   - setuptools=36.5.0=py35ha8c1747_0
144 |   - simplegeneric=0.8.1=py35h2ec4104_0
145 |   - sip=4.18.1=py35h9eaea60_2
146 |   - six=1.11.0=py35h423b573_1
147 |   - smart_open=1.5.3=py35_0
148 |   - sortedcontainers=1.5.7=py35h683703c_0
149 |   - sqlite=3.20.1=hb898158_2
150 |   - sympy=1.1.1=py35h919b29a_0
151 |   - tblib=1.3.2=py35hf1eb0b4_0
152 |   - tensorflow=1.3.0=0
153 |   - tensorflow-base=1.3.0=py35h79a3156_1
154 |   - tensorflow-gpu=1.3.0=0
155 |   - tensorflow-gpu-base=1.3.0=py35cuda8.0cudnn6.0_1
156 |   - tensorflow-tensorboard=0.1.5=py35_0
157 |   - terminado=0.6=py35hce234ed_0
158 |   - testpath=0.3.1=py35had42eaf_0
159 |   - theano=1.0.1=py35h6bb024c_0
160 |   - tk=8.6.7=hc745277_3
161 |   - toolz=0.8.2=py35h90f1797_0
162 |   - tornado=4.5.2=py35hf879e1d_0
163 |   - tqdm=4.19.4=py35h68e51d2_0
164 |   - traitlets=4.3.2=py35ha522a97_0
165 |   - ujson=1.35=py35_0
166 |   - urllib3=1.22=py35h2ab6e29_0
167 |   - wcwidth=0.1.7=py35hcd08066_0
168 |   - webencodings=0.5.1=py35hb6cf162_1
169 |   - werkzeug=0.12.2=py35hbfc1ea6_0
170 |   - wheel=0.30.0=py35hd3883cf_1
171 |   - widgetsnbextension=3.0.8=py35h84cb72a_0
172 |   - xz=5.2.3=h55aa19d_2
173 |   - yaml=0.1.7=had09818_2
174 |   - zeromq=4.2.2=hbedb6e5_2
175 |   - zict=0.1.3=py35h29275ca_0
176 |   - zlib=1.2.11=ha838bed_2
177 |   - pytorch=0.3.0=py35_cuda8.0.61_cudnn7.0.3hb362f6e_4
178 |   - torchvision=0.2.0=py35heaa392f_1
179 |   - pip:
180 |     - keras-tqdm==2.0.1
181 |     - tables==3.4.2
182 |     - torch==0.3.0.post4
183 |     - xgboost==0.7.post3
184 | prefix: /home/roebius/anaconda/envs/p3
185 | 
186 | 


--------------------------------------------------------------------------------
/keras.json.for_TensorFlow:
--------------------------------------------------------------------------------
1 | {
2 |     "epsilon": 1e-07,
3 |     "backend": "tensorflow",
4 |     "floatx": "float32",
5 |     "image_data_format": "channels_last"
6 | }
7 | 


--------------------------------------------------------------------------------
/keras.json.for_Theano:
--------------------------------------------------------------------------------
1 | {
2 |     "image_data_format": "channels_first",
3 |     "epsilon": 1e-07,
4 |     "floatx": "float32",
5 |     "backend": "theano"
6 | }
7 | 


--------------------------------------------------------------------------------
/nbs/char-rnn.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2018-01-09T17:37:36.320493Z",
  9 |      "start_time": "2018-01-09T17:37:32.715223Z"
 10 |     }
 11 |    },
 12 |    "outputs": [
 13 |     {
 14 |      "name": "stderr",
 15 |      "output_type": "stream",
 16 |      "text": [
 17 |       "Using cuDNN version 6021 on context None\n",
 18 |       "Mapped name None to device cuda0: GeForce GTX TITAN X (0000:04:00.0)\n",
 19 |       "Using Theano backend.\n"
 20 |      ]
 21 |     }
 22 |    ],
 23 |    "source": [
 24 |     "from __future__ import division, print_function\n",
 25 |     "%matplotlib inline\n",
 26 |     "from importlib import reload  # Python 3\n",
 27 |     "import utils; reload(utils)\n",
 28 |     "from utils import *"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 2,
 34 |    "metadata": {
 35 |     "ExecuteTime": {
 36 |      "end_time": "2018-01-09T17:37:38.078225Z",
 37 |      "start_time": "2018-01-09T17:37:38.073874Z"
 38 |     }
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "from keras.layers import TimeDistributed, Activation\n",
 43 |     "from numpy.random import choice"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "## Setup"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "We haven't really looked into the detail of how this works yet - so this is provided for self-study for those who are interested. We'll look at it closely next week."
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 3,
 63 |    "metadata": {
 64 |     "ExecuteTime": {
 65 |      "end_time": "2018-01-09T17:37:39.530495Z",
 66 |      "start_time": "2018-01-09T17:37:39.513647Z"
 67 |     }
 68 |    },
 69 |    "outputs": [
 70 |     {
 71 |      "name": "stdout",
 72 |      "output_type": "stream",
 73 |      "text": [
 74 |       "corpus length: 600893\n"
 75 |      ]
 76 |     }
 77 |    ],
 78 |    "source": [
 79 |     "path = get_file('nietzsche.txt', origin=\"https://s3.amazonaws.com/text-datasets/nietzsche.txt\")\n",
 80 |     "text = open(path).read().lower()\n",
 81 |     "print('corpus length:', len(text))"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 4,
 87 |    "metadata": {
 88 |     "ExecuteTime": {
 89 |      "end_time": "2018-01-09T17:37:40.553853Z",
 90 |      "start_time": "2018-01-09T17:37:40.408768Z"
 91 |     }
 92 |    },
 93 |    "outputs": [
 94 |     {
 95 |      "name": "stdout",
 96 |      "output_type": "stream",
 97 |      "text": [
 98 |       "are thinkers who believe in the saints.\r\n",
 99 |       "\r\n",
100 |       "\r\n",
101 |       "144\r\n",
102 |       "\r\n",
103 |       "It stands to reason that this sketch of the saint, made upon the model\r\n",
104 |       "of the whole species, can be confronted with many opposing sketches that\r\n",
105 |       "would create a more agreeable impression. There are certain exceptions\r\n",
106 |       "among the species who distinguish themselves either by especial\r\n",
107 |       "gentleness or especial humanity, and perhaps by the strength of their\r\n",
108 |       "own personality. Others are in the highest degree fascinating because\r\n",
109 |       "certain of their delusions shed a particular glow over their whole\r\n",
110 |       "being, as is the case with the founder of christianity who took himself\r\n",
111 |       "for the only begotten son of God and hence felt himself sinless; so that\r\n",
112 |       "through his imagination--that should not be too harshly judged since the\r\n",
113 |       "whole of antiquity swarmed with sons of god--he attained the same goal,\r\n",
114 |       "the sense of complete sinlessness, complete irresponsibility, that can\r\n",
115 |       "now be attained by every individual through science.--In the same manner\r\n",
116 |       "I have viewed the saints of India who occupy an intermediate station\r\n",
117 |       "between the christian saints and the Greek philosophers and hence are\r\n",
118 |       "not to be regarded as a pure type. Knowledge and science--as far as they\r\n",
119 |       "existed--and superiority to the rest of mankind by logical discipline\r\n",
120 |       "and training of the intellectual powers were insisted upon by the\r\n",
121 |       "Buddhists as essential to sanctity, just as they were denounced by the\r\n",
122 |       "christian world as the indications of sinfulness."
123 |      ]
124 |     }
125 |    ],
126 |    "source": [
127 |     "!tail -n 25 {path}"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 5,
133 |    "metadata": {
134 |     "ExecuteTime": {
135 |      "end_time": "2018-01-09T17:37:42.261626Z",
136 |      "start_time": "2018-01-09T17:37:42.232982Z"
137 |     }
138 |    },
139 |    "outputs": [
140 |     {
141 |      "name": "stdout",
142 |      "output_type": "stream",
143 |      "text": [
144 |       "total chars: 58\n"
145 |      ]
146 |     }
147 |    ],
148 |    "source": [
149 |     "chars = sorted(list(set(text)))\n",
150 |     "vocab_size = len(chars)+1\n",
151 |     "print('total chars:', vocab_size)"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 6,
157 |    "metadata": {
158 |     "ExecuteTime": {
159 |      "end_time": "2018-01-09T17:37:42.673825Z",
160 |      "start_time": "2018-01-09T17:37:42.670388Z"
161 |     }
162 |    },
163 |    "outputs": [],
164 |    "source": [
165 |     "chars.insert(0, \"\\0\")"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 7,
171 |    "metadata": {
172 |     "ExecuteTime": {
173 |      "end_time": "2018-01-09T17:37:43.405865Z",
174 |      "start_time": "2018-01-09T17:37:43.393184Z"
175 |     }
176 |    },
177 |    "outputs": [
178 |     {
179 |      "data": {
180 |       "text/plain": [
181 |        "'\\n !\"\\'(),-.0123456789:;=?[]_abcdefghijklmnopqrstuvwx'"
182 |       ]
183 |      },
184 |      "execution_count": 7,
185 |      "metadata": {},
186 |      "output_type": "execute_result"
187 |     }
188 |    ],
189 |    "source": [
190 |     "''.join(chars[1:-6])"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": 8,
196 |    "metadata": {
197 |     "ExecuteTime": {
198 |      "end_time": "2018-01-09T17:37:43.653291Z",
199 |      "start_time": "2018-01-09T17:37:43.648297Z"
200 |     }
201 |    },
202 |    "outputs": [],
203 |    "source": [
204 |     "char_indices = dict((c, i) for i, c in enumerate(chars))\n",
205 |     "indices_char = dict((i, c) for i, c in enumerate(chars))"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": 9,
211 |    "metadata": {
212 |     "ExecuteTime": {
213 |      "end_time": "2018-01-09T17:37:43.970560Z",
214 |      "start_time": "2018-01-09T17:37:43.875090Z"
215 |     }
216 |    },
217 |    "outputs": [],
218 |    "source": [
219 |     "idx = [char_indices[c] for c in text]"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": 10,
225 |    "metadata": {
226 |     "ExecuteTime": {
227 |      "end_time": "2018-01-09T17:37:44.088043Z",
228 |      "start_time": "2018-01-09T17:37:44.081181Z"
229 |     }
230 |    },
231 |    "outputs": [
232 |     {
233 |      "data": {
234 |       "text/plain": [
235 |        "[43, 45, 32, 33, 28, 30, 32, 1, 1, 1]"
236 |       ]
237 |      },
238 |      "execution_count": 10,
239 |      "metadata": {},
240 |      "output_type": "execute_result"
241 |     }
242 |    ],
243 |    "source": [
244 |     "idx[:10]"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": 11,
250 |    "metadata": {
251 |     "ExecuteTime": {
252 |      "end_time": "2018-01-09T17:37:44.278125Z",
253 |      "start_time": "2018-01-09T17:37:44.272800Z"
254 |     }
255 |    },
256 |    "outputs": [
257 |     {
258 |      "data": {
259 |       "text/plain": [
260 |        "'preface\\n\\n\\nsupposing that truth is a woman--what then? is there not gro'"
261 |       ]
262 |      },
263 |      "execution_count": 11,
264 |      "metadata": {},
265 |      "output_type": "execute_result"
266 |     }
267 |    ],
268 |    "source": [
269 |     "''.join(indices_char[i] for i in idx[:70])"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "markdown",
274 |    "metadata": {},
275 |    "source": [
276 |     "## Preprocess and create model"
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "code",
281 |    "execution_count": 12,
282 |    "metadata": {
283 |     "ExecuteTime": {
284 |      "end_time": "2018-01-09T17:37:50.507182Z",
285 |      "start_time": "2018-01-09T17:37:48.167841Z"
286 |     }
287 |    },
288 |    "outputs": [
289 |     {
290 |      "name": "stdout",
291 |      "output_type": "stream",
292 |      "text": [
293 |       "nb sequences: 600854\n"
294 |      ]
295 |     }
296 |    ],
297 |    "source": [
298 |     "maxlen = 40\n",
299 |     "sentences = []\n",
300 |     "next_chars = []\n",
301 |     "for i in range(0, len(idx) - maxlen+1):\n",
302 |     "    sentences.append(idx[i: i + maxlen])\n",
303 |     "    next_chars.append(idx[i+1: i+maxlen+1])\n",
304 |     "print('nb sequences:', len(sentences))"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": 13,
310 |    "metadata": {
311 |     "ExecuteTime": {
312 |      "end_time": "2018-01-09T17:37:57.204305Z",
313 |      "start_time": "2018-01-09T17:37:50.508646Z"
314 |     }
315 |    },
316 |    "outputs": [],
317 |    "source": [
318 |     "sentences = np.concatenate([[np.array(o)] for o in sentences[:-2]])\n",
319 |     "next_chars = np.concatenate([[np.array(o)] for o in next_chars[:-2]])"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "code",
324 |    "execution_count": 14,
325 |    "metadata": {
326 |     "ExecuteTime": {
327 |      "end_time": "2018-01-09T17:37:57.208861Z",
328 |      "start_time": "2018-01-09T17:37:57.205817Z"
329 |     }
330 |    },
331 |    "outputs": [
332 |     {
333 |      "data": {
334 |       "text/plain": [
335 |        "((600852, 40), (600852, 40))"
336 |       ]
337 |      },
338 |      "execution_count": 14,
339 |      "metadata": {},
340 |      "output_type": "execute_result"
341 |     }
342 |    ],
343 |    "source": [
344 |     "sentences.shape, next_chars.shape"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "code",
349 |    "execution_count": 15,
350 |    "metadata": {
351 |     "ExecuteTime": {
352 |      "end_time": "2018-01-09T17:37:57.249341Z",
353 |      "start_time": "2018-01-09T17:37:57.209999Z"
354 |     }
355 |    },
356 |    "outputs": [],
357 |    "source": [
358 |     "n_fac = 24"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": 16,
364 |    "metadata": {
365 |     "ExecuteTime": {
366 |      "end_time": "2018-01-09T17:38:10.121159Z",
367 |      "start_time": "2018-01-09T17:37:57.250999Z"
368 |     }
369 |    },
370 |    "outputs": [],
371 |    "source": [
372 |     "model=Sequential([\n",
373 |     "        Embedding(vocab_size, n_fac, input_length=maxlen),\n",
374 |     "        LSTM(units=512, input_shape=(n_fac,),return_sequences=True, dropout=0.2, recurrent_dropout=0.2,\n",
375 |     "             implementation=2),\n",
376 |     "        Dropout(0.2),\n",
377 |     "        LSTM(512, return_sequences=True, dropout=0.2, recurrent_dropout=0.2,\n",
378 |     "             implementation=2),\n",
379 |     "        Dropout(0.2),\n",
380 |     "        TimeDistributed(Dense(vocab_size)),\n",
381 |     "        Activation('softmax')\n",
382 |     "    ])    "
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": 17,
388 |    "metadata": {
389 |     "ExecuteTime": {
390 |      "end_time": "2018-01-09T17:38:10.153817Z",
391 |      "start_time": "2018-01-09T17:38:10.123477Z"
392 |     }
393 |    },
394 |    "outputs": [],
395 |    "source": [
396 |     "model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam())"
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "markdown",
401 |    "metadata": {},
402 |    "source": [
403 |     "## Train"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "code",
408 |    "execution_count": 18,
409 |    "metadata": {
410 |     "ExecuteTime": {
411 |      "end_time": "2018-01-09T17:38:12.858009Z",
412 |      "start_time": "2018-01-09T17:38:12.840547Z"
413 |     }
414 |    },
415 |    "outputs": [],
416 |    "source": [
417 |     "def print_example():\n",
418 |     "    seed_string=\"ethics is a basic foundation of all that\"\n",
419 |     "    for i in range(320):\n",
420 |     "        x=np.array([char_indices[c] for c in seed_string[-40:]])[np.newaxis,:]  # [-40] picks up the last 40 chars\n",
421 |     "        preds = model.predict(x, verbose=0)[0][-1]  # [-1] picks up the last char\n",
422 |     "        preds = preds/np.sum(preds)\n",
423 |     "        next_char = choice(chars, p=preds)\n",
424 |     "        seed_string = seed_string + next_char\n",
425 |     "    print(seed_string)"
426 |    ]
427 |   },
428 |   {
429 |    "cell_type": "code",
430 |    "execution_count": 19,
431 |    "metadata": {
432 |     "ExecuteTime": {
433 |      "end_time": "2018-01-09T17:53:01.861777Z",
434 |      "start_time": "2018-01-09T17:38:13.104719Z"
435 |     }
436 |    },
437 |    "outputs": [
438 |     {
439 |      "name": "stdout",
440 |      "output_type": "stream",
441 |      "text": [
442 |       "Epoch 1/1\n",
443 |       "600852/600852 [==============================] - 795s 1ms/step - loss: 1.4965\n"
444 |      ]
445 |     },
446 |     {
447 |      "data": {
448 |       "text/plain": [
449 |        "<keras.callbacks.History at 0x7efe35698240>"
450 |       ]
451 |      },
452 |      "execution_count": 19,
453 |      "metadata": {},
454 |      "output_type": "execute_result"
455 |     }
456 |    ],
457 |    "source": [
458 |     "model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, epochs=1)"
459 |    ]
460 |   },
461 |   {
462 |    "cell_type": "code",
463 |    "execution_count": 20,
464 |    "metadata": {
465 |     "ExecuteTime": {
466 |      "end_time": "2018-01-09T17:53:16.668682Z",
467 |      "start_time": "2018-01-09T17:53:01.863269Z"
468 |     },
469 |     "scrolled": true
470 |    },
471 |    "outputs": [
472 |     {
473 |      "name": "stdout",
474 |      "output_type": "stream",
475 |      "text": [
476 |       "ethics is a basic foundation of all that which principle. there is i have said gon to fight on the responsibility\n",
477 |       "of intercourse is\n",
478 |       "is not subsequently possible that one\n",
479 |       "can not promise solitude, neither with all this over the half. the whole mewaphysical philosophers have were this requirement to his even failure as his power; even in love comes to be it, d\n"
480 |      ]
481 |     }
482 |    ],
483 |    "source": [
484 |     "print_example()"
485 |    ]
486 |   },
487 |   {
488 |    "cell_type": "code",
489 |    "execution_count": 21,
490 |    "metadata": {
491 |     "ExecuteTime": {
492 |      "end_time": "2018-01-09T18:06:27.312422Z",
493 |      "start_time": "2018-01-09T17:53:16.670290Z"
494 |     }
495 |    },
496 |    "outputs": [
497 |     {
498 |      "name": "stdout",
499 |      "output_type": "stream",
500 |      "text": [
501 |       "Epoch 1/1\n",
502 |       "600852/600852 [==============================] - 791s 1ms/step - loss: 1.2726\n"
503 |      ]
504 |     },
505 |     {
506 |      "data": {
507 |       "text/plain": [
508 |        "<keras.callbacks.History at 0x7efdf7aee6a0>"
509 |       ]
510 |      },
511 |      "execution_count": 21,
512 |      "metadata": {},
513 |      "output_type": "execute_result"
514 |     }
515 |    ],
516 |    "source": [
517 |     "model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, epochs=1)"
518 |    ]
519 |   },
520 |   {
521 |    "cell_type": "code",
522 |    "execution_count": 22,
523 |    "metadata": {
524 |     "ExecuteTime": {
525 |      "end_time": "2018-01-09T18:06:34.396111Z",
526 |      "start_time": "2018-01-09T18:06:27.314283Z"
527 |     },
528 |     "scrolled": true
529 |    },
530 |    "outputs": [
531 |     {
532 |      "name": "stdout",
533 |      "output_type": "stream",
534 |      "text": [
535 |       "ethics is a basic foundation of all that he realized how can the same\n",
536 |       "degree, and\n",
537 |       "bitter! everywhere may not\n",
538 |       "be pessimistic time. it sympathy and of our dull things, one may demand and would not have reaction also a kind of the advance of the brute\", this deenest race: it is necessary to understand\n",
539 |       "to contradict it; but the just as a\n",
540 |       "being which does not bel\n"
541 |      ]
542 |     }
543 |    ],
544 |    "source": [
545 |     "print_example()"
546 |    ]
547 |   },
548 |   {
549 |    "cell_type": "code",
550 |    "execution_count": 23,
551 |    "metadata": {
552 |     "ExecuteTime": {
553 |      "end_time": "2018-01-09T18:06:34.400512Z",
554 |      "start_time": "2018-01-09T18:06:34.398029Z"
555 |     }
556 |    },
557 |    "outputs": [],
558 |    "source": [
559 |     "model.optimizer.lr=0.001"
560 |    ]
561 |   },
562 |   {
563 |    "cell_type": "code",
564 |    "execution_count": 24,
565 |    "metadata": {
566 |     "ExecuteTime": {
567 |      "end_time": "2018-01-09T18:19:44.658226Z",
568 |      "start_time": "2018-01-09T18:06:34.402291Z"
569 |     }
570 |    },
571 |    "outputs": [
572 |     {
573 |      "name": "stdout",
574 |      "output_type": "stream",
575 |      "text": [
576 |       "Epoch 1/1\n",
577 |       "600852/600852 [==============================] - 790s 1ms/step - loss: 1.2383\n"
578 |      ]
579 |     },
580 |     {
581 |      "data": {
582 |       "text/plain": [
583 |        "<keras.callbacks.History at 0x7efdf7d51d68>"
584 |       ]
585 |      },
586 |      "execution_count": 24,
587 |      "metadata": {},
588 |      "output_type": "execute_result"
589 |     }
590 |    ],
591 |    "source": [
592 |     "model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, epochs=1)"
593 |    ]
594 |   },
595 |   {
596 |    "cell_type": "code",
597 |    "execution_count": 25,
598 |    "metadata": {
599 |     "ExecuteTime": {
600 |      "end_time": "2018-01-09T18:19:51.757814Z",
601 |      "start_time": "2018-01-09T18:19:44.659529Z"
602 |     }
603 |    },
604 |    "outputs": [
605 |     {
606 |      "name": "stdout",
607 |      "output_type": "stream",
608 |      "text": [
609 |       "ethics is a basic foundation of all that originates him\n",
610 |       "\n",
611 |       "instance, it true impulses and belief\n",
612 |       "in christianity, results, easily allowed to\n",
613 |       "regard our principle.--one dests inspire concerning the logical is termination; and that the\n",
614 |       "contrary to puritante and attain.\n",
615 |       "\n",
616 |       "162. from deveropment and little itself we have deceived ourselves to\n",
617 |       "action, and without dec\n"
618 |      ]
619 |     }
620 |    ],
621 |    "source": [
622 |     "print_example()"
623 |    ]
624 |   },
625 |   {
626 |    "cell_type": "code",
627 |    "execution_count": 26,
628 |    "metadata": {
629 |     "ExecuteTime": {
630 |      "end_time": "2018-01-09T18:19:51.761120Z",
631 |      "start_time": "2018-01-09T18:19:51.759261Z"
632 |     }
633 |    },
634 |    "outputs": [],
635 |    "source": [
636 |     "model.optimizer.lr=0.0001"
637 |    ]
638 |   },
639 |   {
640 |    "cell_type": "code",
641 |    "execution_count": 27,
642 |    "metadata": {
643 |     "ExecuteTime": {
644 |      "end_time": "2018-01-09T18:33:08.856328Z",
645 |      "start_time": "2018-01-09T18:19:51.762328Z"
646 |     },
647 |     "scrolled": true
648 |    },
649 |    "outputs": [
650 |     {
651 |      "name": "stdout",
652 |      "output_type": "stream",
653 |      "text": [
654 |       "Epoch 1/1\n",
655 |       "600852/600852 [==============================] - 797s 1ms/step - loss: 1.2193\n"
656 |      ]
657 |     },
658 |     {
659 |      "data": {
660 |       "text/plain": [
661 |        "<keras.callbacks.History at 0x7efdf7d51908>"
662 |       ]
663 |      },
664 |      "execution_count": 27,
665 |      "metadata": {},
666 |      "output_type": "execute_result"
667 |     }
668 |    ],
669 |    "source": [
670 |     "model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, epochs=1)"
671 |    ]
672 |   },
673 |   {
674 |    "cell_type": "code",
675 |    "execution_count": 28,
676 |    "metadata": {
677 |     "ExecuteTime": {
678 |      "end_time": "2018-01-09T18:33:15.941120Z",
679 |      "start_time": "2018-01-09T18:33:08.857628Z"
680 |     }
681 |    },
682 |    "outputs": [
683 |     {
684 |      "name": "stdout",
685 |      "output_type": "stream",
686 |      "text": [
687 |       "ethics is a basic foundation of all that \"ego,\" is craceful easy, and through the trainly left itself until feelings, makes this very pleasure to shiftand\n",
688 |       "an emotion of their gutting, mopling and skepcicism--he would like to brighten men and them as france of\n",
689 |       "humanity.\n",
690 |       "\n",
691 |       "\n",
692 |       "54\n",
693 |       "\n",
694 |       "=justice, or even of the foundation of causality which always \"does not know about w\n"
695 |      ]
696 |     }
697 |    ],
698 |    "source": [
699 |     "print_example()"
700 |    ]
701 |   },
702 |   {
703 |    "cell_type": "code",
704 |    "execution_count": 29,
705 |    "metadata": {
706 |     "ExecuteTime": {
707 |      "end_time": "2018-01-09T18:33:15.995753Z",
708 |      "start_time": "2018-01-09T18:33:15.942509Z"
709 |     }
710 |    },
711 |    "outputs": [],
712 |    "source": [
713 |     "model.save_weights('data/char_rnn.h5')"
714 |    ]
715 |   },
716 |   {
717 |    "cell_type": "code",
718 |    "execution_count": 30,
719 |    "metadata": {
720 |     "ExecuteTime": {
721 |      "end_time": "2018-01-09T18:33:16.029984Z",
722 |      "start_time": "2018-01-09T18:33:15.998784Z"
723 |     }
724 |    },
725 |    "outputs": [],
726 |    "source": [
727 |     "model.optimizer.lr=0.00001"
728 |    ]
729 |   },
730 |   {
731 |    "cell_type": "code",
732 |    "execution_count": 31,
733 |    "metadata": {
734 |     "ExecuteTime": {
735 |      "end_time": "2018-01-09T18:46:26.796768Z",
736 |      "start_time": "2018-01-09T18:33:16.033101Z"
737 |     }
738 |    },
739 |    "outputs": [
740 |     {
741 |      "name": "stdout",
742 |      "output_type": "stream",
743 |      "text": [
744 |       "Epoch 1/1\n",
745 |       "600852/600852 [==============================] - 791s 1ms/step - loss: 1.2049\n"
746 |      ]
747 |     },
748 |     {
749 |      "data": {
750 |       "text/plain": [
751 |        "<keras.callbacks.History at 0x7efdf7d51588>"
752 |       ]
753 |      },
754 |      "execution_count": 31,
755 |      "metadata": {},
756 |      "output_type": "execute_result"
757 |     }
758 |    ],
759 |    "source": [
760 |     "model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, epochs=1)"
761 |    ]
762 |   },
763 |   {
764 |    "cell_type": "code",
765 |    "execution_count": 32,
766 |    "metadata": {
767 |     "ExecuteTime": {
768 |      "end_time": "2018-01-09T18:46:33.857340Z",
769 |      "start_time": "2018-01-09T18:46:26.798046Z"
770 |     }
771 |    },
772 |    "outputs": [
773 |     {
774 |      "name": "stdout",
775 |      "output_type": "stream",
776 |      "text": [
777 |       "ethics is a basic foundation of all that sympathy thinks which they may be the most customary or else,\n",
778 |       "owing to\n",
779 |       "horror, to a new\n",
780 |       "riddle-like experience (and is there why learnt\n",
781 |       "to bring at the\n",
782 |       "immense, and have long still profound, dissatisfied in that neighbour and incapacity for\n",
783 |       "me?\" in\n",
784 |       "spite of the proper people, an intercourse, still upself--subtlety and\n"
785 |      ]
786 |     }
787 |    ],
788 |    "source": [
789 |     "print_example()"
790 |    ]
791 |   },
792 |   {
793 |    "cell_type": "code",
794 |    "execution_count": 33,
795 |    "metadata": {
796 |     "ExecuteTime": {
797 |      "end_time": "2018-01-09T18:59:44.358611Z",
798 |      "start_time": "2018-01-09T18:46:33.858823Z"
799 |     }
800 |    },
801 |    "outputs": [
802 |     {
803 |      "name": "stdout",
804 |      "output_type": "stream",
805 |      "text": [
806 |       "Epoch 1/1\n",
807 |       "600852/600852 [==============================] - 790s 1ms/step - loss: 1.1925\n"
808 |      ]
809 |     },
810 |     {
811 |      "data": {
812 |       "text/plain": [
813 |        "<keras.callbacks.History at 0x7efdf7d51860>"
814 |       ]
815 |      },
816 |      "execution_count": 33,
817 |      "metadata": {},
818 |      "output_type": "execute_result"
819 |     }
820 |    ],
821 |    "source": [
822 |     "model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, epochs=1)"
823 |    ]
824 |   },
825 |   {
826 |    "cell_type": "code",
827 |    "execution_count": 34,
828 |    "metadata": {
829 |     "ExecuteTime": {
830 |      "end_time": "2018-01-09T18:59:51.517817Z",
831 |      "start_time": "2018-01-09T18:59:44.360741Z"
832 |     }
833 |    },
834 |    "outputs": [
835 |     {
836 |      "name": "stdout",
837 |      "output_type": "stream",
838 |      "text": [
839 |       "ethics is a basic foundation of all that is called \"higher,\" inspire the permanent thing, at once and strive, remains the most, but that the new\n",
840 |       "construction which do not believe in germany. in music only a shamed through the discipline of mind whone same goethe) perhaps they have its personality\n",
841 |       "itself, they are responsible, and it seems to\n",
842 |       "feel them is\n",
843 |       "don\n"
844 |      ]
845 |     }
846 |    ],
847 |    "source": [
848 |     "print_example()"
849 |    ]
850 |   },
851 |   {
852 |    "cell_type": "code",
853 |    "execution_count": 35,
854 |    "metadata": {
855 |     "ExecuteTime": {
856 |      "end_time": "2018-01-09T18:59:58.596503Z",
857 |      "start_time": "2018-01-09T18:59:51.519361Z"
858 |     }
859 |    },
860 |    "outputs": [
861 |     {
862 |      "name": "stdout",
863 |      "output_type": "stream",
864 |      "text": [
865 |       "ethics is a basic foundation of all that is always vained by the reward. if one should grew back again acknowledge with their semi-barbarity,--they are avlided to life.--we have\n",
866 |       "finds a contradictory?--so the ascetic judgs a defect in every\n",
867 |       "deception change away something that is remained by\n",
868 |       "means of community, in fact, it is precisely through napoleon's sen\n"
869 |      ]
870 |     }
871 |    ],
872 |    "source": [
873 |     "print_example()"
874 |    ]
875 |   },
876 |   {
877 |    "cell_type": "code",
878 |    "execution_count": 36,
879 |    "metadata": {
880 |     "ExecuteTime": {
881 |      "end_time": "2018-01-09T18:59:58.615619Z",
882 |      "start_time": "2018-01-09T18:59:58.597957Z"
883 |     }
884 |    },
885 |    "outputs": [],
886 |    "source": [
887 |     "model.save_weights('data/char_rnn.h5')"
888 |    ]
889 |   },
890 |   {
891 |    "cell_type": "code",
892 |    "execution_count": null,
893 |    "metadata": {},
894 |    "outputs": [],
895 |    "source": []
896 |   }
897 |  ],
898 |  "metadata": {
899 |   "kernelspec": {
900 |    "display_name": "Python 3",
901 |    "language": "python",
902 |    "name": "python3"
903 |   },
904 |   "language_info": {
905 |    "codemirror_mode": {
906 |     "name": "ipython",
907 |     "version": 3
908 |    },
909 |    "file_extension": ".py",
910 |    "mimetype": "text/x-python",
911 |    "name": "python",
912 |    "nbconvert_exporter": "python",
913 |    "pygments_lexer": "ipython3",
914 |    "version": "3.5.4"
915 |   },
916 |   "nav_menu": {},
917 |   "toc": {
918 |    "navigate_menu": true,
919 |    "number_sections": true,
920 |    "sideBar": true,
921 |    "threshold": 6,
922 |    "toc_cell": true,
923 |    "toc_section_display": "block",
924 |    "toc_window_display": false
925 |   },
926 |   "widgets": {
927 |    "state": {},
928 |    "version": "1.1.2"
929 |   }
930 |  },
931 |  "nbformat": 4,
932 |  "nbformat_minor": 1
933 | }
934 | 


--------------------------------------------------------------------------------
/nbs/imagenet_batchnorm.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "This notebook explains how to add batch normalization to VGG.  The code shown here is implemented in [vgg_bn.py](https://github.com/fastai/courses/blob/master/deeplearning1/nbs/vgg16bn.py), and there is a version of ``vgg_ft`` (our fine tuning function) with batch norm called ``vgg_ft_bn`` in [utils.py](https://github.com/fastai/courses/blob/master/deeplearning1/nbs/utils.py)."
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "from __future__ import division, print_function\n",
 19 |     "%matplotlib inline\n",
 20 |     "from importlib import reload\n",
 21 |     "import utils; reload(utils)\n",
 22 |     "from utils import *"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "# The problem, and the solution"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "## The problem"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "The problem that we faced in the lesson 3 is that when we wanted to add batch normalization, we initialized *all* the dense layers of the model to random weights, and then tried to train them with our cats v dogs dataset. But that's a lot of weights to initialize to random - out of 134m params, around 119m are in the dense layers! Take a moment to think about why this is, and convince yourself that dense layers are where most of the weights will be. Also, think about whether this implies that most of the *time* will be spent training these weights. What do you think?\n",
 44 |     "\n",
 45 |     "Trying to train 120m params using just 23k images is clearly an unreasonable expectation. The reason we haven't had this problem before is that the dense layers were not random, but were trained to recognize imagenet categories (other than the very last layer, which only has 8194 params)."
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "## The solution"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "The solution, obviously enough, is to add batch normalization to the VGG model! To do so, we have to be careful - we can't just insert batchnorm layers, since their parameters (*gamma* - which is used to multiply by each activation, and *beta* - which is used to add to each activation) will not be set correctly. Without setting these correctly, the new batchnorm layers will normalize the previous layer's activations, meaning that the next layer will receive totally different activations to what it would have without new batchnorm layer. And that means that all the pre-trained weights are no longer of any use!\n",
 60 |     "\n",
 61 |     "So instead, we need to figure out what beta and gamma to choose when we insert the layers. The answer to this turns out to be pretty simple - we need to calculate what the mean and standard deviation of that activations for that layer are when calculated on all of imagenet, and then set beta and gamma to these values. That means that the new batchnorm layer will normalize the data with the mean and standard deviation, and then immediately un-normalize the data using the beta and gamma parameters we provide. So the output of the batchnorm layer will be identical to it's input - which means that all the pre-trained weights will continue to work just as well as before.\n",
 62 |     "\n",
 63 |     "The benefit of this is that when we wish to fine-tune our own networks, we will have all the benefits of batch normalization (higher learning rates, more resiliant training, and less need for dropout) plus all the benefits of a pre-trained network."
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "To calculate the mean and standard deviation of the activations on imagenet, we need to download imagenet. You can download imagenet from http://www.image-net.org/download-images . The file you want is the one titled **Download links to ILSVRC2013 image data**. You'll need to request access from the imagenet admins for this, although it seems to be an automated system - I've always found that access is provided instantly. Once you're logged in and have gone to that page, look for the **CLS-LOC dataset** section. Both training and validation images are available, and you should download both. There's not much reason to download the test images, however.\n",
 71 |     "\n",
 72 |     "Note that this will not be the entire imagenet archive, but just the 1000 categories that are used in the annual competition. Since that's what VGG16 was originally trained on, that seems like a good choice - especially since the full dataset is 1.1 terabytes, whereas the 1000 category dataset is 138 gigabytes."
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "# Adding batchnorm to Imagenet"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "## Setup"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "metadata": {},
 92 |    "source": [
 93 |     "### Sample"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "As per usual, we create a sample so we can experiment more rapidly."
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {
107 |     "collapsed": true
108 |    },
109 |    "outputs": [],
110 |    "source": [
111 |     "# %pushd data/imagenet\n",
112 |     "%pushd data/imagenet\n",
113 |     "%cd train"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {
120 |     "collapsed": true
121 |    },
122 |    "outputs": [],
123 |    "source": [
124 |     "%mkdir ../sample\n",
125 |     "%mkdir ../sample/train\n",
126 |     "%mkdir ../sample/valid\n",
127 |     "\n",
128 |     "from shutil import copyfile\n",
129 |     "\n",
130 |     "g = glob('*')\n",
131 |     "for d in g: \n",
132 |     "    os.mkdir('../sample/train/'+d)\n",
133 |     "    os.mkdir('../sample/valid/'+d)"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {
140 |     "collapsed": true
141 |    },
142 |    "outputs": [],
143 |    "source": [
144 |     "g = glob('*/*.JPEG')\n",
145 |     "shuf = np.random.permutation(g)\n",
146 |     "for i in range(25000): copyfile(shuf[i], '../sample/train/' + shuf[i])"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": null,
152 |    "metadata": {
153 |     "collapsed": true,
154 |     "scrolled": true
155 |    },
156 |    "outputs": [],
157 |    "source": [
158 |     "%cd ../valid\n",
159 |     "\n",
160 |     "g = glob('*/*.JPEG')\n",
161 |     "shuf = np.random.permutation(g)\n",
162 |     "for i in range(5000): copyfile(shuf[i], '../sample/valid/' + shuf[i])\n",
163 |     "\n",
164 |     "%cd .."
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": null,
170 |    "metadata": {
171 |     "collapsed": true
172 |    },
173 |    "outputs": [],
174 |    "source": [
175 |     "%mkdir sample/results"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {
182 |     "collapsed": true
183 |    },
184 |    "outputs": [],
185 |    "source": [
186 |     "%popd"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "metadata": {},
192 |    "source": [
193 |     "### Data setup"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {},
199 |    "source": [
200 |     "We set up our paths, data, and labels in the usual way. Note that we don't try to read all of Imagenet into memory! We only load the sample into memory."
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": null,
206 |    "metadata": {
207 |     "collapsed": true
208 |    },
209 |    "outputs": [],
210 |    "source": [
211 |     "sample_path = \"data/imagenet/sample/\"\n",
212 |     "path = \"data/imagenet/\"\n",
213 |     "\n",
214 |     "#sample_path = 'data/jhoward/imagenet/sample/'\n",
215 |     "# This is the path to my fast SSD - I put datasets there when I can to get the speed benefit\n",
216 |     "#fast_path = '/home/jhoward/ILSVRC2012_img_proc/'\n",
217 |     "#path = '/data/jhoward/imagenet/sample/'\n",
218 |     "#path = 'data/jhoward/imagenet/'"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": null,
224 |    "metadata": {
225 |     "collapsed": true
226 |    },
227 |    "outputs": [],
228 |    "source": [
229 |     "batch_size=64"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": null,
235 |    "metadata": {
236 |     "collapsed": true
237 |    },
238 |    "outputs": [],
239 |    "source": [
240 |     "samp_trn = get_data(sample_path+'train')\n",
241 |     "samp_val = get_data(sample_path+'valid')"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": null,
247 |    "metadata": {
248 |     "collapsed": true
249 |    },
250 |    "outputs": [],
251 |    "source": [
252 |     "save_array(sample_path+'results/trn.dat', samp_trn)\n",
253 |     "save_array(sample_path+'results/val.dat', samp_val)"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": null,
259 |    "metadata": {
260 |     "collapsed": true
261 |    },
262 |    "outputs": [],
263 |    "source": [
264 |     "samp_trn = load_array(sample_path+'results/trn.dat')\n",
265 |     "samp_val = load_array(sample_path+'results/val.dat')"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": null,
271 |    "metadata": {
272 |     "collapsed": true,
273 |     "scrolled": true
274 |    },
275 |    "outputs": [],
276 |    "source": [
277 |     "(val_classes, trn_classes, val_labels, trn_labels, \n",
278 |     "    val_filenames, filenames, test_filenames) = get_classes(path)"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": null,
284 |    "metadata": {
285 |     "collapsed": true
286 |    },
287 |    "outputs": [],
288 |    "source": [
289 |     "(samp_val_classes, samp_trn_classes, samp_val_labels, samp_trn_labels, \n",
290 |     "    samp_val_filenames, samp_filenames, samp_test_filenames) = get_classes(sample_path)"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "metadata": {},
296 |    "source": [
297 |     "### Model setup"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "markdown",
302 |    "metadata": {},
303 |    "source": [
304 |     "Since we're just working with the dense layers, we should pre-compute the output of the convolutional layers."
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": null,
310 |    "metadata": {
311 |     "collapsed": true,
312 |     "scrolled": true
313 |    },
314 |    "outputs": [],
315 |    "source": [
316 |     "vgg = Vgg16()\n",
317 |     "model = vgg.model"
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "code",
322 |    "execution_count": null,
323 |    "metadata": {
324 |     "collapsed": true
325 |    },
326 |    "outputs": [],
327 |    "source": [
328 |     "layers = model.layers\n",
329 |     "last_conv_idx = [index for index,layer in enumerate(layers) \n",
330 |     "                     if type(layer) is Conv2D][-1]\n",
331 |     "conv_layers = layers[:last_conv_idx+1]"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "code",
336 |    "execution_count": null,
337 |    "metadata": {
338 |     "collapsed": true
339 |    },
340 |    "outputs": [],
341 |    "source": [
342 |     "dense_layers = layers[last_conv_idx+1:]"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "code",
347 |    "execution_count": null,
348 |    "metadata": {
349 |     "collapsed": true
350 |    },
351 |    "outputs": [],
352 |    "source": [
353 |     "conv_model = Sequential(conv_layers)"
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "code",
358 |    "execution_count": null,
359 |    "metadata": {
360 |     "collapsed": true
361 |    },
362 |    "outputs": [],
363 |    "source": [
364 |     "samp_conv_val_feat = conv_model.predict(samp_val, batch_size=batch_size*2)\n",
365 |     "samp_conv_feat = conv_model.predict(samp_trn, batch_size=batch_size*2)"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "code",
370 |    "execution_count": null,
371 |    "metadata": {
372 |     "collapsed": true
373 |    },
374 |    "outputs": [],
375 |    "source": [
376 |     "save_array(sample_path+'results/conv_val_feat.dat', samp_conv_val_feat)\n",
377 |     "save_array(sample_path+'results/conv_feat.dat', samp_conv_feat)"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": null,
383 |    "metadata": {
384 |     "collapsed": true
385 |    },
386 |    "outputs": [],
387 |    "source": [
388 |     "samp_conv_feat = load_array(sample_path+'results/conv_feat.dat')\n",
389 |     "samp_conv_val_feat = load_array(sample_path+'results/conv_val_feat.dat')"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "code",
394 |    "execution_count": null,
395 |    "metadata": {
396 |     "collapsed": true,
397 |     "scrolled": true
398 |    },
399 |    "outputs": [],
400 |    "source": [
401 |     "samp_conv_val_feat.shape"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "markdown",
406 |    "metadata": {},
407 |    "source": [
408 |     "This is our usual Vgg network just covering the dense layers:"
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "code",
413 |    "execution_count": null,
414 |    "metadata": {
415 |     "collapsed": true
416 |    },
417 |    "outputs": [],
418 |    "source": [
419 |     "def get_dense_layers():\n",
420 |     "    return [\n",
421 |     "        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),\n",
422 |     "        Flatten(),\n",
423 |     "        Dense(4096, activation='relu'),\n",
424 |     "        Dropout(0.5),\n",
425 |     "        Dense(4096, activation='relu'),\n",
426 |     "        Dropout(0.5),\n",
427 |     "        # Dense(1000, activation='softmax')\n",
428 |     "        Dense(1000, activation='relu')\n",
429 |     "        ]"
430 |    ]
431 |   },
432 |   {
433 |    "cell_type": "code",
434 |    "execution_count": null,
435 |    "metadata": {
436 |     "collapsed": true
437 |    },
438 |    "outputs": [],
439 |    "source": [
440 |     "dense_model = Sequential(get_dense_layers())"
441 |    ]
442 |   },
443 |   {
444 |    "cell_type": "code",
445 |    "execution_count": null,
446 |    "metadata": {
447 |     "collapsed": true
448 |    },
449 |    "outputs": [],
450 |    "source": [
451 |     "for l1, l2 in zip(dense_layers, dense_model.layers):\n",
452 |     "    l2.set_weights(l1.get_weights())"
453 |    ]
454 |   },
455 |   {
456 |    "cell_type": "code",
457 |    "execution_count": null,
458 |    "metadata": {
459 |     "collapsed": true
460 |    },
461 |    "outputs": [],
462 |    "source": [
463 |     "dense_model.add(Dense(763, activation='softmax'))"
464 |    ]
465 |   },
466 |   {
467 |    "cell_type": "markdown",
468 |    "metadata": {},
469 |    "source": [
470 |     "### Check model"
471 |    ]
472 |   },
473 |   {
474 |    "cell_type": "markdown",
475 |    "metadata": {},
476 |    "source": [
477 |     "It's a good idea to check that your models are giving reasonable answers, before using them."
478 |    ]
479 |   },
480 |   {
481 |    "cell_type": "code",
482 |    "execution_count": null,
483 |    "metadata": {
484 |     "collapsed": true
485 |    },
486 |    "outputs": [],
487 |    "source": [
488 |     "dense_model.compile(Adam(), 'categorical_crossentropy', ['accuracy'])"
489 |    ]
490 |   },
491 |   {
492 |    "cell_type": "code",
493 |    "execution_count": null,
494 |    "metadata": {
495 |     "collapsed": true
496 |    },
497 |    "outputs": [],
498 |    "source": [
499 |     "dense_model.evaluate(samp_conv_val_feat, samp_val_labels)"
500 |    ]
501 |   },
502 |   {
503 |    "cell_type": "code",
504 |    "execution_count": null,
505 |    "metadata": {
506 |     "collapsed": true
507 |    },
508 |    "outputs": [],
509 |    "source": [
510 |     "model.compile(Adam(), 'categorical_crossentropy', ['accuracy'])"
511 |    ]
512 |   },
513 |   {
514 |    "cell_type": "code",
515 |    "execution_count": null,
516 |    "metadata": {
517 |     "collapsed": true
518 |    },
519 |    "outputs": [],
520 |    "source": [
521 |     "# should be identical to above\n",
522 |     "# model.evaluate(val, val_labels)"
523 |    ]
524 |   },
525 |   {
526 |    "cell_type": "code",
527 |    "execution_count": null,
528 |    "metadata": {
529 |     "collapsed": true
530 |    },
531 |    "outputs": [],
532 |    "source": [
533 |     "# should be a little better than above, since VGG authors overfit\n",
534 |     "# dense_model.evaluate(conv_feat, trn_labels)"
535 |    ]
536 |   },
537 |   {
538 |    "cell_type": "markdown",
539 |    "metadata": {
540 |     "collapsed": true
541 |    },
542 |    "source": [
543 |     "## Adding our new layers"
544 |    ]
545 |   },
546 |   {
547 |    "cell_type": "markdown",
548 |    "metadata": {},
549 |    "source": [
550 |     "### Calculating batchnorm params"
551 |    ]
552 |   },
553 |   {
554 |    "cell_type": "markdown",
555 |    "metadata": {},
556 |    "source": [
557 |     "To calculate the output of a layer in a Keras sequential model, we have to create a function that defines the input layer and the output layer, like this:"
558 |    ]
559 |   },
560 |   {
561 |    "cell_type": "code",
562 |    "execution_count": null,
563 |    "metadata": {
564 |     "collapsed": true
565 |    },
566 |    "outputs": [],
567 |    "source": [
568 |     "k_layer_out = K.function([dense_model.layers[0].input, K.learning_phase()], \n",
569 |     "                         [dense_model.layers[2].output])"
570 |    ]
571 |   },
572 |   {
573 |    "cell_type": "markdown",
574 |    "metadata": {},
575 |    "source": [
576 |     "Then we can call the function to get our layer activations:"
577 |    ]
578 |   },
579 |   {
580 |    "cell_type": "code",
581 |    "execution_count": null,
582 |    "metadata": {
583 |     "collapsed": true
584 |    },
585 |    "outputs": [],
586 |    "source": [
587 |     "d0_out = k_layer_out([samp_conv_val_feat, 0])[0]"
588 |    ]
589 |   },
590 |   {
591 |    "cell_type": "code",
592 |    "execution_count": null,
593 |    "metadata": {
594 |     "collapsed": true
595 |    },
596 |    "outputs": [],
597 |    "source": [
598 |     "k_layer_out = K.function([dense_model.layers[0].input, K.learning_phase()], \n",
599 |     "                         [dense_model.layers[4].output])"
600 |    ]
601 |   },
602 |   {
603 |    "cell_type": "code",
604 |    "execution_count": null,
605 |    "metadata": {
606 |     "collapsed": true
607 |    },
608 |    "outputs": [],
609 |    "source": [
610 |     "d2_out = k_layer_out([samp_conv_val_feat, 0])[0]"
611 |    ]
612 |   },
613 |   {
614 |    "cell_type": "markdown",
615 |    "metadata": {},
616 |    "source": [
617 |     "Now that we've got our activations, we can calculate the mean and standard deviation for each (note that due to a bug in keras, it's actually the variance that we'll need)."
618 |    ]
619 |   },
620 |   {
621 |    "cell_type": "code",
622 |    "execution_count": null,
623 |    "metadata": {
624 |     "collapsed": true
625 |    },
626 |    "outputs": [],
627 |    "source": [
628 |     "mu0,var0 = d0_out.mean(axis=0), d0_out.var(axis=0)\n",
629 |     "mu2,var2 = d2_out.mean(axis=0), d2_out.var(axis=0)"
630 |    ]
631 |   },
632 |   {
633 |    "cell_type": "markdown",
634 |    "metadata": {},
635 |    "source": [
636 |     "### Creating batchnorm model"
637 |    ]
638 |   },
639 |   {
640 |    "cell_type": "markdown",
641 |    "metadata": {},
642 |    "source": [
643 |     "Now we're ready to create and insert our layers just after each dense layer."
644 |    ]
645 |   },
646 |   {
647 |    "cell_type": "code",
648 |    "execution_count": null,
649 |    "metadata": {
650 |     "collapsed": true
651 |    },
652 |    "outputs": [],
653 |    "source": [
654 |     "nl1 = BatchNormalization()\n",
655 |     "nl2 = BatchNormalization()"
656 |    ]
657 |   },
658 |   {
659 |    "cell_type": "code",
660 |    "execution_count": null,
661 |    "metadata": {
662 |     "collapsed": true
663 |    },
664 |    "outputs": [],
665 |    "source": [
666 |     "bn_model = insert_layer(dense_model, nl2, 5)\n",
667 |     "bn_model = insert_layer(bn_model, nl1, 3)"
668 |    ]
669 |   },
670 |   {
671 |    "cell_type": "code",
672 |    "execution_count": null,
673 |    "metadata": {
674 |     "collapsed": true
675 |    },
676 |    "outputs": [],
677 |    "source": [
678 |     "bnl1 = bn_model.layers[3]\n",
679 |     "bnl4 = bn_model.layers[6]"
680 |    ]
681 |   },
682 |   {
683 |    "cell_type": "markdown",
684 |    "metadata": {},
685 |    "source": [
686 |     "After inserting the layers, we can set their weights to the variance and mean we just calculated."
687 |    ]
688 |   },
689 |   {
690 |    "cell_type": "code",
691 |    "execution_count": null,
692 |    "metadata": {
693 |     "collapsed": true
694 |    },
695 |    "outputs": [],
696 |    "source": [
697 |     "bnl1.set_weights([var0, mu0, mu0, var0])\n",
698 |     "bnl4.set_weights([var2, mu2, mu2, var2])"
699 |    ]
700 |   },
701 |   {
702 |    "cell_type": "code",
703 |    "execution_count": null,
704 |    "metadata": {
705 |     "collapsed": true
706 |    },
707 |    "outputs": [],
708 |    "source": [
709 |     "bn_model.compile(Adam(1e-5), 'categorical_crossentropy', ['accuracy'])"
710 |    ]
711 |   },
712 |   {
713 |    "cell_type": "markdown",
714 |    "metadata": {},
715 |    "source": [
716 |     "We should find that the new model gives identical results to those provided by the original VGG model."
717 |    ]
718 |   },
719 |   {
720 |    "cell_type": "code",
721 |    "execution_count": null,
722 |    "metadata": {
723 |     "collapsed": true
724 |    },
725 |    "outputs": [],
726 |    "source": [
727 |     "bn_model.evaluate(samp_conv_val_feat, samp_val_labels)"
728 |    ]
729 |   },
730 |   {
731 |    "cell_type": "code",
732 |    "execution_count": null,
733 |    "metadata": {
734 |     "collapsed": true
735 |    },
736 |    "outputs": [],
737 |    "source": [
738 |     "bn_model.evaluate(samp_conv_feat, samp_trn_labels)"
739 |    ]
740 |   },
741 |   {
742 |    "cell_type": "markdown",
743 |    "metadata": {},
744 |    "source": [
745 |     "### Optional - additional fine-tuning"
746 |    ]
747 |   },
748 |   {
749 |    "cell_type": "markdown",
750 |    "metadata": {},
751 |    "source": [
752 |     "Now that we have a VGG model with batchnorm, we might expect that the optimal weights would be a little different to what they were when originally created without batchnorm. So we fine tune the weights for one epoch."
753 |    ]
754 |   },
755 |   {
756 |    "cell_type": "code",
757 |    "execution_count": null,
758 |    "metadata": {
759 |     "collapsed": true
760 |    },
761 |    "outputs": [],
762 |    "source": [
763 |     "feat_bc = bcolz.open(fast_path+'trn_features.dat')"
764 |    ]
765 |   },
766 |   {
767 |    "cell_type": "code",
768 |    "execution_count": null,
769 |    "metadata": {
770 |     "collapsed": true
771 |    },
772 |    "outputs": [],
773 |    "source": [
774 |     "labels = load_array(fast_path+'trn_labels.dat')"
775 |    ]
776 |   },
777 |   {
778 |    "cell_type": "code",
779 |    "execution_count": null,
780 |    "metadata": {
781 |     "collapsed": true
782 |    },
783 |    "outputs": [],
784 |    "source": [
785 |     "val_feat_bc = bcolz.open(fast_path+'val_features.dat')"
786 |    ]
787 |   },
788 |   {
789 |    "cell_type": "code",
790 |    "execution_count": null,
791 |    "metadata": {
792 |     "collapsed": true
793 |    },
794 |    "outputs": [],
795 |    "source": [
796 |     "val_labels = load_array(fast_path+'val_labels.dat')"
797 |    ]
798 |   },
799 |   {
800 |    "cell_type": "code",
801 |    "execution_count": null,
802 |    "metadata": {
803 |     "collapsed": true
804 |    },
805 |    "outputs": [],
806 |    "source": [
807 |     "bn_model.fit(feat_bc, labels, nb_epoch=1, batch_size=batch_size,\n",
808 |     "             validation_data=(val_feat_bc, val_labels))"
809 |    ]
810 |   },
811 |   {
812 |    "cell_type": "markdown",
813 |    "metadata": {},
814 |    "source": [
815 |     "The results look quite encouraging! Note that these VGG weights are now specific to how keras handles image scaling - that is, it squashes and stretches images, rather than adding black borders. So this model is best used on images created in that way."
816 |    ]
817 |   },
818 |   {
819 |    "cell_type": "code",
820 |    "execution_count": null,
821 |    "metadata": {
822 |     "collapsed": true
823 |    },
824 |    "outputs": [],
825 |    "source": [
826 |     "bn_model.save_weights(path+'models/bn_model2.h5')"
827 |    ]
828 |   },
829 |   {
830 |    "cell_type": "code",
831 |    "execution_count": null,
832 |    "metadata": {
833 |     "collapsed": true
834 |    },
835 |    "outputs": [],
836 |    "source": [
837 |     "bn_model.load_weights(path+'models/bn_model2.h5')"
838 |    ]
839 |   },
840 |   {
841 |    "cell_type": "markdown",
842 |    "metadata": {
843 |     "collapsed": true
844 |    },
845 |    "source": [
846 |     "### Create combined model"
847 |    ]
848 |   },
849 |   {
850 |    "cell_type": "markdown",
851 |    "metadata": {},
852 |    "source": [
853 |     "Our last step is simply to copy our new dense layers on to the end of the convolutional part of the network, and save the new complete set of weights, so we can use them in the future when using VGG. (Of course, we'll also need to update our VGG architecture to add the batchnorm layers)."
854 |    ]
855 |   },
856 |   {
857 |    "cell_type": "code",
858 |    "execution_count": null,
859 |    "metadata": {
860 |     "collapsed": true
861 |    },
862 |    "outputs": [],
863 |    "source": [
864 |     "new_layers = copy_layers(bn_model.layers)\n",
865 |     "for layer in new_layers:\n",
866 |     "    conv_model.add(layer)"
867 |    ]
868 |   },
869 |   {
870 |    "cell_type": "code",
871 |    "execution_count": null,
872 |    "metadata": {
873 |     "collapsed": true
874 |    },
875 |    "outputs": [],
876 |    "source": [
877 |     "copy_weights(bn_model.layers, new_layers)"
878 |    ]
879 |   },
880 |   {
881 |    "cell_type": "code",
882 |    "execution_count": null,
883 |    "metadata": {
884 |     "collapsed": true
885 |    },
886 |    "outputs": [],
887 |    "source": [
888 |     "conv_model.compile(Adam(1e-5), 'categorical_crossentropy', ['accuracy'])"
889 |    ]
890 |   },
891 |   {
892 |    "cell_type": "code",
893 |    "execution_count": null,
894 |    "metadata": {
895 |     "collapsed": true
896 |    },
897 |    "outputs": [],
898 |    "source": [
899 |     "conv_model.evaluate(samp_val, samp_val_labels)"
900 |    ]
901 |   },
902 |   {
903 |    "cell_type": "code",
904 |    "execution_count": null,
905 |    "metadata": {
906 |     "collapsed": true
907 |    },
908 |    "outputs": [],
909 |    "source": [
910 |     "conv_model.save_weights(path+'models/inet_224squash_bn.h5')"
911 |    ]
912 |   },
913 |   {
914 |    "cell_type": "markdown",
915 |    "metadata": {
916 |     "collapsed": true
917 |    },
918 |    "source": [
919 |     "The code shown here is implemented in [vgg_bn.py](https://github.com/fastai/courses/blob/master/deeplearning1/nbs/vgg16bn.py), and there is a version of ``vgg_ft`` (our fine tuning function) with batch norm called ``vgg_ft_bn`` in [utils.py](https://github.com/fastai/courses/blob/master/deeplearning1/nbs/utils.py)."
920 |    ]
921 |   },
922 |   {
923 |    "cell_type": "code",
924 |    "execution_count": null,
925 |    "metadata": {
926 |     "collapsed": true
927 |    },
928 |    "outputs": [],
929 |    "source": []
930 |   }
931 |  ],
932 |  "metadata": {
933 |   "anaconda-cloud": {},
934 |   "kernelspec": {
935 |    "display_name": "Python 3",
936 |    "language": "python",
937 |    "name": "python3"
938 |   },
939 |   "language_info": {
940 |    "codemirror_mode": {
941 |     "name": "ipython",
942 |     "version": 3
943 |    },
944 |    "file_extension": ".py",
945 |    "mimetype": "text/x-python",
946 |    "name": "python",
947 |    "nbconvert_exporter": "python",
948 |    "pygments_lexer": "ipython3",
949 |    "version": "3.5.4"
950 |   },
951 |   "nav_menu": {},
952 |   "toc": {
953 |    "navigate_menu": true,
954 |    "number_sections": true,
955 |    "sideBar": true,
956 |    "threshold": 6,
957 |    "toc_cell": false,
958 |    "toc_section_display": "block",
959 |    "toc_window_display": false
960 |   }
961 |  },
962 |  "nbformat": 4,
963 |  "nbformat_minor": 1
964 | }
965 | 


--------------------------------------------------------------------------------
/nbs/lesson5.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 1,
   6 |    "metadata": {
   7 |     "ExecuteTime": {
   8 |      "end_time": "2018-01-09T15:36:28.456263Z",
   9 |      "start_time": "2018-01-09T15:36:25.246783Z"
  10 |     }
  11 |    },
  12 |    "outputs": [
  13 |     {
  14 |      "name": "stderr",
  15 |      "output_type": "stream",
  16 |      "text": [
  17 |       "Using cuDNN version 6021 on context None\n",
  18 |       "Mapped name None to device cuda0: GeForce GTX TITAN X (0000:04:00.0)\n",
  19 |       "Using Theano backend.\n"
  20 |      ]
  21 |     }
  22 |    ],
  23 |    "source": [
  24 |     "from __future__ import division, print_function\n",
  25 |     "%matplotlib inline\n",
  26 |     "from importlib import reload  # Python 3\n",
  27 |     "import utils; reload(utils)\n",
  28 |     "from utils import *"
  29 |    ]
  30 |   },
  31 |   {
  32 |    "cell_type": "code",
  33 |    "execution_count": 2,
  34 |    "metadata": {
  35 |     "ExecuteTime": {
  36 |      "end_time": "2018-01-09T15:36:28.461234Z",
  37 |      "start_time": "2018-01-09T15:36:28.458339Z"
  38 |     }
  39 |    },
  40 |    "outputs": [],
  41 |    "source": [
  42 |     "path = \"data/imdb/\"\n",
  43 |     "model_path = path + 'models/'\n",
  44 |     "if not os.path.exists(model_path): os.mkdir(model_path)"
  45 |    ]
  46 |   },
  47 |   {
  48 |    "cell_type": "markdown",
  49 |    "metadata": {},
  50 |    "source": [
  51 |     "## Setup data"
  52 |    ]
  53 |   },
  54 |   {
  55 |    "cell_type": "markdown",
  56 |    "metadata": {},
  57 |    "source": [
  58 |     "We're going to look at the IMDB dataset, which contains movie reviews from IMDB, along with their sentiment. Keras comes with some helpers for this dataset."
  59 |    ]
  60 |   },
  61 |   {
  62 |    "cell_type": "code",
  63 |    "execution_count": 3,
  64 |    "metadata": {
  65 |     "ExecuteTime": {
  66 |      "end_time": "2018-01-09T15:36:28.541861Z",
  67 |      "start_time": "2018-01-09T15:36:28.463022Z"
  68 |     }
  69 |    },
  70 |    "outputs": [],
  71 |    "source": [
  72 |     "from keras.datasets import imdb\n",
  73 |     "idx = imdb.get_word_index()"
  74 |    ]
  75 |   },
  76 |   {
  77 |    "cell_type": "markdown",
  78 |    "metadata": {},
  79 |    "source": [
  80 |     "This is the word list:"
  81 |    ]
  82 |   },
  83 |   {
  84 |    "cell_type": "code",
  85 |    "execution_count": 4,
  86 |    "metadata": {
  87 |     "ExecuteTime": {
  88 |      "end_time": "2018-01-09T15:36:28.602069Z",
  89 |      "start_time": "2018-01-09T15:36:28.543828Z"
  90 |     }
  91 |    },
  92 |    "outputs": [
  93 |     {
  94 |      "data": {
  95 |       "text/plain": [
  96 |        "['the', 'and', 'a', 'of', 'to', 'is', 'br', 'in', 'it', 'i']"
  97 |       ]
  98 |      },
  99 |      "execution_count": 4,
 100 |      "metadata": {},
 101 |      "output_type": "execute_result"
 102 |     }
 103 |    ],
 104 |    "source": [
 105 |     "idx_arr = sorted(idx, key=idx.get)\n",
 106 |     "idx_arr[:10]"
 107 |    ]
 108 |   },
 109 |   {
 110 |    "cell_type": "markdown",
 111 |    "metadata": {},
 112 |    "source": [
 113 |     "...and this is the mapping from id to word"
 114 |    ]
 115 |   },
 116 |   {
 117 |    "cell_type": "code",
 118 |    "execution_count": 5,
 119 |    "metadata": {
 120 |     "ExecuteTime": {
 121 |      "end_time": "2018-01-09T15:36:28.644220Z",
 122 |      "start_time": "2018-01-09T15:36:28.603536Z"
 123 |     },
 124 |     "scrolled": false
 125 |    },
 126 |    "outputs": [],
 127 |    "source": [
 128 |     "idx2word = {v: k for k, v in idx.items()}"
 129 |    ]
 130 |   },
 131 |   {
 132 |    "cell_type": "markdown",
 133 |    "metadata": {},
 134 |    "source": [
 135 |     "We download the reviews using code copied from keras.datasets:"
 136 |    ]
 137 |   },
 138 |   {
 139 |    "cell_type": "code",
 140 |    "execution_count": 6,
 141 |    "metadata": {
 142 |     "ExecuteTime": {
 143 |      "end_time": "2018-01-09T15:36:30.774903Z",
 144 |      "start_time": "2018-01-09T15:36:28.645649Z"
 145 |     }
 146 |    },
 147 |    "outputs": [],
 148 |    "source": [
 149 |     "path = get_file('imdb_full.pkl',\n",
 150 |     "                origin='https://s3.amazonaws.com/text-datasets/imdb_full.pkl',\n",
 151 |     "                md5_hash='d091312047c43cf9e4e38fef92437263')\n",
 152 |     "f = open(path, 'rb')\n",
 153 |     "(x_train, labels_train), (x_test, labels_test) = pickle.load(f)"
 154 |    ]
 155 |   },
 156 |   {
 157 |    "cell_type": "code",
 158 |    "execution_count": 7,
 159 |    "metadata": {
 160 |     "ExecuteTime": {
 161 |      "end_time": "2018-01-09T15:36:30.779253Z",
 162 |      "start_time": "2018-01-09T15:36:30.776488Z"
 163 |     }
 164 |    },
 165 |    "outputs": [
 166 |     {
 167 |      "data": {
 168 |       "text/plain": [
 169 |        "25000"
 170 |       ]
 171 |      },
 172 |      "execution_count": 7,
 173 |      "metadata": {},
 174 |      "output_type": "execute_result"
 175 |     }
 176 |    ],
 177 |    "source": [
 178 |     "len(x_train)"
 179 |    ]
 180 |   },
 181 |   {
 182 |    "cell_type": "markdown",
 183 |    "metadata": {},
 184 |    "source": [
 185 |     "Here's the 1st review. As you see, the words have been replaced by ids. The ids can be looked up in idx2word."
 186 |    ]
 187 |   },
 188 |   {
 189 |    "cell_type": "code",
 190 |    "execution_count": 8,
 191 |    "metadata": {
 192 |     "ExecuteTime": {
 193 |      "end_time": "2018-01-09T15:36:30.817217Z",
 194 |      "start_time": "2018-01-09T15:36:30.780486Z"
 195 |     },
 196 |     "scrolled": false
 197 |    },
 198 |    "outputs": [
 199 |     {
 200 |      "data": {
 201 |       "text/plain": [
 202 |        "'23022, 309, 6, 3, 1069, 209, 9, 2175, 30, 1, 169, 55, 14, 46, 82, 5869, 41, 393, 110, 138, 14, 5359, 58, 4477, 150, 8, 1, 5032, 5948, 482, 69, 5, 261, 12, 23022, 73935, 2003, 6, 73, 2436, 5, 632, 71, 6, 5359, 1, 25279, 5, 2004, 10471, 1, 5941, 1534, 34, 67, 64, 205, 140, 65, 1232, 63526, 21145, 1, 49265, 4, 1, 223, 901, 29, 3024, 69, 4, 1, 5863, 10, 694, 2, 65, 1534, 51, 10, 216, 1, 387, 8, 60, 3, 1472, 3724, 802, 5, 3521, 177, 1, 393, 10, 1238, 14030, 30, 309, 3, 353, 344, 2989, 143, 130, 5, 7804, 28, 4, 126, 5359, 1472, 2375, 5, 23022, 309, 10, 532, 12, 108, 1470, 4, 58, 556, 101, 12, 23022, 309, 6, 227, 4187, 48, 3, 2237, 12, 9, 215'"
 203 |       ]
 204 |      },
 205 |      "execution_count": 8,
 206 |      "metadata": {},
 207 |      "output_type": "execute_result"
 208 |     }
 209 |    ],
 210 |    "source": [
 211 |     "', '.join(map(str, x_train[0]))"
 212 |    ]
 213 |   },
 214 |   {
 215 |    "cell_type": "markdown",
 216 |    "metadata": {},
 217 |    "source": [
 218 |     "The first word of the first review is 23022. Let's see what that is."
 219 |    ]
 220 |   },
 221 |   {
 222 |    "cell_type": "code",
 223 |    "execution_count": 9,
 224 |    "metadata": {
 225 |     "ExecuteTime": {
 226 |      "end_time": "2018-01-09T15:36:30.839786Z",
 227 |      "start_time": "2018-01-09T15:36:30.819303Z"
 228 |     }
 229 |    },
 230 |    "outputs": [
 231 |     {
 232 |      "data": {
 233 |       "text/plain": [
 234 |        "'bromwell'"
 235 |       ]
 236 |      },
 237 |      "execution_count": 9,
 238 |      "metadata": {},
 239 |      "output_type": "execute_result"
 240 |     }
 241 |    ],
 242 |    "source": [
 243 |     "idx2word[23022]"
 244 |    ]
 245 |   },
 246 |   {
 247 |    "cell_type": "markdown",
 248 |    "metadata": {},
 249 |    "source": [
 250 |     "Here's the whole review, mapped from ids to words."
 251 |    ]
 252 |   },
 253 |   {
 254 |    "cell_type": "code",
 255 |    "execution_count": 10,
 256 |    "metadata": {
 257 |     "ExecuteTime": {
 258 |      "end_time": "2018-01-09T15:36:30.870150Z",
 259 |      "start_time": "2018-01-09T15:36:30.841763Z"
 260 |     },
 261 |     "scrolled": false
 262 |    },
 263 |    "outputs": [
 264 |     {
 265 |      "data": {
 266 |       "text/plain": [
 267 |        "\"bromwell high is a cartoon comedy it ran at the same time as some other programs about school life such as teachers my 35 years in the teaching profession lead me to believe that bromwell high's satire is much closer to reality than is teachers the scramble to survive financially the insightful students who can see right through their pathetic teachers' pomp the pettiness of the whole situation all remind me of the schools i knew and their students when i saw the episode in which a student repeatedly tried to burn down the school i immediately recalled at high a classic line inspector i'm here to sack one of your teachers student welcome to bromwell high i expect that many adults of my age think that bromwell high is far fetched what a pity that it isn't\""
 268 |       ]
 269 |      },
 270 |      "execution_count": 10,
 271 |      "metadata": {},
 272 |      "output_type": "execute_result"
 273 |     }
 274 |    ],
 275 |    "source": [
 276 |     "' '.join([idx2word[o] for o in x_train[0]])"
 277 |    ]
 278 |   },
 279 |   {
 280 |    "cell_type": "markdown",
 281 |    "metadata": {},
 282 |    "source": [
 283 |     "The labels are 1 for positive, 0 for negative."
 284 |    ]
 285 |   },
 286 |   {
 287 |    "cell_type": "code",
 288 |    "execution_count": 11,
 289 |    "metadata": {
 290 |     "ExecuteTime": {
 291 |      "end_time": "2018-01-09T15:36:30.910789Z",
 292 |      "start_time": "2018-01-09T15:36:30.872255Z"
 293 |     }
 294 |    },
 295 |    "outputs": [
 296 |     {
 297 |      "data": {
 298 |       "text/plain": [
 299 |        "[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]"
 300 |       ]
 301 |      },
 302 |      "execution_count": 11,
 303 |      "metadata": {},
 304 |      "output_type": "execute_result"
 305 |     }
 306 |    ],
 307 |    "source": [
 308 |     "labels_train[:10]"
 309 |    ]
 310 |   },
 311 |   {
 312 |    "cell_type": "markdown",
 313 |    "metadata": {},
 314 |    "source": [
 315 |     "Reduce vocab size by setting rare words to max index."
 316 |    ]
 317 |   },
 318 |   {
 319 |    "cell_type": "code",
 320 |    "execution_count": 12,
 321 |    "metadata": {
 322 |     "ExecuteTime": {
 323 |      "end_time": "2018-01-09T15:36:32.664917Z",
 324 |      "start_time": "2018-01-09T15:36:30.914144Z"
 325 |     }
 326 |    },
 327 |    "outputs": [
 328 |     {
 329 |      "data": {
 330 |       "text/plain": [
 331 |        "'bergman'"
 332 |       ]
 333 |      },
 334 |      "execution_count": 12,
 335 |      "metadata": {},
 336 |      "output_type": "execute_result"
 337 |     }
 338 |    ],
 339 |    "source": [
 340 |     "vocab_size = 5000\n",
 341 |     "\n",
 342 |     "trn = [np.array([i if i<vocab_size-1 else vocab_size-1 for i in s]) for s in x_train]\n",
 343 |     "test = [np.array([i if i<vocab_size-1 else vocab_size-1 for i in s]) for s in x_test]\n",
 344 |     "# check the \"max index\" word out of curiosity\n",
 345 |     "idx2word[vocab_size-1]"
 346 |    ]
 347 |   },
 348 |   {
 349 |    "cell_type": "markdown",
 350 |    "metadata": {},
 351 |    "source": [
 352 |     "Look at distribution of lengths of sentences."
 353 |    ]
 354 |   },
 355 |   {
 356 |    "cell_type": "code",
 357 |    "execution_count": 13,
 358 |    "metadata": {
 359 |     "ExecuteTime": {
 360 |      "end_time": "2018-01-09T15:36:32.674699Z",
 361 |      "start_time": "2018-01-09T15:36:32.666264Z"
 362 |     },
 363 |     "scrolled": true
 364 |    },
 365 |    "outputs": [
 366 |     {
 367 |      "data": {
 368 |       "text/plain": [
 369 |        "(2493, 10, 237.71364)"
 370 |       ]
 371 |      },
 372 |      "execution_count": 13,
 373 |      "metadata": {},
 374 |      "output_type": "execute_result"
 375 |     }
 376 |    ],
 377 |    "source": [
 378 |     "lens = np.array([*map(len, trn)])\n",
 379 |     "(lens.max(), lens.min(), lens.mean())"
 380 |    ]
 381 |   },
 382 |   {
 383 |    "cell_type": "markdown",
 384 |    "metadata": {},
 385 |    "source": [
 386 |     "Pad (with zero) or truncate each sentence to make consistent length."
 387 |    ]
 388 |   },
 389 |   {
 390 |    "cell_type": "code",
 391 |    "execution_count": 14,
 392 |    "metadata": {
 393 |     "ExecuteTime": {
 394 |      "end_time": "2018-01-09T15:36:32.955702Z",
 395 |      "start_time": "2018-01-09T15:36:32.675824Z"
 396 |     }
 397 |    },
 398 |    "outputs": [],
 399 |    "source": [
 400 |     "seq_len = 500\n",
 401 |     "\n",
 402 |     "trn = sequence.pad_sequences(trn, maxlen=seq_len, value=0)\n",
 403 |     "test = sequence.pad_sequences(test, maxlen=seq_len, value=0)"
 404 |    ]
 405 |   },
 406 |   {
 407 |    "cell_type": "markdown",
 408 |    "metadata": {},
 409 |    "source": [
 410 |     "This results in nice rectangular matrices that can be passed to ML algorithms. Reviews shorter than 500 words are pre-padded with zeros, those greater are truncated."
 411 |    ]
 412 |   },
 413 |   {
 414 |    "cell_type": "code",
 415 |    "execution_count": 15,
 416 |    "metadata": {
 417 |     "ExecuteTime": {
 418 |      "end_time": "2018-01-09T15:36:32.960220Z",
 419 |      "start_time": "2018-01-09T15:36:32.957458Z"
 420 |     },
 421 |     "scrolled": false
 422 |    },
 423 |    "outputs": [
 424 |     {
 425 |      "data": {
 426 |       "text/plain": [
 427 |        "(25000, 500)"
 428 |       ]
 429 |      },
 430 |      "execution_count": 15,
 431 |      "metadata": {},
 432 |      "output_type": "execute_result"
 433 |     }
 434 |    ],
 435 |    "source": [
 436 |     "trn.shape"
 437 |    ]
 438 |   },
 439 |   {
 440 |    "cell_type": "code",
 441 |    "execution_count": 16,
 442 |    "metadata": {
 443 |     "ExecuteTime": {
 444 |      "end_time": "2018-01-09T15:36:32.987884Z",
 445 |      "start_time": "2018-01-09T15:36:32.961433Z"
 446 |     }
 447 |    },
 448 |    "outputs": [],
 449 |    "source": [
 450 |     "batch_size = 64"
 451 |    ]
 452 |   },
 453 |   {
 454 |    "cell_type": "markdown",
 455 |    "metadata": {},
 456 |    "source": [
 457 |     "## Create simple models"
 458 |    ]
 459 |   },
 460 |   {
 461 |    "cell_type": "markdown",
 462 |    "metadata": {},
 463 |    "source": [
 464 |     "### Single hidden layer NN"
 465 |    ]
 466 |   },
 467 |   {
 468 |    "cell_type": "markdown",
 469 |    "metadata": {},
 470 |    "source": [
 471 |     "The simplest model that tends to give reasonable results is a single hidden layer net. So let's try that. Note that we can't expect to get any useful results by feeding word ids directly into a neural net - so instead we use an embedding to replace them with a vector of 32 (initially random) floats for each word in the vocab."
 472 |    ]
 473 |   },
 474 |   {
 475 |    "cell_type": "code",
 476 |    "execution_count": 17,
 477 |    "metadata": {
 478 |     "ExecuteTime": {
 479 |      "end_time": "2018-01-09T15:36:35.985539Z",
 480 |      "start_time": "2018-01-09T15:36:32.989027Z"
 481 |     }
 482 |    },
 483 |    "outputs": [],
 484 |    "source": [
 485 |     "model = Sequential([\n",
 486 |     "    Embedding(vocab_size, 32, input_length=seq_len),\n",
 487 |     "    Flatten(),\n",
 488 |     "    Dense(100, activation='relu'),\n",
 489 |     "    Dropout(0.7),\n",
 490 |     "    Dense(1, activation='sigmoid')])"
 491 |    ]
 492 |   },
 493 |   {
 494 |    "cell_type": "code",
 495 |    "execution_count": 18,
 496 |    "metadata": {
 497 |     "ExecuteTime": {
 498 |      "end_time": "2018-01-09T15:36:36.115636Z",
 499 |      "start_time": "2018-01-09T15:36:35.987561Z"
 500 |     },
 501 |     "scrolled": true
 502 |    },
 503 |    "outputs": [
 504 |     {
 505 |      "name": "stdout",
 506 |      "output_type": "stream",
 507 |      "text": [
 508 |       "_________________________________________________________________\n",
 509 |       "Layer (type)                 Output Shape              Param #   \n",
 510 |       "=================================================================\n",
 511 |       "embedding_1 (Embedding)      (None, 500, 32)           160000    \n",
 512 |       "_________________________________________________________________\n",
 513 |       "flatten_1 (Flatten)          (None, 16000)             0         \n",
 514 |       "_________________________________________________________________\n",
 515 |       "dense_1 (Dense)              (None, 100)               1600100   \n",
 516 |       "_________________________________________________________________\n",
 517 |       "dropout_1 (Dropout)          (None, 100)               0         \n",
 518 |       "_________________________________________________________________\n",
 519 |       "dense_2 (Dense)              (None, 1)                 101       \n",
 520 |       "=================================================================\n",
 521 |       "Total params: 1,760,201\n",
 522 |       "Trainable params: 1,760,201\n",
 523 |       "Non-trainable params: 0\n",
 524 |       "_________________________________________________________________\n"
 525 |      ]
 526 |     }
 527 |    ],
 528 |    "source": [
 529 |     "model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])\n",
 530 |     "model.summary()"
 531 |    ]
 532 |   },
 533 |   {
 534 |    "cell_type": "code",
 535 |    "execution_count": 19,
 536 |    "metadata": {
 537 |     "ExecuteTime": {
 538 |      "end_time": "2018-01-09T15:36:44.389198Z",
 539 |      "start_time": "2018-01-09T15:36:36.117096Z"
 540 |     }
 541 |    },
 542 |    "outputs": [
 543 |     {
 544 |      "name": "stdout",
 545 |      "output_type": "stream",
 546 |      "text": [
 547 |       "Train on 25000 samples, validate on 25000 samples\n",
 548 |       "Epoch 1/2\n",
 549 |       "25000/25000 [==============================] - 2s 61us/step - loss: 0.4545 - acc: 0.7578 - val_loss: 0.2835 - val_acc: 0.8808\n",
 550 |       "Epoch 2/2\n",
 551 |       "25000/25000 [==============================] - 1s 46us/step - loss: 0.1969 - acc: 0.9259 - val_loss: 0.2988 - val_acc: 0.8748\n"
 552 |      ]
 553 |     },
 554 |     {
 555 |      "data": {
 556 |       "text/plain": [
 557 |        "<keras.callbacks.History at 0x7fb7692e7e48>"
 558 |       ]
 559 |      },
 560 |      "execution_count": 19,
 561 |      "metadata": {},
 562 |      "output_type": "execute_result"
 563 |     }
 564 |    ],
 565 |    "source": [
 566 |     "model.fit(trn, labels_train, validation_data=(test, labels_test), epochs=2, batch_size=batch_size)"
 567 |    ]
 568 |   },
 569 |   {
 570 |    "cell_type": "markdown",
 571 |    "metadata": {},
 572 |    "source": [
 573 |     "The [stanford paper](http://ai.stanford.edu/~amaas/papers/wvSent_acl2011.pdf) that this dataset is from cites a state of the art accuracy (without unlabelled data) of 0.883. So we're short of that, but on the right track."
 574 |    ]
 575 |   },
 576 |   {
 577 |    "cell_type": "markdown",
 578 |    "metadata": {},
 579 |    "source": [
 580 |     "### Single conv layer with max pooling"
 581 |    ]
 582 |   },
 583 |   {
 584 |    "cell_type": "markdown",
 585 |    "metadata": {},
 586 |    "source": [
 587 |     "A CNN is likely to work better, since it's designed to take advantage of ordered data. We'll need to use a 1D CNN, since a sequence of words is 1D."
 588 |    ]
 589 |   },
 590 |   {
 591 |    "cell_type": "code",
 592 |    "execution_count": 20,
 593 |    "metadata": {
 594 |     "ExecuteTime": {
 595 |      "end_time": "2018-01-09T15:36:44.940338Z",
 596 |      "start_time": "2018-01-09T15:36:44.391487Z"
 597 |     }
 598 |    },
 599 |    "outputs": [],
 600 |    "source": [
 601 |     "conv1 = Sequential([\n",
 602 |     "    Embedding(vocab_size, 32, input_length=seq_len),\n",
 603 |     "    SpatialDropout1D(0.2),\n",
 604 |     "    Dropout(0.2),\n",
 605 |     "    Conv1D(64, 5, padding='same', activation='relu'),\n",
 606 |     "    Dropout(0.2),\n",
 607 |     "    MaxPooling1D(),\n",
 608 |     "    Flatten(),\n",
 609 |     "    Dense(100, activation='relu'),\n",
 610 |     "    Dropout(0.7),\n",
 611 |     "    Dense(1, activation='sigmoid')])"
 612 |    ]
 613 |   },
 614 |   {
 615 |    "cell_type": "code",
 616 |    "execution_count": 21,
 617 |    "metadata": {
 618 |     "ExecuteTime": {
 619 |      "end_time": "2018-01-09T15:36:44.953110Z",
 620 |      "start_time": "2018-01-09T15:36:44.941598Z"
 621 |     }
 622 |    },
 623 |    "outputs": [],
 624 |    "source": [
 625 |     "conv1.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])"
 626 |    ]
 627 |   },
 628 |   {
 629 |    "cell_type": "code",
 630 |    "execution_count": 22,
 631 |    "metadata": {
 632 |     "ExecuteTime": {
 633 |      "end_time": "2018-01-09T15:37:08.911067Z",
 634 |      "start_time": "2018-01-09T15:36:44.954333Z"
 635 |     },
 636 |     "scrolled": false
 637 |    },
 638 |    "outputs": [
 639 |     {
 640 |      "name": "stdout",
 641 |      "output_type": "stream",
 642 |      "text": [
 643 |       "Train on 25000 samples, validate on 25000 samples\n",
 644 |       "Epoch 1/4\n",
 645 |       "25000/25000 [==============================] - 3s 139us/step - loss: 0.5199 - acc: 0.7041 - val_loss: 0.2844 - val_acc: 0.8822\n",
 646 |       "Epoch 2/4\n",
 647 |       "25000/25000 [==============================] - 3s 140us/step - loss: 0.2783 - acc: 0.8921 - val_loss: 0.2593 - val_acc: 0.8923\n",
 648 |       "Epoch 3/4\n",
 649 |       "25000/25000 [==============================] - 3s 140us/step - loss: 0.2209 - acc: 0.9164 - val_loss: 0.2680 - val_acc: 0.8878\n",
 650 |       "Epoch 4/4\n",
 651 |       "25000/25000 [==============================] - 3s 140us/step - loss: 0.1886 - acc: 0.9315 - val_loss: 0.2783 - val_acc: 0.8883\n"
 652 |      ]
 653 |     },
 654 |     {
 655 |      "data": {
 656 |       "text/plain": [
 657 |        "<keras.callbacks.History at 0x7fb75f144400>"
 658 |       ]
 659 |      },
 660 |      "execution_count": 22,
 661 |      "metadata": {},
 662 |      "output_type": "execute_result"
 663 |     }
 664 |    ],
 665 |    "source": [
 666 |     "conv1.fit(trn, labels_train, validation_data=(test, labels_test), epochs=4, batch_size=batch_size)"
 667 |    ]
 668 |   },
 669 |   {
 670 |    "cell_type": "markdown",
 671 |    "metadata": {},
 672 |    "source": [
 673 |     "That's well past the Stanford paper's accuracy - another win for CNNs!"
 674 |    ]
 675 |   },
 676 |   {
 677 |    "cell_type": "code",
 678 |    "execution_count": 23,
 679 |    "metadata": {
 680 |     "ExecuteTime": {
 681 |      "end_time": "2018-01-09T15:37:08.927645Z",
 682 |      "start_time": "2018-01-09T15:37:08.913264Z"
 683 |     }
 684 |    },
 685 |    "outputs": [],
 686 |    "source": [
 687 |     "conv1.save_weights(model_path + 'conv1.h5')"
 688 |    ]
 689 |   },
 690 |   {
 691 |    "cell_type": "code",
 692 |    "execution_count": 24,
 693 |    "metadata": {
 694 |     "ExecuteTime": {
 695 |      "end_time": "2018-01-09T15:37:08.991050Z",
 696 |      "start_time": "2018-01-09T15:37:08.929144Z"
 697 |     }
 698 |    },
 699 |    "outputs": [],
 700 |    "source": [
 701 |     "conv1.load_weights(model_path + 'conv1.h5')"
 702 |    ]
 703 |   },
 704 |   {
 705 |    "cell_type": "markdown",
 706 |    "metadata": {},
 707 |    "source": [
 708 |     "## Pre-trained vectors"
 709 |    ]
 710 |   },
 711 |   {
 712 |    "cell_type": "markdown",
 713 |    "metadata": {},
 714 |    "source": [
 715 |     "You may want to look at wordvectors.ipynb before moving on.\n",
 716 |     "\n",
 717 |     "In this section, we replicate the previous CNN, but using pre-trained embeddings."
 718 |    ]
 719 |   },
 720 |   {
 721 |    "cell_type": "code",
 722 |    "execution_count": 25,
 723 |    "metadata": {
 724 |     "ExecuteTime": {
 725 |      "end_time": "2018-01-09T15:37:09.011614Z",
 726 |      "start_time": "2018-01-09T15:37:08.992708Z"
 727 |     }
 728 |    },
 729 |    "outputs": [],
 730 |    "source": [
 731 |     "def load_vectors(loc):\n",
 732 |     "    return (load_array(loc+'.dat'),\n",
 733 |     "        pickle.load(open(loc+'_words.pkl','rb')),\n",
 734 |     "        pickle.load(open(loc+'_idx.pkl','rb')))"
 735 |    ]
 736 |   },
 737 |   {
 738 |    "cell_type": "code",
 739 |    "execution_count": 26,
 740 |    "metadata": {
 741 |     "ExecuteTime": {
 742 |      "end_time": "2018-01-09T15:37:09.239444Z",
 743 |      "start_time": "2018-01-09T15:37:09.012989Z"
 744 |     },
 745 |     "scrolled": true
 746 |    },
 747 |    "outputs": [],
 748 |    "source": [
 749 |     "vecs, words, wordidx = load_vectors('data/glove/results/6B.50d')"
 750 |    ]
 751 |   },
 752 |   {
 753 |    "cell_type": "markdown",
 754 |    "metadata": {},
 755 |    "source": [
 756 |     "The glove word ids and imdb word ids use different indexes. So we create a simple function that creates an embedding matrix using the indexes from imdb, and the embeddings from glove (where they exist)."
 757 |    ]
 758 |   },
 759 |   {
 760 |    "cell_type": "code",
 761 |    "execution_count": 27,
 762 |    "metadata": {
 763 |     "ExecuteTime": {
 764 |      "end_time": "2018-01-09T15:37:09.252893Z",
 765 |      "start_time": "2018-01-09T15:37:09.240872Z"
 766 |     }
 767 |    },
 768 |    "outputs": [],
 769 |    "source": [
 770 |     "def create_emb():\n",
 771 |     "    n_fact = vecs.shape[1]\n",
 772 |     "    emb = np.zeros((vocab_size, n_fact))\n",
 773 |     "\n",
 774 |     "    for i in range(1,len(emb)):\n",
 775 |     "        word = idx2word[i]\n",
 776 |     "        if word and re.match(r\"^[a-zA-Z0-9\\-]*$\", word):\n",
 777 |     "            src_idx = wordidx[word]\n",
 778 |     "            emb[i] = vecs[src_idx]\n",
 779 |     "        else:\n",
 780 |     "            # If we can't find the word in glove, randomly initialize\n",
 781 |     "            emb[i] = normal(scale=0.6, size=(n_fact,))\n",
 782 |     "\n",
 783 |     "    # This is our \"rare word\" id - we want to randomly initialize\n",
 784 |     "    emb[-1] = normal(scale=0.6, size=(n_fact,))\n",
 785 |     "    emb/=3\n",
 786 |     "    return emb"
 787 |    ]
 788 |   },
 789 |   {
 790 |    "cell_type": "code",
 791 |    "execution_count": 28,
 792 |    "metadata": {
 793 |     "ExecuteTime": {
 794 |      "end_time": "2018-01-09T15:37:09.310781Z",
 795 |      "start_time": "2018-01-09T15:37:09.255346Z"
 796 |     }
 797 |    },
 798 |    "outputs": [],
 799 |    "source": [
 800 |     "emb = create_emb()"
 801 |    ]
 802 |   },
 803 |   {
 804 |    "cell_type": "markdown",
 805 |    "metadata": {},
 806 |    "source": [
 807 |     "We pass our embedding matrix to the Embedding constructor, and set it to non-trainable."
 808 |    ]
 809 |   },
 810 |   {
 811 |    "cell_type": "code",
 812 |    "execution_count": 29,
 813 |    "metadata": {
 814 |     "ExecuteTime": {
 815 |      "end_time": "2018-01-09T15:37:09.791497Z",
 816 |      "start_time": "2018-01-09T15:37:09.312813Z"
 817 |     }
 818 |    },
 819 |    "outputs": [],
 820 |    "source": [
 821 |     "model = Sequential([\n",
 822 |     "    Embedding(vocab_size, 50, input_length=seq_len,\n",
 823 |     "              weights=[emb], trainable=False),\n",
 824 |     "    SpatialDropout1D(0.2),\n",
 825 |     "    Dropout(0.25),\n",
 826 |     "    Convolution1D(64, 5, padding='same', activation='relu'),\n",
 827 |     "    Dropout(0.25),\n",
 828 |     "    MaxPooling1D(),\n",
 829 |     "    Flatten(),\n",
 830 |     "    Dense(100, activation='relu'),\n",
 831 |     "    Dropout(0.7),\n",
 832 |     "    Dense(1, activation='sigmoid')])"
 833 |    ]
 834 |   },
 835 |   {
 836 |    "cell_type": "code",
 837 |    "execution_count": 30,
 838 |    "metadata": {
 839 |     "ExecuteTime": {
 840 |      "end_time": "2018-01-09T15:37:09.805095Z",
 841 |      "start_time": "2018-01-09T15:37:09.793122Z"
 842 |     }
 843 |    },
 844 |    "outputs": [],
 845 |    "source": [
 846 |     "model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])"
 847 |    ]
 848 |   },
 849 |   {
 850 |    "cell_type": "code",
 851 |    "execution_count": 31,
 852 |    "metadata": {
 853 |     "ExecuteTime": {
 854 |      "end_time": "2018-01-09T15:37:17.713147Z",
 855 |      "start_time": "2018-01-09T15:37:09.806293Z"
 856 |     },
 857 |     "scrolled": true
 858 |    },
 859 |    "outputs": [
 860 |     {
 861 |      "name": "stdout",
 862 |      "output_type": "stream",
 863 |      "text": [
 864 |       "Train on 25000 samples, validate on 25000 samples\n",
 865 |       "Epoch 1/2\n",
 866 |       "25000/25000 [==============================] - 3s 114us/step - loss: 0.6864 - acc: 0.5476 - val_loss: 0.6292 - val_acc: 0.6946\n",
 867 |       "Epoch 2/2\n",
 868 |       "25000/25000 [==============================] - 3s 115us/step - loss: 0.6047 - acc: 0.6765 - val_loss: 0.5632 - val_acc: 0.7289\n"
 869 |      ]
 870 |     },
 871 |     {
 872 |      "data": {
 873 |       "text/plain": [
 874 |        "<keras.callbacks.History at 0x7fb7496c1588>"
 875 |       ]
 876 |      },
 877 |      "execution_count": 31,
 878 |      "metadata": {},
 879 |      "output_type": "execute_result"
 880 |     }
 881 |    ],
 882 |    "source": [
 883 |     "model.fit(trn, labels_train, validation_data=(test, labels_test), epochs=2, batch_size=batch_size)"
 884 |    ]
 885 |   },
 886 |   {
 887 |    "cell_type": "markdown",
 888 |    "metadata": {},
 889 |    "source": [
 890 |     "We already have beaten our previous model! But let's fine-tune the embedding weights - especially since the words we couldn't find in glove just have random embeddings."
 891 |    ]
 892 |   },
 893 |   {
 894 |    "cell_type": "code",
 895 |    "execution_count": 32,
 896 |    "metadata": {
 897 |     "ExecuteTime": {
 898 |      "end_time": "2018-01-09T15:37:17.716541Z",
 899 |      "start_time": "2018-01-09T15:37:17.714588Z"
 900 |     }
 901 |    },
 902 |    "outputs": [],
 903 |    "source": [
 904 |     "model.layers[0].trainable=True"
 905 |    ]
 906 |   },
 907 |   {
 908 |    "cell_type": "code",
 909 |    "execution_count": 33,
 910 |    "metadata": {
 911 |     "ExecuteTime": {
 912 |      "end_time": "2018-01-09T15:37:17.753775Z",
 913 |      "start_time": "2018-01-09T15:37:17.717705Z"
 914 |     }
 915 |    },
 916 |    "outputs": [],
 917 |    "source": [
 918 |     "#- added compile instruction in order to avoid a Keras 2.1 warning message\n",
 919 |     "model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])"
 920 |    ]
 921 |   },
 922 |   {
 923 |    "cell_type": "code",
 924 |    "execution_count": 34,
 925 |    "metadata": {
 926 |     "ExecuteTime": {
 927 |      "end_time": "2018-01-09T15:37:17.779437Z",
 928 |      "start_time": "2018-01-09T15:37:17.756560Z"
 929 |     }
 930 |    },
 931 |    "outputs": [],
 932 |    "source": [
 933 |     "model.optimizer.lr=1e-4"
 934 |    ]
 935 |   },
 936 |   {
 937 |    "cell_type": "code",
 938 |    "execution_count": 35,
 939 |    "metadata": {
 940 |     "ExecuteTime": {
 941 |      "end_time": "2018-01-09T15:37:25.264865Z",
 942 |      "start_time": "2018-01-09T15:37:17.781913Z"
 943 |     },
 944 |     "scrolled": true
 945 |    },
 946 |    "outputs": [
 947 |     {
 948 |      "name": "stdout",
 949 |      "output_type": "stream",
 950 |      "text": [
 951 |       "Train on 25000 samples, validate on 25000 samples\n",
 952 |       "Epoch 1/1\n",
 953 |       "25000/25000 [==============================] - 4s 161us/step - loss: 0.5459 - acc: 0.7395 - val_loss: 0.4819 - val_acc: 0.8041\n"
 954 |      ]
 955 |     },
 956 |     {
 957 |      "data": {
 958 |       "text/plain": [
 959 |        "<keras.callbacks.History at 0x7fb749ca7fd0>"
 960 |       ]
 961 |      },
 962 |      "execution_count": 35,
 963 |      "metadata": {},
 964 |      "output_type": "execute_result"
 965 |     }
 966 |    ],
 967 |    "source": [
 968 |     "model.fit(trn, labels_train, validation_data=(test, labels_test), epochs=1, batch_size=batch_size)"
 969 |    ]
 970 |   },
 971 |   {
 972 |    "cell_type": "markdown",
 973 |    "metadata": {},
 974 |    "source": [
 975 |     "As expected, that's given us a nice little boost. :)"
 976 |    ]
 977 |   },
 978 |   {
 979 |    "cell_type": "code",
 980 |    "execution_count": 36,
 981 |    "metadata": {
 982 |     "ExecuteTime": {
 983 |      "end_time": "2018-01-09T15:37:25.285694Z",
 984 |      "start_time": "2018-01-09T15:37:25.266237Z"
 985 |     }
 986 |    },
 987 |    "outputs": [],
 988 |    "source": [
 989 |     "model.save_weights(model_path+'glove50.h5')"
 990 |    ]
 991 |   },
 992 |   {
 993 |    "cell_type": "markdown",
 994 |    "metadata": {},
 995 |    "source": [
 996 |     "## Multi-size CNN"
 997 |    ]
 998 |   },
 999 |   {
1000 |    "cell_type": "markdown",
1001 |    "metadata": {},
1002 |    "source": [
1003 |     "This is an implementation of a multi-size CNN as shown in Ben Bowles' [excellent blog post](https://quid.com/feed/how-quid-uses-deep-learning-with-small-data)."
1004 |    ]
1005 |   },
1006 |   {
1007 |    "cell_type": "code",
1008 |    "execution_count": 37,
1009 |    "metadata": {
1010 |     "ExecuteTime": {
1011 |      "end_time": "2018-01-09T15:37:25.331618Z",
1012 |      "start_time": "2018-01-09T15:37:25.288099Z"
1013 |     }
1014 |    },
1015 |    "outputs": [],
1016 |    "source": [
1017 |     "from keras.layers import Merge"
1018 |    ]
1019 |   },
1020 |   {
1021 |    "cell_type": "markdown",
1022 |    "metadata": {},
1023 |    "source": [
1024 |     "We use the functional API to create multiple conv layers of different sizes, and then concatenate them."
1025 |    ]
1026 |   },
1027 |   {
1028 |    "cell_type": "code",
1029 |    "execution_count": 38,
1030 |    "metadata": {
1031 |     "ExecuteTime": {
1032 |      "end_time": "2018-01-09T15:37:25.663878Z",
1033 |      "start_time": "2018-01-09T15:37:25.333959Z"
1034 |     }
1035 |    },
1036 |    "outputs": [],
1037 |    "source": [
1038 |     "graph_in = Input ((vocab_size, 50))\n",
1039 |     "convs = [ ] \n",
1040 |     "for fsz in range (3, 6): \n",
1041 |     "    x = Conv1D(64, fsz, padding='same', activation=\"relu\")(graph_in)\n",
1042 |     "    x = MaxPooling1D()(x) \n",
1043 |     "    x = Flatten()(x) \n",
1044 |     "    convs.append(x)\n",
1045 |     "out = Concatenate(axis=-1)(convs) \n",
1046 |     "graph = Model(graph_in, out) "
1047 |    ]
1048 |   },
1049 |   {
1050 |    "cell_type": "code",
1051 |    "execution_count": 39,
1052 |    "metadata": {
1053 |     "ExecuteTime": {
1054 |      "end_time": "2018-01-09T15:37:25.680572Z",
1055 |      "start_time": "2018-01-09T15:37:25.665333Z"
1056 |     }
1057 |    },
1058 |    "outputs": [],
1059 |    "source": [
1060 |     "emb = create_emb()"
1061 |    ]
1062 |   },
1063 |   {
1064 |    "cell_type": "markdown",
1065 |    "metadata": {},
1066 |    "source": [
1067 |     "We then replace the conv/max-pool layer in our original CNN with the concatenated conv layers."
1068 |    ]
1069 |   },
1070 |   {
1071 |    "cell_type": "code",
1072 |    "execution_count": 40,
1073 |    "metadata": {
1074 |     "ExecuteTime": {
1075 |      "end_time": "2018-01-09T15:37:26.077190Z",
1076 |      "start_time": "2018-01-09T15:37:25.681892Z"
1077 |     }
1078 |    },
1079 |    "outputs": [],
1080 |    "source": [
1081 |     "model = Sequential ([\n",
1082 |     "    Embedding(vocab_size, 50, input_length=seq_len, weights=[emb]),\n",
1083 |     "    SpatialDropout1D(0.2),\n",
1084 |     "    Dropout (0.2),\n",
1085 |     "    graph,\n",
1086 |     "    Dropout (0.5),\n",
1087 |     "    Dense (100, activation=\"relu\"),\n",
1088 |     "    Dropout (0.7),\n",
1089 |     "    Dense (1, activation='sigmoid')\n",
1090 |     "    ])"
1091 |    ]
1092 |   },
1093 |   {
1094 |    "cell_type": "code",
1095 |    "execution_count": 41,
1096 |    "metadata": {
1097 |     "ExecuteTime": {
1098 |      "end_time": "2018-01-09T15:37:26.091788Z",
1099 |      "start_time": "2018-01-09T15:37:26.078778Z"
1100 |     }
1101 |    },
1102 |    "outputs": [],
1103 |    "source": [
1104 |     "model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])"
1105 |    ]
1106 |   },
1107 |   {
1108 |    "cell_type": "code",
1109 |    "execution_count": 42,
1110 |    "metadata": {
1111 |     "ExecuteTime": {
1112 |      "end_time": "2018-01-09T15:37:49.236254Z",
1113 |      "start_time": "2018-01-09T15:37:26.093178Z"
1114 |     },
1115 |     "scrolled": false
1116 |    },
1117 |    "outputs": [
1118 |     {
1119 |      "name": "stdout",
1120 |      "output_type": "stream",
1121 |      "text": [
1122 |       "Train on 25000 samples, validate on 25000 samples\n",
1123 |       "Epoch 1/2\n",
1124 |       "25000/25000 [==============================] - 9s 358us/step - loss: 0.7064 - acc: 0.5109 - val_loss: 0.6907 - val_acc: 0.6127\n",
1125 |       "Epoch 2/2\n",
1126 |       "25000/25000 [==============================] - 9s 360us/step - loss: 0.6307 - acc: 0.5772 - val_loss: 0.4123 - val_acc: 0.8628\n"
1127 |      ]
1128 |     },
1129 |     {
1130 |      "data": {
1131 |       "text/plain": [
1132 |        "<keras.callbacks.History at 0x7fb74950b4a8>"
1133 |       ]
1134 |      },
1135 |      "execution_count": 42,
1136 |      "metadata": {},
1137 |      "output_type": "execute_result"
1138 |     }
1139 |    ],
1140 |    "source": [
1141 |     "model.fit(trn, labels_train, validation_data=(test, labels_test), epochs=2, batch_size=batch_size)"
1142 |    ]
1143 |   },
1144 |   {
1145 |    "cell_type": "markdown",
1146 |    "metadata": {},
1147 |    "source": [
1148 |     "Interestingly, I found that in this case I got best results when I started the embedding layer as being trainable, and then set it to non-trainable after a couple of epochs. I have no idea why!"
1149 |    ]
1150 |   },
1151 |   {
1152 |    "cell_type": "code",
1153 |    "execution_count": 43,
1154 |    "metadata": {
1155 |     "ExecuteTime": {
1156 |      "end_time": "2018-01-09T15:37:49.240107Z",
1157 |      "start_time": "2018-01-09T15:37:49.237728Z"
1158 |     }
1159 |    },
1160 |    "outputs": [],
1161 |    "source": [
1162 |     "model.layers[0].trainable=False"
1163 |    ]
1164 |   },
1165 |   {
1166 |    "cell_type": "code",
1167 |    "execution_count": 44,
1168 |    "metadata": {
1169 |     "ExecuteTime": {
1170 |      "end_time": "2018-01-09T15:37:49.290696Z",
1171 |      "start_time": "2018-01-09T15:37:49.241432Z"
1172 |     }
1173 |    },
1174 |    "outputs": [],
1175 |    "source": [
1176 |     "#- added compile instruction in order to avoid a Keras 2.1 warning message\n",
1177 |     "model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])"
1178 |    ]
1179 |   },
1180 |   {
1181 |    "cell_type": "code",
1182 |    "execution_count": 45,
1183 |    "metadata": {
1184 |     "ExecuteTime": {
1185 |      "end_time": "2018-01-09T15:37:49.311712Z",
1186 |      "start_time": "2018-01-09T15:37:49.293566Z"
1187 |     }
1188 |    },
1189 |    "outputs": [],
1190 |    "source": [
1191 |     "model.optimizer.lr=1e-5"
1192 |    ]
1193 |   },
1194 |   {
1195 |    "cell_type": "code",
1196 |    "execution_count": 46,
1197 |    "metadata": {
1198 |     "ExecuteTime": {
1199 |      "end_time": "2018-01-09T15:38:06.992397Z",
1200 |      "start_time": "2018-01-09T15:37:49.313168Z"
1201 |     },
1202 |     "scrolled": true
1203 |    },
1204 |    "outputs": [
1205 |     {
1206 |      "name": "stdout",
1207 |      "output_type": "stream",
1208 |      "text": [
1209 |       "Train on 25000 samples, validate on 25000 samples\n",
1210 |       "Epoch 1/2\n",
1211 |       "25000/25000 [==============================] - 7s 281us/step - loss: 0.5267 - acc: 0.7285 - val_loss: 0.4087 - val_acc: 0.8636\n",
1212 |       "Epoch 2/2\n",
1213 |       "25000/25000 [==============================] - 7s 280us/step - loss: 0.5247 - acc: 0.7324 - val_loss: 0.4069 - val_acc: 0.8637\n"
1214 |      ]
1215 |     },
1216 |     {
1217 |      "data": {
1218 |       "text/plain": [
1219 |        "<keras.callbacks.History at 0x7fb7437547f0>"
1220 |       ]
1221 |      },
1222 |      "execution_count": 46,
1223 |      "metadata": {},
1224 |      "output_type": "execute_result"
1225 |     }
1226 |    ],
1227 |    "source": [
1228 |     "model.fit(trn, labels_train, validation_data=(test, labels_test), epochs=2, batch_size=batch_size)"
1229 |    ]
1230 |   },
1231 |   {
1232 |    "cell_type": "markdown",
1233 |    "metadata": {},
1234 |    "source": [
1235 |     "This more complex architecture has given us another boost in accuracy."
1236 |    ]
1237 |   },
1238 |   {
1239 |    "cell_type": "markdown",
1240 |    "metadata": {},
1241 |    "source": [
1242 |     "## LSTM"
1243 |    ]
1244 |   },
1245 |   {
1246 |    "cell_type": "markdown",
1247 |    "metadata": {},
1248 |    "source": [
1249 |     "We haven't covered this bit yet!"
1250 |    ]
1251 |   },
1252 |   {
1253 |    "cell_type": "code",
1254 |    "execution_count": 47,
1255 |    "metadata": {
1256 |     "ExecuteTime": {
1257 |      "end_time": "2018-01-09T15:38:08.142541Z",
1258 |      "start_time": "2018-01-09T15:38:06.995148Z"
1259 |     },
1260 |     "scrolled": true
1261 |    },
1262 |    "outputs": [
1263 |     {
1264 |      "name": "stdout",
1265 |      "output_type": "stream",
1266 |      "text": [
1267 |       "_________________________________________________________________\n",
1268 |       "Layer (type)                 Output Shape              Param #   \n",
1269 |       "=================================================================\n",
1270 |       "embedding_5 (Embedding)      (None, 500, 32)           160000    \n",
1271 |       "_________________________________________________________________\n",
1272 |       "spatial_dropout1d_4 (Spatial (None, 500, 32)           0         \n",
1273 |       "_________________________________________________________________\n",
1274 |       "lstm_1 (LSTM)                (None, 100)               53200     \n",
1275 |       "_________________________________________________________________\n",
1276 |       "dense_9 (Dense)              (None, 1)                 101       \n",
1277 |       "=================================================================\n",
1278 |       "Total params: 213,301\n",
1279 |       "Trainable params: 213,301\n",
1280 |       "Non-trainable params: 0\n",
1281 |       "_________________________________________________________________\n"
1282 |      ]
1283 |     }
1284 |    ],
1285 |    "source": [
1286 |     "model = Sequential([\n",
1287 |     "    Embedding(vocab_size, 32, input_length=seq_len, mask_zero=True,\n",
1288 |     "              embeddings_regularizer=l2(1e-6)),\n",
1289 |     "    SpatialDropout1D(0.2),\n",
1290 |     "    LSTM(100, implementation=2),\n",
1291 |     "    Dense(1, activation='sigmoid')])\n",
1292 |     "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
1293 |     "model.summary()"
1294 |    ]
1295 |   },
1296 |   {
1297 |    "cell_type": "code",
1298 |    "execution_count": 48,
1299 |    "metadata": {
1300 |     "ExecuteTime": {
1301 |      "end_time": "2018-01-09T15:44:42.078954Z",
1302 |      "start_time": "2018-01-09T15:38:08.144494Z"
1303 |     },
1304 |     "scrolled": false
1305 |    },
1306 |    "outputs": [
1307 |     {
1308 |      "name": "stdout",
1309 |      "output_type": "stream",
1310 |      "text": [
1311 |       "Train on 25000 samples, validate on 25000 samples\n",
1312 |       "Epoch 1/5\n",
1313 |       "25000/25000 [==============================] - 77s 3ms/step - loss: 0.4662 - acc: 0.7686 - val_loss: 0.3269 - val_acc: 0.8674\n",
1314 |       "Epoch 2/5\n",
1315 |       "25000/25000 [==============================] - 77s 3ms/step - loss: 0.2928 - acc: 0.8818 - val_loss: 0.3047 - val_acc: 0.8713\n",
1316 |       "Epoch 3/5\n",
1317 |       "25000/25000 [==============================] - 77s 3ms/step - loss: 0.2508 - acc: 0.9010 - val_loss: 0.3173 - val_acc: 0.8752\n",
1318 |       "Epoch 4/5\n",
1319 |       "25000/25000 [==============================] - 77s 3ms/step - loss: 0.2326 - acc: 0.9106 - val_loss: 0.3075 - val_acc: 0.8769\n",
1320 |       "Epoch 5/5\n",
1321 |       "25000/25000 [==============================] - 77s 3ms/step - loss: 0.1997 - acc: 0.9239 - val_loss: 0.3182 - val_acc: 0.8736\n"
1322 |      ]
1323 |     },
1324 |     {
1325 |      "data": {
1326 |       "text/plain": [
1327 |        "<keras.callbacks.History at 0x7fb71c1bf1d0>"
1328 |       ]
1329 |      },
1330 |      "execution_count": 48,
1331 |      "metadata": {},
1332 |      "output_type": "execute_result"
1333 |     }
1334 |    ],
1335 |    "source": [
1336 |     "model.fit(trn, labels_train, validation_data=(test, labels_test), epochs=5, batch_size=batch_size)"
1337 |    ]
1338 |   }
1339 |  ],
1340 |  "metadata": {
1341 |   "anaconda-cloud": {},
1342 |   "kernelspec": {
1343 |    "display_name": "Python 3",
1344 |    "language": "python",
1345 |    "name": "python3"
1346 |   },
1347 |   "language_info": {
1348 |    "codemirror_mode": {
1349 |     "name": "ipython",
1350 |     "version": 3
1351 |    },
1352 |    "file_extension": ".py",
1353 |    "mimetype": "text/x-python",
1354 |    "name": "python",
1355 |    "nbconvert_exporter": "python",
1356 |    "pygments_lexer": "ipython3",
1357 |    "version": "3.5.4"
1358 |   }
1359 |  },
1360 |  "nbformat": 4,
1361 |  "nbformat_minor": 1
1362 | }
1363 | 


--------------------------------------------------------------------------------
/nbs/resnet50.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division, print_function
  2 | 
  3 | import os, json
  4 | from glob import glob
  5 | import numpy as np
  6 | from scipy import misc, ndimage
  7 | from scipy.ndimage.interpolation import zoom
  8 | 
  9 | import keras
 10 | from keras import backend as K
 11 | from keras.models import Sequential, Model
 12 | from keras.layers.core import Flatten, Dense, Dropout, Lambda
 13 | from keras.layers import Input, Activation, merge
 14 | from keras.optimizers import RMSprop
 15 | from keras.layers.normalization import BatchNormalization
 16 | from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D  # Conv2D: Keras2
 17 | import keras.preprocessing.image as image
 18 | from keras.utils.data_utils import get_file
 19 | from keras.utils.layer_utils import convert_all_kernels_in_model
 20 | from keras.applications.resnet50 import identity_block, conv_block
 21 | 
 22 | 
 23 | class Resnet50():
 24 |     """The Resnet 50 Imagenet model"""
 25 | 
 26 | 
 27 |     def __init__(self, size=(224,224), include_top=True):
 28 |         self.FILE_PATH = 'http://files.fast.ai/models/'
 29 |         self.vgg_mean = np.array([123.68, 116.779, 103.939]).reshape((3,1,1))
 30 |         self.create(size, include_top)
 31 |         self.get_classes()
 32 | 
 33 | 
 34 |     def get_classes(self):
 35 |         fname = 'imagenet_class_index.json'
 36 |         fpath = get_file(fname, self.FILE_PATH+fname, cache_subdir='models')
 37 |         with open(fpath) as f:
 38 |             class_dict = json.load(f)
 39 |         self.classes = [class_dict[str(i)][1] for i in range(len(class_dict))]
 40 | 
 41 |     def predict(self, imgs, details=False):
 42 |         all_preds = self.model.predict(imgs)
 43 |         idxs = np.argmax(all_preds, axis=1)
 44 |         preds = [all_preds[i, idxs[i]] for i in range(len(idxs))]
 45 |         classes = [self.classes[idx] for idx in idxs]
 46 |         return np.array(preds), idxs, classes
 47 | 
 48 | 
 49 |     def vgg_preprocess(self, x):
 50 |         x = x - self.vgg_mean
 51 |         return x[:, ::-1] # reverse axis bgr->rgb
 52 | 
 53 | 
 54 |     def create(self, size, include_top):
 55 |         input_shape = (3,)+size
 56 |         img_input = Input(shape=input_shape)
 57 |         bn_axis = 1
 58 | 
 59 |         x = Lambda(self.vgg_preprocess)(img_input)
 60 |         x = ZeroPadding2D((3, 3))(x)
 61 |         x = Conv2D(64, 7, 7, subsample=(2, 2), name='conv1')(x)  # Keras2
 62 |         x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
 63 |         x = Activation('relu')(x)
 64 |         x = MaxPooling2D((3, 3), strides=(2, 2))(x)
 65 | 
 66 |         x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
 67 |         x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
 68 |         x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
 69 | 
 70 |         x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
 71 |         for n in ['b','c','d']: x = identity_block(x, 3, [128, 128, 512], stage=3, block=n)
 72 |         x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
 73 |         for n in ['b','c','d', 'e', 'f']: x = identity_block(x, 3, [256, 256, 1024], stage=4, block=n)
 74 | 
 75 |         x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
 76 |         x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
 77 |         x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
 78 | 
 79 |         if include_top:
 80 |             x = AveragePooling2D((7, 7), name='avg_pool')(x)
 81 |             x = Flatten()(x)
 82 |             x = Dense(1000, activation='softmax', name='fc1000')(x)
 83 |             fname = 'resnet50.h5'
 84 |         else:
 85 |             fname = 'resnet_nt.h5'
 86 | 
 87 |         self.img_input = img_input
 88 |         self.model = Model(self.img_input, x)
 89 |         convert_all_kernels_in_model(self.model)
 90 |         self.model.load_weights(get_file(fname, self.FILE_PATH+fname, cache_subdir='models'))
 91 | 
 92 | 
 93 |     def get_batches(self, path, gen=image.ImageDataGenerator(),class_mode='categorical', shuffle=True, batch_size=8):
 94 |         return gen.flow_from_directory(path, target_size=(224,224),
 95 |                 class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)
 96 | 
 97 | 
 98 |     def finetune(self, batches):
 99 |         model = self.model
100 |         model.layers.pop()
101 |         for layer in model.layers: layer.trainable=False
102 |         m = Dense(batches.num_classes, activation='softmax')(model.layers[-1].output)  # Keras 2.1
103 |         self.model = Model(model.input, m)
104 |         self.model.compile(optimizer=RMSprop(lr=0.1), loss='categorical_crossentropy', metrics=['accuracy']
105 |  
106 | 
107 |     # Keras2
108 |     def fit(self, batches, val_batches, batch_size, nb_epoch=1):
109 |         # Keras 1
110 |         # self.model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=nb_epoch,
111 |         #        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)
112 |         # Keras 2
113 |         self.model.fit_generator(batches, steps_per_epoch=int(np.ceil(batches.samples/batch_size)), epochs=nb_epoch,
114 |                                  validation_data=val_batches, validation_steps=int(np.ceil(val_batches.samples/batch_size)))
115 | 
116 |                            
117 |     # Keras2
118 |     def test(self, path, batch_size=8):
119 |         test_batches = self.get_batches(path, shuffle=False, batch_size=batch_size, class_mode=None)
120 |         return test_batches, self.model.predict_generator(test_batches, int(np.ceil(test_batches.samples/batch_size)))


--------------------------------------------------------------------------------
/nbs/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division,print_function
  2 | import math, os, json, sys, re
  3 | 
  4 | # import cPickle as pickle  # Python 2
  5 | import pickle  # Python3
  6 | 
  7 | from glob import glob
  8 | import numpy as np
  9 | from matplotlib import pyplot as plt
 10 | from operator import itemgetter, attrgetter, methodcaller
 11 | from collections import OrderedDict
 12 | import itertools
 13 | from itertools import chain
 14 | 
 15 | import pandas as pd
 16 | import PIL
 17 | from PIL import Image
 18 | from numpy.random import random, permutation, randn, normal, uniform, choice
 19 | from numpy import newaxis
 20 | import scipy
 21 | from scipy import misc, ndimage
 22 | from scipy.ndimage.interpolation import zoom
 23 | from scipy.ndimage import imread
 24 | from sklearn.metrics import confusion_matrix
 25 | import bcolz
 26 | from sklearn.preprocessing import OneHotEncoder
 27 | from sklearn.manifold import TSNE
 28 | 
 29 | from IPython.lib.display import FileLink
 30 | 
 31 | import theano
 32 | from theano import shared, tensor as T
 33 | from theano.tensor.nnet import conv2d, nnet
 34 | from theano.tensor.signal import pool
 35 | 
 36 | import keras
 37 | from keras import backend as K
 38 | from keras.utils.data_utils import get_file
 39 | from keras.utils import np_utils
 40 | from keras.utils.np_utils import to_categorical
 41 | from keras.models import Sequential, Model
 42 | from keras.layers import Input, Embedding, Reshape, merge, LSTM, Bidirectional
 43 | from keras.layers import SpatialDropout1D, Concatenate  # Keras2
 44 | 
 45 | from keras.layers import TimeDistributed, Activation, SimpleRNN, GRU
 46 | from keras.layers.core import Flatten, Dense, Dropout, Lambda
 47 | 
 48 | # from keras.regularizers import l2, activity_l2, l1, activity_l1  # Keras1
 49 | from keras.regularizers import l2, l1  # Keras2
 50 | 
 51 | from keras.layers.normalization import BatchNormalization
 52 | from keras.optimizers import SGD, RMSprop, Adam
 53 | 
 54 | # from keras.utils.layer_utils import layer_from_config  # Keras1
 55 | from keras.layers import deserialize  # Keras 2
 56 | from keras.layers.merge import dot, add, concatenate  # Keras2
 57 | from keras.metrics import categorical_crossentropy, categorical_accuracy
 58 | from keras.layers.convolutional import *
 59 | from keras.preprocessing import image, sequence
 60 | from keras.preprocessing.text import Tokenizer
 61 | 
 62 | from vgg16 import *
 63 | from vgg16bn import *
 64 | np.set_printoptions(precision=4, linewidth=100)
 65 | 
 66 | 
 67 | to_bw = np.array([0.299, 0.587, 0.114])
 68 | 
 69 | def gray(img):
 70 |     if K.image_dim_ordering() == 'tf':
 71 |         return np.rollaxis(img, 0, 1).dot(to_bw)
 72 |     else:
 73 |         return np.rollaxis(img, 0, 3).dot(to_bw)
 74 | 
 75 | def to_plot(img):
 76 |     if K.image_dim_ordering() == 'tf':
 77 |         return np.rollaxis(img, 0, 1).astype(np.uint8)
 78 |     else:
 79 |         return np.rollaxis(img, 0, 3).astype(np.uint8)
 80 | 
 81 | def plot(img):
 82 |     plt.imshow(to_plot(img))
 83 | 
 84 | 
 85 | def floor(x):
 86 |     return int(math.floor(x))
 87 | def ceil(x):
 88 |     return int(math.ceil(x))
 89 | 
 90 | def plots(ims, figsize=(12,6), rows=1, interp=False, titles=None):
 91 |     if type(ims[0]) is np.ndarray:
 92 |         ims = np.array(ims).astype(np.uint8)
 93 |         if (ims.shape[-1] != 3):
 94 |             ims = ims.transpose((0,2,3,1))
 95 |     f = plt.figure(figsize=figsize)
 96 |     for i in range(len(ims)):
 97 |         sp = f.add_subplot(rows, len(ims)//rows, i+1)
 98 |         sp.axis('Off')
 99 |         if titles is not None:
100 |             sp.set_title(titles[i], fontsize=16)
101 |         plt.imshow(ims[i], interpolation=None if interp else 'none')
102 | 
103 | 
104 | def do_clip(arr, mx):
105 |     clipped = np.clip(arr, (1-mx)/1, mx)
106 |     return clipped/clipped.sum(axis=1)[:, np.newaxis]
107 | 
108 | 
109 | def get_batches(dirname, gen=image.ImageDataGenerator(), shuffle=True, batch_size=4, class_mode='categorical',
110 |                 target_size=(224,224)):
111 |     return gen.flow_from_directory(dirname, target_size=target_size,
112 |             class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)
113 | 
114 | 
115 | def onehot(x):
116 |     return to_categorical(x)
117 | 
118 | 
119 | def wrap_config(layer):
120 |     return {'class_name': layer.__class__.__name__, 'config': layer.get_config()}
121 | 
122 | 
123 | def copy_layer(layer): return deserialize(wrap_config(layer))  # Keras2
124 | 
125 | 
126 | def copy_layers(layers): return [copy_layer(layer) for layer in layers]
127 | 
128 | 
129 | def copy_weights(from_layers, to_layers):
130 |     for from_layer,to_layer in zip(from_layers, to_layers):
131 |         to_layer.set_weights(from_layer.get_weights())
132 | 
133 | 
134 | def copy_model(m):
135 |     res = Sequential(copy_layers(m.layers))
136 |     copy_weights(m.layers, res.layers)
137 |     return res
138 | 
139 | 
140 | def insert_layer(model, new_layer, index):
141 |     res = Sequential()
142 |     for i,layer in enumerate(model.layers):
143 |         if i==index: res.add(new_layer)
144 |         copied = deserialize(wrap_config(layer))  # Keras2
145 |         res.add(copied)
146 |         copied.set_weights(layer.get_weights())
147 |     return res
148 | 
149 | 
150 | def adjust_dropout(weights, prev_p, new_p):
151 |     scal = (1-prev_p)/(1-new_p)
152 |     return [o*scal for o in weights]
153 | 
154 | 
155 | def get_data(path, target_size=(224,224)):
156 |     batches = get_batches(path, shuffle=False, batch_size=1, class_mode=None, target_size=target_size)
157 |     return np.concatenate([batches.next() for i in range(batches.samples)])  # Keras2
158 | 
159 | 
160 | def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
161 |     """
162 |     This function prints and plots the confusion matrix.
163 |     Normalization can be applied by setting `normalize=True`.
164 |     (This function is copied from the scikit docs.)
165 |     """
166 |     plt.figure()
167 |     plt.imshow(cm, interpolation='nearest', cmap=cmap)
168 |     plt.title(title)
169 |     plt.colorbar()
170 |     tick_marks = np.arange(len(classes))
171 |     plt.xticks(tick_marks, classes, rotation=45)
172 |     plt.yticks(tick_marks, classes)
173 | 
174 |     if normalize:
175 |         cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
176 |     print(cm)
177 |     thresh = cm.max() / 2.
178 |     for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
179 |         plt.text(j, i, cm[i, j], horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")
180 | 
181 |     plt.tight_layout()
182 |     plt.ylabel('True label')
183 |     plt.xlabel('Predicted label')
184 | 
185 | 
186 | def save_array(fname, arr):
187 |     c=bcolz.carray(arr, rootdir=fname, mode='w')
188 |     c.flush()
189 | 
190 | 
191 | def load_array(fname):
192 |     return bcolz.open(fname)[:]
193 | 
194 | 
195 | def mk_size(img, r2c):
196 |     r,c,_ = img.shape
197 |     curr_r2c = r/c
198 |     new_r, new_c = r,c
199 |     if r2c>curr_r2c:
200 |         new_r = floor(c*r2c)
201 |     else:
202 |         new_c = floor(r/r2c)
203 |     arr = np.zeros((new_r, new_c, 3), dtype=np.float32)
204 |     r2=(new_r-r)//2
205 |     c2=(new_c-c)//2
206 |     arr[floor(r2):floor(r2)+r,floor(c2):floor(c2)+c] = img
207 |     return arr
208 | 
209 | 
210 | def mk_square(img):
211 |     x,y,_ = img.shape
212 |     maxs = max(img.shape[:2])
213 |     y2=(maxs-y)//2
214 |     x2=(maxs-x)//2
215 |     arr = np.zeros((maxs,maxs,3), dtype=np.float32)
216 |     arr[floor(x2):floor(x2)+x,floor(y2):floor(y2)+y] = img
217 |     return arr
218 | 
219 | 
220 | def vgg_ft(out_dim):
221 |     vgg = Vgg16()
222 |     vgg.ft(out_dim)
223 |     model = vgg.model
224 |     return model
225 | 
226 | def vgg_ft_bn(out_dim):
227 |     vgg = Vgg16BN()
228 |     vgg.ft(out_dim)
229 |     model = vgg.model
230 |     return model
231 | 
232 | 
233 | def get_classes(path):
234 |     batches = get_batches(path+'train', shuffle=False, batch_size=1)
235 |     val_batches = get_batches(path+'valid', shuffle=False, batch_size=1)
236 |     test_batches = get_batches(path+'test', shuffle=False, batch_size=1)
237 |     return (val_batches.classes, batches.classes, onehot(val_batches.classes), onehot(batches.classes),
238 |         val_batches.filenames, batches.filenames, test_batches.filenames)
239 | 
240 | 
241 | def split_at(model, layer_type):
242 |     layers = model.layers
243 |     layer_idx = [index for index,layer in enumerate(layers)
244 |                  if type(layer) is layer_type][-1]
245 |     return layers[:layer_idx+1], layers[layer_idx+1:]
246 | 
247 | 
248 | class MixIterator(object):
249 |     def __init__(self, iters):
250 |         self.iters = iters
251 |         self.multi = type(iters) is list
252 |         if self.multi:
253 |             self.N = sum([it[0].N for it in self.iters])
254 |         else:
255 |             self.N = sum([it.N for it in self.iters])
256 | 
257 |     def reset(self):
258 |         for it in self.iters: it.reset()
259 | 
260 |     def __iter__(self):
261 |         return self
262 | 
263 |     def next(self, *args, **kwargs):
264 |         if self.multi:
265 |             nexts = [[next(it) for it in o] for o in self.iters]
266 |             n0 = np.concatenate([n[0] for n in nexts])
267 |             n1 = np.concatenate([n[1] for n in nexts])
268 |             return (n0, n1)
269 |         else:
270 |             nexts = [next(it) for it in self.iters]
271 |             n0 = np.concatenate([n[0] for n in nexts])
272 |             n1 = np.concatenate([n[1] for n in nexts])
273 |             return (n0, n1)
274 | 
275 | 


--------------------------------------------------------------------------------
/nbs/vgg16.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division, print_function
  2 | 
  3 | import os, json
  4 | from glob import glob
  5 | import numpy as np
  6 | from scipy import misc, ndimage
  7 | from scipy.ndimage.interpolation import zoom
  8 | 
  9 | from keras import backend as K
 10 | from keras.layers.normalization import BatchNormalization
 11 | from keras.utils.data_utils import get_file
 12 | from keras.models import Sequential
 13 | from keras.layers.core import Flatten, Dense, Dropout, Lambda
 14 | from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D  # Conv2D: Keras2
 15 | from keras.layers.pooling import GlobalAveragePooling2D
 16 | from keras.optimizers import SGD, RMSprop, Adam
 17 | from keras.preprocessing import image
 18 | 
 19 | 
 20 | vgg_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32).reshape((3,1,1))
 21 | def vgg_preprocess(x):
 22 |     x = x - vgg_mean
 23 |     return x[:, ::-1] # reverse axis rgb->bgr
 24 | 
 25 | 
 26 | class Vgg16():
 27 |     """The VGG 16 Imagenet model"""
 28 | 
 29 | 
 30 |     def __init__(self):
 31 |         self.FILE_PATH = 'http://files.fast.ai/models/'
 32 |         self.create()
 33 |         self.get_classes()
 34 | 
 35 | 
 36 |     def get_classes(self):
 37 |         fname = 'imagenet_class_index.json'
 38 |         fpath = get_file(fname, self.FILE_PATH+fname, cache_subdir='models')
 39 |         with open(fpath) as f:
 40 |             class_dict = json.load(f)
 41 |         self.classes = [class_dict[str(i)][1] for i in range(len(class_dict))]
 42 | 
 43 |     def predict(self, imgs, details=False):
 44 |         all_preds = self.model.predict(imgs)
 45 |         idxs = np.argmax(all_preds, axis=1)
 46 |         preds = [all_preds[i, idxs[i]] for i in range(len(idxs))]
 47 |         classes = [self.classes[idx] for idx in idxs]
 48 |         return np.array(preds), idxs, classes
 49 | 
 50 | 
 51 |     def ConvBlock(self, layers, filters):
 52 |         model = self.model
 53 |         for i in range(layers):
 54 |             model.add(ZeroPadding2D((1, 1)))
 55 |             model.add(Conv2D(filters, kernel_size=(3, 3), activation='relu'))  # Keras2
 56 |         model.add(MaxPooling2D((2, 2), strides=(2, 2)))
 57 | 
 58 | 
 59 |     def FCBlock(self):
 60 |         model = self.model
 61 |         model.add(Dense(4096, activation='relu'))
 62 |         model.add(Dropout(0.5))
 63 | 
 64 | 
 65 |     def create(self):
 66 |         model = self.model = Sequential()
 67 |         model.add(Lambda(vgg_preprocess, input_shape=(3,224,224), output_shape=(3,224,224)))
 68 | 
 69 |         self.ConvBlock(2, 64)
 70 |         self.ConvBlock(2, 128)
 71 |         self.ConvBlock(3, 256)
 72 |         self.ConvBlock(3, 512)
 73 |         self.ConvBlock(3, 512)
 74 | 
 75 |         model.add(Flatten())
 76 |         self.FCBlock()
 77 |         self.FCBlock()
 78 |         model.add(Dense(1000, activation='softmax'))
 79 | 
 80 |         fname = 'vgg16.h5'
 81 |         model.load_weights(get_file(fname, self.FILE_PATH+fname, cache_subdir='models'))
 82 | 
 83 | 
 84 |     def get_batches(self, path, gen=image.ImageDataGenerator(), shuffle=True, batch_size=8, class_mode='categorical'):
 85 |         return gen.flow_from_directory(path, target_size=(224,224),
 86 |                 class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)
 87 | 
 88 | 
 89 |     def ft(self, num):
 90 |         model = self.model
 91 |         model.pop()
 92 |         for layer in model.layers: layer.trainable=False
 93 |         model.add(Dense(num, activation='softmax'))
 94 |         self.compile()
 95 | 
 96 |     def finetune(self, batches):
 97 |         self.ft(batches.num_classes)  # Keras 2.1
 98 |         classes = list(iter(batches.class_indices))
 99 |         for c in batches.class_indices:
100 |             classes[batches.class_indices[c]] = c
101 |         self.classes = classes
102 | 
103 | 
104 |     def compile(self, lr=0.001):
105 |         self.model.compile(optimizer=Adam(lr=lr),
106 |                 loss='categorical_crossentropy', metrics=['accuracy'])
107 | 
108 | 
109 |     # Keras2
110 |     def fit_data(self, trn, labels,  val, val_labels,  nb_epoch=1, batch_size=64):
111 |         self.model.fit(trn, labels, epochs=nb_epoch,
112 |                 validation_data=(val, val_labels), batch_size=batch_size)
113 | 
114 | 
115 |     # Keras2
116 |     def fit(self, batches, val_batches, batch_size, nb_epoch=1):
117 |         self.model.fit_generator(batches, steps_per_epoch=int(np.ceil(batches.samples/batch_size)), epochs=nb_epoch,
118 |                 validation_data=val_batches, validation_steps=int(np.ceil(val_batches.samples/batch_size)))
119 | 
120 |         
121 |     # Keras2
122 |     def test(self, path, batch_size=8):
123 |         test_batches = self.get_batches(path, shuffle=False, batch_size=batch_size, class_mode=None)
124 |         return test_batches, self.model.predict_generator(test_batches, int(np.ceil(test_batches.samples/batch_size)))
125 | 


--------------------------------------------------------------------------------
/nbs/vgg16bn.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division, print_function
  2 | 
  3 | import os, json
  4 | from glob import glob
  5 | import numpy as np
  6 | from scipy import misc, ndimage
  7 | from scipy.ndimage.interpolation import zoom
  8 | 
  9 | from keras import backend as K
 10 | from keras.layers.normalization import BatchNormalization
 11 | from keras.utils.data_utils import get_file
 12 | from keras.models import Sequential
 13 | from keras.layers.core import Flatten, Dense, Dropout, Lambda
 14 | from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D  # Conv2D: Keras2
 15 | from keras.layers.pooling import GlobalAveragePooling2D
 16 | from keras.optimizers import SGD, RMSprop, Adam
 17 | from keras.preprocessing import image
 18 | 
 19 | 
 20 | vgg_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32).reshape((3,1,1))
 21 | def vgg_preprocess(x):
 22 |     x = x - vgg_mean
 23 |     return x[:, ::-1] # reverse axis rgb->bgr
 24 | 
 25 | 
 26 | class Vgg16BN():
 27 |     """The VGG 16 Imagenet model with Batch Normalization for the Dense Layers"""
 28 | 
 29 | 
 30 |     def __init__(self, size=(224,224), include_top=True):
 31 |         self.FILE_PATH = 'http://files.fast.ai/models/'
 32 |         self.create(size, include_top)
 33 |         self.get_classes()
 34 | 
 35 | 
 36 |     def get_classes(self):
 37 |         fname = 'imagenet_class_index.json'
 38 |         fpath = get_file(fname, self.FILE_PATH+fname, cache_subdir='models')
 39 |         with open(fpath) as f:
 40 |             class_dict = json.load(f)
 41 |         self.classes = [class_dict[str(i)][1] for i in range(len(class_dict))]
 42 | 
 43 |     def predict(self, imgs, details=False):
 44 |         all_preds = self.model.predict(imgs)
 45 |         idxs = np.argmax(all_preds, axis=1)
 46 |         preds = [all_preds[i, idxs[i]] for i in range(len(idxs))]
 47 |         classes = [self.classes[idx] for idx in idxs]
 48 |         return np.array(preds), idxs, classes
 49 | 
 50 | 
 51 |     def ConvBlock(self, layers, filters):
 52 |         model = self.model
 53 |         for i in range(layers):
 54 |             model.add(ZeroPadding2D((1, 1)))
 55 |             model.add(Conv2D(filters, kernel_size=(3, 3), activation='relu'))  # Keras2
 56 |         model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
 57 | 
 58 | 
 59 |     def FCBlock(self):
 60 |         model = self.model
 61 |         model.add(Dense(4096, activation='relu'))
 62 |         model.add(BatchNormalization())
 63 |         model.add(Dropout(0.5))
 64 | 
 65 | 
 66 |     def create(self, size, include_top):
 67 |         if size != (224,224):
 68 |             include_top=False
 69 | 
 70 |         model = self.model = Sequential()
 71 |         model.add(Lambda(vgg_preprocess, input_shape=(3,)+size, output_shape=(3,)+size))
 72 | 
 73 |         self.ConvBlock(2, 64)
 74 |         self.ConvBlock(2, 128)
 75 |         self.ConvBlock(3, 256)
 76 |         self.ConvBlock(3, 512)
 77 |         self.ConvBlock(3, 512)
 78 | 
 79 |         if not include_top:
 80 |             fname = 'vgg16_bn_conv.h5'
 81 |             model.load_weights(get_file(fname, self.FILE_PATH+fname, cache_subdir='models'))
 82 |             return
 83 | 
 84 |         model.add(Flatten())
 85 |         self.FCBlock()
 86 |         self.FCBlock()
 87 |         model.add(Dense(1000, activation='softmax'))
 88 | 
 89 |         fname = 'vgg16_bn.h5'
 90 |         model.load_weights(get_file(fname, self.FILE_PATH+fname, cache_subdir='models'))
 91 | 
 92 | 
 93 |     def get_batches(self, path, gen=image.ImageDataGenerator(), shuffle=True, batch_size=8, class_mode='categorical'):
 94 |         return gen.flow_from_directory(path, target_size=(224,224),
 95 |                 class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)
 96 | 
 97 | 
 98 |     def ft(self, num):
 99 |         model = self.model
100 |         model.pop()
101 |         for layer in model.layers: layer.trainable=False
102 |         model.add(Dense(num, activation='softmax'))
103 |         self.compile()
104 | 
105 |     def finetune(self, batches):
106 |         model = self.model
107 |         model.pop()
108 |         for layer in model.layers: layer.trainable=False
109 |         model.add(Dense(batches.num_classes, activation='softmax'))  # Keras 2.1
110 |         self.compile()
111 | 
112 | 
113 |     def compile(self, lr=0.001):
114 |         self.model.compile(optimizer=Adam(lr=lr),
115 |                 loss='categorical_crossentropy', metrics=['accuracy'])
116 | 
117 | 
118 |     # Keras2
119 |     def fit_data(self, trn, labels,  val, val_labels,  nb_epoch=1, batch_size=64):
120 |         self.model.fit(trn, labels, epochs=nb_epoch,
121 |                 validation_data=(val, val_labels), batch_size=batch_size)
122 | 
123 |         
124 |     # Keras2
125 |     def fit(self, batches, val_batches, batch_size, nb_epoch=1):
126 |         self.model.fit_generator(batches, steps_per_epoch=int(np.ceil(batches.samples/batch_size)), epochs=nb_epoch,
127 |                 validation_data=val_batches, validation_steps=int(np.ceil(val_batches.samples/batch_size)))
128 |         
129 | 
130 |     # Keras2
131 |     def test(self, path, batch_size=8):
132 |         test_batches = self.get_batches(path, shuffle=False, batch_size=batch_size, class_mode=None)
133 |         return test_batches, self.model.predict_generator(test_batches, int(np.ceil(test_batches.samples/batch_size)))
134 | 


--------------------------------------------------------------------------------
/nbs2/Keras-Tensorflow-Tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2017-09-20T09:46:27.105499Z",
  9 |      "start_time": "2017-09-20T09:46:17.701636Z"
 10 |     }
 11 |    },
 12 |    "outputs": [
 13 |     {
 14 |      "name": "stderr",
 15 |      "output_type": "stream",
 16 |      "text": [
 17 |       "Using TensorFlow backend.\n"
 18 |      ]
 19 |     }
 20 |    ],
 21 |    "source": [
 22 |     "import tensorflow as tf\n",
 23 |     "sess = tf.Session()\n",
 24 |     "\n",
 25 |     "from keras import backend as K\n",
 26 |     "K.set_session(sess)"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 2,
 32 |    "metadata": {
 33 |     "ExecuteTime": {
 34 |      "end_time": "2017-09-20T09:46:29.009523Z",
 35 |      "start_time": "2017-09-20T09:46:29.003665Z"
 36 |     },
 37 |     "collapsed": true
 38 |    },
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "# this placeholder will contain our input digits, as flat vectors\n",
 42 |     "img = tf.placeholder(tf.float32, shape=(None, 784))"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 3,
 48 |    "metadata": {
 49 |     "ExecuteTime": {
 50 |      "end_time": "2017-09-20T09:46:29.653563Z",
 51 |      "start_time": "2017-09-20T09:46:29.595156Z"
 52 |     },
 53 |     "collapsed": true
 54 |    },
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "from keras.layers import Dense\n",
 58 |     "\n",
 59 |     "# Keras layers can be called on TensorFlow tensors:\n",
 60 |     "x = Dense(128, activation='relu')(img)  # fully-connected layer with 128 units and ReLU activation\n",
 61 |     "x = Dense(128, activation='relu')(x)\n",
 62 |     "preds = Dense(10, activation='softmax')(x)  # output layer with 10 units and a softmax activation"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 4,
 68 |    "metadata": {
 69 |     "ExecuteTime": {
 70 |      "end_time": "2017-09-20T09:46:30.315748Z",
 71 |      "start_time": "2017-09-20T09:46:30.291351Z"
 72 |     },
 73 |     "collapsed": true
 74 |    },
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "labels = tf.placeholder(tf.float32, shape=(None, 10))\n",
 78 |     "\n",
 79 |     "from keras.objectives import categorical_crossentropy\n",
 80 |     "loss = tf.reduce_mean(categorical_crossentropy(labels, preds))"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 5,
 86 |    "metadata": {
 87 |     "ExecuteTime": {
 88 |      "end_time": "2017-09-20T09:46:39.950492Z",
 89 |      "start_time": "2017-09-20T09:46:30.955584Z"
 90 |     }
 91 |    },
 92 |    "outputs": [
 93 |     {
 94 |      "name": "stdout",
 95 |      "output_type": "stream",
 96 |      "text": [
 97 |       "Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.\n",
 98 |       "Extracting MNIST_data/train-images-idx3-ubyte.gz\n",
 99 |       "Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.\n",
100 |       "Extracting MNIST_data/train-labels-idx1-ubyte.gz\n",
101 |       "Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.\n",
102 |       "Extracting MNIST_data/t10k-images-idx3-ubyte.gz\n",
103 |       "Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.\n",
104 |       "Extracting MNIST_data/t10k-labels-idx1-ubyte.gz\n"
105 |      ]
106 |     }
107 |    ],
108 |    "source": [
109 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
110 |     "mnist_data = input_data.read_data_sets('MNIST_data', one_hot=True)\n",
111 |     "\n",
112 |     "train_step = tf.train.GradientDescentOptimizer(0.5).minimize(loss)\n",
113 |     "\n",
114 |     "sess.run(tf.global_variables_initializer())  # - required for Tensorflow variable initialization\n",
115 |     "\n",
116 |     "with sess.as_default():\n",
117 |     "    for i in range(100):\n",
118 |     "        batch = mnist_data.train.next_batch(50)\n",
119 |     "        train_step.run(feed_dict={img: batch[0],\n",
120 |     "                                  labels: batch[1]})"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {
127 |     "collapsed": true
128 |    },
129 |    "outputs": [],
130 |    "source": []
131 |   }
132 |  ],
133 |  "metadata": {
134 |   "anaconda-cloud": {},
135 |   "kernelspec": {
136 |    "display_name": "p3",
137 |    "language": "python",
138 |    "name": "p3"
139 |   },
140 |   "language_info": {
141 |    "codemirror_mode": {
142 |     "name": "ipython",
143 |     "version": 3
144 |    },
145 |    "file_extension": ".py",
146 |    "mimetype": "text/x-python",
147 |    "name": "python",
148 |    "nbconvert_exporter": "python",
149 |    "pygments_lexer": "ipython3",
150 |    "version": "3.5.2"
151 |   },
152 |   "toc": {
153 |    "colors": {
154 |     "hover_highlight": "#DAA520",
155 |     "navigate_num": "#000000",
156 |     "navigate_text": "#333333",
157 |     "running_highlight": "#FF0000",
158 |     "selected_highlight": "#FFD700",
159 |     "sidebar_border": "#EEEEEE",
160 |     "wrapper_background": "#FFFFFF"
161 |    },
162 |    "moveMenuLeft": true,
163 |    "nav_menu": {
164 |     "height": "12px",
165 |     "width": "252px"
166 |    },
167 |    "navigate_menu": true,
168 |    "number_sections": true,
169 |    "sideBar": true,
170 |    "threshold": 4,
171 |    "toc_cell": false,
172 |    "toc_section_display": "block",
173 |    "toc_window_display": false,
174 |    "widenNotebook": false
175 |   }
176 |  },
177 |  "nbformat": 4,
178 |  "nbformat_minor": 2
179 | }
180 | 


--------------------------------------------------------------------------------
/nbs2/attention_wrapper.py:
--------------------------------------------------------------------------------
  1 | # - Introduced a few changes for Keras 2
  2 | from keras import backend as K
  3 | # from keras.initializations import zero  # - Not used with Keras 2
  4 | from keras.engine import InputSpec
  5 | from keras.models import Sequential
  6 | from keras.layers import LSTM, activations, Wrapper, Recurrent, Layer
  7 | 
  8 | class Attention(Layer):
  9 |     def __init__(self, fn_rnn, nlayers=1, **kwargs):
 10 |         self.supports_masking = True
 11 |         self.fn_rnn = fn_rnn
 12 |         self.nlayers = nlayers
 13 |         self.input_spec = [InputSpec(ndim=3), InputSpec(ndim=3)]
 14 |         super(Attention, self).__init__(**kwargs)
 15 | 
 16 | 
 17 |     def all_attrs(self, name):
 18 |         return sum([getattr(layer, name, []) for layer in self.layers], [])
 19 | 
 20 | 
 21 |     def w(self, dims, init, name):
 22 |         # - Keras 2 different parameter order
 23 |         return self.add_weight(dims, name=name.format(self.name), initializer=init) 
 24 | 
 25 | 
 26 |     def build(self, input_shape):
 27 |         self.enc_shape, self.dec_shape = input_shape
 28 |         assert len(self.enc_shape) >= 3
 29 |         self.layers = [self.fn_rnn() for i in range(self.nlayers)]
 30 |         nb_samples, nb_time, nb_dims = self.dec_shape
 31 |         l0 = self.layers[0]
 32 | 
 33 |         out_shape = self.compute_output_shape(input_shape)  # - changed from self.get_output_shape_for
 34 |         for layer in self.layers:
 35 |             if not layer.built: layer.build(out_shape)
 36 | 
 37 |         init = l0.kernel_initializer  # - changed from l0.init
 38 |         out_dim = l0.units  # - changed from l0.output_dim
 39 |         self.W1 = self.w((self.enc_shape[-1], nb_dims), init, '{}_W1')
 40 |         self.W2 = self.w((out_dim, nb_dims), init, '{}_W2')
 41 |         self.b2 = self.w((nb_dims,), 'zeros', '{}_b2') # - changed from zero
 42 |         self.V =  self.w((nb_dims,), init, '{}_V')
 43 |         self.W3 = self.w((nb_dims+out_dim, out_dim), init, '{}_W3')
 44 |         self.b3 = self.w((out_dim,), 'zeros', '{}_b3') # - changed from zero
 45 | 
 46 |         self.trainable_weights += self.all_attrs( 'trainable_weights')
 47 |         self.non_trainable_weights += self.all_attrs( 'non_trainable_weights')
 48 |         #self.losses += self.all_attrs( 'losses')  # - seems not available for layer in Keras 2
 49 |         #self.updates = self.all_attrs( 'updates') # - seems not available for layer in Keras 2
 50 |         self.constraints = getattr(self.layers[0], 'constraints', {}) # FIXME
 51 |         super(Attention, self).build(input_shape)
 52 | 
 53 |         
 54 |     def compute_output_shape(self, input_shape): # - changed from self.get_output_shape_for
 55 |         return self.layers[0].compute_output_shape(input_shape[1])  # - changed from self.get_output_shape_for
 56 | 
 57 |     def step(self, x, states):
 58 |         h = states[0]
 59 |         enc_output = states[-1]
 60 |         xW1 = states[-2]
 61 | 
 62 |         hW2 = K.expand_dims(K.dot(h,self.W2)+self.b2, 1)
 63 |         u = K.tanh(xW1+hW2)
 64 |         a = K.expand_dims(K.softmax(K.sum(self.V*u,2)), -1)
 65 |         Xa = K.sum(a*enc_output,1)
 66 |         h = K.dot(K.concatenate([x,Xa],1),self.W3)+self.b3
 67 | 
 68 |         for layer in self.layers: h, new_states = layer.step(h, states)
 69 |         return h, new_states
 70 | 
 71 | 
 72 |     def get_constants(self, enc_output, constants):
 73 |         constants.append(K.dot(enc_output,self.W1))
 74 |         constants.append(enc_output)
 75 |         return constants
 76 | 
 77 | 
 78 |     def compute_mask(self, input, mask):
 79 |         return self.layers[0].compute_mask(input, mask[1])
 80 | 
 81 | 
 82 |     def call(self, x, mask=None):
 83 |         l0 = self.layers[0]
 84 |         enc_output, dec_input = x
 85 | 
 86 |         if l0.stateful: initial_states = l0.states
 87 |         else: initial_states = l0.get_initial_state(dec_input)  # - changed from get_initial_states
 88 |         constants = l0.get_constants(dec_input)
 89 |         constants = self.get_constants(enc_output, constants)
 90 |         preprocessed_input = l0.preprocess_input(dec_input)
 91 |         last_output, outputs, states = K.rnn(self.step, preprocessed_input,
 92 |              initial_states, go_backwards=l0.go_backwards,  mask=mask, # - changed from mask=mask[1]
 93 |              constants=constants, unroll=l0.unroll, input_length=self.dec_shape[1])
 94 |         if l0.stateful:
 95 |             self.updates = []
 96 |             for i in range(len(states)):
 97 |                 self.updates.append((l0.states[i], states[i]))
 98 | 
 99 |         return outputs if l0.return_sequences else last_output
100 | 
101 | 


--------------------------------------------------------------------------------
/nbs2/batcher.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | 
 4 | import numpy as np
 5 | import re
 6 | from scipy import linalg
 7 | import scipy.ndimage as ndi
 8 | from six.moves import range
 9 | import os
10 | import threading
11 | import warnings
12 | 
13 | class Iterator(object):
14 | 
15 |     def __init__(self, n, batch_size, shuffle, seed):
16 |         self.n = n
17 |         self.batch_size = batch_size
18 |         self.shuffle = shuffle
19 |         self.batch_index = 0
20 |         self.total_batches_seen = 0
21 |         self.lock = threading.Lock()
22 |         self.index_generator = self._flow_index(n, batch_size, shuffle, seed)
23 | 
24 |     def reset(self):
25 |         self.batch_index = 0
26 | 
27 |     def _flow_index(self, n, batch_size=32, shuffle=False, seed=None):
28 |         # ensure self.batch_index is 0
29 |         self.reset()
30 |         while 1:
31 |             if seed is not None:
32 |                 np.random.seed(seed + self.total_batches_seen)
33 |             if self.batch_index == 0:
34 |                 index_array = np.arange(n)
35 |                 if shuffle:
36 |                     index_array = np.random.permutation(n)
37 | 
38 |             current_index = (self.batch_index * batch_size) % n
39 |             if n >= current_index + batch_size:
40 |                 current_batch_size = batch_size
41 |                 self.batch_index += 1
42 |             else:
43 |                 current_batch_size = n - current_index
44 |                 self.batch_index = 0
45 |             self.total_batches_seen += 1
46 |             yield (index_array[current_index: current_index + current_batch_size],
47 |                    current_index, current_batch_size)
48 | 
49 |     def __iter__(self):
50 |         # needed if we want to do something like:
51 |         # for x, y in data_gen.flow(...):
52 |         return self
53 | 
54 |     def __next__(self, *args, **kwargs):
55 |         return self.next(*args, **kwargs)
56 | 
57 | class Batcher(Iterator):
58 |     def __init__(self, x, y, batch_size=64, shuffle=False, seed=None, proc_fn=None):
59 |         if y is not None and len(x) != len(y):
60 |             raise ValueError('X (images tensor) and y (labels) '
61 |                              'should have the same length. '
62 |                              'Found: X.shape = %s, y.shape = %s' %
63 |                              (np.asarray(x).shape, np.asarray(y).shape))
64 |         self.x = np.asarray(x)
65 |         self.y = np.asarray(y) if y is not None else None
66 |         self.proc_fn = proc_fn
67 |         super(Batcher, self).__init__(x.shape[0], batch_size, shuffle, seed)
68 | 
69 | 
70 |     def next(self):
71 |         with self.lock:
72 |             index_array, current_index, current_batch_size = next(self.index_generator)
73 |         batch_x = np.zeros(tuple([current_batch_size] + list(self.x.shape)[1:]), self.x.dtype)
74 |         for i, j in enumerate(index_array):
75 |             x = self.x[j]
76 |             #x = self.image_data_generator.standardize(x)
77 |             batch_x[i] = x
78 |         res = batch_x if self.y is None else batch_x, self.y[index_array]
79 |         if self.proc_fn: res=self.proc_fn(res)
80 |         return res
81 | 
82 | 


--------------------------------------------------------------------------------
/nbs2/bcolz_array_iterator.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import bcolz
  3 | import threading
  4 | 
  5 | class BcolzArrayIterator(object):
  6 |     """
  7 |     Returns an iterator object into Bcolz carray files
  8 |     Original version by Thiago Ramon Gonçalves Montoya
  9 |     Docs (and discovery) by @MPJansen
 10 |     Refactoring, performance improvements, fixes by Jeremy Howard j@fast.ai
 11 |         :Example:
 12 |         X = bcolz.open('file_path/feature_file.bc', mode='r')
 13 |         y = bcolz.open('file_path/label_file.bc', mode='r')
 14 |         trn_batches = BcolzArrayIterator(X, y, batch_size=64, shuffle=True)
 15 |         model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.N, nb_epoch=1)
 16 |         :param X: Input features
 17 |         :param y: (optional) Input labels
 18 |         :param w: (optional) Input feature weights
 19 |         :param batch_size: (optional) Batch size, defaults to 32
 20 |         :param shuffle: (optional) Shuffle batches, defaults to false
 21 |         :param seed: (optional) Provide a seed to shuffle, defaults to a random seed
 22 |         :rtype: BcolzArrayIterator
 23 |         >>> A = np.random.random((32*10 + 17, 10, 10))
 24 |         >>> c = bcolz.carray(A, rootdir='test.bc', mode='w', expectedlen=A.shape[0], chunklen=16)
 25 |         >>> c.flush()
 26 |         >>> Bc = bcolz.open('test.bc')
 27 |         >>> bc_it = BcolzArrayIterator(Bc, shuffle=True)
 28 |         >>> C_list = [next(bc_it) for i in range(11)]
 29 |         >>> C = np.concatenate(C_list)
 30 |         >>> np.allclose(sorted(A.flatten()), sorted(C.flatten()))
 31 |         True
 32 |     """
 33 | 
 34 |     def __init__(self, X, y=None, w=None, batch_size=32, shuffle=False, seed=None):
 35 |         if y is not None and len(X) != len(y):
 36 |             raise ValueError('X (features) and y (labels) should have the same length'
 37 |                              'Found: X.shape = %s, y.shape = %s' % (X.shape, y.shape))
 38 |         if w is not None and len(X) != len(w):
 39 |             raise ValueError('X (features) and w (weights) should have the same length'
 40 |                              'Found: X.shape = %s, w.shape = %s' % (X.shape, w.shape))
 41 |         if batch_size % X.chunklen != 0:
 42 |             raise ValueError('batch_size needs to be a multiple of X.chunklen')
 43 | 
 44 |         self.chunks_per_batch = batch_size // X.chunklen
 45 |         self.X = X
 46 |         self.y = y if y is not None else None
 47 |         self.w = w if w is not None else None
 48 |         self.N = X.shape[0]
 49 |         self.batch_size = batch_size
 50 |         self.batch_index = 0
 51 |         self.total_batches_seen = 0
 52 |         self.lock = threading.Lock()
 53 |         self.shuffle = shuffle
 54 |         self.seed = seed
 55 | 
 56 | 
 57 |     def reset(self): self.batch_index = 0
 58 | 
 59 | 
 60 |     def next(self):
 61 |         with self.lock:
 62 |             if self.batch_index == 0:
 63 |                 if self.seed is not None:
 64 |                     np.random.seed(self.seed + self.total_batches_seen)
 65 |                 self.index_array = (np.random.permutation(self.X.nchunks + 1) if self.shuffle
 66 |                     else np.arange(self.X.nchunks + 1))
 67 | 
 68 |             #batches_x = np.zeros((self.batch_size,)+self.X.shape[1:])
 69 |             batches_x, batches_y, batches_w = [],[],[]
 70 |             for i in range(self.chunks_per_batch):
 71 |                 current_index = self.index_array[self.batch_index]
 72 |                 if current_index == self.X.nchunks:
 73 |                     batches_x.append(self.X.leftover_array[:self.X.leftover_elements])
 74 |                     current_batch_size = self.X.leftover_elements
 75 |                 else:
 76 |                     batches_x.append(self.X.chunks[current_index][:])
 77 |                     current_batch_size = self.X.chunklen
 78 |                 self.batch_index += 1
 79 |                 self.total_batches_seen += 1
 80 | 
 81 |                 idx = current_index * self.X.chunklen
 82 |                 if not self.y is None: batches_y.append(self.y[idx: idx + current_batch_size])
 83 |                 if not self.w is None: batches_w.append(self.w[idx: idx + current_batch_size])
 84 |                 if self.batch_index >= len(self.index_array):
 85 |                     self.batch_index = 0
 86 |                     break
 87 | 
 88 |             batch_x = np.concatenate(batches_x)
 89 |             if self.y is None: return batch_x
 90 | 
 91 |             batch_y = np.concatenate(batches_y)
 92 |             if self.w is None: return batch_x, batch_y
 93 | 
 94 |             batch_w = np.concatenate(batches_w)
 95 |             return batch_x, batch_y, batch_w
 96 | 
 97 | 
 98 |     def __iter__(self): return self
 99 | 
100 |     def __next__(self, *args, **kwargs): return self.next(*args, **kwargs)
101 | 
102 | 


--------------------------------------------------------------------------------
/nbs2/bcolz_iter_test.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2018-01-10T14:31:18.850522Z",
  9 |      "start_time": "2018-01-10T14:31:18.576965Z"
 10 |     }
 11 |    },
 12 |    "outputs": [],
 13 |    "source": [
 14 |     "# - module bcolz_array_iterator2 not available, using bcolz_array_iterator\n",
 15 |     "#from bcolz_array_iterator2 import BcolzArrayIterator2  \n",
 16 |     "from bcolz_array_iterator import BcolzArrayIterator "
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {
 23 |     "ExecuteTime": {
 24 |      "end_time": "2018-01-10T14:31:18.854116Z",
 25 |      "start_time": "2018-01-10T14:31:18.852109Z"
 26 |     }
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "from bcolz import carray"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 3,
 36 |    "metadata": {
 37 |     "ExecuteTime": {
 38 |      "end_time": "2018-01-10T14:31:18.877578Z",
 39 |      "start_time": "2018-01-10T14:31:18.855511Z"
 40 |     }
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "import numpy as np"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 4,
 50 |    "metadata": {
 51 |     "ExecuteTime": {
 52 |      "end_time": "2018-01-10T14:31:18.907061Z",
 53 |      "start_time": "2018-01-10T14:31:18.878994Z"
 54 |     }
 55 |    },
 56 |    "outputs": [
 57 |     {
 58 |      "data": {
 59 |       "text/plain": [
 60 |        "array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13])"
 61 |       ]
 62 |      },
 63 |      "execution_count": 4,
 64 |      "metadata": {},
 65 |      "output_type": "execute_result"
 66 |     }
 67 |    ],
 68 |    "source": [
 69 |     "x = np.arange(14); x"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 5,
 75 |    "metadata": {
 76 |     "ExecuteTime": {
 77 |      "end_time": "2018-01-10T14:31:18.929684Z",
 78 |      "start_time": "2018-01-10T14:31:18.908318Z"
 79 |     }
 80 |    },
 81 |    "outputs": [
 82 |     {
 83 |      "data": {
 84 |       "text/plain": [
 85 |        "array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13])"
 86 |       ]
 87 |      },
 88 |      "execution_count": 5,
 89 |      "metadata": {},
 90 |      "output_type": "execute_result"
 91 |     }
 92 |    ],
 93 |    "source": [
 94 |     "y = np.arange(14); y"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 6,
100 |    "metadata": {
101 |     "ExecuteTime": {
102 |      "end_time": "2018-01-10T14:31:18.952866Z",
103 |      "start_time": "2018-01-10T14:31:18.932399Z"
104 |     }
105 |    },
106 |    "outputs": [],
107 |    "source": [
108 |     "x = carray(x, chunklen=3)\n",
109 |     "y = carray(y, chunklen=3)"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 7,
115 |    "metadata": {
116 |     "ExecuteTime": {
117 |      "end_time": "2018-01-10T14:31:18.975310Z",
118 |      "start_time": "2018-01-10T14:31:18.955000Z"
119 |     }
120 |    },
121 |    "outputs": [],
122 |    "source": [
123 |     "# - module bcolz_array_iterator2 not available, using bcolz_array_iterator\n",
124 |     "#b = BcolzArrayIterator2(x, y, shuffle=True, batch_size=3)\n",
125 |     "b = BcolzArrayIterator(x, y, shuffle=True, batch_size=3)"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 8,
131 |    "metadata": {
132 |     "ExecuteTime": {
133 |      "end_time": "2018-01-10T14:31:19.003191Z",
134 |      "start_time": "2018-01-10T14:31:18.977356Z"
135 |     }
136 |    },
137 |    "outputs": [
138 |     {
139 |      "data": {
140 |       "text/plain": [
141 |        "14"
142 |       ]
143 |      },
144 |      "execution_count": 8,
145 |      "metadata": {},
146 |      "output_type": "execute_result"
147 |     }
148 |    ],
149 |    "source": [
150 |     "b.N"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 9,
156 |    "metadata": {
157 |     "ExecuteTime": {
158 |      "end_time": "2018-01-10T14:31:19.025683Z",
159 |      "start_time": "2018-01-10T14:31:19.005221Z"
160 |     }
161 |    },
162 |    "outputs": [
163 |     {
164 |      "data": {
165 |       "text/plain": [
166 |        "5"
167 |       ]
168 |      },
169 |      "execution_count": 9,
170 |      "metadata": {},
171 |      "output_type": "execute_result"
172 |     }
173 |    ],
174 |    "source": [
175 |     "nit = len(x)//b.batch_size+1; nit"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 10,
181 |    "metadata": {
182 |     "ExecuteTime": {
183 |      "end_time": "2018-01-10T14:31:20.407336Z",
184 |      "start_time": "2018-01-10T14:31:19.027557Z"
185 |     }
186 |    },
187 |    "outputs": [],
188 |    "source": [
189 |     "for j in range(10000):\n",
190 |     "    bx,by = list(zip(*[next(b) for i in range(nit)]))\n",
191 |     "    nx = np.concatenate(bx)\n",
192 |     "    ny = np.concatenate(by)\n",
193 |     "    assert(np.allclose(nx,ny))\n",
194 |     "    assert(len(np.unique(nx))==len(nx))"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 11,
200 |    "metadata": {
201 |     "ExecuteTime": {
202 |      "end_time": "2018-01-10T14:31:20.414801Z",
203 |      "start_time": "2018-01-10T14:31:20.408619Z"
204 |     }
205 |    },
206 |    "outputs": [
207 |     {
208 |      "data": {
209 |       "text/plain": [
210 |        "[(array([0, 1, 2]), array([0, 1, 2])),\n",
211 |        " (array([6, 7, 8]), array([6, 7, 8])),\n",
212 |        " (array([3, 4, 5]), array([3, 4, 5])),\n",
213 |        " (array([ 9, 10, 11]), array([ 9, 10, 11])),\n",
214 |        " (array([12, 13]), array([12, 13])),\n",
215 |        " (array([ 9, 10, 11]), array([ 9, 10, 11])),\n",
216 |        " (array([12, 13]), array([12, 13])),\n",
217 |        " (array([3, 4, 5]), array([3, 4, 5])),\n",
218 |        " (array([0, 1, 2]), array([0, 1, 2])),\n",
219 |        " (array([6, 7, 8]), array([6, 7, 8])),\n",
220 |        " (array([0, 1, 2]), array([0, 1, 2])),\n",
221 |        " (array([3, 4, 5]), array([3, 4, 5])),\n",
222 |        " (array([6, 7, 8]), array([6, 7, 8])),\n",
223 |        " (array([12, 13]), array([12, 13])),\n",
224 |        " (array([ 9, 10, 11]), array([ 9, 10, 11])),\n",
225 |        " (array([ 9, 10, 11]), array([ 9, 10, 11])),\n",
226 |        " (array([3, 4, 5]), array([3, 4, 5])),\n",
227 |        " (array([0, 1, 2]), array([0, 1, 2])),\n",
228 |        " (array([12, 13]), array([12, 13])),\n",
229 |        " (array([6, 7, 8]), array([6, 7, 8]))]"
230 |       ]
231 |      },
232 |      "execution_count": 11,
233 |      "metadata": {},
234 |      "output_type": "execute_result"
235 |     }
236 |    ],
237 |    "source": [
238 |     "[next(b) for i in range(20)]"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": null,
244 |    "metadata": {},
245 |    "outputs": [],
246 |    "source": []
247 |   }
248 |  ],
249 |  "metadata": {
250 |   "kernelspec": {
251 |    "display_name": "p3",
252 |    "language": "python",
253 |    "name": "p3"
254 |   },
255 |   "language_info": {
256 |    "codemirror_mode": {
257 |     "name": "ipython",
258 |     "version": 3
259 |    },
260 |    "file_extension": ".py",
261 |    "mimetype": "text/x-python",
262 |    "name": "python",
263 |    "nbconvert_exporter": "python",
264 |    "pygments_lexer": "ipython3",
265 |    "version": "3.5.2"
266 |   },
267 |   "toc": {
268 |    "colors": {
269 |     "hover_highlight": "#DAA520",
270 |     "navigate_num": "#000000",
271 |     "navigate_text": "#333333",
272 |     "running_highlight": "#FF0000",
273 |     "selected_highlight": "#FFD700",
274 |     "sidebar_border": "#EEEEEE",
275 |     "wrapper_background": "#FFFFFF"
276 |    },
277 |    "moveMenuLeft": true,
278 |    "nav_menu": {
279 |     "height": "12px",
280 |     "width": "252px"
281 |    },
282 |    "navigate_menu": true,
283 |    "number_sections": true,
284 |    "sideBar": true,
285 |    "threshold": 4,
286 |    "toc_cell": false,
287 |    "toc_section_display": "block",
288 |    "toc_window_display": false,
289 |    "widenNotebook": false
290 |   }
291 |  },
292 |  "nbformat": 4,
293 |  "nbformat_minor": 2
294 | }
295 | 


--------------------------------------------------------------------------------
/nbs2/dcgan.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.parallel
 4 | 
 5 | # - modified: changed string formatting to Python 3
 6 | class DCGAN_D(nn.Module):
 7 |     def conv_block(self, main, name, inf, of, a, b, c, bn=True):
 8 |         main.add_module('{}-{}.{}.conv'.format(name, inf, of), nn.Conv2d(inf, of, a, b, c, bias=False))
 9 |         main.add_module('{}-{}.batchnorm'.format(name, of), nn.BatchNorm2d(of))
10 |         main.add_module('{}-{}.relu'.format(name, of), nn.LeakyReLU(0.2, inplace=True))
11 | 
12 |     def __init__(self, isize, nc, ndf, ngpu, n_extra_layers=0):
13 |         super(DCGAN_D, self).__init__()
14 |         self.ngpu = ngpu
15 |         assert isize % 16 == 0, "isize has to be a multiple of 16"
16 | 
17 |         main = nn.Sequential()
18 |         # input is nc x isize x isize
19 |         self.conv_block(main, 'initial', nc, ndf, 4, 2, 1, False)
20 |         csize, cndf = isize / 2, ndf
21 | 
22 |         for t in range(n_extra_layers):
23 |             self.conv_block(main, 'extra-{}'.format(t), cndf, cndf, 3, 1, 1)
24 | 
25 |         while csize > 4:
26 |             self.conv_block(main, 'pyramid', cndf, cndf*2, 4, 2, 1)
27 |             cndf *= 2; csize /= 2
28 | 
29 |         # state size. K x 4 x 4
30 |         main.add_module('final.{}-1.conv'.format(cndf), nn.Conv2d(cndf, 1, 4, 1, 0, bias=False))
31 |         self.main = main
32 | 
33 | 
34 |     def forward(self, input):
35 |         gpu_ids = None
36 |         if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
37 |             gpu_ids = range(self.ngpu)
38 |         output = nn.parallel.data_parallel(self.main, input, gpu_ids)
39 |         output = output.mean(0)
40 |         return output.view(1)
41 | 
42 | class DCGAN_G(nn.Module):
43 |     def deconv_block(self, main, name, inf, of, a, b, c, bn=True):
44 |         main.add_module('{}-{}.{}.convt'.format(name, inf, of), nn.ConvTranspose2d(inf, of, a, b, c, bias=False))
45 |         main.add_module('{}-{}.batchnorm'.format(name, of), nn.BatchNorm2d(of))
46 |         main.add_module('{}-{}.relu'.format(name, of), nn.ReLU(inplace=True))
47 | 
48 |     def __init__(self, isize, nz, nc, ngf, ngpu, n_extra_layers=0):
49 |         super(DCGAN_G, self).__init__()
50 |         self.ngpu = ngpu
51 |         assert isize % 16 == 0, "isize has to be a multiple of 16"
52 | 
53 |         cngf, tisize = ngf//2, 4
54 |         while tisize != isize: cngf *= 2; tisize *= 2
55 | 
56 |         main = nn.Sequential()
57 |         self.deconv_block(main, 'initial', nz, cngf, 4, 1, 0)
58 | 
59 |         csize, cndf = 4, cngf
60 |         while csize < isize//2:
61 |             self.deconv_block(main, 'pyramid', cngf, cngf//2, 4, 2, 1)
62 |             cngf //= 2; csize *= 2
63 | 
64 |         for t in range(n_extra_layers):
65 |             self.deconv_block(main, 'extra-{}'.format(t), cngf, cngf, 3, 1, 1)
66 | 
67 |         main.add_module('final.{}-{}.convt'.format(cngf, nc), nn.ConvTranspose2d(cngf, nc, 4, 2, 1, bias=False))
68 |         main.add_module('final.{}.tanh'.format(nc), nn.Tanh())
69 |         self.main = main
70 | 
71 |     def forward(self, input):
72 |         gpu_ids = None
73 |         if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
74 |             gpu_ids = range(self.ngpu)
75 |         return nn.parallel.data_parallel(self.main, input, gpu_ids)
76 | 
77 | 


--------------------------------------------------------------------------------
/nbs2/kmeans.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import math, numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | 
 6 | def plot_data(centroids, data, n_samples):
 7 |     colour = plt.cm.rainbow(np.linspace(0,1,len(centroids)))
 8 |     for i, centroid in enumerate(centroids):
 9 |         samples = data[i*n_samples:(i+1)*n_samples]
10 |         plt.scatter(samples[:,0], samples[:,1], c=colour[i], s=1)
11 |         plt.plot(centroid[0], centroid[1], markersize=10, marker="x", color='k', mew=5)
12 |         plt.plot(centroid[0], centroid[1], markersize=5, marker="x", color='m', mew=2)
13 | 
14 |         
15 | def all_distances(a, b):
16 |     diff = tf.squared_difference(tf.expand_dims(a, 0), tf.expand_dims(b,1))
17 |     return tf.reduce_sum(diff, axis=2)
18 |         
19 |         
20 | class Kmeans(object):
21 | 
22 |     def __init__(self, data, n_clusters):
23 |         self.n_data, self.n_dim = data.shape
24 |         self.n_clusters = n_clusters
25 |         self.data = data
26 |         self.v_data = tf.Variable(data)
27 |         self.n_samples = self.n_data//self.n_clusters
28 | 
29 |     def run(self):
30 |         tf.global_variables_initializer().run()
31 |         initial_centroids = self.find_initial_centroids(self.n_clusters).eval()
32 |         curr_centroids = tf.Variable(initial_centroids)
33 |         nearest_indices = self.assign_to_nearest(curr_centroids)
34 |         updated_centroids = self.update_centroids(nearest_indices)
35 |         # Begin main algorithm
36 |         tf.global_variables_initializer().run()
37 |         c = initial_centroids
38 |         for i in range(10):
39 |             c2 = curr_centroids.assign(updated_centroids).eval()
40 |             if np.allclose(c,c2): break
41 |             c=c2
42 |         return c2
43 | 
44 | 
45 |     def find_initial_centroids(self, k):
46 |         r_index = tf.random_uniform([1], 0, self.n_data, dtype=tf.int32)
47 |         r = tf.expand_dims(self.v_data[tf.squeeze(r_index)], dim=1)
48 |         initial_centroids = []
49 |         for i in range(k):
50 |             dist = all_distances(self.v_data, r)
51 |             farthest_index = tf.argmax(tf.reduce_min(dist, axis=0), 0)
52 |             farthest_point = self.v_data[tf.to_int32(farthest_index)]
53 |             initial_centroids.append(farthest_point)
54 |             r = tf.stack(initial_centroids)
55 |         return r
56 | 
57 |     def choose_random_centroids(self):
58 |         n_samples = tf.shape(self.v_data)[0]  # - Changed from "v_data" to "self.v_data"
59 |         random_indices = tf.random_shuffle(tf.range(0, n_samples))
60 |         centroid_indices = random_indices[:self.n_clusters]
61 |         return tf.gather(self.v_data, centroid_indices)
62 | 
63 |     def assign_to_nearest(self, centroids):
64 |         return tf.argmin(all_distances(self.v_data, centroids), 0)
65 | 
66 |     def update_centroids(self, nearest_indices):
67 |         partitions = tf.dynamic_partition(self.v_data, tf.to_int32(nearest_indices), self.n_clusters)
68 |         return tf.concat([tf.expand_dims(tf.reduce_mean(partition, 0), 0)
69 |                                       for partition in partitions], 0)
70 |    
71 |         
72 | 


--------------------------------------------------------------------------------
/nbs2/rossman_exp.py:
--------------------------------------------------------------------------------
  1 | train_ratio = 0.9
  2 | use_dict = True
  3 | use_scaler = False
  4 | init_emb = False
  5 | split_contins = True
  6 | samp_size = 100000
  7 | # samp_size = 0
  8 | 
  9 | import math, keras, datetime, pandas as pd, numpy as np, keras.backend as K
 10 | import matplotlib.pyplot as plt, xgboost, operator, random, pickle, os
 11 | from sklearn_pandas import DataFrameMapper
 12 | from sklearn.preprocessing import LabelEncoder, Imputer, StandardScaler
 13 | from keras.models import Model
 14 | from keras.layers import merge, Input
 15 | from keras.layers.core import Dense, Activation, Reshape, Flatten, Dropout
 16 | from keras.layers.embeddings import Embedding
 17 | from keras.optimizers import Adam
 18 | from keras.layers.normalization import BatchNormalization
 19 | from keras.regularizers import l2
 20 | 
 21 | # from keras import initializations  # Keras 2 does not use this module
 22 | np.set_printoptions(4)
 23 | 
 24 | cfg = K.tf.ConfigProto()
 25 | cfg.gpu_options.allow_growth = True
 26 | K.set_session(K.tf.Session(config=cfg))
 27 | 
 28 | os.chdir('data/rossman')
 29 | cat_var_dict = {'Store': 50, 'DayOfWeek': 6, 'Year': 2, 'Month': 6,
 30 |                 'Day': 10, 'StateHoliday': 3, 'CompetitionMonthsOpen': 2,
 31 |                 'Promo2Weeks': 1, 'StoreType': 2, 'Assortment': 3, 'PromoInterval': 3,
 32 |                 'CompetitionOpenSinceYear': 4, 'Promo2SinceYear': 4, 'State': 6,
 33 |                 'Week': 2, 'Events': 4, 'Promo_fw': 1,
 34 |                 'Promo_bw': 1, 'StateHoliday_fw': 1,
 35 |                 'StateHoliday_bw': 1, 'SchoolHoliday_fw': 1,
 36 |                 'SchoolHoliday_bw': 1}
 37 | 
 38 | cats, contins = [o for n, o in np.load('vars.npz').items()]
 39 | y = np.load('deps.npz').items()[0][1]
 40 | 
 41 | if samp_size != 0:
 42 |     np.random.seed(42)
 43 |     idxs = sorted(np.random.choice(len(y), samp_size, replace=False))
 44 |     cats = cats[idxs]
 45 |     contins = contins[idxs]
 46 |     y = y[idxs]
 47 | 
 48 | n = len(y)
 49 | train_size = int(n * train_ratio)
 50 | 
 51 | contins_trn_orig, contins_val_orig = contins[:train_size], contins[train_size:]
 52 | cats_trn, cats_val = cats[:train_size], cats[train_size:]
 53 | y_trn, y_val = y[:train_size], y[train_size:]
 54 | 
 55 | contin_map_fit = pickle.load(open('contin_maps.pickle', 'rb'))
 56 | cat_map_fit = pickle.load(open('cat_maps.pickle', 'rb'))
 57 | 
 58 | 
 59 | def cat_map_info(feat): return feat[0], len(feat[1].classes_)
 60 | 
 61 | 
 62 | co_enc = StandardScaler().fit(contins_trn_orig)
 63 | tf_contins_trn = co_enc.transform(contins_trn_orig)
 64 | tf_contins_val = co_enc.transform(contins_val_orig)
 65 | 
 66 | """
 67 | def rmspe(y_pred, targ = y_valid_orig):
 68 |     return math.sqrt(np.square((targ - y_pred)/targ).mean())
 69 | def log_max_inv(preds, mx = max_log_y): return np.exp(preds * mx)
 70 | def normalize_inv(preds): return preds * ystd + ymean
 71 | """
 72 | 
 73 | 
 74 | def split_cols(arr): return np.hsplit(arr, arr.shape[1])
 75 | 
 76 | 
 77 | def emb_init(shape, name=None):
 78 |     # - In Keras 2 the "initializations" module is not available.
 79 |     # - To keep here the custom initializer the code from Keras 1 "uniform" initializer is exploited
 80 |     # return initializations.uniform(shape, scale=0.6/shape[1], name=name)
 81 |     return K.variable(np.random.uniform(low=-2 / (shape[1] + 1), high=2 / (shape[1] + 1), size=shape),
 82 |                       name=name)
 83 | 
 84 | 
 85 | def get_emb(feat):
 86 |     name, c = cat_map_info(feat)
 87 |     if use_dict:
 88 |         c2 = cat_var_dict[name]
 89 |     else:
 90 |         c2 = (c + 2) // 3
 91 |         if c2 > 50: c2 = 50
 92 |     inp = Input((1,), dtype='int64', name=name + '_in')
 93 |     if init_emb:
 94 |         u = Flatten(name=name + '_flt')(Embedding(c, c2, input_length=1)(inp))
 95 |     else:
 96 |         u = Flatten(name=name + '_flt')(Embedding(c, c2, input_length=1, init=emb_init)(inp))
 97 |     return inp, u
 98 | 
 99 | 
100 | def get_contin(feat):
101 |     name = feat[0][0]
102 |     inp = Input((1,), name=name + '_in')
103 |     return inp, Dense(1, name=name + '_d')(inp)
104 | 
105 | 
106 | def split_data():
107 |     if split_contins:
108 |         map_train = split_cols(cats_trn) + split_cols(contins_trn)
109 |         map_valid = split_cols(cats_val) + split_cols(contins_val)
110 |     else:
111 |         map_train = split_cols(cats_trn) + [contins_trn]
112 |         map_valid = split_cols(cats_val) + [contins_val]
113 |     return (map_train, map_valid)
114 | 
115 | 
116 | def get_contin_one():
117 |     n_contin = contins_trn.shape[1]
118 |     contin_inp = Input((n_contin,), name='contin')
119 |     contin_out = BatchNormalization()(contin_inp)
120 |     return contin_inp, contin_out
121 | 
122 | 
123 | def train(model, map_train, map_valid, bs=128, ne=10):
124 |     return model.fit(map_train, y_trn, batch_size=bs, nb_epoch=ne,
125 |                      verbose=0, validation_data=(map_valid, y_val))
126 | 
127 | 
128 | def get_model():
129 |     if split_contins:
130 |         conts = [get_contin(feat) for feat in contin_map_fit.features]
131 |         cont_out = [d for inp, d in conts]
132 |         cont_inp = [inp for inp, d in conts]
133 |     else:
134 |         contin_inp, contin_out = get_contin_one()
135 |         cont_out = [contin_out]
136 |         cont_inp = [contin_inp]
137 | 
138 |     embs = [get_emb(feat) for feat in cat_map_fit.features]
139 |     x = merge([emb for inp, emb in embs] + cont_out, mode='concat')
140 | 
141 |     x = Dropout(0.02)(x)
142 |     x = Dense(1000, activation='relu', init='uniform')(x)
143 |     x = Dense(500, activation='relu', init='uniform')(x)
144 |     x = Dense(1, activation='sigmoid')(x)
145 | 
146 |     model = Model([inp for inp, emb in embs] + cont_inp, x)
147 |     model.compile('adam', 'mean_absolute_error')
148 |     # model.compile(Adam(), 'mse')
149 |     return model
150 | 
151 | 
152 | for split_contins in [True, False]:
153 |     for use_dict in [True, False]:
154 |         for use_scaler in [True, False]:
155 |             for init_emb in [True, False]:
156 |                 print({'split_contins': split_contins, 'use_dict': use_dict,
157 |                        'use_scaler': use_scaler, 'init_emb': init_emb})
158 |                 if use_scaler:
159 |                     contins_trn = tf_contins_trn
160 |                     contins_val = tf_contins_val
161 |                 else:
162 |                     contins_trn = contins_trn_orig
163 |                     contins_val = contins_val_orig
164 | 
165 |                 map_train, map_valid = split_data()
166 |                 model = get_model()
167 |                 hist = np.array(train(model, map_train, map_valid, 128, 10)
168 |                                 .history['val_loss'])
169 |                 print(hist)
170 |                 print(hist.min())
171 | 


--------------------------------------------------------------------------------
/nbs2/tiramisu-pytorch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# One Hundred Layers Tiramisu"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import torch\n",
 19 |     "import torch.nn as nn\n",
 20 |     "import torch.nn.functional as F\n",
 21 |     "from collections import OrderedDict"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "### Initial Conv Block"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 29,
 34 |    "metadata": {
 35 |     "collapsed": true
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "class _FirstConv(nn.Sequential):\n",
 40 |     "    def __init__(self, num_input_features):\n",
 41 |     "        super(_FirstConv, self).__init__()\n",
 42 |     "        self.add_module('conv0', nn.Conv2d(3, num_input_features, kernel_size=7, stride=2, padding=3, bias=False))\n",
 43 |     "        self.add_module('norm0', nn.BatchNorm2d(num_input_features))\n",
 44 |     "        self.add_module('relu0', nn.ReLU(inplace=True))\n",
 45 |     "        self.add_module('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1))"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 30,
 51 |    "metadata": {
 52 |     "collapsed": false
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "# Test\n",
 57 |     "conv1 = _FirstConv(5)"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "### Dense Layer"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 31,
 70 |    "metadata": {
 71 |     "collapsed": true
 72 |    },
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "class _DenseLayer(nn.Sequential):\n",
 76 |     "    def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):\n",
 77 |     "        super(_DenseLayer, self).__init__()\n",
 78 |     "        self.add_module('norm.1', nn.BatchNorm2d(num_input_features)),\n",
 79 |     "        self.add_module('relu.1', nn.ReLU(inplace=True)),\n",
 80 |     "        self.add_module('conv.1', nn.Conv2d(num_input_features, bn_size *\n",
 81 |     "                        growth_rate, kernel_size=1, stride=1, bias=False)),\n",
 82 |     "        self.add_module('norm.2', nn.BatchNorm2d(bn_size * growth_rate)),\n",
 83 |     "        self.add_module('relu.2', nn.ReLU(inplace=True)),\n",
 84 |     "        self.add_module('conv.2', nn.Conv2d(bn_size * growth_rate, growth_rate,\n",
 85 |     "                                            kernel_size=3, stride=1, padding=1, bias=False)),\n",
 86 |     "        self.drop_rate = drop_rate\n",
 87 |     "\n",
 88 |     "    def forward(self, x):\n",
 89 |     "        new_features = super(_DenseLayer, self).forward(x)\n",
 90 |     "        if self.drop_rate > 0:\n",
 91 |     "            new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)\n",
 92 |     "        return torch.cat([x, new_features], 1)"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 32,
 98 |    "metadata": {
 99 |     "collapsed": true
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "# Test\n",
104 |     "dense1 = _DenseLayer(5,3,1,.5)"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "metadata": {},
110 |    "source": [
111 |     "### Dense Block"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 33,
117 |    "metadata": {
118 |     "collapsed": true
119 |    },
120 |    "outputs": [],
121 |    "source": [
122 |     "class _DenseBlock(nn.Sequential):\n",
123 |     "    def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate):\n",
124 |     "        super(_DenseBlock, self).__init__()\n",
125 |     "        for i in range(num_layers):\n",
126 |     "            layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate)\n",
127 |     "            self.add_module('denselayer%d' % (i + 1), layer)"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 34,
133 |    "metadata": {
134 |     "collapsed": true
135 |    },
136 |    "outputs": [],
137 |    "source": [
138 |     "# Test\n",
139 |     "denseBlock1 = _DenseBlock(4,4,1,4,.5)"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {},
145 |    "source": [
146 |     "### Transition Up"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 35,
152 |    "metadata": {
153 |     "collapsed": true
154 |    },
155 |    "outputs": [],
156 |    "source": [
157 |     "class _TransitionUp(nn.Sequential):\n",
158 |     "    def __init__(self, num_input_features, num_output_features):\n",
159 |     "        super(_TransitionUp, self).__init__()\n",
160 |     "        self.add_module('norm', nn.BatchNorm2d(num_input_features))\n",
161 |     "        self.add_module('relu', nn.ReLU(inplace=True))\n",
162 |     "        self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,\n",
163 |     "                                          kernel_size=1, stride=1, bias=False))\n",
164 |     "        self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 36,
170 |    "metadata": {
171 |     "collapsed": false
172 |    },
173 |    "outputs": [],
174 |    "source": [
175 |     "# Test\n",
176 |     "transUp = _TransitionUp(5,10)"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {},
182 |    "source": [
183 |     "### Transition Down"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 37,
189 |    "metadata": {
190 |     "collapsed": true
191 |    },
192 |    "outputs": [],
193 |    "source": [
194 |     "class _TransitionDown(nn.Sequential):\n",
195 |     "    def __init__(self, num_input_features, num_output_features):\n",
196 |     "        super(_TransitionDown, self).__init__()\n",
197 |     "        self.add_module('norm', nn.BatchNorm2d(num_input_features))\n",
198 |     "        self.add_module('relu', nn.ReLU(inplace=True))\n",
199 |     "        self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,\n",
200 |     "                                          kernel_size=1, stride=1, bias=False))\n",
201 |     "        self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 38,
207 |    "metadata": {
208 |     "collapsed": true
209 |    },
210 |    "outputs": [],
211 |    "source": [
212 |     "# Test\n",
213 |     "transDown = _TransitionDown(5,10)"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "metadata": {},
219 |    "source": [
220 |     "### Final Model"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": 45,
226 |    "metadata": {
227 |     "collapsed": true
228 |    },
229 |    "outputs": [],
230 |    "source": [
231 |     "class FCDenseNet(nn.Module):\n",
232 |     "    r\"\"\"FC-DenseNet model class, based on\n",
233 |     "    `\"The One Hundred Layers Tiramisu: Fully Convolutional DenseNets for Semantic Segmentation\" <https://arxiv.org/pdf/1611.09326>`\n",
234 |     "\n",
235 |     "    Args:\n",
236 |     "        growth_rate (int) - how many filters to add each layer (`k` in paper)\n",
237 |     "        block_config (list of 4 ints) - how many layers in each pooling block\n",
238 |     "        num_init_features (int) - the number of filters to learn in the first convolution layer\n",
239 |     "        bn_size (int) - multiplicative factor for number of bottle neck layers\n",
240 |     "          (i.e. bn_size * k features in the bottleneck layer)\n",
241 |     "        drop_rate (float) - dropout rate after each dense layer\n",
242 |     "        num_classes (int) - number of classification classes\n",
243 |     "    \"\"\"\n",
244 |     "    def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),\n",
245 |     "                 num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000):\n",
246 |     "\n",
247 |     "        super(FCDenseNet, self).__init__()\n",
248 |     "        \n",
249 |     "        self.features = nn.Sequential()\n",
250 |     "        self.features.add_module('firstConv', _FirstConv(num_init_features))\n",
251 |     "\n",
252 |     "        # Each denseblock\n",
253 |     "        num_features = num_init_features\n",
254 |     "        for i, num_layers in enumerate(block_config):\n",
255 |     "            block = _DenseBlock(num_layers=num_layers, num_input_features=num_features,\n",
256 |     "                                bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)\n",
257 |     "            self.features.add_module('denseblock%d' % (i + 1), block)\n",
258 |     "            num_features = num_features + num_layers * growth_rate\n",
259 |     "            if i != len(block_config) - 1:\n",
260 |     "                trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2)\n",
261 |     "                self.features.add_module('transition%d' % (i + 1), trans)\n",
262 |     "                num_features = num_features // 2\n",
263 |     "\n",
264 |     "        # Final batch norm\n",
265 |     "        self.features.add_module('norm5', nn.BatchNorm2d(num_features))\n",
266 |     "\n",
267 |     "        # Linear layer\n",
268 |     "        self.classifier = nn.Linear(num_features, num_classes)\n",
269 |     "\n",
270 |     "    def forward(self, x):\n",
271 |     "        features = self.features(x)\n",
272 |     "        out = F.relu(features, inplace=True)\n",
273 |     "        out = F.avg_pool2d(out, kernel_size=7).view(features.size(0), -1)\n",
274 |     "        out = self.classifier(out)\n",
275 |     "        return out"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": 46,
281 |    "metadata": {
282 |     "collapsed": false
283 |    },
284 |    "outputs": [],
285 |    "source": [
286 |     "# Test\n",
287 |     "model = FCDenseNet()"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": null,
293 |    "metadata": {
294 |     "collapsed": true
295 |    },
296 |    "outputs": [],
297 |    "source": []
298 |   }
299 |  ],
300 |  "metadata": {
301 |   "kernelspec": {
302 |    "display_name": "Python 3",
303 |    "language": "python",
304 |    "name": "python3"
305 |   },
306 |   "language_info": {
307 |    "codemirror_mode": {
308 |     "name": "ipython",
309 |     "version": 3
310 |    },
311 |    "file_extension": ".py",
312 |    "mimetype": "text/x-python",
313 |    "name": "python",
314 |    "nbconvert_exporter": "python",
315 |    "pygments_lexer": "ipython3",
316 |    "version": "3.6.2"
317 |   },
318 |   "latex_envs": {
319 |    "LaTeX_envs_menu_present": true,
320 |    "autocomplete": true,
321 |    "bibliofile": "biblio.bib",
322 |    "cite_by": "apalike",
323 |    "current_citInitial": 1,
324 |    "eqLabelWithNumbers": true,
325 |    "eqNumInitial": 1,
326 |    "hotkeys": {
327 |     "equation": "Ctrl-E",
328 |     "itemize": "Ctrl-I"
329 |    },
330 |    "labels_anchors": false,
331 |    "latex_user_defs": false,
332 |    "report_style_numbering": false,
333 |    "user_envs_cfg": false
334 |   }
335 |  },
336 |  "nbformat": 4,
337 |  "nbformat_minor": 2
338 | }
339 | 


--------------------------------------------------------------------------------
/nbs2/torch_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import optim, nn, FloatTensor as FT
 3 | import torch.nn.parallel
 4 | import torch.utils.data
 5 | from torch.backends import cudnn
 6 | from torchvision import datasets, transforms, utils as vutils
 7 | from torch.autograd import Variable
 8 | 
 9 | import operator
10 | 
11 | def unit_prefix(x, n=1):
12 |     for i in range(n): x = x.unsqueeze(0)
13 |     return x
14 | 
15 | def align(x, y, start_dim=2):
16 |     xd, yd = x.dim(), y.dim()
17 |     if xd > yd: y = unit_prefix(y, xd - yd)
18 |     elif yd > xd: x = unit_prefix(x, yd - xd)
19 | 
20 |     xs, ys = list(x.size()), list(y.size())
21 |     nd = len(ys)
22 |     for i in range(start_dim, nd):
23 |         td = nd-i-1
24 |         if   ys[td]==1: ys[td] = xs[td]
25 |         elif xs[td]==1: xs[td] = ys[td]
26 |     return x.expand(*xs), y.expand(*ys)
27 | 
28 | def dot(x, y):
29 |     assert(1<y.dim()<5)
30 |     x, y = align(x, y)
31 |     
32 |     if y.dim() == 2: return x.mm(y)
33 |     elif y.dim() == 3: return x.bmm(y)
34 |     else:
35 |         xs,ys = x.size(), y.size()
36 |         res = torch.zeros(*(xs[:-1] + (ys[-1],)))
37 |         for i in range(xs[0]): res[i].baddbmm_(x[i], (y[i]))
38 |         return res
39 | 
40 | 
41 | def aligned_op(x,y,f): return f(*align(x,y,0))
42 | 
43 | def add(x, y): return aligned_op(x, y, operator.add)
44 | def sub(x, y): return aligned_op(x, y, operator.sub)
45 | def mul(x, y): return aligned_op(x, y, operator.mul)
46 | def div(x, y): return aligned_op(x, y, operator.truediv)


--------------------------------------------------------------------------------
/nbs2/utils2.py:
--------------------------------------------------------------------------------
  1 | import math, keras, datetime, pandas as pd, numpy as np, keras.backend as K, threading, json, re, collections
  2 | import tarfile, tensorflow as tf, matplotlib.pyplot as plt, xgboost, operator, random, pickle, glob, os, bcolz
  3 | import shutil, sklearn, functools, itertools, scipy
  4 | from PIL import Image
  5 | from concurrent.futures import ProcessPoolExecutor, as_completed, ThreadPoolExecutor
  6 | import matplotlib.patheffects as PathEffects
  7 | from sklearn.preprocessing import LabelEncoder, StandardScaler
  8 | from sklearn.neighbors import NearestNeighbors, LSHForest
  9 | import IPython
 10 | from IPython.display import display, Audio
 11 | from numpy.random import normal
 12 | from gensim.models import word2vec
 13 | from keras.preprocessing.text import Tokenizer
 14 | #from nltk.tokenize import ToktokTokenizer, StanfordTokenizer  # - changed for compatibility with conda-installed nltk
 15 | from nltk.tokenize import ToktokTokenizer  # - changed for compatibility with conda-installed nltk
 16 | from nltk.tokenize.stanford import StanfordTokenizer  # - changed for compatibility with conda-installed nltk
 17 | from functools import reduce
 18 | from itertools import chain
 19 | 
 20 | from tensorflow.python.framework import ops
 21 | #from tensorflow.contrib import rnn, legacy_seq2seq as seq2seq
 22 | 
 23 | from keras_tqdm import TQDMNotebookCallback
 24 | #from keras import initializations  # Keras 1
 25 | from keras.applications.resnet50 import ResNet50, decode_predictions, conv_block, identity_block
 26 | from keras.applications.vgg16 import VGG16
 27 | from keras.preprocessing import image
 28 | from keras.preprocessing.sequence import pad_sequences
 29 | from keras.models import Model, Sequential
 30 | from keras.layers import *
 31 | from keras.optimizers import Adam
 32 | from keras.regularizers import l2
 33 | from keras.utils.data_utils import get_file
 34 | from keras.applications.imagenet_utils import decode_predictions, preprocess_input
 35 | 
 36 | 
 37 | np.set_printoptions(threshold=50, edgeitems=20)
 38 | def beep(): return Audio(filename='/home/jhoward/beep.mp3', autoplay=True)
 39 | def dump(obj, fname): pickle.dump(obj, open(fname, 'wb'))
 40 | def load(fname): return pickle.load(open(fname, 'rb'))
 41 | 
 42 | 
 43 | def limit_mem():
 44 |     K.get_session().close()
 45 |     cfg = K.tf.ConfigProto()
 46 |     cfg.gpu_options.allow_growth = True
 47 |     K.set_session(K.tf.Session(config=cfg))
 48 | 
 49 | 
 50 | def autolabel(plt, fmt='%.2f'):
 51 |     rects = plt.patches
 52 |     ax = rects[0].axes
 53 |     y_bottom, y_top = ax.get_ylim()
 54 |     y_height = y_top - y_bottom
 55 |     for rect in rects:
 56 |         height = rect.get_height()
 57 |         if height / y_height > 0.95:
 58 |             label_position = height - (y_height * 0.06)
 59 |         else:
 60 |             label_position = height + (y_height * 0.01)
 61 |         txt = ax.text(rect.get_x() + rect.get_width()/2., label_position,
 62 |                 fmt % height, ha='center', va='bottom')
 63 |         txt.set_path_effects([PathEffects.withStroke(linewidth=3, foreground='w')])
 64 | 
 65 | 
 66 | def column_chart(lbls, vals, val_lbls='%.2f'):
 67 |     n = len(lbls)
 68 |     p = plt.bar(np.arange(n), vals)
 69 |     plt.xticks(np.arange(n), lbls)
 70 |     if val_lbls: autolabel(p, val_lbls)
 71 | 
 72 | 
 73 | def save_array(fname, arr):
 74 |     c=bcolz.carray(arr, rootdir=fname, mode='w')
 75 |     c.flush()
 76 | 
 77 | 
 78 | def load_array(fname): return bcolz.open(fname)[:]
 79 | 
 80 | 
 81 | def load_glove(loc):
 82 |     return (load_array(loc+'.dat'),
 83 |         pickle.load(open(loc+'_words.pkl','rb'), encoding='latin1'),
 84 |         pickle.load(open(loc+'_idx.pkl','rb'), encoding='latin1'))
 85 | 
 86 | def plot_multi(im, dim=(4,4), figsize=(6,6), **kwargs ):
 87 |     plt.figure(figsize=figsize)
 88 |     for i,img in enumerate(im):
 89 |         plt.subplot(*dim, i+1)
 90 |         plt.imshow(img, **kwargs)
 91 |         plt.axis('off')
 92 |     plt.tight_layout()
 93 | 
 94 | 
 95 | def plot_train(hist):
 96 |     h = hist.history
 97 |     if 'acc' in h:
 98 |         meas='acc'
 99 |         loc='lower right'
100 |     else:
101 |         meas='loss'
102 |         loc='upper right'
103 |     plt.plot(hist.history[meas])
104 |     plt.plot(hist.history['val_'+meas])
105 |     plt.title('model '+meas)
106 |     plt.ylabel(meas)
107 |     plt.xlabel('epoch')
108 |     plt.legend(['train', 'validation'], loc=loc)
109 | 
110 | 
111 | def fit_gen(gen, fn, eval_fn, nb_iter):
112 |     for i in range(nb_iter):
113 |         fn(*next(gen))
114 |         if i % (nb_iter//10) == 0: eval_fn()
115 | 
116 | 
117 | def wrap_config(layer):
118 |     return {'class_name': layer.__class__.__name__, 'config': layer.get_config()}
119 | 
120 | 
121 | def copy_layer(layer): return layer_from_config(wrap_config(layer))
122 | 
123 | 
124 | def copy_layers(layers): return [copy_layer(layer) for layer in layers]
125 | 
126 | 
127 | def copy_weights(from_layers, to_layers):
128 |     for from_layer,to_layer in zip(from_layers, to_layers):
129 |         to_layer.set_weights(from_layer.get_weights())
130 | 
131 | 
132 | def copy_model(m):
133 |     res = Sequential(copy_layers(m.layers))
134 |     copy_weights(m.layers, res.layers)
135 |     return res
136 | 
137 | 
138 | def insert_layer(model, new_layer, index):
139 |     res = Sequential()
140 |     for i,layer in enumerate(model.layers):
141 |         if i==index: res.add(new_layer)
142 |         copied = layer_from_config(wrap_config(layer))
143 |         res.add(copied)
144 |         copied.set_weights(layer.get_weights())
145 |     return res
146 | 
147 | 


--------------------------------------------------------------------------------
/nbs2/vgg16.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import print_function
 3 | from __future__ import absolute_import
 4 | 
 5 | import warnings
 6 | 
 7 | from keras.models import Model
 8 | from keras.layers import Flatten, Dense, Input, Dropout
 9 | from keras.layers import Conv2D, MaxPooling2D
10 | from keras.engine.topology import get_source_inputs
11 | from keras.utils.layer_utils import convert_all_kernels_in_model
12 | from keras.utils.data_utils import get_file
13 | from keras import backend as K
14 | from keras.applications.imagenet_utils import decode_predictions, preprocess_input, _obtain_input_shape
15 | 
16 | 
17 | PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/'
18 | TF_WEIGHTS = 'vgg16_weights_tf_dim_ordering_tf_kernels.h5'
19 | TF_WEIGHTS_NO_TOP = 'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
20 | 
21 | 
22 | def VGG16(include_top=True, weights='imagenet',
23 |           input_tensor=None, input_shape=None):
24 |     if weights not in {'imagenet', None}:
25 |         raise ValueError('The `weights` argument should be either '
26 |                          '`None` (random initialization) or `imagenet` '
27 |                          '(pre-training on ImageNet).')
28 |     # Determine proper input shape
29 |     input_shape = _obtain_input_shape(input_shape,
30 |                                       default_size=224,
31 |                                       min_size=48,
32 |                                       dim_ordering=K.image_dim_ordering(),
33 |                                       include_top=include_top)
34 | 
35 |     if input_tensor is None:
36 |         img_input = Input(shape=input_shape)
37 |     else:
38 |         if not K.is_keras_tensor(input_tensor):
39 |             img_input = Input(tensor=input_tensor, shape=input_shape)
40 |         else:
41 |             img_input = input_tensor
42 |     # Block 1
43 |     x = Conv2D(64, 3, 3, activation='relu', padding='same', name='block1_conv1')(img_input)
44 |     x = Conv2D(64, 3, 3, activation='relu', padding='same', name='block1_conv2')(x)
45 |     x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
46 | 
47 |     # Block 2
48 |     x = Conv2D(128, 3, 3, activation='relu', padding='same', name='block2_conv1')(x)
49 |     x = Conv2D(128, 3, 3, activation='relu', padding='same', name='block2_conv2')(x)
50 |     x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
51 | 
52 |     # Block 3
53 |     x = Conv2D(256, 3, 3, activation='relu', padding='same', name='block3_conv1')(x)
54 |     x = Conv2D(256, 3, 3, activation='relu', padding='same', name='block3_conv2')(x)
55 |     x = Conv2D(256, 3, 3, activation='relu', padding='same', name='block3_conv3')(x)
56 |     x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
57 | 
58 |     # Block 4
59 |     x = Conv2D(512, 3, 3, activation='relu', padding='same', name='block4_conv1')(x)
60 |     x = Conv2D(512, 3, 3, activation='relu', padding='same', name='block4_conv2')(x)
61 |     x = Conv2D(512, 3, 3, activation='relu', padding='same', name='block4_conv3')(x)
62 |     x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
63 | 
64 |     # Block 5
65 |     x = Conv2D(512, 3, 3, activation='relu', padding='same', name='block5_conv1')(x)
66 |     x = Conv2D(512, 3, 3, activation='relu', padding='same', name='block5_conv2')(x)
67 |     x = Conv2D(512, 3, 3, activation='relu', padding='same', name='block5_conv3')(x)
68 |     x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
69 | 
70 |     if include_top:
71 |         # Classification block
72 |         x = Flatten(name='flatten')(x)
73 |         x = Dense(4096, activation='relu', name='fc1')(x)
74 |         x = Dropout(0.5)(x)
75 |         x = Dense(4096, activation='relu', name='fc2')(x)
76 |         x = Dropout(0.5)(x)
77 |         x = Dense(1000, activation='softmax', name='predictions')(x)
78 | 
79 |     # Ensure that the model takes into account
80 |     # any potential predecessors of `input_tensor`.
81 |     if input_tensor is not None:
82 |         inputs = get_source_inputs(input_tensor)
83 |     else:
84 |         inputs = img_input
85 |     # Create model.
86 |     model = Model(inputs, x, name='vgg16')
87 | 
88 |     # load weights
89 |     if weights == 'imagenet':
90 |         if K.image_dim_ordering() == 'tf':
91 |             wname = TF_WEIGHTS if include_top else TF_WEIGHTS_NO_TOP
92 |             weights_path = get_file(wname, PATH+wname, cache_subdir='models')
93 |             model.load_weights(weights_path)
94 |     return model
95 | 


--------------------------------------------------------------------------------
/nbs2/vgg16_avg.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from __future__ import absolute_import
  3 | 
  4 | import warnings
  5 | 
  6 | from keras.models import Model
  7 | from keras.layers import Flatten, Dense, Input
  8 | from keras.layers import Conv2D, AveragePooling2D
  9 | from keras.engine.topology import get_source_inputs
 10 | from keras.utils.layer_utils import convert_all_kernels_in_model
 11 | from keras.utils.data_utils import get_file
 12 | from keras import backend as K
 13 | from keras.applications.imagenet_utils import decode_predictions, preprocess_input, _obtain_input_shape
 14 | 
 15 | 
 16 | TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels.h5'
 17 | TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
 18 | TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels_notop.h5'
 19 | TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
 20 | 
 21 | 
 22 | def VGG16_Avg(include_top=True, weights='imagenet',
 23 |           input_tensor=None, input_shape=None,
 24 |           classes=1000):
 25 |     if weights not in {'imagenet', None}:
 26 |         raise ValueError('The `weights` argument should be either '
 27 |                          '`None` (random initialization) or `imagenet` '
 28 |                          '(pre-training on ImageNet).')
 29 | 
 30 |     if weights == 'imagenet' and include_top and classes != 1000:
 31 |         raise ValueError('If using `weights` as imagenet with `include_top`'
 32 |                          ' as true, `classes` should be 1000')
 33 |     # Determine proper input shape
 34 |     input_shape = _obtain_input_shape(input_shape,
 35 |                                       default_size=224,
 36 |                                       min_size=48,
 37 |                                       data_format=K.image_data_format(),  # Keras2 dim_ordering=K.image_dim_ordering(),
 38 |                                       include_top=include_top)
 39 | 
 40 |     if input_tensor is None:
 41 |         img_input = Input(shape=input_shape)
 42 |     else:
 43 |         if not K.is_keras_tensor(input_tensor):
 44 |             img_input = Input(tensor=input_tensor, shape=input_shape)
 45 |         else:
 46 |             img_input = input_tensor
 47 |     # Block 1
 48 |     x = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
 49 |     x = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', name='block1_conv2')(x)
 50 |     x = AveragePooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
 51 | 
 52 |     # Block 2
 53 |     x = Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same', name='block2_conv1')(x)
 54 |     x = Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same', name='block2_conv2')(x)
 55 |     x = AveragePooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
 56 | 
 57 |     # Block 3
 58 |     x = Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same', name='block3_conv1')(x)
 59 |     x = Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same', name='block3_conv2')(x)
 60 |     x = Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same', name='block3_conv3')(x)
 61 |     x = AveragePooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
 62 | 
 63 |     # Block 4
 64 |     x = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', name='block4_conv1')(x)
 65 |     x = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', name='block4_conv2')(x)
 66 |     x = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', name='block4_conv3')(x)
 67 |     x = AveragePooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
 68 | 
 69 |     # Block 5
 70 |     x = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', name='block5_conv1')(x)
 71 |     x = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', name='block5_conv2')(x)
 72 |     x = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', name='block5_conv3')(x)
 73 |     x = AveragePooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
 74 | 
 75 |     if include_top:
 76 |         # Classification block
 77 |         x = Flatten(name='flatten')(x)
 78 |         x = Dense(4096, activation='relu', name='fc1')(x)
 79 |         x = Dense(4096, activation='relu', name='fc2')(x)
 80 |         x = Dense(classes, activation='softmax', name='predictions')(x)
 81 | 
 82 |     # Ensure that the model takes into account
 83 |     # any potential predecessors of `input_tensor`.
 84 |     if input_tensor is not None:
 85 |         inputs = get_source_inputs(input_tensor)
 86 |     else:
 87 |         inputs = img_input
 88 |     # Create model.
 89 |     model = Model(inputs, x, name='vgg16')
 90 | 
 91 |     # load weights
 92 |     if weights == 'imagenet':
 93 |         if K.image_data_format() == 'channels_first':
 94 |             if include_top:
 95 |                 weights_path = get_file('vgg16_weights_th_dim_ordering_th_kernels.h5',
 96 |                                         TH_WEIGHTS_PATH,
 97 |                                         cache_subdir='models')
 98 |             else:
 99 |                 weights_path = get_file('vgg16_weights_th_dim_ordering_th_kernels_notop.h5',
100 |                                         TH_WEIGHTS_PATH_NO_TOP,
101 |                                         cache_subdir='models')
102 |             model.load_weights(weights_path)
103 |             if K.backend() == 'tensorflow':
104 |                 warnings.warn('You are using the TensorFlow backend, yet you '
105 |                               'are using the Theano '
106 |                               'image dimension ordering convention '
107 |                               '(`image_data_format="channels_first"`). '
108 |                               'For best performance, set '
109 |                               '`image_data_format="channels_last"` in '
110 |                               'your Keras config '
111 |                               'at ~/.keras/keras.json.')
112 |                 convert_all_kernels_in_model(model)
113 |         else:
114 |             if include_top:
115 |                 weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels.h5',
116 |                                         TF_WEIGHTS_PATH,
117 |                                         cache_subdir='models')
118 |             else:
119 |                 weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5',
120 |                                         TF_WEIGHTS_PATH_NO_TOP,
121 |                                         cache_subdir='models')
122 |             model.load_weights(weights_path)
123 |             if K.backend() == 'theano':
124 |                 convert_all_kernels_in_model(model)
125 |     return model
126 | 


--------------------------------------------------------------------------------