├── .gitignore
├── .travis.yml
├── README.md
├── docs
├── Makefile
├── images
│ ├── convolution_filter.png
│ ├── dropout.png
│ ├── generalization_error.png
│ ├── laplacian_filter.png
│ ├── max_pooling.png
│ └── mlp.png
├── requirements.txt
├── source
│ ├── _ext
│ │ └── show_on_colaboratory.py
│ ├── _static
│ │ └── colaboratory.png
│ ├── _templates
│ │ └── searchbox.html
│ ├── begginers_hands_on.rst
│ ├── conf.py
│ ├── index.rst
│ ├── official_examples.rst
│ ├── other_examples.rst
│ └── other_hands_on.rst
└── source_ja
│ ├── _ext
│ └── show_on_colaboratory.py
│ ├── _static
│ ├── _templates
│ ├── begginers_hands_on.rst
│ ├── conf.py
│ ├── index.rst
│ ├── official_examples.rst
│ ├── other_examples.rst
│ └── other_hands_on.rst
├── example
├── chainer
│ ├── OfficialTutorial
│ │ └── 02_1_How_to_Write_a_New_Network.ipynb
│ └── mnist
│ │ ├── Chainer_MNIST_Example_en.ipynb
│ │ └── Chainer_MNIST_Example_ja.ipynb
└── cupy
│ └── prml
│ ├── LICENSE
│ └── ch01_Introduction_ipynb.ipynb
├── hands_on_en
├── chainer
│ ├── begginers_hands_on
│ │ ├── 01_Write_the_training_loop.ipynb
│ │ ├── 02_Try_Trainer_class.ipynb
│ │ ├── 03_Write_your_own_network.ipynb
│ │ └── 04_Write_your_own_dataset_class.ipynb
│ ├── chainer.ipynb
│ └── classify_anime_characters.ipynb
├── chainercv
│ └── object_detection_tutorial.ipynb
└── chainerrl
│ ├── README.md
│ └── quickstart.ipynb
├── hands_on_ja
├── chainer
│ ├── begginers_hands_on
│ │ ├── 00_How_to_use_chainer_on_colaboratory.ipynb
│ │ ├── 01_Chainer_basic_tutorial.ipynb
│ │ ├── 02_Chainer_kaggle_tutorial.ipynb
│ │ ├── 11_Write_the_training_loop.ipynb
│ │ ├── 12_Try_Trainer_class.ipynb
│ │ ├── 13_Write_your_own_network.ipynb
│ │ └── 14_Write_your_own_dataset_class.ipynb
│ ├── chainer.ipynb
│ ├── chainer_tutorial_book.ipynb
│ └── classify_anime_characters.ipynb
└── chainerrl
│ ├── README.md
│ ├── atari_sample.ipynb
│ └── quickstart.ipynb
├── official_example_en
├── dcgan.ipynb
├── sentiment.ipynb
└── word2vec.ipynb
├── official_example_ja
├── sentiment.ipynb
├── wavenet.ipynb
└── word2vec.ipynb
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
103 | # Mac
104 | .DS_Store
105 |
106 | # for this repo
107 | notebook/
108 | notebook_ja/
109 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - '3.6'
4 | before_install:
5 | - sudo apt-get install -y pandoc
6 | install: pip install -q -r requirements.txt
7 | script: make -C docs html
8 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # chainer-colab-notebook
2 |
3 | ## Overview
4 | You can quickly start Chainer hands-on with Colaboratory:
5 |
6 | - without setting up,
7 | - with GPU,
8 | - and without MONEY!
9 |
10 | ## Description
11 | The github repository [chainer-community/chainer-colab-notebook](https://github.com/chainer-community/chainer-colab-notebook) is synchronized with [ReadTheDocs](https://chainer-colab-notebook.readthedocs.io/en/latest/).
12 | All the notebooks here can be run on Colaboratory.
13 |
14 |
15 | ## Requirement
16 | You can use the notebooks if you can use Colaboratory.
17 | See detail of [Colaboratory FAQ](https://research.google.com/colaboratory/faq.html).
18 |
19 | ## Usage
20 | 1. Access [Chainer Colab Notebook](https://chainer-colab-notebook.readthedocs.io/en/latest/) on ReadTheDocs.
21 |
22 | 2. Find the notebook you want to start.
23 |
24 | 3. Click and open with Colaboratory.
25 |
26 | 4. Copy the notebook to your drive.
27 |
28 |
29 | 5. Just run the notebook!
30 |
31 |
32 | ## Contribution
33 | 1. Create your notebook. Please use Python3 and GPU.
34 |
35 |
36 | 2. Download it as the jupyter notebook.
37 |
38 |
39 | 3. Push it to the github repository [chainer-community](https://github.com/chainer-community/chainer-colab-notebook) and make PR!
40 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = ChainerColabNotebook
8 | SOURCEDIR = ./source
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/images/convolution_filter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chainer-community/chainer-colab-notebook/7e8a134279d362859a6600d4c9ca9a2699d00338/docs/images/convolution_filter.png
--------------------------------------------------------------------------------
/docs/images/dropout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chainer-community/chainer-colab-notebook/7e8a134279d362859a6600d4c9ca9a2699d00338/docs/images/dropout.png
--------------------------------------------------------------------------------
/docs/images/generalization_error.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chainer-community/chainer-colab-notebook/7e8a134279d362859a6600d4c9ca9a2699d00338/docs/images/generalization_error.png
--------------------------------------------------------------------------------
/docs/images/laplacian_filter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chainer-community/chainer-colab-notebook/7e8a134279d362859a6600d4c9ca9a2699d00338/docs/images/laplacian_filter.png
--------------------------------------------------------------------------------
/docs/images/max_pooling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chainer-community/chainer-colab-notebook/7e8a134279d362859a6600d4c9ca9a2699d00338/docs/images/max_pooling.png
--------------------------------------------------------------------------------
/docs/images/mlp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chainer-community/chainer-colab-notebook/7e8a134279d362859a6600d4c9ca9a2699d00338/docs/images/mlp.png
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | nbsphinx
2 |
--------------------------------------------------------------------------------
/docs/source/_ext/show_on_colaboratory.py:
--------------------------------------------------------------------------------
1 | """
2 | Sphinx extension to add ReadTheDocs-style "Show on Colaboratory" links to the
3 | sidebar.
4 | Loosely based on https://github.com/astropy/astropy/pull/347
5 | """
6 |
7 | import os
8 | import warnings
9 |
10 |
11 | __licence__ = 'BSD (3 clause)'
12 |
13 |
14 | def get_colaboratory_url(path):
15 | fromto = [('hands_on', 'hands_on_en'), ('official_example', 'official_example_en')]
16 | for f, t in fromto:
17 | if path.startswith(f):
18 | path = path.replace(f, t, 1)
19 | return 'https://colab.research.google.com/github/chainer-community/chainer-colab-notebook/blob/master/{path}'.format(
20 | path=path)
21 |
22 |
23 | def html_page_context(app, pagename, templatename, context, doctree):
24 | if templatename != 'page.html':
25 | return
26 |
27 | path = os.path.relpath(doctree.get('source'), app.builder.srcdir).replace('notebook/', '')
28 | show_url = get_colaboratory_url(path)
29 |
30 | context['show_on_colaboratory_url'] = show_url
31 |
32 |
33 | def setup(app):
34 | app.connect('html-page-context', html_page_context)
35 |
--------------------------------------------------------------------------------
/docs/source/_static/colaboratory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chainer-community/chainer-colab-notebook/7e8a134279d362859a6600d4c9ca9a2699d00338/docs/source/_static/colaboratory.png
--------------------------------------------------------------------------------
/docs/source/_templates/searchbox.html:
--------------------------------------------------------------------------------
1 | {%- if builder != 'singlehtml' %}
2 |
3 |
8 |
9 | {%- endif %}
10 | {%- if show_source and has_source and sourcename %}
11 | {%- if show_on_colaboratory_url %}
12 |
13 |
14 |
17 | {%- endif %}
18 | {%- endif %}
19 |
--------------------------------------------------------------------------------
/docs/source/begginers_hands_on.rst:
--------------------------------------------------------------------------------
1 | Chainer Begginer's Hands-on
2 | ----------------------------
3 |
4 | .. toctree::
5 | :glob:
6 | :titlesonly:
7 |
8 | notebook/hands_on/chainer/begginers_hands_on/*
9 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Chainer Colab Notebook documentation build configuration file, created by
5 | # sphinx-quickstart on Tue Jun 12 09:34:10 2018.
6 | #
7 | # This file is execfile()d with the current directory set to its
8 | # containing dir.
9 | #
10 | # Note that not all possible configuration values are present in this
11 | # autogenerated file.
12 | #
13 | # All configuration values have a default; values that are commented out
14 | # serve to show the default.
15 |
16 | # If extensions (or modules to document with autodoc) are in another directory,
17 | # add these directories to sys.path here. If the directory is relative to the
18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
19 | #
20 | import os
21 | import sys
22 | sys.path.insert(0, os.path.abspath('_ext'))
23 |
24 |
25 | # COPY FILES
26 | import shutil
27 |
28 | try:
29 | shutil.rmtree('notebook')
30 | except:
31 | pass
32 | os.mkdir('notebook')
33 | shutil.copytree('../../hands_on_en', 'notebook/hands_on')
34 | shutil.copytree('../../official_example_en', 'notebook/official_example')
35 | shutil.copytree('../../example', 'notebook/example')
36 |
37 | # -- General configuration ------------------------------------------------
38 |
39 | # If your documentation needs a minimal Sphinx version, state it here.
40 | #
41 | # needs_sphinx = '1.0'
42 |
43 | # Add any Sphinx extension module names here, as strings. They can be
44 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
45 | # ones.
46 | extensions = ['sphinx.ext.autodoc',
47 | 'sphinx.ext.doctest',
48 | 'sphinx.ext.intersphinx',
49 | 'sphinx.ext.mathjax',
50 | 'nbsphinx',
51 | 'show_on_colaboratory',]
52 |
53 | # Add any paths that contain templates here, relative to this directory.
54 | templates_path = ['_templates']
55 |
56 | # The suffix(es) of source filenames.
57 | # You can specify multiple suffix as a list of string:
58 | #
59 | # source_suffix = ['.rst', '.md']
60 | source_suffix = '.rst'
61 |
62 | # The master toctree document.
63 | master_doc = 'index'
64 |
65 | # General information about the project.
66 | project = 'Chainer Colab Notebook'
67 | copyright = '2018, Chainer User Group'
68 | author = 'Chainer User Group'
69 |
70 | # The version info for the project you're documenting, acts as replacement for
71 | # |version| and |release|, also used in various other places throughout the
72 | # built documents.
73 | #
74 | # The short X.Y version.
75 | version = '0.0'
76 | # The full version, including alpha/beta/rc tags.
77 | release = '0.0'
78 |
79 | # The language for content autogenerated by Sphinx. Refer to documentation
80 | # for a list of supported languages.
81 | #
82 | # This is also used if you do content translation via gettext catalogs.
83 | # Usually you set "language" from the command line for these cases.
84 | language = None
85 |
86 | # List of patterns, relative to source directory, that match files and
87 | # directories to ignore when looking for source files.
88 | # This patterns also effect to html_static_path and html_extra_path
89 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints']
90 |
91 | # The name of the Pygments (syntax highlighting) style to use.
92 | pygments_style = 'sphinx'
93 |
94 | # If true, `todo` and `todoList` produce output, else they produce nothing.
95 | todo_include_todos = False
96 |
97 |
98 | # -- Options for HTML output ----------------------------------------------
99 |
100 | # The theme to use for HTML and HTML Help pages. See the documentation for
101 | # a list of builtin themes.
102 | #
103 | html_theme = 'sphinx_rtd_theme'
104 |
105 | # Theme options are theme-specific and customize the look and feel of a theme
106 | # further. For a list of options available for each theme, see the
107 | # documentation.
108 | #
109 | # html_theme_options = {}
110 |
111 | # Add any paths that contain custom static files (such as style sheets) here,
112 | # relative to this directory. They are copied after the builtin static files,
113 | # so a file named "default.css" will overwrite the builtin "default.css".
114 | html_static_path = ['_static']
115 |
116 |
117 | # -- Options for HTMLHelp output ------------------------------------------
118 |
119 | # Output file base name for HTML help builder.
120 | htmlhelp_basename = 'ChainerColabNotebookdoc'
121 |
122 |
123 | # -- Options for LaTeX output ---------------------------------------------
124 |
125 | latex_elements = {
126 | # The paper size ('letterpaper' or 'a4paper').
127 | #
128 | # 'papersize': 'letterpaper',
129 |
130 | # The font size ('10pt', '11pt' or '12pt').
131 | #
132 | # 'pointsize': '10pt',
133 |
134 | # Additional stuff for the LaTeX preamble.
135 | #
136 | # 'preamble': '',
137 |
138 | # Latex figure (float) alignment
139 | #
140 | # 'figure_align': 'htbp',
141 | }
142 |
143 | # Grouping the document tree into LaTeX files. List of tuples
144 | # (source start file, target name, title,
145 | # author, documentclass [howto, manual, or own class]).
146 | latex_documents = [
147 | (master_doc, 'ChainerColabNotebook.tex', 'Chainer Colab Notebook Documentation',
148 | 'Chainer User Group', 'manual'),
149 | ]
150 |
151 |
152 | # -- Options for manual page output ---------------------------------------
153 |
154 | # One entry per manual page. List of tuples
155 | # (source start file, name, description, authors, manual section).
156 | man_pages = [
157 | (master_doc, 'chainercolabnotebook', 'Chainer Colab Notebook Documentation',
158 | [author], 1)
159 | ]
160 |
161 |
162 | # -- Options for Texinfo output -------------------------------------------
163 |
164 | # Grouping the document tree into Texinfo files. List of tuples
165 | # (source start file, target name, title, author,
166 | # dir menu entry, description, category)
167 | texinfo_documents = [
168 | (master_doc, 'ChainerColabNotebook', 'Chainer Colab Notebook Documentation',
169 | author, 'ChainerColabNotebook', 'One line description of project.',
170 | 'Miscellaneous'),
171 | ]
172 |
173 |
174 |
175 |
176 | # Example configuration for intersphinx: refer to the Python standard library.
177 | intersphinx_mapping = {'https://docs.python.org/': None}
178 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | Chainer Colab Notebooks : An easy way to learn and use Deep Learning
2 | =====================================================================
3 |
4 | You can run notebooks on Colaboratory as soon as you can click the link of
5 | "Show on Colaboratory" of each page.
6 |
7 |
8 | .. toctree::
9 | :maxdepth: 2
10 | :caption: Hands-on
11 |
12 | begginers_hands_on
13 | other_hands_on
14 |
15 | .. toctree::
16 | :maxdepth: 2
17 | :caption: Examples
18 |
19 | official_examples
20 | other_examples
21 |
22 | Other Languages
23 | ================
24 |
25 | - `日本語 `_
26 |
27 | Links
28 | ======
29 |
30 | - `GitHub Issues `_
31 |
--------------------------------------------------------------------------------
/docs/source/official_examples.rst:
--------------------------------------------------------------------------------
1 | Official Example
2 | -----------------
3 |
4 | .. toctree::
5 | :glob:
6 | :titlesonly:
7 |
8 | notebook/official_example/*
9 |
10 |
--------------------------------------------------------------------------------
/docs/source/other_examples.rst:
--------------------------------------------------------------------------------
1 | Other Examples
2 | ---------------
3 |
4 | CuPy
5 | ^^^^^
6 |
7 | .. toctree::
8 | :glob:
9 | :titlesonly:
10 |
11 | notebook/example/cupy/prml/*
12 |
--------------------------------------------------------------------------------
/docs/source/other_hands_on.rst:
--------------------------------------------------------------------------------
1 | Other Hands-on
2 | ---------------
3 |
4 | Chainer
5 | ^^^^^^^^^^^
6 |
7 | .. toctree::
8 | :glob:
9 | :titlesonly:
10 |
11 | notebook/hands_on/chainer/*
12 |
13 | Chainer RL
14 | ^^^^^^^^^^^
15 |
16 | .. toctree::
17 | :glob:
18 | :titlesonly:
19 |
20 | notebook/hands_on/chainerrl/*
21 |
--------------------------------------------------------------------------------
/docs/source_ja/_ext/show_on_colaboratory.py:
--------------------------------------------------------------------------------
1 | """
2 | Sphinx extension to add ReadTheDocs-style "Show on Colaboratory" links to the
3 | sidebar.
4 | Loosely based on https://github.com/astropy/astropy/pull/347
5 | """
6 |
7 | import os
8 | import warnings
9 |
10 |
11 | __licence__ = 'BSD (3 clause)'
12 |
13 |
14 | def get_colaboratory_url(path):
15 | fromto = [('hands_on', 'hands_on_ja'), ('official_example', 'official_example_ja')]
16 | for f, t in fromto:
17 | if path.startswith(f):
18 | path = path.replace(f, t, 1)
19 | return 'https://colab.research.google.com/github/chainer-community/chainer-colab-notebook/blob/master/{path}'.format(
20 | path=path)
21 |
22 |
23 | def html_page_context(app, pagename, templatename, context, doctree):
24 | if templatename != 'page.html':
25 | return
26 |
27 | path = os.path.relpath(doctree.get('source'), app.builder.srcdir).replace('notebook/', '')
28 | show_url = get_colaboratory_url(path)
29 |
30 | context['show_on_colaboratory_url'] = show_url
31 |
32 |
33 | def setup(app):
34 | app.connect('html-page-context', html_page_context)
35 |
--------------------------------------------------------------------------------
/docs/source_ja/_static:
--------------------------------------------------------------------------------
1 | ../source/_static/
--------------------------------------------------------------------------------
/docs/source_ja/_templates:
--------------------------------------------------------------------------------
1 | ../source/_templates/
--------------------------------------------------------------------------------
/docs/source_ja/begginers_hands_on.rst:
--------------------------------------------------------------------------------
1 | Chainer Begginer's Hands-on
2 | ============================
3 |
4 | 概要
5 | ------
6 |
7 | Deep Learningフレームワーク
8 | `Chainer `_
9 | を使って、Googleの提供する
10 | `Colaboratory `_
11 | で演習をする無料ハンズオンコースです。
12 | プログラムを変更・実行していくことで実際に問題解決の方法を学習し、疑問が発生しうるところに詳細な説明を加えています。その結果として、より効率的な学習を行えることを目指しています。
13 |
14 | ※ Colaboratory: 完全にクラウドで実行される Jupyterノートブック環境。設定不要で、無料で利用可能。
15 |
16 | また、単に動くDeep Learningモデルを作成するのではなく、実際の問題を解くことができるモデルを作ることを目指しています。そのため、単にモデルを訓練するだけでなく、訓練・検証・テストセットを使用した厳密な機械学習の手法を実践し、モデルを正しく評価する方法も学習します。
17 |
18 | コース全体を通した目標
19 | -----------------------
20 |
21 | - Deep Learningフレームワークに必要な構成要素とChainerによる実装を説明できる
22 | - モデルの評価に必要な機械学習の手法を実践できる
23 | - CNN、RNNなど基本的なネットワークを使ったモデルを実装できる
24 | - 画像処理、自然言語処理などの応用分野の問題をChainerで解くことができる
25 | - 実際に作成したモデルを使って、アプリケーションを作ることができる
26 |
27 | コンテンツ
28 | -----------------------
29 |
30 | .. toctree::
31 | :glob:
32 | :titlesonly:
33 |
34 | notebook/hands_on/chainer/begginers_hands_on/*
35 |
--------------------------------------------------------------------------------
/docs/source_ja/conf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Chainer Colab Notebook documentation build configuration file, created by
5 | # sphinx-quickstart on Tue Jun 12 09:34:10 2018.
6 | #
7 | # This file is execfile()d with the current directory set to its
8 | # containing dir.
9 | #
10 | # Note that not all possible configuration values are present in this
11 | # autogenerated file.
12 | #
13 | # All configuration values have a default; values that are commented out
14 | # serve to show the default.
15 |
16 | # If extensions (or modules to document with autodoc) are in another directory,
17 | # add these directories to sys.path here. If the directory is relative to the
18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
19 | #
20 | import os
21 | import sys
22 | sys.path.insert(0, os.path.abspath('_ext'))
23 |
24 |
25 | # COPY FILES
26 | import shutil
27 |
28 |
29 | try:
30 | shutil.rmtree('notebook')
31 | except:
32 | pass
33 | os.mkdir('notebook')
34 | shutil.copytree('../../hands_on_ja', 'notebook/hands_on')
35 | shutil.copytree('../../official_example_ja', 'notebook/official_example')
36 | shutil.copytree('../../example', 'notebook/example')
37 |
38 | # -- General configuration ------------------------------------------------
39 |
40 | # If your documentation needs a minimal Sphinx version, state it here.
41 | #
42 | # needs_sphinx = '1.0'
43 |
44 | # Add any Sphinx extension module names here, as strings. They can be
45 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
46 | # ones.
47 | extensions = ['sphinx.ext.autodoc',
48 | 'sphinx.ext.doctest',
49 | 'sphinx.ext.intersphinx',
50 | 'sphinx.ext.mathjax',
51 | 'nbsphinx',
52 | 'show_on_colaboratory',]
53 |
54 | # Add any paths that contain templates here, relative to this directory.
55 | templates_path = ['_templates']
56 |
57 | # The suffix(es) of source filenames.
58 | # You can specify multiple suffix as a list of string:
59 | #
60 | # source_suffix = ['.rst', '.md']
61 | source_suffix = '.rst'
62 |
63 | # The master toctree document.
64 | master_doc = 'index'
65 |
66 | # General information about the project.
67 | project = 'Chainer Colab Notebook'
68 | copyright = '2018, Chainer User Group'
69 | author = 'Chainer User Group'
70 |
71 | # The version info for the project you're documenting, acts as replacement for
72 | # |version| and |release|, also used in various other places throughout the
73 | # built documents.
74 | #
75 | # The short X.Y version.
76 | version = '0.0'
77 | # The full version, including alpha/beta/rc tags.
78 | release = '0.0'
79 |
80 | # The language for content autogenerated by Sphinx. Refer to documentation
81 | # for a list of supported languages.
82 | #
83 | # This is also used if you do content translation via gettext catalogs.
84 | # Usually you set "language" from the command line for these cases.
85 | language = None
86 |
87 | # List of patterns, relative to source directory, that match files and
88 | # directories to ignore when looking for source files.
89 | # This patterns also effect to html_static_path and html_extra_path
90 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints']
91 |
92 | # The name of the Pygments (syntax highlighting) style to use.
93 | pygments_style = 'sphinx'
94 |
95 | # If true, `todo` and `todoList` produce output, else they produce nothing.
96 | todo_include_todos = False
97 |
98 |
99 | # -- Options for HTML output ----------------------------------------------
100 |
101 | # The theme to use for HTML and HTML Help pages. See the documentation for
102 | # a list of builtin themes.
103 | #
104 | html_theme = 'sphinx_rtd_theme'
105 |
106 | # Theme options are theme-specific and customize the look and feel of a theme
107 | # further. For a list of options available for each theme, see the
108 | # documentation.
109 | #
110 | # html_theme_options = {}
111 |
112 | # Add any paths that contain custom static files (such as style sheets) here,
113 | # relative to this directory. They are copied after the builtin static files,
114 | # so a file named "default.css" will overwrite the builtin "default.css".
115 | html_static_path = ['_static']
116 |
117 |
118 | # -- Options for HTMLHelp output ------------------------------------------
119 |
120 | # Output file base name for HTML help builder.
121 | htmlhelp_basename = 'ChainerColabNotebookdoc'
122 |
123 |
124 | # -- Options for LaTeX output ---------------------------------------------
125 |
126 | latex_elements = {
127 | # The paper size ('letterpaper' or 'a4paper').
128 | #
129 | # 'papersize': 'letterpaper',
130 |
131 | # The font size ('10pt', '11pt' or '12pt').
132 | #
133 | # 'pointsize': '10pt',
134 |
135 | # Additional stuff for the LaTeX preamble.
136 | #
137 | # 'preamble': '',
138 |
139 | # Latex figure (float) alignment
140 | #
141 | # 'figure_align': 'htbp',
142 | }
143 |
144 | # Grouping the document tree into LaTeX files. List of tuples
145 | # (source start file, target name, title,
146 | # author, documentclass [howto, manual, or own class]).
147 | latex_documents = [
148 | (master_doc, 'ChainerColabNotebook.tex', 'Chainer Colab Notebook Documentation',
149 | 'Chainer User Group', 'manual'),
150 | ]
151 |
152 |
153 | # -- Options for manual page output ---------------------------------------
154 |
155 | # One entry per manual page. List of tuples
156 | # (source start file, name, description, authors, manual section).
157 | man_pages = [
158 | (master_doc, 'chainercolabnotebook', 'Chainer Colab Notebook Documentation',
159 | [author], 1)
160 | ]
161 |
162 |
163 | # -- Options for Texinfo output -------------------------------------------
164 |
165 | # Grouping the document tree into Texinfo files. List of tuples
166 | # (source start file, target name, title, author,
167 | # dir menu entry, description, category)
168 | texinfo_documents = [
169 | (master_doc, 'ChainerColabNotebook', 'Chainer Colab Notebook Documentation',
170 | author, 'ChainerColabNotebook', 'One line description of project.',
171 | 'Miscellaneous'),
172 | ]
173 |
174 |
175 |
176 |
177 | # Example configuration for intersphinx: refer to the Python standard library.
178 | intersphinx_mapping = {'https://docs.python.org/': None}
179 |
--------------------------------------------------------------------------------
/docs/source_ja/index.rst:
--------------------------------------------------------------------------------
1 | Chainer Colab Notebooks : Deep Learning 実践ノートブック集
2 | ===========================================================
3 |
4 | 各ページの「Show on Colaboratory」をクリックすれば、
5 | 今すぐにnotebookをColaboratoryで実行することができます。
6 |
7 | .. toctree::
8 | :maxdepth: 2
9 | :caption: Hands-on
10 |
11 | begginers_hands_on
12 | other_hands_on
13 |
14 | .. toctree::
15 | :maxdepth: 2
16 | :caption: Examples
17 |
18 | official_examples
19 | other_examples
20 |
21 | Other Languages
22 | ================
23 |
24 | - `English `_
25 |
26 | Links
27 | ======
28 |
29 | - `GitHub Issues `_
30 |
--------------------------------------------------------------------------------
/docs/source_ja/official_examples.rst:
--------------------------------------------------------------------------------
1 | ../source/official_examples.rst
--------------------------------------------------------------------------------
/docs/source_ja/other_examples.rst:
--------------------------------------------------------------------------------
1 | ../source/other_examples.rst
--------------------------------------------------------------------------------
/docs/source_ja/other_hands_on.rst:
--------------------------------------------------------------------------------
1 | ../source/other_hands_on.rst
--------------------------------------------------------------------------------
/example/chainer/OfficialTutorial/02_1_How_to_Write_a_New_Network.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "02-1-How-to-Write-a-New-Network.ipynb",
7 | "version": "0.3.2",
8 | "views": {},
9 | "default_view": {},
10 | "provenance": [],
11 | "collapsed_sections": [],
12 | "toc_visible": true
13 | },
14 | "kernelspec": {
15 | "name": "python3",
16 | "display_name": "Python 3"
17 | },
18 | "accelerator": "GPU"
19 | },
20 | "cells": [
21 | {
22 | "metadata": {
23 | "id": "0IerUqWPN2YL",
24 | "colab_type": "text"
25 | },
26 | "cell_type": "markdown",
27 | "source": [
28 | "# How to Write a New Network\n",
29 | "\n",
30 | "## Precondition\n",
31 | "\n",
32 | "Install Chainer and Cupy."
33 | ]
34 | },
35 | {
36 | "metadata": {
37 | "id": "kizqNcZNNuYL",
38 | "colab_type": "code",
39 | "colab": {
40 | "autoexec": {
41 | "startup": false,
42 | "wait_interval": 0
43 | },
44 | "output_extras": [
45 | {
46 | "item_id": 21
47 | },
48 | {
49 | "item_id": 45
50 | }
51 | ],
52 | "base_uri": "https://localhost:8080/",
53 | "height": 972
54 | },
55 | "outputId": "de43cbeb-c415-4748-8dad-3f653a67f431",
56 | "executionInfo": {
57 | "status": "ok",
58 | "timestamp": 1520062255406,
59 | "user_tz": -540,
60 | "elapsed": 40721,
61 | "user": {
62 | "displayName": "Yasuki IKEUCHI",
63 | "photoUrl": "//lh6.googleusercontent.com/-QrqZkF-x2us/AAAAAAAAAAI/AAAAAAAAAAA/PzOMhw6mH3o/s50-c-k-no/photo.jpg",
64 | "userId": "110487681503898314699"
65 | }
66 | }
67 | },
68 | "cell_type": "code",
69 | "source": [
70 | "# Install Chainer and CuPy!\n",
71 | "\n",
72 | "!curl https://colab.chainer.org/install | sh -"
73 | ],
74 | "execution_count": 1,
75 | "outputs": [
76 | {
77 | "output_type": "stream",
78 | "text": [
79 | "Reading package lists... Done\n",
80 | "Building dependency tree \n",
81 | "Reading state information... Done\n",
82 | "The following NEW packages will be installed:\n",
83 | " libcusparse8.0 libnvrtc8.0 libnvtoolsext1\n",
84 | "0 upgraded, 3 newly installed, 0 to remove and 1 not upgraded.\n",
85 | "Need to get 28.9 MB of archives.\n",
86 | "After this operation, 71.6 MB of additional disk space will be used.\n",
87 | "Get:1 http://archive.ubuntu.com/ubuntu artful/multiverse amd64 libcusparse8.0 amd64 8.0.61-1 [22.6 MB]\n",
88 | "Get:2 http://archive.ubuntu.com/ubuntu artful/multiverse amd64 libnvrtc8.0 amd64 8.0.61-1 [6,225 kB]\n",
89 | "Get:3 http://archive.ubuntu.com/ubuntu artful/multiverse amd64 libnvtoolsext1 amd64 8.0.61-1 [32.2 kB]\n",
90 | "Fetched 28.9 MB in 1s (16.2 MB/s)\n",
91 | "\n",
92 | "\u001b7\u001b[0;23r\u001b8\u001b[1ASelecting previously unselected package libcusparse8.0:amd64.\n",
93 | "(Reading database ... 16669 files and directories currently installed.)\n",
94 | "Preparing to unpack .../libcusparse8.0_8.0.61-1_amd64.deb ...\n",
95 | "\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 0%]\u001b[49m\u001b[39m [..........................................................] \u001b8\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 6%]\u001b[49m\u001b[39m [###.......................................................] \u001b8Unpacking libcusparse8.0:amd64 (8.0.61-1) ...\n",
96 | "\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 12%]\u001b[49m\u001b[39m [#######...................................................] \u001b8\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 18%]\u001b[49m\u001b[39m [##########................................................] \u001b8Selecting previously unselected package libnvrtc8.0:amd64.\n",
97 | "Preparing to unpack .../libnvrtc8.0_8.0.61-1_amd64.deb ...\n",
98 | "\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 25%]\u001b[49m\u001b[39m [##############............................................] \u001b8Unpacking libnvrtc8.0:amd64 (8.0.61-1) ...\n",
99 | "\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 31%]\u001b[49m\u001b[39m [##################........................................] \u001b8\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 37%]\u001b[49m\u001b[39m [#####################.....................................] \u001b8Selecting previously unselected package libnvtoolsext1:amd64.\n",
100 | "Preparing to unpack .../libnvtoolsext1_8.0.61-1_amd64.deb ...\n",
101 | "\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 43%]\u001b[49m\u001b[39m [#########################.................................] \u001b8Unpacking libnvtoolsext1:amd64 (8.0.61-1) ...\n",
102 | "\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 50%]\u001b[49m\u001b[39m [#############################.............................] \u001b8\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 56%]\u001b[49m\u001b[39m [################################..........................] \u001b8Setting up libnvtoolsext1:amd64 (8.0.61-1) ...\n",
103 | "\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 62%]\u001b[49m\u001b[39m [####################################......................] \u001b8\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 68%]\u001b[49m\u001b[39m [#######################################...................] \u001b8Setting up libcusparse8.0:amd64 (8.0.61-1) ...\n",
104 | "\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 75%]\u001b[49m\u001b[39m [###########################################...............] \u001b8\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 81%]\u001b[49m\u001b[39m [###############################################...........] \u001b8Setting up libnvrtc8.0:amd64 (8.0.61-1) ...\n",
105 | "\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 87%]\u001b[49m\u001b[39m [##################################################........] \u001b8\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 93%]\u001b[49m\u001b[39m [######################################################....] \u001b8Processing triggers for libc-bin (2.26-0ubuntu2.1) ...\n",
106 | "\n",
107 | "\u001b7\u001b[0;24r\u001b8\u001b[1A\u001b[JCollecting cupy-cuda80==4.0.0b3 from https://github.com/kmaehashi/chainer-colab/releases/download/2018-02-06/cupy_cuda80-4.0.0b3-cp36-cp36m-linux_x86_64.whl\n",
108 | " Downloading https://github.com/kmaehashi/chainer-colab/releases/download/2018-02-06/cupy_cuda80-4.0.0b3-cp36-cp36m-linux_x86_64.whl (205.2MB)\n",
109 | "\u001b[K 13% |████▍ | 28.3MB 36.2MB/s eta 0:00:05"
110 | ],
111 | "name": "stdout"
112 | },
113 | {
114 | "output_type": "stream",
115 | "text": [
116 | "\u001b[K 100% |████████████████████████████████| 205.2MB 7.1kB/s \n",
117 | "\u001b[?25hRequirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.6/dist-packages (from cupy-cuda80==4.0.0b3)\n",
118 | "Requirement already satisfied: numpy>=1.9.0 in /usr/local/lib/python3.6/dist-packages (from cupy-cuda80==4.0.0b3)\n",
119 | "Collecting fastrlock>=0.3 (from cupy-cuda80==4.0.0b3)\n",
120 | " Downloading fastrlock-0.3-cp36-cp36m-manylinux1_x86_64.whl (77kB)\n",
121 | "\u001b[K 100% |████████████████████████████████| 81kB 2.3MB/s \n",
122 | "\u001b[?25hInstalling collected packages: fastrlock, cupy-cuda80\n",
123 | "Successfully installed cupy-cuda80-4.0.0b3 fastrlock-0.3\n",
124 | "Collecting chainer==4.0.0b3\n",
125 | " Downloading chainer-4.0.0b3.tar.gz (366kB)\n",
126 | "\u001b[K 100% |████████████████████████████████| 368kB 2.3MB/s \n",
127 | "\u001b[?25hCollecting filelock (from chainer==4.0.0b3)\n",
128 | " Downloading filelock-3.0.4.tar.gz\n",
129 | "Requirement already satisfied: numpy>=1.9.0 in /usr/local/lib/python3.6/dist-packages (from chainer==4.0.0b3)\n",
130 | "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from chainer==4.0.0b3)\n",
131 | "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.6/dist-packages (from chainer==4.0.0b3)\n",
132 | "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from protobuf>=3.0.0->chainer==4.0.0b3)\n",
133 | "Building wheels for collected packages: chainer, filelock\n",
134 | " Running setup.py bdist_wheel for chainer ... \u001b[?25l-\b \b\\\b \b|\b \b/\b \bdone\n",
135 | "\u001b[?25h Stored in directory: /content/.cache/pip/wheels/ce/20/9f/4f7d6978c1a5f88bf2bb18d429998b85cf75cfe96315c7631b\n",
136 | " Running setup.py bdist_wheel for filelock ... \u001b[?25l-\b \bdone\n",
137 | "\u001b[?25h Stored in directory: /content/.cache/pip/wheels/5f/5e/8a/9f1eb481ffbfff95d5f550570c1dbeff3c1785c8383c12c62b\n",
138 | "Successfully built chainer filelock\n",
139 | "Installing collected packages: filelock, chainer\n",
140 | "Successfully installed chainer-4.0.0b3 filelock-3.0.4\n"
141 | ],
142 | "name": "stdout"
143 | }
144 | ]
145 | },
146 | {
147 | "metadata": {
148 | "id": "NgmVF6QhOLSK",
149 | "colab_type": "code",
150 | "colab": {
151 | "autoexec": {
152 | "startup": false,
153 | "wait_interval": 0
154 | },
155 | "output_extras": [
156 | {
157 | "item_id": 1
158 | }
159 | ],
160 | "base_uri": "https://localhost:8080/",
161 | "height": 71
162 | },
163 | "outputId": "3decdb4c-9734-43cd-8194-84432f907e40",
164 | "executionInfo": {
165 | "status": "ok",
166 | "timestamp": 1520062260652,
167 | "user_tz": -540,
168 | "elapsed": 5228,
169 | "user": {
170 | "displayName": "Yasuki IKEUCHI",
171 | "photoUrl": "//lh6.googleusercontent.com/-QrqZkF-x2us/AAAAAAAAAAI/AAAAAAAAAAA/PzOMhw6mH3o/s50-c-k-no/photo.jpg",
172 | "userId": "110487681503898314699"
173 | }
174 | }
175 | },
176 | "cell_type": "code",
177 | "source": [
178 | "import numpy as np\n",
179 | "import chainer\n",
180 | "from chainer import cuda, Function, gradient_check, report, training, utils, Variable\n",
181 | "from chainer import datasets, iterators, optimizers, serializers\n",
182 | "from chainer import Link, Chain, ChainList\n",
183 | "import chainer.functions as F\n",
184 | "import chainer.links as L\n",
185 | "from chainer.training import extensions"
186 | ],
187 | "execution_count": 2,
188 | "outputs": [
189 | {
190 | "output_type": "stream",
191 | "text": [
192 | "/usr/local/lib/python3.6/dist-packages/cupy/core/fusion.py:659: FutureWarning: cupy.core.fusion is experimental. The interface can change in the future.\n",
193 | " util.experimental('cupy.core.fusion')\n"
194 | ],
195 | "name": "stderr"
196 | }
197 | ]
198 | },
199 | {
200 | "metadata": {
201 | "id": "hYjsrZEJFpKE",
202 | "colab_type": "text"
203 | },
204 | "cell_type": "markdown",
205 | "source": [
206 | "## Convolutional Network for Visual Recognition Tasks\n",
207 | "\n",
208 | "In this section, you will learn how to write\n",
209 | "\n",
210 | "* A small convolutional network with a model class that is inherited from :class:`chainer.Chain`,\n",
211 | "* A large convolutional network that has several building block networks with :class:`chainer.ChainList`.\n",
212 | "\n",
213 | "After reading this section, you will be able to:\n",
214 | "\n",
215 | "* Write your own original convolutional network in Chainer\n",
216 | "\n",
217 | "A convolutional network (ConvNet) is mainly comprised of convolutional layers.\n",
218 | "This type of network is commonly used for various visual recognition tasks,\n",
219 | "e.g., classifying hand-written digits or natural images into given object\n",
220 | "classes, detecting objects from an image, and labeling all pixels of an image\n",
221 | "with the object classes (semantic segmentation), and so on.\n",
222 | "\n",
223 | "In such tasks, a typical ConvNet takes a set of images whose shape is\n",
224 | ":math:`(N, C, H, W)`, where\n",
225 | "\n",
226 | "- `N` denotes the number of images in a mini-batch,\n",
227 | "- `C` denotes the number of channels of those images,\n",
228 | "- `H` and `W` denote the height and width of those images,\n",
229 | "\n",
230 | "respectively. Then, it typically outputs a fixed-sized vector as membership\n",
231 | "probabilities over the target object classes. It also can output a set of\n",
232 | "feature maps that have the corresponding size to the input image for a pixel\n",
233 | "labeling task, etc.\n"
234 | ]
235 | },
236 | {
237 | "metadata": {
238 | "id": "ltUfGA-JPGZg",
239 | "colab_type": "text"
240 | },
241 | "cell_type": "markdown",
242 | "source": [
243 | "Note\n",
244 | "\n",
245 | "> The below example code assumes that some packages are already imported.\n",
246 | "> Please see the details here: `basic`."
247 | ]
248 | },
249 | {
250 | "metadata": {
251 | "id": "KSgZ2XqzPRll",
252 | "colab_type": "text"
253 | },
254 | "cell_type": "markdown",
255 | "source": [
256 | "### LeNet5\n",
257 | "\n",
258 | "Here, let's start by defining LeNet5 [LeCun98] in Chainer.\n",
259 | "This is a ConvNet model that has 5 layers comprised of 3 convolutional layers\n",
260 | "and 2 fully-connected layers. This was proposed to classify hand-written\n",
261 | "digit images in 1998. In Chainer, the model can be written as follows:"
262 | ]
263 | },
264 | {
265 | "metadata": {
266 | "id": "04ZUqAdNFq9M",
267 | "colab_type": "code",
268 | "colab": {
269 | "autoexec": {
270 | "startup": false,
271 | "wait_interval": 0
272 | }
273 | }
274 | },
275 | "cell_type": "code",
276 | "source": [
277 | "class LeNet5(Chain):\n",
278 | " def __init__(self):\n",
279 | " super(LeNet5, self).__init__()\n",
280 | " with self.init_scope():\n",
281 | " self.conv1 = L.Convolution2D(\n",
282 | " in_channels=1, out_channels=6, ksize=5, stride=1)\n",
283 | " self.conv2 = L.Convolution2D(\n",
284 | " in_channels=6, out_channels=16, ksize=5, stride=1)\n",
285 | " self.conv3 = L.Convolution2D(\n",
286 | " in_channels=16, out_channels=120, ksize=4, stride=1)\n",
287 | " self.fc4 = L.Linear(None, 84)\n",
288 | " self.fc5 = L.Linear(84, 10)\n",
289 | "\n",
290 | " def __call__(self, x):\n",
291 | " h = F.sigmoid(self.conv1(x))\n",
292 | " h = F.max_pooling_2d(h, 2, 2)\n",
293 | " h = F.sigmoid(self.conv2(h))\n",
294 | " h = F.max_pooling_2d(h, 2, 2)\n",
295 | " h = F.sigmoid(self.conv3(h))\n",
296 | " h = F.sigmoid(self.fc4(h))\n",
297 | " if chainer.config.train:\n",
298 | " return self.fc5(h)\n",
299 | " return F.softmax(self.fc5(h))"
300 | ],
301 | "execution_count": 0,
302 | "outputs": []
303 | },
304 | {
305 | "metadata": {
306 | "id": "0zcZTI-7Ob8T",
307 | "colab_type": "text"
308 | },
309 | "cell_type": "markdown",
310 | "source": [
311 | "A typical way to write your network is creating a new class inherited from\n",
312 | "`chainer.Chain` class. When defining your model in this way, typically,\n",
313 | "all the layers which have trainable parameters are registered to the model\n",
314 | "by assigning the objects of `chainer.Link` as an attribute.\n",
315 | "\n",
316 | "The model class is instantiated before the forward and backward computations.\n",
317 | "To give input images and label vectors simply by calling the model object\n",
318 | "like a function, `__call__` is usually defined in the model class.\n",
319 | "\n",
320 | "This method performs the forward computation of the model. Chainer uses\n",
321 | "the powerful autograd system for any computational graphs written with\n",
322 | "`chainer.FunctionNode`\\ s and `chainer.Link`\\ s (actually a\n",
323 | "`chainer.Link` calls a corresponding `chainer.FunctionNode`\n",
324 | "inside of it), so that you don't need to explicitly write the code for backward\n",
325 | "computations in the model. Just prepare the data, then give it to the model.\n",
326 | "\n",
327 | "The way this works is the resulting output `chainer.Variable` from the\n",
328 | "forward computation has a `chainer.Variable.backward` method to perform\n",
329 | "autograd. In the above model, `__call__` has a ``if`` statement at the\n",
330 | "end to switch its behavior by the Chainer's running mode, i.e., training mode or\n",
331 | "not. Chainer presents the running mode as a global variable ``chainer.config.train``.\n",
332 | "\n",
333 | "When it's in training mode, `__call__` returns the output value of the\n",
334 | "last layer as is to compute the loss later on, otherwise it returns a\n",
335 | "prediction result by calculating `chainer.functions.softmax`."
336 | ]
337 | },
338 | {
339 | "metadata": {
340 | "id": "zaRmPHh_PaJI",
341 | "colab_type": "text"
342 | },
343 | "cell_type": "markdown",
344 | "source": [
345 | "Note:\n",
346 | "\n",
347 | "> In Chainer v1, if a function or link behaved differently in\n",
348 | "> training and other modes, it was common that it held an attribute\n",
349 | "> that represented its running mode or was provided with the mode\n",
350 | "> from outside as an argument. In Chainer v2, it is recommended to use\n",
351 | "> the global configuration `chainer.config.train` to switch the running mode.\n"
352 | ]
353 | },
354 | {
355 | "metadata": {
356 | "id": "20wNAeRUPoQi",
357 | "colab_type": "text"
358 | },
359 | "cell_type": "markdown",
360 | "source": [
361 | "If you don't want to write ``conv1`` and the other layers more than once, you\n",
362 | "can also write the model like in this way:"
363 | ]
364 | },
365 | {
366 | "metadata": {
367 | "id": "uIBiqO44PfMW",
368 | "colab_type": "code",
369 | "colab": {
370 | "autoexec": {
371 | "startup": false,
372 | "wait_interval": 0
373 | }
374 | }
375 | },
376 | "cell_type": "code",
377 | "source": [
378 | " class LeNet5(Chain):\n",
379 | " def __init__(self):\n",
380 | " super(LeNet5, self).__init__()\n",
381 | " net = [('conv1', L.Convolution2D(1, 6, 5, 1))]\n",
382 | " net += [('_sigm1', F.Sigmoid())]\n",
383 | " net += [('_mpool1', F.MaxPooling2D(2, 2))]\n",
384 | " net += [('conv2', L.Convolution2D(6, 16, 5, 1))]\n",
385 | " net += [('_sigm2', F.Sigmoid())]\n",
386 | " net += [('_mpool2', F.MaxPooling2D(2, 2))]\n",
387 | " net += [('conv3', L.Convolution2D(16, 120, 4, 1))]\n",
388 | " net += [('_sigm3', F.Sigmoid())]\n",
389 | " net += [('_mpool3', F.MaxPooling2D(2, 2))]\n",
390 | " net += [('fc4', L.Linear(None, 84))]\n",
391 | " net += [('_sigm4', F.Sigmoid())]\n",
392 | " net += [('fc5', L.Linear(84, 10))]\n",
393 | " net += [('_sigm5', F.Sigmoid())]\n",
394 | " with self.init_scope():\n",
395 | " for n in net:\n",
396 | " if not n[0].startswith('_'):\n",
397 | " setattr(self, n[0], n[1])\n",
398 | " self.forward = net\n",
399 | "\n",
400 | " def __call__(self, x):\n",
401 | " for n, f in self.forward:\n",
402 | " if not n.startswith('_'):\n",
403 | " x = getattr(self, n)(x)\n",
404 | " else:\n",
405 | " x = f(x)\n",
406 | " if chainer.config.train:\n",
407 | " return x\n",
408 | " return F.softmax(x)"
409 | ],
410 | "execution_count": 0,
411 | "outputs": []
412 | },
413 | {
414 | "metadata": {
415 | "id": "coslyHOtQGxm",
416 | "colab_type": "text"
417 | },
418 | "cell_type": "markdown",
419 | "source": [
420 | "This code creates a list of all `chainer.Link`\\ s and\n",
421 | "`chainer.FunctionNode`\\ s after calling its superclass's constructor.\n",
422 | "\n",
423 | "Then the elements of the list are registered to this model as\n",
424 | "trainable layers when the name of an element doesn't start with ``_``\n",
425 | "character. This operation can be freely replaced with many other ways because\n",
426 | "those names are just designed to select `chainer.Link`\\ s only from the\n",
427 | "list ``net`` easily. `chainer.FunctionNode` doesn't have any trainable\n",
428 | "parameters, so that we can't register it to the model, but we want to use\n",
429 | "`chainer.FunctionNode`\\ s for constructing a forward path. The list\n",
430 | "``net`` is stored as an attribute `forward` to refer it in\n",
431 | "`__call__`. In `__call__`, it retrieves all layers in the network\n",
432 | "from `self.forward` sequentially regardless of what types of object (\n",
433 | "`chainer.Link` or `chainer.FunctionNode`) it is, and gives the\n",
434 | "input variable or the intermediate output from the previous layer to the\n",
435 | "current layer. The last part of the `__call__` to switch its behavior\n",
436 | "by the training/inference mode is the same as the former way."
437 | ]
438 | },
439 | {
440 | "metadata": {
441 | "id": "3OtpUJ_cQP1Y",
442 | "colab_type": "text"
443 | },
444 | "cell_type": "markdown",
445 | "source": [
446 | "### Ways to calculate loss\n",
447 | "\n",
448 | "When you train the model with label vector ``t``, the loss should be calculated\n",
449 | "using the output from the model. There also are several ways to calculate the\n",
450 | "loss:"
451 | ]
452 | },
453 | {
454 | "metadata": {
455 | "id": "CCziSWwtQLY7",
456 | "colab_type": "code",
457 | "colab": {
458 | "autoexec": {
459 | "startup": false,
460 | "wait_interval": 0
461 | }
462 | }
463 | },
464 | "cell_type": "code",
465 | "source": [
466 | "def do():\n",
467 | " model = LeNet5()\n",
468 | "\n",
469 | " # Input data and label\n",
470 | " x = np.random.rand(32, 1, 28, 28).astype(np.float32)\n",
471 | " t = np.random.randint(0, 10, size=(32,)).astype(np.int32)\n",
472 | "\n",
473 | " # Forward computation\n",
474 | " y = model(x)\n",
475 | "\n",
476 | " # Loss calculation\n",
477 | " loss = F.softmax_cross_entropy(y, t)"
478 | ],
479 | "execution_count": 0,
480 | "outputs": []
481 | },
482 | {
483 | "metadata": {
484 | "id": "pAuoNPUjbmFL",
485 | "colab_type": "text"
486 | },
487 | "cell_type": "markdown",
488 | "source": [
489 | "This is a primitive way to calculate a loss value from the output of the model.\n",
490 | "\n",
491 | "On the other hand, the loss computation can be included in the model itself by\n",
492 | "wrapping the model object (`chainer.Chain` or\n",
493 | "`chainer.ChainList` object) with a class inherited from\n",
494 | "`chainer.Chain`. The outer `chainer.Chain` should take the\n",
495 | "model defined above and register it with `chainer.Chain.init_scope`.\n",
496 | "\n",
497 | "`chainer.Chain` is actually\n",
498 | "inherited from `chainer.Link`, so that `chainer.Chain` itself\n",
499 | "can also be registered as a trainable `chainer.Link` to another\n",
500 | "`chainer.Chain`. Actually, `chainer.links.Classifier` class to\n",
501 | "wrap the model and add the loss computation to the model already exists.\n",
502 | "\n",
503 | "Actually, there is already a `chainer.links.Classifier` class that can\n",
504 | "be used to wrap the model and include the loss computation as well.\n",
505 | "\n",
506 | "It can be used like this\n"
507 | ]
508 | },
509 | {
510 | "metadata": {
511 | "id": "5AMnm8vJQVgy",
512 | "colab_type": "code",
513 | "colab": {
514 | "autoexec": {
515 | "startup": false,
516 | "wait_interval": 0
517 | }
518 | }
519 | },
520 | "cell_type": "code",
521 | "source": [
522 | "def do():\n",
523 | " model = L.Classifier(LeNet5())\n",
524 | "\n",
525 | " # Foward & Loss calculation\n",
526 | " loss = model(x, t)"
527 | ],
528 | "execution_count": 0,
529 | "outputs": []
530 | },
531 | {
532 | "metadata": {
533 | "id": "M7_DiDKzb9PH",
534 | "colab_type": "text"
535 | },
536 | "cell_type": "markdown",
537 | "source": [
538 | "This class takes a model object as an input argument and registers it to\n",
539 | "a ``predictor`` property as a trained parameter. As shown above, the returned\n",
540 | "object can then be called like a function in which we pass ``x`` and ``t`` as\n",
541 | "the input arguments and the resulting loss value (which we recall is a\n",
542 | "`chainer.Variable`) is returned.\n",
543 | "\n",
544 | "See the detailed implementation of `chainer.links.Classifier` from\n",
545 | "hereclass`chainer.links.Classifier` and check the implementation by looking\n",
546 | "at the source.\n",
547 | "\n",
548 | "From the above examples, we can see that Chainer provides the flexibility to\n",
549 | "write our original network in many different ways. Such flexibility intends to\n",
550 | "make it intuitive for users to design new and complex models.\n",
551 | "\n"
552 | ]
553 | },
554 | {
555 | "metadata": {
556 | "id": "WOurFBsBcAWx",
557 | "colab_type": "text"
558 | },
559 | "cell_type": "markdown",
560 | "source": [
561 | "### VGG16\n",
562 | "\n",
563 | "Next, let's write some larger models in Chainer. When you write a large network\n",
564 | "consisting of several building block networks, :class:`chainer.ChainList` is\n",
565 | "useful. First, let's see how to write a VGG16 [Simonyan14] model.\n"
566 | ]
567 | },
568 | {
569 | "metadata": {
570 | "id": "T_D2bGV9byTj",
571 | "colab_type": "code",
572 | "colab": {
573 | "autoexec": {
574 | "startup": false,
575 | "wait_interval": 0
576 | }
577 | }
578 | },
579 | "cell_type": "code",
580 | "source": [
581 | "class VGG16(chainer.ChainList):\n",
582 | " def __init__(self):\n",
583 | " super(VGG16, self).__init__(\n",
584 | " VGGBlock(64),\n",
585 | " VGGBlock(128),\n",
586 | " VGGBlock(256, 3),\n",
587 | " VGGBlock(512, 3),\n",
588 | " VGGBlock(512, 3, True))\n",
589 | "\n",
590 | " def __call__(self, x):\n",
591 | " for f in self.children():\n",
592 | " x = f(x)\n",
593 | " if chainer.config.train:\n",
594 | " return x\n",
595 | " return F.softmax(x)\n",
596 | "\n",
597 | "\n",
598 | "class VGGBlock(chainer.Chain):\n",
599 | " def __init__(self, n_channels, n_convs=2, fc=False):\n",
600 | " w = chainer.initializers.HeNormal()\n",
601 | " super(VGGBlock, self).__init__()\n",
602 | " with self.init_scope():\n",
603 | " self.conv1 = L.Convolution2D(None, n_channels, 3, 1, 1, initialW=w)\n",
604 | " self.conv2 = L.Convolution2D(\n",
605 | " n_channels, n_channels, 3, 1, 1, initialW=w)\n",
606 | " if n_convs == 3:\n",
607 | " self.conv3 = L.Convolution2D(\n",
608 | " n_channels, n_channels, 3, 1, 1, initialW=w)\n",
609 | " if fc:\n",
610 | " self.fc4 = L.Linear(None, 4096, initialW=w)\n",
611 | " self.fc5 = L.Linear(4096, 4096, initialW=w)\n",
612 | " self.fc6 = L.Linear(4096, 1000, initialW=w)\n",
613 | "\n",
614 | " self.n_convs = n_convs\n",
615 | " self.fc = fc\n",
616 | "\n",
617 | " def __call__(self, x):\n",
618 | " h = F.relu(self.conv1(x))\n",
619 | " h = F.relu(self.conv2(h))\n",
620 | " if self.n_convs == 3:\n",
621 | " h = F.relu(self.conv3(h))\n",
622 | " h = F.max_pooling_2d(h, 2, 2)\n",
623 | " if self.fc:\n",
624 | " h = F.dropout(F.relu(self.fc4(h)))\n",
625 | " h = F.dropout(F.relu(self.fc5(h)))\n",
626 | " h = self.fc6(h)\n",
627 | " return h"
628 | ],
629 | "execution_count": 0,
630 | "outputs": []
631 | },
632 | {
633 | "metadata": {
634 | "id": "fCP9FOHQdAV6",
635 | "colab_type": "text"
636 | },
637 | "cell_type": "markdown",
638 | "source": [
639 | "That's it. VGG16 is a model which won the 1st place in\n",
640 | "[classification + localization task at ILSVRC 2014](http://www.image-net.org/challenges/LSVRC/2014/results#clsloc,\n",
641 | "and since then, has become one of the standard models for many different tasks\n",
642 | "as a pre-trained model.\n",
643 | "\n",
644 | "This has 16-layers, so it's called \"VGG-16\", but we can\n",
645 | "write this model without writing all layers independently. Since this model\n",
646 | "consists of several building blocks that have the same architecture, we can\n",
647 | "build the whole network by re-using the building block definition.\n",
648 | "\n",
649 | "Each part of the network is consisted of 2 or 3 convolutional layers and activation\n",
650 | "function (`chainer.functions.relu`) following them, and\n",
651 | "`chainer.functions.max_pooling_2d` operations. This block is written as\n",
652 | "`VGGBlock` in the above example code. And the whole network just calls\n",
653 | "this block one by one in sequential manner.\n"
654 | ]
655 | },
656 | {
657 | "metadata": {
658 | "id": "oOyKzj-7c7zD",
659 | "colab_type": "code",
660 | "colab": {
661 | "autoexec": {
662 | "startup": false,
663 | "wait_interval": 0
664 | }
665 | }
666 | },
667 | "cell_type": "code",
668 | "source": [
669 | ""
670 | ],
671 | "execution_count": 0,
672 | "outputs": []
673 | },
674 | {
675 | "metadata": {
676 | "id": "TuwSx9lXdh0P",
677 | "colab_type": "text"
678 | },
679 | "cell_type": "markdown",
680 | "source": [
681 | "### ResNet152\n",
682 | "\n",
683 | "\n",
684 | "How about ResNet? ResNet [He16] came in the following year's ILSVRC. It is a\n",
685 | "much deeper model than VGG16, having up to 152 layers. This sounds super\n",
686 | "laborious to build, but it can be implemented in almost same manner as VGG16.\n",
687 | "\n",
688 | "In the other words, it's easy. One possible way to write ResNet-152 is:\n"
689 | ]
690 | },
691 | {
692 | "metadata": {
693 | "id": "QbPfVL_ydqA4",
694 | "colab_type": "code",
695 | "colab": {
696 | "autoexec": {
697 | "startup": false,
698 | "wait_interval": 0
699 | }
700 | }
701 | },
702 | "cell_type": "code",
703 | "source": [
704 | "class ResNet152(chainer.Chain):\n",
705 | " def __init__(self, n_blocks=[3, 8, 36, 3]):\n",
706 | " w = chainer.initializers.HeNormal()\n",
707 | " super(ResNet152, self).__init__()\n",
708 | " with self.init_scope():\n",
709 | " self.conv1 = L.Convolution2D(None, 64, 7, 2, 3, initialW=w, nobias=True)\n",
710 | " self.bn1 = L.BatchNormalization(64)\n",
711 | " self.res2 = ResBlock(n_blocks[0], 64, 64, 256, 1)\n",
712 | " self.res3 = ResBlock(n_blocks[1], 256, 128, 512)\n",
713 | " self.res4 = ResBlock(n_blocks[2], 512, 256, 1024)\n",
714 | " self.res5 = ResBlock(n_blocks[3], 1024, 512, 2048)\n",
715 | " self.fc6 = L.Linear(2048, 1000)\n",
716 | "\n",
717 | " def __call__(self, x):\n",
718 | " h = self.bn1(self.conv1(x))\n",
719 | " h = F.max_pooling_2d(F.relu(h), 2, 2)\n",
720 | " h = self.res2(h)\n",
721 | " h = self.res3(h)\n",
722 | " h = self.res4(h)\n",
723 | " h = self.res5(h)\n",
724 | " h = F.average_pooling_2d(h, h.shape[2:], stride=1)\n",
725 | " h = self.fc6(h)\n",
726 | " if chainer.config.train:\n",
727 | " return h\n",
728 | " return F.softmax(h)\n",
729 | "\n",
730 | "\n",
731 | "class ResBlock(chainer.ChainList):\n",
732 | " def __init__(self, n_layers, n_in, n_mid, n_out, stride=2):\n",
733 | " super(ResBlock, self).__init__()\n",
734 | " self.add_link(BottleNeck(n_in, n_mid, n_out, stride, True))\n",
735 | " for _ in range(n_layers - 1):\n",
736 | " self.add_link(BottleNeck(n_out, n_mid, n_out))\n",
737 | "\n",
738 | " def __call__(self, x):\n",
739 | " for f in self.children():\n",
740 | " x = f(x)\n",
741 | " return x\n",
742 | "\n",
743 | "\n",
744 | "class BottleNeck(chainer.Chain):\n",
745 | " def __init__(self, n_in, n_mid, n_out, stride=1, proj=False):\n",
746 | " w = chainer.initializers.HeNormal()\n",
747 | " super(BottleNeck, self).__init__()\n",
748 | " with self.init_scope():\n",
749 | " self.conv1x1a = L.Convolution2D(\n",
750 | " n_in, n_mid, 1, stride, 0, initialW=w, nobias=True)\n",
751 | " self.conv3x3b = L.Convolution2D(\n",
752 | " n_mid, n_mid, 3, 1, 1, initialW=w, nobias=True)\n",
753 | " self.conv1x1c = L.Convolution2D(\n",
754 | " n_mid, n_out, 1, 1, 0, initialW=w, nobias=True)\n",
755 | " self.bn_a = L.BatchNormalization(n_mid)\n",
756 | " self.bn_b = L.BatchNormalization(n_mid)\n",
757 | " self.bn_c = L.BatchNormalization(n_out)\n",
758 | " if proj:\n",
759 | " self.conv1x1r = L.Convolution2D(\n",
760 | " n_in, n_out, 1, stride, 0, initialW=w, nobias=True)\n",
761 | " self.bn_r = L.BatchNormalization(n_out)\n",
762 | " self.proj = proj\n",
763 | "\n",
764 | " def __call__(self, x):\n",
765 | " h = F.relu(self.bn_a(self.conv1x1a(x)))\n",
766 | " h = F.relu(self.bn_b(self.conv3x3b(h)))\n",
767 | " h = self.bn_c(self.conv1x1c(h))\n",
768 | " if self.proj:\n",
769 | " x = self.bn_r(self.conv1x1r(x))\n",
770 | " return F.relu(h + x)"
771 | ],
772 | "execution_count": 0,
773 | "outputs": []
774 | },
775 | {
776 | "metadata": {
777 | "id": "isqRzDSedxsh",
778 | "colab_type": "text"
779 | },
780 | "cell_type": "markdown",
781 | "source": [
782 | "In the `BottleNeck` class, depending on the value of the proj argument\n",
783 | "supplied to the initializer, it will conditionally compute a convolutional\n",
784 | "layer ``conv1x1r`` which will extend the number of channels of the input ``x``\n",
785 | "to be equal to the number of channels of the output of ``conv1x1c``, and\n",
786 | "followed by a batch normalization layer before the final ReLU layer.\n",
787 | "\n",
788 | "Writing the building block in this way improves the re-usability of a class.\n",
789 | "It switches not only the behavior in `__class__` by flags but also the\n",
790 | "parameter registration. In this case, when `proj` is ``False``, the\n",
791 | "`BottleNeck` doesn't have `conv1x1r` and `bn_r` layers, so the memory\n",
792 | "usage would be efficient compared to the case when it registers both anyway and\n",
793 | "just ignore them if `proj` is ``False``.\n",
794 | "\n",
795 | "Using nested `chainer.Chain` s and `chainer.ChainList` for\n",
796 | "sequential part enables us to write complex and very deep models easily.\n",
797 | "\n"
798 | ]
799 | },
800 | {
801 | "metadata": {
802 | "id": "-l9qikeyd6Dm",
803 | "colab_type": "text"
804 | },
805 | "cell_type": "markdown",
806 | "source": [
807 | "### Use Pre-trained Models\n",
808 | "\n",
809 | "Various ways to write your models were described above. It turns out that\n",
810 | "VGG16 and ResNet are very useful as general feature extractors for many kinds\n",
811 | "of tasks, including but not limited to image classification. So, Chainer\n",
812 | "provides you with the pre-trained VGG16 and ResNet-50/101/152 models with a\n",
813 | "simple API. You can use these models as follows:\n"
814 | ]
815 | },
816 | {
817 | "metadata": {
818 | "id": "qNy9pa3PduRO",
819 | "colab_type": "code",
820 | "colab": {
821 | "autoexec": {
822 | "startup": false,
823 | "wait_interval": 0
824 | },
825 | "output_extras": [
826 | {
827 | "item_id": 1
828 | },
829 | {
830 | "item_id": 2
831 | }
832 | ],
833 | "base_uri": "https://localhost:8080/",
834 | "height": 51
835 | },
836 | "outputId": "bf39d310-0376-4b66-af09-81e79dcfe39f",
837 | "executionInfo": {
838 | "status": "ok",
839 | "timestamp": 1520062360752,
840 | "user_tz": -540,
841 | "elapsed": 90387,
842 | "user": {
843 | "displayName": "Yasuki IKEUCHI",
844 | "photoUrl": "//lh6.googleusercontent.com/-QrqZkF-x2us/AAAAAAAAAAI/AAAAAAAAAAA/PzOMhw6mH3o/s50-c-k-no/photo.jpg",
845 | "userId": "110487681503898314699"
846 | }
847 | }
848 | },
849 | "cell_type": "code",
850 | "source": [
851 | "from chainer.links import VGG16Layers\n",
852 | "model = VGG16Layers()"
853 | ],
854 | "execution_count": 9,
855 | "outputs": [
856 | {
857 | "output_type": "stream",
858 | "text": [
859 | "Downloading from http://www.robots.ox.ac.uk/%7Evgg/software/very_deep/caffe/VGG_ILSVRC_16_layers.caffemodel...\n"
860 | ],
861 | "name": "stderr"
862 | },
863 | {
864 | "output_type": "stream",
865 | "text": [
866 | "Now loading caffemodel (usually it may take few minutes)\n"
867 | ],
868 | "name": "stdout"
869 | }
870 | ]
871 | },
872 | {
873 | "metadata": {
874 | "id": "mgkF7PjbeNg0",
875 | "colab_type": "text"
876 | },
877 | "cell_type": "markdown",
878 | "source": [
879 | "When `chainer.links.VGG16Layers` is instantiated, the pre-trained\n",
880 | "parameters are automatically downloaded from the author's server. So you can\n",
881 | "immediately start to use VGG16 with pre-trained weight as a good image feature\n",
882 | "extractor. See the details of this model here:\n",
883 | "`chainer.links.VGG16Layers`.\n",
884 | "\n",
885 | "In the case of ResNet models, there are three variations differing in the number\n",
886 | "of layers. We have `chainer.links.ResNet50Layers`,\n",
887 | "`chainer.links.ResNet101Layers`, and `chainer.links.ResNet152Layers` models\n",
888 | "with easy parameter loading feature. ResNet's pre-trained parameters are not\n",
889 | "available for direct downloading, so you need to download the weight from the\n",
890 | "author's web page first, and then place it into the dir\n",
891 | "``$CHAINER_DATSET_ROOT/pfnet/chainer/models`` or your favorite place. Once\n",
892 | "the preparation is finished, the usage is the same as VGG16:\n"
893 | ]
894 | },
895 | {
896 | "metadata": {
897 | "id": "XX-M8jirrjv6",
898 | "colab_type": "text"
899 | },
900 | "cell_type": "markdown",
901 | "source": [
902 | "**To use on Colaboratory, you need to upload ResNet's pre-trained parameters to your Google drive.**\n",
903 | "\n",
904 | "1. Please download pre-trained model name `ResNet-152-model.caffemodel` to your local. You can find the link from https://github.com/KaimingHe/deep-residual-networks#models\n",
905 | "2. Please upload `ResNet-152-model.caffemodel` to **root of your Google drive**.\n",
906 | "3. Please run below code\n",
907 | "4. Please input verification code (It is required at first time)\n",
908 | "\n",
909 | "The below code shows 1) authentication, 2) list your root of Google drive and find `ResNet-152-model.caffemodel`, 3) download. For more information, please refer to [PyDrive documentation](https://googledrive.github.io/PyDrive/docs/build/html/index.html)"
910 | ]
911 | },
912 | {
913 | "metadata": {
914 | "id": "pDAiXqZremwV",
915 | "colab_type": "code",
916 | "colab": {
917 | "autoexec": {
918 | "startup": false,
919 | "wait_interval": 0
920 | }
921 | }
922 | },
923 | "cell_type": "code",
924 | "source": [
925 | "!pip install -U -q PyDrive\n",
926 | "\n",
927 | "import os\n",
928 | "\n",
929 | "PATH= os.path.expanduser('.chainer/dataset/pfnet/chainer/models')\n",
930 | "FILE = 'ResNet-152-model.caffemodel'\n",
931 | "\n",
932 | "from pydrive.auth import GoogleAuth\n",
933 | "from pydrive.drive import GoogleDrive\n",
934 | "from google.colab import auth\n",
935 | "from oauth2client.client import GoogleCredentials\n",
936 | "\n",
937 | "# 1. Authenticate and create the PyDrive client.\n",
938 | "auth.authenticate_user()\n",
939 | "gauth = GoogleAuth()\n",
940 | "gauth.credentials = GoogleCredentials.get_application_default()\n",
941 | "drive = GoogleDrive(gauth)\n",
942 | "\n",
943 | "# PyDrive reference:\n",
944 | "# https://googledrive.github.io/PyDrive/docs/build/html/index.html\n",
945 | "\n",
946 | "\n",
947 | "file_list = drive.ListFile({'q': \"'root' in parents and trashed=false\"}).GetList()\n",
948 | "id = None\n",
949 | "for file in file_list:\n",
950 | " if file['title'] == FILE:\n",
951 | " id = file['id']\n",
952 | " break\n",
953 | "\n",
954 | "if id is None:\n",
955 | " print(\"{} is not Found\".format(FILE))\n",
956 | "else:\n",
957 | " file = drive.CreateFile({'id': id})\n",
958 | " file.GetContentFile(os.path.join(PATH, FILE))"
959 | ],
960 | "execution_count": 0,
961 | "outputs": []
962 | },
963 | {
964 | "metadata": {
965 | "id": "PxCr-8t1eBfm",
966 | "colab_type": "code",
967 | "colab": {
968 | "autoexec": {
969 | "startup": false,
970 | "wait_interval": 0
971 | },
972 | "output_extras": [
973 | {
974 | "item_id": 1
975 | }
976 | ],
977 | "base_uri": "https://localhost:8080/",
978 | "height": 34
979 | },
980 | "outputId": "ed4b55af-f44c-4c6f-a4d8-68b2c9515de3",
981 | "executionInfo": {
982 | "status": "ok",
983 | "timestamp": 1520062453955,
984 | "user_tz": -540,
985 | "elapsed": 11191,
986 | "user": {
987 | "displayName": "Yasuki IKEUCHI",
988 | "photoUrl": "//lh6.googleusercontent.com/-QrqZkF-x2us/AAAAAAAAAAI/AAAAAAAAAAA/PzOMhw6mH3o/s50-c-k-no/photo.jpg",
989 | "userId": "110487681503898314699"
990 | }
991 | }
992 | },
993 | "cell_type": "code",
994 | "source": [
995 | "from chainer.links import ResNet152Layers\n",
996 | "\n",
997 | "model = ResNet152Layers()"
998 | ],
999 | "execution_count": 11,
1000 | "outputs": [
1001 | {
1002 | "output_type": "stream",
1003 | "text": [
1004 | "Now loading caffemodel (usually it may take few minutes)\n"
1005 | ],
1006 | "name": "stdout"
1007 | }
1008 | ]
1009 | },
1010 | {
1011 | "metadata": {
1012 | "id": "oZ1ugwsueR-Z",
1013 | "colab_type": "text"
1014 | },
1015 | "cell_type": "markdown",
1016 | "source": [
1017 | "\n",
1018 | "Please see the details of usage and how to prepare the pre-trained weights for\n",
1019 | "ResNet here: `chainer.links.ResNet50Layers`\n",
1020 | "\n",
1021 | "\n",
1022 | "### References\n",
1023 | "\n",
1024 | "* [LeCun98] Yann LeCun, Léon Bottou, Yoshua Bengio, and Patrick Haffner.\n",
1025 | " Gradient-based learning applied to document recognition. Proceedings of the\n",
1026 | " IEEE, 86(11), 2278–2324, 1998.\n",
1027 | "* [Simonyan14] Simonyan, K. and Zisserman, A., Very Deep Convolutional\n",
1028 | " Networks for Large-Scale Image Recognition. arXiv preprint arXiv:1409.1556,\n",
1029 | " 2014.\n",
1030 | "* [He16] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. Deep Residual\n",
1031 | " Learning for Image Recognition. The IEEE Conference on Computer Vision and\n",
1032 | " Pattern Recognition (CVPR), pp. 770-778, 2016.\n",
1033 | " "
1034 | ]
1035 | }
1036 | ]
1037 | }
1038 |
--------------------------------------------------------------------------------
/example/cupy/prml/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 ctgk
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/hands_on_en/chainerrl/README.md:
--------------------------------------------------------------------------------
1 | ChainerRL Quickstart guide
2 | -----
3 |
4 | This quickstart guide is forked from [chainer/chainerrl/example/quickstart/quickstart.ipynb](https://github.com/chainer/chainerrl/blob/23f201b556f3010316c996d7847efe841c321ea7/examples/quickstart/quickstart.ipynb).
5 |
6 | License is [here](https://github.com/chainer/chainerrl/blob/23f201b556f3010316c996d7847efe841c321ea7/LICENSE).
7 |
--------------------------------------------------------------------------------
/hands_on_ja/chainer/begginers_hands_on/11_Write_the_training_loop.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "1-Write-the-training-loop_ja.ipynb",
7 | "version": "0.3.2",
8 | "views": {},
9 | "default_view": {},
10 | "provenance": [],
11 | "collapsed_sections": []
12 | },
13 | "kernelspec": {
14 | "display_name": "Python [default]",
15 | "language": "python",
16 | "name": "python3"
17 | },
18 | "accelerator": "GPU"
19 | },
20 | "cells": [
21 | {
22 | "metadata": {
23 | "id": "6wYcO_JXa7kj",
24 | "colab_type": "text"
25 | },
26 | "cell_type": "markdown",
27 | "source": [
28 | "# 学習ループを書いてみよう\n",
29 | "\n",
30 | "ここでは、\n",
31 | "\n",
32 | "1. データセットからデータを取り出す\n",
33 | "2. モデルに入力する\n",
34 | "3. Optimizerを使ってモデルのパラメータを更新して学習を行うループを回す\n",
35 | "\n",
36 | "ことをやってみます。このノートから得られるものは、`Trainer`を使わない学習ループの書き方です。"
37 | ]
38 | },
39 | {
40 | "metadata": {
41 | "id": "1ZN45x4ebGge",
42 | "colab_type": "code",
43 | "colab": {
44 | "autoexec": {
45 | "startup": false,
46 | "wait_interval": 0
47 | },
48 | "output_extras": [
49 | {
50 | "item_id": 9
51 | }
52 | ],
53 | "base_uri": "https://localhost:8080/",
54 | "height": 332
55 | },
56 | "outputId": "650465ac-e6a5-42f1-c85c-4810e518d021",
57 | "executionInfo": {
58 | "status": "ok",
59 | "timestamp": 1518687218770,
60 | "user_tz": -540,
61 | "elapsed": 8182,
62 | "user": {
63 | "displayName": "keisuke umezawa",
64 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128",
65 | "userId": "103766488840000528829"
66 | }
67 | }
68 | },
69 | "cell_type": "code",
70 | "source": [
71 | "# Install Chainer and CuPy!\n",
72 | "\n",
73 | "!curl https://colab.chainer.org/install | sh -"
74 | ],
75 | "execution_count": 1,
76 | "outputs": [
77 | {
78 | "output_type": "stream",
79 | "text": [
80 | "Reading package lists... Done\n",
81 | "Building dependency tree \n",
82 | "Reading state information... Done\n",
83 | "libcusparse8.0 is already the newest version (8.0.61-1).\n",
84 | "libnvrtc8.0 is already the newest version (8.0.61-1).\n",
85 | "libnvtoolsext1 is already the newest version (8.0.61-1).\n",
86 | "0 upgraded, 0 newly installed, 0 to remove and 1 not upgraded.\n",
87 | "Requirement already satisfied: cupy-cuda80==4.0.0b3 from https://github.com/kmaehashi/chainer-colab/releases/download/2018-02-06/cupy_cuda80-4.0.0b3-cp36-cp36m-linux_x86_64.whl in /usr/local/lib/python3.6/dist-packages\n",
88 | "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.6/dist-packages (from cupy-cuda80==4.0.0b3)\n",
89 | "Requirement already satisfied: numpy>=1.9.0 in /usr/local/lib/python3.6/dist-packages (from cupy-cuda80==4.0.0b3)\n",
90 | "Requirement already satisfied: fastrlock>=0.3 in /usr/local/lib/python3.6/dist-packages (from cupy-cuda80==4.0.0b3)\n",
91 | "Requirement already satisfied: chainer==4.0.0b3 in /usr/local/lib/python3.6/dist-packages\n",
92 | "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from chainer==4.0.0b3)\n",
93 | "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.6/dist-packages (from chainer==4.0.0b3)\n",
94 | "Requirement already satisfied: numpy>=1.9.0 in /usr/local/lib/python3.6/dist-packages (from chainer==4.0.0b3)\n",
95 | "Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from chainer==4.0.0b3)\n",
96 | "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from protobuf>=3.0.0->chainer==4.0.0b3)\n"
97 | ],
98 | "name": "stdout"
99 | }
100 | ]
101 | },
102 | {
103 | "metadata": {
104 | "id": "cx-pv0Oda7kk",
105 | "colab_type": "text"
106 | },
107 | "cell_type": "markdown",
108 | "source": [
109 | "## 1. データセットの準備\n",
110 | "\n",
111 | "ここでは、Chainerが用意しているMNISTデータセットを使うための便利なメソッドを利用します。これを使うと、データのダウンロードから、一つ一つのデータを取り出せるようにするところまでが隠蔽されます。"
112 | ]
113 | },
114 | {
115 | "metadata": {
116 | "id": "sTEqFAsua7kl",
117 | "colab_type": "code",
118 | "slideshow": {
119 | "slide_type": "-"
120 | },
121 | "colab": {
122 | "autoexec": {
123 | "startup": false,
124 | "wait_interval": 0
125 | },
126 | "output_extras": [
127 | {
128 | "item_id": 1
129 | },
130 | {
131 | "item_id": 2
132 | },
133 | {
134 | "item_id": 3
135 | }
136 | ],
137 | "base_uri": "https://localhost:8080/",
138 | "height": 337
139 | },
140 | "outputId": "51b59c0c-c138-4cb4-95c3-b5438d4d30ef",
141 | "executionInfo": {
142 | "status": "ok",
143 | "timestamp": 1518687220478,
144 | "user_tz": -540,
145 | "elapsed": 1687,
146 | "user": {
147 | "displayName": "keisuke umezawa",
148 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128",
149 | "userId": "103766488840000528829"
150 | }
151 | }
152 | },
153 | "cell_type": "code",
154 | "source": [
155 | "from chainer.datasets import mnist\n",
156 | "\n",
157 | "# データセットがダウンロード済みでなければ、ダウンロードも行う\n",
158 | "train, test = mnist.get_mnist(withlabel=True, ndim=1)\n",
159 | "\n",
160 | "# matplotlibを使ったグラフ描画結果がnotebook内に表示されるようにします。\n",
161 | "%matplotlib inline\n",
162 | "import matplotlib.pyplot as plt\n",
163 | "\n",
164 | "# データの例示\n",
165 | "x, t = train[0]\n",
166 | "plt.imshow(x.reshape(28, 28), cmap='gray')\n",
167 | "plt.show()\n",
168 | "print('label:', t)"
169 | ],
170 | "execution_count": 2,
171 | "outputs": [
172 | {
173 | "output_type": "stream",
174 | "text": [
175 | "/usr/local/lib/python3.6/dist-packages/cupy/core/fusion.py:659: FutureWarning: cupy.core.fusion is experimental. The interface can change in the future.\n",
176 | " util.experimental('cupy.core.fusion')\n"
177 | ],
178 | "name": "stderr"
179 | },
180 | {
181 | "output_type": "display_data",
182 | "data": {
183 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPoAAAD4CAYAAADFJPs2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAD9hJREFUeJzt3XuMVGWax/FvN4asEBVxssKwYwju\n5HG12pBh1cF4gVWGwegqlwkhXogSmWxkYrIZE0f9Q0hwzCC6QdgJk9n1toHgLQM4o4KA8ocJKyqm\ne5g8O04mJkJPUIaWm6JA7R9d3dNV9Hmr6vSpqgPv75N0rHOeOqceq/tHnXPeqnrbisUiInJma291\nAyLSeAq6SAQUdJEIKOgiEVDQRWJQLBYb/gMUB/50dnYWK9fl5Ue9qbfTta9QBtvSDq+Z2VPA90sP\ncr+7v5d037a2trIHKRaLtLW1pXrcRlNv6ai3+mXdV7FYTNxZqkN3M7se+K67TwYWACtS9iYiTZD2\nHP0G4DcA7v4H4HwzOzezrkQkU2el3G4M8P6A5c9K6w4OdufOzk4KhULZujy/I0+9paPe6tesvtIG\nvVLwRKOjo6NsOa/nTKDe0lJv9WvAOXpiLe2h+156X8H7fBvoTrkvEWmwtEHfBMwBMLPvAXvd/VBm\nXYlIplIF3d3fBd43s3fpveJ+X6ZdiUimUo+j1/UgGkfPhHpLJ6+95X4cXUROLwq6SAQUdJEIKOgi\nEVDQRSKgoItEQEEXiYCCLhIBBV0kAgq6SAQUdJEIKOgiEVDQRSKgoItEQEEXiYCCLhIBBV0kAgq6\nSAQUdJEIKOgiEVDQRSKgoItEQEEXiYCCLhIBBV0kAgq6SAQUdJEIKOgiEVDQRSJwVqsbkMYYNmxY\nsH7eeedl/pijR4/uv71o0aLE+40YMSK4HzML1u+7LzxL9xNPPHHKujVr1gAwb9684LZfffVVsP74\n448H64sXLw7WWyVV0M1sCvAS8PvSqk53/0lWTYlItobyiv6Ou8/JrBMRaRido4tEoK1YLNa9UenQ\n/T+Bj4HRwGJ335x0/66urmKhUEjbo4jUpi2xkDLo44BrgBeBCcA24B/d/etBH6StrexBisUibW2J\nPbXUmdJbsy/G7d+/nwsuuKB/OU8X4+bNm8fatWv7b4c082Jc1n9rxWIxcWepztHdfQ+wrrT4JzP7\nCzAO+HOa/YlIY6U6Rzez283sp6XbY4ALgT1ZNiYi2Ul71X0DsMbMbgWGA/+WdNges4suuihYHz58\neLB+9dVXn7Lurrvu6r99zTXXJG47atSo4L5nz54drKfx2WefZbKfTz/9NFhfsWJFsD5z5sxT1s2d\nOxeAQ4cOBbf96KOPgvV33nknWM+rtIfuh4BbMu5FRBpEw2siEVDQRSKgoItEQEEXiYCCLhKBVO+M\nq/tBztB3xk2cODFY37p1a7Be77vT2tvbOXnyZF3bNEs9vVW73z333BOsHz58uOa+AF599VVmzZoF\nQHd3d/C+Bw4cCNbdva7HDmnmO+P0ii4SAQVdJAIKukgEFHSRCCjoIhFQ0EUioKCLREDj6BXq6W3g\n1xsPZseOHcH6hAkTau4LmjuOXq33np6esuUZM2bw+uuv9y9PnTo1cduvvw5/ojnrb7/J69+bxtFF\nJFMKukgEFHSRCCjoIhFQ0EUioKCLREBBF4mAxtErZNnbbbfdFqzffPPNwfqHH35Ytrxy5cqyGVCq\nfe1xyK5du4L16667Llg/cuRI2XLl83bZZZclbnv//fcH971w4cJgvV55/XvTOLqIZEpBF4mAgi4S\nAQVdJAIKukgEFHSRCCjoIhHQOHqFZvZ27rnnBuuVU/yePHmS9va//du8evXqxG0XLFgQ3Pcdd9wR\nrK9duzZYr6Tfaf2aOY5e07TJZlYA1gNPuftKM/sO8AIwDOgG7nT3Y1k0KyLZq3robmYjgaeBLQNW\nLwFWufu1wMdAeGoNEWmpWs7RjwE3AXsHrJsCbCjd3gjcmG1bIpKlqofu7n4cOG5mA1ePHHCovg8Y\nG9pHZ2cnhUKhbF0zrg2klefesvrOuDVr1gypPpg8P2957a1ZfdV0jl5F1asJHR0dZct5vTgCuhjX\nRxfjGq8BF+MSa2mH1w6b2dml2+MoP6wXkZxJG/S3gNml27OBN7JpR0Qaoeqhu5lNApYD44FvzGwO\ncDvwrJn9GPgEeK6RTZ6pDh48WPc2Aw/Pvvjii9SPfe+99wbr69atC9bzOk+7DK6Wi3Hv03uVvdK0\nzLsRkYbQW2BFIqCgi0RAQReJgIIuEgEFXSQC+phqhdOpt5EjRybed+PGjcF9XX/99cH6jBkzgvVN\nmzYFe8uTvPamr3sWkUwp6CIRUNBFIqCgi0RAQReJgIIuEgEFXSQCGkevcKb0dvHFFwfrH3zwQbDe\n09MTrG/btq1sef78+Tz33N8+rbxz587EbVetWhXcd9Z/k3n9nWocXUQypaCLREBBF4mAgi4SAQVd\nJAIKukgEFHSRCGgcvUIsvc2cOTNYf+aZZ4L1c845p2y5vb295q+Afuihh4L1559/Pljv7u6u6XH6\n5PV3qnF0EcmUgi4SAQVdJAIKukgEFHSRCCjoIhFQ0EUioHH0CuqtV6FQCNaffPLJsuVp06axefPm\n/uUbbrgh9WOvXr06WF+6dGmwvmfPnrLlvP5OmzmOXnXaZAAzKwDrgafcfaWZPQtMAvaX7rLM3X87\n1EZFpDGqBt3MRgJPA1sqSj9z99ca0pWIZKqWc/RjwE3A3gb3IiINUvM5upk9Cnw+4NB9DDAc2Acs\ncvfPk7bt6uoqVjvnE5EhG9o5+iBeAPa7+y4zexB4FFiUdOeOjo6y5bxeHAH11kcX4xqvARfjEmup\ngu7uA8/XNwC/TLMfEWmOVOPoZvaKmU0oLU4BujLrSEQyV/Uc3cwmAcuB8cA3wB56r8I/CBwFDgN3\nu/u+xAfROHom8tTbqFGjypYPHDjA+eef3798yy23JG5b7bPu1f4ft27dGqxPmzatbDlPz9tAuRpH\nd/f36X3VrvTKEHoSkSbSW2BFIqCgi0RAQReJgIIuEgEFXSQC+phqBfWWTj29HTt2LFg/66zwYNDx\n48eD9enTp5ctb9u2jalTpwLw9ttvV2+wSfR1zyKSKQVdJAIKukgEFHSRCCjoIhFQ0EUioKCLRCDt\nN8zIGe7yyy8P1ufMmXPKuiVLlvTfvuKKKxK3rTZOXs3u3buD9e3bt9e0LiZ6RReJgIIuEgEFXSQC\nCrpIBBR0kQgo6CIRUNBFIqBx9DOUmQXrixYlTqwDwKxZs4L1MWPGnLLu4Ycfrt5YDU6cOBGsd3d3\nB+snT56saV1M9IouEgEFXSQCCrpIBBR0kQgo6CIRUNBFIqCgi0RA4+g5NthY9cB18+bNS9y22jj5\n+PHjU/c1VDt37gzWly5dGqxv2LAhy3aiUFPQzewXwLWl+/8ceA94ARgGdAN3unv4W/lFpGWqHrqb\n2VSg4O6TgR8C/wEsAVa5+7XAx8A9De1SRIaklnP07cCPSrd7gJHAFKDv+GkjcGPmnYlIZuqae83M\nFtJ7CD/d3f++tO5i4AV3vzppu66urmKhUBhqryISljj3Ws0X48zsVmAB8APgj7XsvE9HR0fZ8pky\nWWCjVV6M6+7uZuzYsf3LeboY197eXvMHR5p9MS5Pv9OBGjDJYmKtpuE1M5sOPAzMcPcvgMNmdnap\nPA7YO9QmRaRxqr6im9l5wDLgRnf/a2n1W8Bs4H9K/32jYR2exi688MJg/dJLLw3WV65cecq6LVu2\n9N++5JJL0jWWgR07dpQtT548uWzdsmXLErddv359cN+xf6S0EWo5dJ8LfAt4ccBnnOcDvzazHwOf\nAM81pj0RyULVoLv7r4BfDVKaln07ItIIegusSAQUdJEIKOgiEVDQRSKgoItEoK63wKZ+kLa2sgfJ\n6zuV4NTeRo8enXjf1atXB/c1ceLEYH3ChAl19VbPu8+qeffdd4P15cuXB+tvvvlm2fLRo0cZMWJE\n//KXX36ZvrmM5fXvrQHvjEvcmV7RRSKgoItEQEEXiYCCLhIBBV0kAgq6SAQUdJEInPFf93zVVVcF\n6w888MAp615++eX+21deeWXituPGjUvfWAaOHj2aWFuxYkVw28ceeyxYP3LkSN395GnsXMrpFV0k\nAgq6SAQUdJEIKOgiEVDQRSKgoItEQEEXicAZP44+c+bMuuvVtqnV7t27g/XXXnstWD9+/HjZ8iOP\nPFI2/h36zHhPT08NHUos9IouEgEFXSQCCrpIBBR0kQgo6CIRUNBFIqCgi0Sgpu91N7NfANfSO+7+\nc+BfgUnA/tJdlrn7bxMf5DT+Xvc8UW/p5LW3Zn6ve9U3zJjZVKDg7pPN7ALgQ2Ar8DN3D7/jQ0Ry\noZZ3xm0H/rd0uwcYCQxrWEcikrm6pmQys4X0HsKfAMYAw4F9wCJ3/zxpu66urmKhUBhiqyJSReKh\ne81BN7NbgYeAHwD/DOx3911m9iDwD+6+KPFBdI6eCfWWTl57y9U5OoCZTQceBn7o7l8AWwaUNwC/\nHFKHItJQVYfXzOw8YBlws7v/tbTuFTPrmwp0CtDVsA5FZMhqeUWfC3wLeNHM+tY9A6wzs6PAYeDu\nxrQnIlnQ/OgV1Fs66q1+mh9dRDKloItEQEEXiYCCLhIBBV0kAgq6SAQUdJEIKOgiEVDQRSKgoItE\nQEEXiYCCLhIBBV0kAgq6SASa8jFVEWktvaKLREBBF4mAgi4SAQVdJAIKukgEFHSRCCjoIhGoaaaW\nLJnZU8D3gSJwv7u/1+weBmNmU4CXgN+XVnW6+09a1xGYWQFYDzzl7ivN7DvAC/ROctkN3Onux3LS\n27PUMZV2g3urnOb7PXLwvA11+vGhaGrQzex64LulKZj/CfhvYHIze6jiHXef0+omAMxsJPA05dNf\nLQFWuftLZvYYcA8tmA4roTfIwVTaCdN8b6HFz1urpx9v9qH7DcBvANz9D8D5ZnZuk3s4XRwDbgL2\nDlg3hd657gA2Ajc2uac+g/WWF9uBH5Vu903zPYXWP2+D9dW06cebfeg+Bnh/wPJnpXUHm9xHkkvN\nbAMwGljs7ptb1Yi7HweOD5gGC2DkgEPOfcDYpjdGYm8Ai8zs36lhKu0G9nYCOFJaXAD8Dpje6uct\noa8TNOk5a/XFuDzNk/NHYDFwKzAf+C8zG97aloLy9NxB7znwg+7+L8Au4NFWNlOa5nsBUDmdd0uf\nt4q+mvacNfsVfS+9r+B9vk3vxZGWc/c9wLrS4p/M7C/AOODPrevqFIfN7Gx3/5Le3nJz6OzuuZlK\nu3KabzPLxfPWyunHm/2KvgmYA2Bm3wP2uvuhJvcwKDO73cx+Wro9BrgQ2NPark7xFjC7dHs28EYL\neymTl6m0B5vmmxw8b62efrzpH1M1s8eB64CTwH3u/lFTG0hgZucAa4BRwHB6z9F/18J+JgHLgfHA\nN/T+o3M78Czwd8AnwN3u/k1OensaeBDon0rb3fe1oLeF9B4C/9+A1fOBX9PC5y2hr2foPYRv+HOm\nz6OLRKDVF+NEpAkUdJEIKOgiEVDQRSKgoItEQEEXiYCCLhKB/wcGHQ6X7PrItwAAAABJRU5ErkJg\ngg==\n",
184 | "text/plain": [
185 | ""
186 | ]
187 | },
188 | "metadata": {
189 | "tags": []
190 | }
191 | },
192 | {
193 | "output_type": "stream",
194 | "text": [
195 | "label: 5\n"
196 | ],
197 | "name": "stdout"
198 | }
199 | ]
200 | },
201 | {
202 | "metadata": {
203 | "id": "R3_POVA1a7kp",
204 | "colab_type": "text"
205 | },
206 | "cell_type": "markdown",
207 | "source": [
208 | "## 2. Iteratorの作成\n",
209 | "\n",
210 | "データセットから決まった数のデータを取り出し、それらを束ねてミニバッチを作成して返してくれる`Iterator`を作成しましょう。これをこの後の学習ループの中で使用していきます。イテレータは、`next()`メソッドで新しいミニバッチを返してくれます。内部ではデータセットを何周なめたか(`epoch`)、現在のイテレーションが新しいepochの最初のイテレーションか、などを管理するプロパティ(`is_new_epoch`)などを持っています。"
211 | ]
212 | },
213 | {
214 | "metadata": {
215 | "id": "FsdG_PaKa7kq",
216 | "colab_type": "code",
217 | "colab": {
218 | "autoexec": {
219 | "startup": false,
220 | "wait_interval": 0
221 | }
222 | }
223 | },
224 | "cell_type": "code",
225 | "source": [
226 | "from chainer import iterators\n",
227 | "\n",
228 | "batchsize = 128\n",
229 | "\n",
230 | "train_iter = iterators.SerialIterator(train, batchsize)\n",
231 | "test_iter = iterators.SerialIterator(test, batchsize,\n",
232 | " repeat=False, shuffle=False)"
233 | ],
234 | "execution_count": 0,
235 | "outputs": []
236 | },
237 | {
238 | "metadata": {
239 | "id": "-bDSS7S1a7ks",
240 | "colab_type": "text"
241 | },
242 | "cell_type": "markdown",
243 | "source": [
244 | "### Iteratorについて\n",
245 | "\n",
246 | "- Chainerがいくつか用意しているIteratorの一種である`SerialIterator`は、データセットの中のデータを順番に取り出してくる最もシンプルなIteratorです。\n",
247 | "- 引数にデータセットオブジェクトと、バッチサイズを取ります。\n",
248 | "- また、このとき渡したデータセットから、何周も何周もデータを繰り返し読み出す必要がある場合は`repeat`引数を`True`とし、1周が終わったらそれ以上データを取り出したくない場合はこれを`False`とします。デフォルトでは、`True`になっています。\n",
249 | "- `shuffle`引数に`True`を渡すと、データセットから取り出されてくるデータの順番をエポックごとにランダムに変更します。\n",
250 | "\n",
251 | "ここで、`batchsize = 128`としているので、ここで作成した訓練データ用の`Iterator`である`train_iter`およびテストデータ用の`Iterator`である`test_iter`は、それぞれ128枚の数字画像データを一括りにして返す`Iterator`ということになります。"
252 | ]
253 | },
254 | {
255 | "metadata": {
256 | "id": "F4ycO_cca7ks",
257 | "colab_type": "text"
258 | },
259 | "cell_type": "markdown",
260 | "source": [
261 | "## 3. モデルの定義\n",
262 | "\n",
263 | "ここでは、シンプルな三層パーセプトロンを定義します。これは全結合層のみからなるネットワークです。中間層のユニット数は適当に100とし、出力は10クラスなので10とします。ここで用いるMNISTデータセットは10種のラベルを持つためです。では、モデルを定義するために必要な`Link`, `Function`, そして`Chain`について、簡単にここで説明を行います。\n",
264 | "\n",
265 | "### LinkとFunction\n",
266 | "\n",
267 | "- Chainerでは、ニューラルネットワークの各層を、`Link`と`Function`に区別します。\n",
268 | "- **`Link`は、パラメータを持つ関数です。**\n",
269 | "- **`Function`は、パラメータを持たない関数です。**\n",
270 | "- これらを組み合わせてモデルを記述します。\n",
271 | "- パラメータを持つ層は、`chainer.links`モジュール以下にたくさん用意されています。\n",
272 | "- パラメータを持たない層は、`chainer.functions`モジュール以下にたくさん用意されています。\n",
273 | "- これらを簡単に使うために、\n",
274 | " ```\n",
275 | " import chainer.links as L\n",
276 | " import chainer.functions as F\n",
277 | " ```\n",
278 | " と別名を与えて、`L.Convolution2D(...)`や`F.relu(...)`のように用いる慣習があります。\n",
279 | "\n",
280 | "### Chain\n",
281 | "\n",
282 | "- Chainは、パラメータを持つ層=**`Link`をまとめておくためのクラス**です。\n",
283 | "- パラメータを持つということは、基本的にモデルの学習の際にそれらを更新していく必要があるということです(例外はあります)。\n",
284 | "- そこで、学習中に`Optimizer`が更新すべき全てのパラメータを簡単に取得できるように、`Chain`で一箇所にまとめておきます。\n",
285 | "\n",
286 | "### Chainを継承して定義されるモデル\n",
287 | "\n",
288 | "- モデルは`Chain`クラスを継承したクラスとして定義されることが多いです。\n",
289 | "- その場合、モデルを表すクラスのコンストラクタで、親クラスのコンストラクタにキーワード引数の形で登録したい層の名前と、オブジェクトを渡しておくと、自動的に`Optimizer`から見つけられる形で保持しておいてくれます。\n",
290 | "- これは、別の場所で`add_link`メソッドを使っても行うことができます。 \n",
291 | "- また、関数呼び出しのようにしてモデルに`()`アクセサでデータを渡せるように、`__call__`メソッドを定義して、その中にforward処理を記述すると便利です。\n",
292 | "\n",
293 | "### GPUで実行するには\n",
294 | "\n",
295 | "- `Chain`クラスは`to_gpu`メソッドを持ち、この引数にGPU IDを指定すると、指定したGPU IDのメモリ上にモデルの全パラメータを転送します。\n",
296 | "- これはモデル内部でのforward/backward計算をその指定したGPU上で行うために必要になります。\n",
297 | "- これを行わない場合、それらの処理はCPU上で行われます。\n",
298 | "\n",
299 | "---\n",
300 | "\n",
301 | "それでは実際にモデルの定義を行い、オブジェクトを作って、GPUに送信してみましょう。"
302 | ]
303 | },
304 | {
305 | "metadata": {
306 | "id": "a7COhg2Ia7kt",
307 | "colab_type": "code",
308 | "colab": {
309 | "autoexec": {
310 | "startup": false,
311 | "wait_interval": 0
312 | }
313 | }
314 | },
315 | "cell_type": "code",
316 | "source": [
317 | "import chainer\n",
318 | "import chainer.links as L\n",
319 | "import chainer.functions as F\n",
320 | "\n",
321 | "class MLP(chainer.Chain):\n",
322 | "\n",
323 | " def __init__(self, n_mid_units=100, n_out=10):\n",
324 | " # パラメータを持つ層の登録\n",
325 | " super(MLP, self).__init__()\n",
326 | " with self.init_scope():\n",
327 | " self.l1=L.Linear(None, n_mid_units)\n",
328 | " self.l2=L.Linear(None, n_mid_units)\n",
329 | " self.l3=L.Linear(None, n_out)\n",
330 | "\n",
331 | "\n",
332 | " def __call__(self, x):\n",
333 | " # データを受け取った際のforward計算を書く\n",
334 | " h1 = F.relu(self.l1(x))\n",
335 | " h2 = F.relu(self.l2(h1))\n",
336 | " return self.l3(h2)\n",
337 | "\n",
338 | "gpu_id = 0 # change to -1 if not using GPU\n",
339 | "\n",
340 | "model = MLP()\n",
341 | "if gpu_id >= 0:\n",
342 | " model.to_gpu(gpu_id)"
343 | ],
344 | "execution_count": 0,
345 | "outputs": []
346 | },
347 | {
348 | "metadata": {
349 | "id": "rLe3zkWQa7kv",
350 | "colab_type": "text"
351 | },
352 | "cell_type": "markdown",
353 | "source": [
354 | "### NOTE\n",
355 | "\n",
356 | "ここで、`L.Linear`クラスは全結合層を意味します。コンストラクタの第一引数に`None`を渡すと、実行時に、データがその層に入力された瞬間、必要な数の入力側ユニット数を自動的に計算し、`(n_input)` $\\times$ `n_mid_units`の大きさの行列を作成し、パラメータとして保持します。これは後々、畳み込み層を全結合層の前に配置する際などに便利な機能です。\n",
357 | "\n",
358 | "前述のように、`Link`はパラメータを持つので、そのパラメータの値にアクセスすることができます。例えば、上のモデル`MLP`は`l1`という名前の全結合相が登録されています。この全結合相は`W`と`b`という2つのパラメータを持ちます。これらは外からアクセスすることができます。例えば`b`へアクセスするには、以下のようにします。"
359 | ]
360 | },
361 | {
362 | "metadata": {
363 | "id": "A8yORmH1a7kw",
364 | "colab_type": "code",
365 | "colab": {
366 | "autoexec": {
367 | "startup": false,
368 | "wait_interval": 0
369 | },
370 | "output_extras": [
371 | {
372 | "item_id": 1
373 | }
374 | ],
375 | "base_uri": "https://localhost:8080/",
376 | "height": 121
377 | },
378 | "outputId": "bc072adc-53a6-456d-8c6f-b9c717f8c52d",
379 | "executionInfo": {
380 | "status": "ok",
381 | "timestamp": 1518682251950,
382 | "user_tz": -540,
383 | "elapsed": 730,
384 | "user": {
385 | "displayName": "keisuke umezawa",
386 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128",
387 | "userId": "103766488840000528829"
388 | }
389 | }
390 | },
391 | "cell_type": "code",
392 | "source": [
393 | "print('1つ目の全結合相のバイアスパラメータの形は、', model.l1.b.shape)\n",
394 | "print('初期化直後のその値は、', model.l1.b.data)"
395 | ],
396 | "execution_count": 16,
397 | "outputs": [
398 | {
399 | "output_type": "stream",
400 | "text": [
401 | "1つ目の全結合相のバイアスパラメータの形は、 (100,)\n",
402 | "初期化直後のその値は、 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n",
403 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n",
404 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n",
405 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n",
406 | " 0. 0. 0. 0.]\n"
407 | ],
408 | "name": "stdout"
409 | }
410 | ]
411 | },
412 | {
413 | "metadata": {
414 | "id": "VWVaCPLJa7ky",
415 | "colab_type": "text"
416 | },
417 | "cell_type": "markdown",
418 | "source": [
419 | "## 4. 最適化手法の選択\n",
420 | "\n",
421 | "Chainerは多くの最適化手法を提供しています。`chainer.optimizers`モジュール以下にそれらはあります。ここでは最もシンプルな勾配降下法の手法である`optimizers.SGD`を用います。Optimizerのオブジェクトには、`setup`メソッドを使ってモデル(`Chain`オブジェクト)を渡します。こうすることでOptimizerは、自身が更新すべきモデル内のパラメータを自動的にたどってくれます。\n",
422 | "\n",
423 | "他にもいろいろな最適化手法が手軽に試せるので、色々と試してみて結果の変化を見てみてください。例えば、下の`chainer.optimizers.SGD`のうち`SGD`の部分を`MomentumSGD`, `RMSprop`, `Adam`などに変えて、結果の違いを見てみましょう。"
424 | ]
425 | },
426 | {
427 | "metadata": {
428 | "id": "N0hTSAL4a7ky",
429 | "colab_type": "code",
430 | "colab": {
431 | "autoexec": {
432 | "startup": false,
433 | "wait_interval": 0
434 | }
435 | }
436 | },
437 | "cell_type": "code",
438 | "source": [
439 | "from chainer import optimizers\n",
440 | "\n",
441 | "optimizer = optimizers.SGD(lr=0.01)\n",
442 | "optimizer.setup(model)"
443 | ],
444 | "execution_count": 0,
445 | "outputs": []
446 | },
447 | {
448 | "metadata": {
449 | "id": "4vx6vKLBa7k0",
450 | "colab_type": "text"
451 | },
452 | "cell_type": "markdown",
453 | "source": [
454 | "### NOTE\n",
455 | "\n",
456 | "今回はSGDのコンストラクタの`lr`という引数に $0.01$ を与えました。この値は学習率として知られ、モデルをうまく訓練して良いパフォーマンスを発揮させるために調整する必要がある重要な**ハイパーパラメータ**として知られています。"
457 | ]
458 | },
459 | {
460 | "metadata": {
461 | "id": "DjSmOmvRa7k1",
462 | "colab_type": "text"
463 | },
464 | "cell_type": "markdown",
465 | "source": [
466 | "## 5. 学習ループ\n",
467 | "\n",
468 | "いよいよ学習ループです。今回は分類問題なので、`softmax_cross_entropy`というロス関数を使って最小化すべきロスの値を計算します。\n",
469 | "\n",
470 | "Chainerでは、`Function`や`Link`を使ってモデルのforward計算を行い、結果と正解ラベルを`Function`の一種でありスカラ値を返すロス関数に渡してやり、ロスの計算を行うと、それは他の`Link`や`Function`と同じく、`Variable`オブジェクトを返します。`Variable`オブジェクトはこれまでの計算過程をあとから逆向きに辿り返すための参照を保持しているため、`Variable.backward`メソッドを呼ぶだけで、自動的にそこからこれまでの計算過程を遡って、途中で施された計算に用いられたパラメータの勾配を計算してくれます。\n",
471 | "\n",
472 | "つまり、学習ループの1回の中で行うのは、以下の4項目です。\n",
473 | "\n",
474 | "1. モデルにデータを渡して出力`y`を得る\n",
475 | "2. `y`と正解ラベル`t`を使って、最小化すべきロスの値を`softmax_cross_entropy`関数で計算する\n",
476 | "3. `softmax_cross_entropy`関数の出力`Variable`の`backward`メソッドを呼んで、モデル内部のパラメータに`grad`プロパティ(これがパラメータ更新に使われる勾配)を持たせる\n",
477 | "4. Optimizerの`update`メソッドを呼び、3.で計算した`grad`を使って全パラメータを更新する\n",
478 | "\n",
479 | "以上です。では実際に訓練ループを書いていきます。"
480 | ]
481 | },
482 | {
483 | "metadata": {
484 | "id": "BHcWKhRPa7k2",
485 | "colab_type": "code",
486 | "colab": {
487 | "autoexec": {
488 | "startup": false,
489 | "wait_interval": 0
490 | },
491 | "output_extras": [
492 | {
493 | "item_id": 17
494 | }
495 | ],
496 | "base_uri": "https://localhost:8080/",
497 | "height": 190
498 | },
499 | "outputId": "2b9ce02e-4fe2-4c1d-d18e-a7aeecd6503f",
500 | "executionInfo": {
501 | "status": "ok",
502 | "timestamp": 1518682307149,
503 | "user_tz": -540,
504 | "elapsed": 54334,
505 | "user": {
506 | "displayName": "keisuke umezawa",
507 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128",
508 | "userId": "103766488840000528829"
509 | }
510 | }
511 | },
512 | "cell_type": "code",
513 | "source": [
514 | "import numpy as np\n",
515 | "from chainer.dataset import concat_examples\n",
516 | "from chainer.cuda import to_cpu\n",
517 | "\n",
518 | "max_epoch = 10\n",
519 | "\n",
520 | "while train_iter.epoch < max_epoch:\n",
521 | " \n",
522 | " # ---------- 学習の1イテレーション ----------\n",
523 | " train_batch = train_iter.next()\n",
524 | " x, t = concat_examples(train_batch, gpu_id)\n",
525 | " \n",
526 | " # 予測値の計算\n",
527 | " y = model(x)\n",
528 | "\n",
529 | " # ロスの計算\n",
530 | " loss = F.softmax_cross_entropy(y, t)\n",
531 | "\n",
532 | " # 勾配の計算\n",
533 | " model.cleargrads()\n",
534 | " loss.backward()\n",
535 | "\n",
536 | " # パラメータの更新\n",
537 | " optimizer.update()\n",
538 | " # --------------- ここまで ----------------\n",
539 | "\n",
540 | " # 1エポック終了ごとにValidationデータに対する予測精度を測って、\n",
541 | " # モデルの汎化性能が向上していることをチェックしよう\n",
542 | " if train_iter.is_new_epoch: # 1 epochが終わったら\n",
543 | "\n",
544 | " # ロスの表示\n",
545 | " print('epoch:{:02d} train_loss:{:.04f} '.format(\n",
546 | " train_iter.epoch, float(to_cpu(loss.data))), end='')\n",
547 | "\n",
548 | " test_losses = []\n",
549 | " test_accuracies = []\n",
550 | " for test_batch in test_iter:\n",
551 | " test_batch = test_iter.next()\n",
552 | " x_test, t_test = concat_examples(test_batch, gpu_id)\n",
553 | "\n",
554 | " # テストデータをforward\n",
555 | " y_test = model(x_test)\n",
556 | "\n",
557 | " # ロスを計算\n",
558 | " loss_test = F.softmax_cross_entropy(y_test, t_test)\n",
559 | " test_losses.append(to_cpu(loss_test.data))\n",
560 | "\n",
561 | " # 精度を計算\n",
562 | " accuracy = F.accuracy(y_test, t_test)\n",
563 | " accuracy.to_cpu()\n",
564 | " test_accuracies.append(accuracy.data)\n",
565 | "\n",
566 | " test_iter.reset()\n",
567 | "\n",
568 | " print('val_loss:{:.04f} val_accuracy:{:.04f}'.format(\n",
569 | " np.mean(test_losses), np.mean(test_accuracies)))"
570 | ],
571 | "execution_count": 18,
572 | "outputs": [
573 | {
574 | "output_type": "stream",
575 | "text": [
576 | "epoch:01 train_loss:0.7972 val_loss:0.7641 val_accuracy:0.8286\n",
577 | "epoch:02 train_loss:0.4790 val_loss:0.4431 val_accuracy:0.8841\n",
578 | "epoch:03 train_loss:0.3785 val_loss:0.3625 val_accuracy:0.8993\n",
579 | "epoch:04 train_loss:0.2973 val_loss:0.3270 val_accuracy:0.9079\n",
580 | "epoch:05 train_loss:0.2120 val_loss:0.3032 val_accuracy:0.9143\n",
581 | "epoch:06 train_loss:0.2209 val_loss:0.2875 val_accuracy:0.9176\n",
582 | "epoch:07 train_loss:0.3127 val_loss:0.2739 val_accuracy:0.9202\n",
583 | "epoch:08 train_loss:0.2458 val_loss:0.2620 val_accuracy:0.9252\n",
584 | "epoch:09 train_loss:0.2618 val_loss:0.2513 val_accuracy:0.9283\n",
585 | "epoch:10 train_loss:0.2291 val_loss:0.2431 val_accuracy:0.9295\n"
586 | ],
587 | "name": "stdout"
588 | }
589 | ]
590 | },
591 | {
592 | "metadata": {
593 | "id": "UCodn6XSa7k4",
594 | "colab_type": "text"
595 | },
596 | "cell_type": "markdown",
597 | "source": [
598 | "## 6. 学習済みモデルを保存する\n",
599 | "\n",
600 | "Chainerには2つのシリアライズ機能が用意されています。一つはHDF5形式でモデルを保存するもので、もう一つはNumPyのNPZ形式でモデルを保存するものです。今回は、追加ライブラリのインストールが必要なHDF5ではなく、NumPy標準機能で提供されているシリアライズ機能を利用したNPZ形式でのモデルの保存を行います。"
601 | ]
602 | },
603 | {
604 | "metadata": {
605 | "id": "KLTb41DYa7k5",
606 | "colab_type": "code",
607 | "colab": {
608 | "autoexec": {
609 | "startup": false,
610 | "wait_interval": 0
611 | },
612 | "output_extras": [
613 | {
614 | "item_id": 1
615 | }
616 | ],
617 | "base_uri": "https://localhost:8080/",
618 | "height": 34
619 | },
620 | "outputId": "300f21c8-f019-4edd-e701-c3461d5c8750",
621 | "executionInfo": {
622 | "status": "ok",
623 | "timestamp": 1518682307995,
624 | "user_tz": -540,
625 | "elapsed": 831,
626 | "user": {
627 | "displayName": "keisuke umezawa",
628 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128",
629 | "userId": "103766488840000528829"
630 | }
631 | }
632 | },
633 | "cell_type": "code",
634 | "source": [
635 | "from chainer import serializers\n",
636 | "\n",
637 | "serializers.save_npz('my_mnist.model', model)\n",
638 | "\n",
639 | "# ちゃんと保存されていることを確認\n",
640 | "%ls -la my_mnist.model"
641 | ],
642 | "execution_count": 19,
643 | "outputs": [
644 | {
645 | "output_type": "stream",
646 | "text": [
647 | "-rw-r--r-- 1 root root 333851 Feb 15 08:11 my_mnist.model\r\n"
648 | ],
649 | "name": "stdout"
650 | }
651 | ]
652 | },
653 | {
654 | "metadata": {
655 | "id": "xdskF19ua7k7",
656 | "colab_type": "text"
657 | },
658 | "cell_type": "markdown",
659 | "source": [
660 | "## 7. 保存したモデルを読み込んで推論する\n",
661 | "\n",
662 | "今しがた保存したNPZファイルを読み込んで、テストデータに対して予測を計算させてみます。NPZファイルにはパラメータが保存されているので、forward計算のロジックを持つモデルのオブジェクトをまず作成し、そのパラメータを先程保存したNPZが持つ値で上書きすることで学習直後のモデルの状態を復元します。"
663 | ]
664 | },
665 | {
666 | "metadata": {
667 | "id": "D0p6AcOMa7k8",
668 | "colab_type": "code",
669 | "colab": {
670 | "autoexec": {
671 | "startup": false,
672 | "wait_interval": 0
673 | }
674 | }
675 | },
676 | "cell_type": "code",
677 | "source": [
678 | "# まず同じモデルのオブジェクトを作る\n",
679 | "infer_model = MLP()\n",
680 | "\n",
681 | "# そのオブジェクトに保存済みパラメータをロードする\n",
682 | "serializers.load_npz('my_mnist.model', infer_model)\n",
683 | "\n",
684 | "# GPU上で計算させるために、モデルをGPUに送る\n",
685 | "if gpu_id >= 0:\n",
686 | " infer_model.to_gpu(gpu_id)"
687 | ],
688 | "execution_count": 0,
689 | "outputs": []
690 | },
691 | {
692 | "metadata": {
693 | "id": "t8AtSuBSa7k-",
694 | "colab_type": "code",
695 | "colab": {
696 | "autoexec": {
697 | "startup": false,
698 | "wait_interval": 0
699 | },
700 | "output_extras": [
701 | {
702 | "item_id": 1
703 | },
704 | {
705 | "item_id": 2
706 | }
707 | ],
708 | "base_uri": "https://localhost:8080/",
709 | "height": 282
710 | },
711 | "outputId": "b76e4029-0e89-4f05-a788-9f7246008dd4",
712 | "executionInfo": {
713 | "status": "ok",
714 | "timestamp": 1518682309207,
715 | "user_tz": -540,
716 | "elapsed": 598,
717 | "user": {
718 | "displayName": "keisuke umezawa",
719 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128",
720 | "userId": "103766488840000528829"
721 | }
722 | }
723 | },
724 | "cell_type": "code",
725 | "source": [
726 | "# テストデータ\n",
727 | "x, t = test[0]\n",
728 | "plt.imshow(x.reshape(28, 28), cmap='gray')\n",
729 | "plt.show()\n",
730 | "print('label:', t)"
731 | ],
732 | "execution_count": 21,
733 | "outputs": [
734 | {
735 | "output_type": "display_data",
736 | "data": {
737 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPoAAAD4CAYAAADFJPs2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAADppJREFUeJzt3X2oXPWdx/H3bFQa4rarVhubVYOm\nfLFMgpoVmjXZXjdpdeO6/pGIf6iIii6LSkEMGPuHDxC7KOriA4J0txa1YKKg0QaxiYv5w0g1rHqv\n6M9Gi6hRosa2xq4xcWf/uJP0znjnzNxz5yn+3i8Izvn9zsOXc/l4nubMr1Kr1ZD09fZXgy5AUu8Z\ndCkDBl3KgEGXMmDQpRzUarWe/wNqE/+Njo7WmtuG5Z+1WduBWldRBitlH69FxB3AD+ob+UlK6YVW\n81YqlYaN1Go1KpVKqe32mrWVY21T1+26arVay5WVOnWPiB8C30spLQIuBe4sWZukPih7jb4UeAwg\npfQacFhEfLNrVUnqqoNKLjcb2Dph+sN6258mm3l0dJRqtdrQNszfyLO2cqxt6vpVV9mgNyu80Jg/\nf37D9LBeM4G1lWVtU9eDa/SWfWVP3bczfgTf57vA+yXXJanHygb9aWAlQEScAmxPKX3ataokdVWp\noKeUngO2RsRzjN9xv6KrVUnqqtLP0ae0EZ+jd4W1lTOstQ39c3RJBxaDLmXAoEsZMOhSBgy6lAGD\nLmXAoEsZMOhSBgy6lAGDLmXAoEsZMOhSBgy6lAGDLmXAoEsZMOhSBgy6lAGDLmXAoEsZMOhSBgy6\nlAGDLmXAoEsZMOhSBgy6lAGDLmXAoEsZMOhSBgy6lAGDLmXgoDILRcQIsA54td40mlK6qltFSequ\nUkGvezaltLJrlUjqGU/dpQxM54j+/YhYDxwO3JhS+k2rGUdHR6lWqw1ttVptGpvuLWsrx9qmrl91\nVcpsKCLmAIuBtcDxwH8D81JKX0y6kUqlYSO1Wo1KpTL1avvA2sqxtqnrdl21Wq3lykoFvVlE/BY4\nL6X0+0k3YtC7wtrKGdba+hn0UtfoEXF+RFxT/zwb+A7wXrnyJPVa2Wv09cCvIuIc4BDg31qdtksa\nvK6curfdiKfuXWFt5QxrbUN/6i7pwGLQpQwYdCkDBl3KgEGXMjCdr8BmYeXK1u/tXHbZZYXLbt++\nvbD/888/L+x/6KGHvtK2ePHi/Z8/+OCDlstu27atcN3Ki0d0KQMGXcqAQZcyYNClDBh0KQMGXcqA\nQZcy4NtrTZpre+utt1rOO3fu3D5U9BeVSqXhp4c+/fTTlvO++uqrLft6YdGiRWzZsqWv22zl3Xff\nbZg+99xzWbduHQC33HJL4bIvvvhiz+pq5ttrkrrKoEsZMOhSBgy6lAGDLmXAoEsZMOhSBnyO3qS5\ntqVLl7acd8GCBYXreu211wr7TzzxxML+U045pWH6ggsu4MEHH9w/PTIy0nLZOXPmFK77nXfeKew/\n5phjCvubNT/jL7J3797C/g8//LCw/+ijj+64Lmis7fbbby+c95prrpnSuqfD5+iSusqgSxkw6FIG\nDLqUAYMuZcCgSxkw6FIGfI7e5ECq7bDDDms570knnVS4rq1btxb2n3rqqVOqbePGjSxbtqyjedv9\nnv0bb7xR2N/u+wmHH354w/TE5+hXXHFF4bL33ntvYX839fM5ekcDOEREFXgcuCOldHdEHAM8AMwA\n3gcuTCnt7kaxkrqv7al7RMwC7gI2TWi+CbgnpbQE2AZc0pvyJHVDJ9fou4HlwMTxhUaA9fXPTwCd\nnbNJGoiOr9Ej4gbgo/qp+46U0lH19hOAB1JKf99q2bGxsVq1Wu1GvZJam941etmV7zN//vyG6QPp\nhtcw8WbcOG/GtV5fK2Ufr+2KiJn1z3NoPK2XNGTKBn0jsKL+eQXwVHfKkdQLba/RI2IhcBswF9gD\nvAecD9wPfAN4G7g4pbSn5UZ8jt4VudS2YsWKwv61a9cW9o+NjTVML1iwgFdeeQWA008/vXDZnTt3\ndlBhdwzVc/SU0lbG77I3+9E0apLUR34FVsqAQZcyYNClDBh0KQMGXcqAr6k2sbZyplLbUUcdVdg/\nOjo6reVXrlzZMP3II4/sb3v00Uc7qLA//LlnSV1l0KUMGHQpAwZdyoBBlzJg0KUMGHQpA934hRlp\nStr9ysuRRx5Z2P/JJ58U9qeUOmrLiUd0KQMGXcqAQZcyYNClDBh0KQMGXcqAQZcy4PvoTaytnOba\nTjvttJbzPvPMM4XrOvjggwv7R0ZGCvs3b95cWNuw8H10SV1l0KUMGHQpAwZdyoBBlzJg0KUMGHQp\nA76Prp5Yvnx5y752z8k3bdpU2L9ly5ZSNeWso6BHRBV4HLgjpXR3RNwPLAQ+rs9ya0rp170pUdJ0\ntQ16RMwC7gKa/ze7OqX0ZE+qktRVnVyj7waWA9t7XIukHun4u+4RcQPw0YRT99nAIcAO4MqU0ket\nlh0bG6tVq9XpVyupSMvvupe9GfcA8HFK6aWIuBa4Abiy1czz589vmB7WlwzA2spqrm3NmjUt5129\nenXhutrdjCu60QewZ8+ewtqGRQ9eamnZVyroKaWJf4n1wL1l1iOpP0o9R4+IRyPi+PrkCDDWtYok\ndV0nd90XArcBc4E9EbGS8bvwD0fEn4FdwMW9LFLDZ+bMmYVtZ555Zstlv/jii8J1X3/99YX9zafm\naq9t0FNKWxk/ajcbnhHlJRXyK7BSBgy6lAGDLmXAoEsZMOhSBnxNVaWsWrWqsO3kk09uuexTTz1V\nuO7nnnuufGGalEd0KQMGXcqAQZcyYNClDBh0KQMGXcqAQZcy4LDJTaxt3FlnnVXY/9hjjzVMH3TQ\nQezdu3f/9GeffdZy2aJXWAGef/75Dirs3LD+TR02WVJXGXQpAwZdyoBBlzJg0KUMGHQpAwZdyoDv\no2fqiCOOKOy/8847C/tnzJhR2LZhw4aWy3b7Obna84guZcCgSxkw6FIGDLqUAYMuZcCgSxkw6FIG\nfB+9ydeltsmec0/U7ln2woULC/vffPPNhul58+axbdu2/dNF75w3L9trw/o37ef76B19YSYibgGW\n1Of/GfAC8AAwA3gfuDCltHv6pUrqhban7hFxOlBNKS0CzgT+A7gJuCeltATYBlzS0yolTUsn1+ib\ngXPrn/8AzAJGgPX1tieAZV2vTFLXtD11Tyl9Cez7AbBLgQ3AGRNO1XcARxetY3R0lGq12tDWj3sD\nZVlbe/PmzStsm3i9PgyGZb8161ddHb/UEhHnMB70HwO/m9DV9m7C/PnzG6aH9eYIfH1q82bcXwzr\n37QHN+Na9nX0eC0izgB+CvxTSumPwK6ImFnvngNsn26Rknqn7RE9Ir4F3AosSyntrDdvBFYAD9b/\nWzwOrvruhBNOKOxvd8Ru5+qrr26YXr9+fUNbv4/aKtbJqft5wLeBtRGxr+0i4OcR8a/A28Ave1Oe\npG7o5GbcfcB9k3T9qPvlSOoFvwIrZcCgSxkw6FIGDLqUAYMuZcCfez6AHXfccS37nn766Wmte9Wq\nVYX9Tz75ZEdtGg4e0aUMGHQpAwZdyoBBlzJg0KUMGHQpAwZdyoDP0Q9gl19+ecu+Y489dlrrfvbZ\nZwv7J/s1k2H9uSZ5RJeyYNClDBh0KQMGXcqAQZcyYNClDBh0KQM+Rx9iixcvLmy76qqr+lmODmAe\n0aUMGHQpAwZdyoBBlzJg0KUMGHQpAwZdykBHz9Ej4hZgSX3+nwH/AiwEPq7PcmtK6dc9qTBjS5Ys\nKWw79NBDS6+73fjlu3btKr1uDZ+2QY+I04FqSmlRRBwB/A/wDLA6peQv9ksHgE6O6JuB39Y//wGY\nBczoWUWSuq4ylZ//iYjLGT+F/xKYDRwC7ACuTCl91Gq5sbGxWrVanWapktqotOzoNOgRcQ5wHfBj\n4O+Aj1NKL0XEtcDfppSubLmRSqVhI7VajUqlZU0DNUy1rV69umH65ptv5rrrrts/vWbNmtLrbneN\nfvbZZxf2v/766w3Tw7Tfmg1rbd2uq1artVxZpzfjzgB+CpyZUvojsGlC93rg3mlVKKmn2j5ei4hv\nAbcC/5xS2llvezQijq/PMgKM9axCSdPWyRH9PODbwNqI2Nf2C+DhiPgzsAu4uDflqayXX365sH/p\n0qWF/Tt37uxmORqwtkFPKd0H3DdJ1y+7X46kXvCbcVIGDLqUAYMuZcCgSxkw6FIGDLqUgSl91730\nRvwKbFdYWznDWls/vwLrEV3KgEGXMmDQpQwYdCkDBl3KgEGXMmDQpQz05Tm6pMHyiC5lwKBLGTDo\nUgYMupQBgy5lwKBLGTDoUgY6GqmlmyLiDuAHQA34SUrphX7XMJmIGAHWAa/Wm0ZTSlcNriKIiCrw\nOHBHSunuiDgGeIDxQS7fBy5MKe0ektruZ0iG0p5kmO8XGIL9Nsjhx/sa9Ij4IfC9+hDMJwL/BSzq\nZw1tPJtSWjnoIgAiYhZwF43DX90E3JNSWhcRNwOXMIDhsFrUBkMwlHaLYb43MeD9Nujhx/t96r4U\neAwgpfQacFhEfLPPNRwodgPLge0T2kYYH+sO4AlgWZ9r2mey2obFZuDc+ud9w3yPMPj9NlldfRt+\nvN+n7rOBrROmP6y3/anPdbTy/YhYDxwO3JhS+s2gCkkp7QX2ThgGC2DWhFPOHcDRfS+MlrUBXBkR\nV9PBUNo9rO1L4LP65KXABuCMQe+3FnV9SZ/22aBvxg3TD3n9DrgROAe4CPjPiDhksCUVGqZ9B+PX\nwNemlP4ReAm4YZDF1If5vhRoHs57oPutqa6+7bN+H9G3M34E3+e7jN8cGbiU0nvAw/XJNyPiA2AO\n8PvBVfUVuyJiZkrpfxmvbWhOnVNKQzOUdvMw3xExFPttkMOP9/uI/jSwEiAiTgG2p5Q+7XMNk4qI\n8yPimvrn2cB3gPcGW9VXbARW1D+vAJ4aYC0NhmUo7cmG+WYI9tughx/v+2uqEfHvwD8A/wdckVIq\nHt+3TyLir4FfAX8DHML4NfqGAdazELgNmAvsYfx/OucD9wPfAN4GLk4p7RmS2u4CrgX2D6WdUtox\ngNouZ/wU+I0JzRcBP2eA+61FXb9g/BS+5/vM99GlDAz6ZpykPjDoUgYMupQBgy5lwKBLGTDoUgYM\nupSB/wclp6FycBcGXAAAAABJRU5ErkJggg==\n",
738 | "text/plain": [
739 | ""
740 | ]
741 | },
742 | "metadata": {
743 | "tags": []
744 | }
745 | },
746 | {
747 | "output_type": "stream",
748 | "text": [
749 | "label: 7\n"
750 | ],
751 | "name": "stdout"
752 | }
753 | ]
754 | },
755 | {
756 | "metadata": {
757 | "id": "1VBhEIALa7lE",
758 | "colab_type": "code",
759 | "colab": {
760 | "autoexec": {
761 | "startup": false,
762 | "wait_interval": 0
763 | },
764 | "output_extras": [
765 | {
766 | "item_id": 1
767 | }
768 | ],
769 | "base_uri": "https://localhost:8080/",
770 | "height": 52
771 | },
772 | "outputId": "8931b210-7559-46ad-edcc-c4e91a5cdd52",
773 | "executionInfo": {
774 | "status": "ok",
775 | "timestamp": 1518682309876,
776 | "user_tz": -540,
777 | "elapsed": 641,
778 | "user": {
779 | "displayName": "keisuke umezawa",
780 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128",
781 | "userId": "103766488840000528829"
782 | }
783 | }
784 | },
785 | "cell_type": "code",
786 | "source": [
787 | "from chainer.cuda import to_gpu\n",
788 | "\n",
789 | "# ミニバッチの形にする(ここではサイズ1のミニバッチにするが、\n",
790 | "# 複数まとめてサイズnのミニバッチにしてまとめて推論することもできる)\n",
791 | "print(x.shape, end=' -> ')\n",
792 | "x = x[None, ...]\n",
793 | "print(x.shape)\n",
794 | "\n",
795 | "# GPU上で計算させるため、データもGPU上に送る\n",
796 | "if gpu_id >= 0:\n",
797 | " x = to_gpu(x, 0)\n",
798 | "\n",
799 | "# モデルのforward関数に渡す\n",
800 | "y = infer_model(x)\n",
801 | "\n",
802 | "# Variable形式で出てくるので中身を取り出す\n",
803 | "y = y.data\n",
804 | "\n",
805 | "# 結果をCPUに送る\n",
806 | "y = to_cpu(y)\n",
807 | "\n",
808 | "# 最大値のインデックスを見る\n",
809 | "pred_label = y.argmax(axis=1)\n",
810 | "\n",
811 | "print('predicted label:', pred_label[0])"
812 | ],
813 | "execution_count": 22,
814 | "outputs": [
815 | {
816 | "output_type": "stream",
817 | "text": [
818 | "(784,) -> (1, 784)\n",
819 | "predicted label: 7\n"
820 | ],
821 | "name": "stdout"
822 | }
823 | ]
824 | }
825 | ]
826 | }
827 |
--------------------------------------------------------------------------------
/hands_on_ja/chainerrl/README.md:
--------------------------------------------------------------------------------
1 | ChainerRL Quickstart guide
2 | -----
3 |
4 | This quickstart guide is forked from [chainer/chainerrl/example/quickstart/quickstart.ipynb](https://github.com/chainer/chainerrl/blob/23f201b556f3010316c996d7847efe841c321ea7/examples/quickstart/quickstart.ipynb).
5 |
6 | License is [here](https://github.com/chainer/chainerrl/blob/23f201b556f3010316c996d7847efe841c321ea7/LICENSE).
7 |
--------------------------------------------------------------------------------
/official_example_en/word2vec.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "word2vec_en.ipynb",
7 | "version": "0.3.2",
8 | "provenance": [],
9 | "collapsed_sections": []
10 | },
11 | "kernelspec": {
12 | "display_name": "Python 3",
13 | "language": "python",
14 | "name": "python3"
15 | },
16 | "accelerator": "GPU"
17 | },
18 | "cells": [
19 | {
20 | "metadata": {
21 | "id": "JR5-Ja21Ojj9",
22 | "colab_type": "text"
23 | },
24 | "cell_type": "markdown",
25 | "source": [
26 | "# Word2Vec: Obtain word embeddings\n",
27 | "\n",
28 | "## 0. Introduction\n",
29 | "\n",
30 | "**Word2vec** is the tool for generating the distributed representation of words, which is proposed by Mikolov et al[[1]](#1). When the tool assigns a real-valued vector to each word, the closer the meanings of the words, the greater similarity the vectors will indicate.\n",
31 | "\n",
32 | "**Distributed representation** means assigning a real-valued vector for each word and representing the word by the vector. When representing a word by distributed representation, we call the vector **word embeddings**. In this notebook, we aim at explaining how to get the word embeddings from Penn Tree Bank dataset.\n",
33 | "\n",
34 | "Let's think about what the meaning of word is. Since we are human, so we can understand that the words \"animal\" and \"dog\" are deeply related each other. But what information will Word2vec use to learn the vectors for words? The words \"animal\" and \"dog\" should have similar vectors, but the words \"food\" and \"dog\" should be far from each other. How to know the features of those words automatically?"
35 | ]
36 | },
37 | {
38 | "metadata": {
39 | "id": "8n5aX8oAOjj_",
40 | "colab_type": "text"
41 | },
42 | "cell_type": "markdown",
43 | "source": [
44 | "## 1. Basic Idea\n",
45 | "\n",
46 | "Word2vec learns the similarity of word meanings from simple information. It learns the representation of words from sentences. The core idea is based on the assumption that the meaning of a word is affected by the words around it. This idea follows **distributional hypothesis**[[2]](#2).\n",
47 | "\n",
48 | "The word we focus on to learn its representation is called **\"center word\"**, and the words around it are called **\"context words\"**. Depending on the window size `C` determines the number of context words which is considered.\n",
49 | "\n",
50 | "Here, let's see the algorithm by using an example sentence: \"**The cute cat jumps over the lazy dog.**\"\n",
51 | "\n",
52 | "- All of the following figures consider \"cat\" as the center word.\n",
53 | "- According to the window size `C`, you can see that the number of context words is changed.\n",
54 | "\n",
55 | ""
56 | ]
57 | },
58 | {
59 | "metadata": {
60 | "id": "CUHlpGTJOjj_",
61 | "colab_type": "text"
62 | },
63 | "cell_type": "markdown",
64 | "source": [
65 | "## 2. Main Algorithm\n",
66 | "\n",
67 | "Word2vec, the tool for creating the word embeddings, is actually built with two models, which are called **Skip-gram** and **CBoW**.\n",
68 | "\n",
69 | "To explain the models with the figures below, we will use the following symbols.\n",
70 | "\n",
71 | "* $|\\mathcal{V}|$ : The size of vocabulary |\n",
72 | "* $D$ : The size of embedding vector |\n",
73 | "* ${\\bf v}_t$ : A one-hot center word vector |\n",
74 | "* $V_{\\pm C}$ : A set of $C$ context vectors around ${\\bf v}_t$, namely, $\\{{\\bf v}_{t+c}\\}_{c=-C}^C \\backslash {\\bf v}_t$ |\n",
75 | "* ${\\bf l}_H$ : An embedding vector of an input word vector |\n",
76 | "* ${\\bf l}_O$ : An output vector of the network |\n",
77 | "* ${\\bf W}_H$ : The embedding matrix for inputs |\n",
78 | "* ${\\bf W}_O$ : The embedding matrix for outputs |\n",
79 | "\n",
80 | "**Note**\n",
81 | "\n",
82 | "Using **negative sampling** or **hierarchical softmax** for the loss function is very common, however, in this notebook, we will use the **softmax over all words** and skip the other variants for the sake of simplicity."
83 | ]
84 | },
85 | {
86 | "metadata": {
87 | "id": "AFx1gZ8KOjkA",
88 | "colab_type": "text"
89 | },
90 | "cell_type": "markdown",
91 | "source": [
92 | "### 2.1 Skip-gram\n",
93 | "\n",
94 | "This model learns to predict context words $V_{t \\pm C}$ when a center word ${\\bf v}_t$ is given. In the model, each row of the embedding matrix for input $W_H$ becomes a word embedding of each word.\n",
95 | "\n",
96 | "When you input a center word ${\\bf v}_t$ into the network, you can predict one of context words $\\hat{\\bf v}_{t+i} \\in V_{t \\pm C}$ as follows:\n",
97 | "\n",
98 | "1. Calculate an embedding vector of the input center word vector: ${\\bf l}_H = {\\bf W}_H {\\bf v}_t$\n",
99 | "2. Calculate an output vector of the embedding vector: ${\\bf l}_O = {\\bf W}_O {\\bf l}_H$\n",
100 | "3. Calculate a probability vector of a context word: $\\hat{\\bf v}_{t+i} = \\text{softmax}({\\bf l}_O)$\n",
101 | "\n",
102 | "Each element of the $|\\mathcal{V}|$-dimensional vector $\\hat{\\bf v}_{t+i}$ is a probability that a word in the vocabulary turns out to be a context word at position $i$. So, the probability $p({\\bf v}_{t+i} \\mid {\\bf v}_t)$ can be estimated by a dot product of the one-hot vector ${\\bf v}_{t+i}$ which represents the actual word at the position $i$ and the output vector $\\hat{\\bf v}_{t+i}$.\n",
103 | "\n",
104 | "$p({\\bf v}_{t+i} \\mid {\\bf v}_t) = {\\bf v}_{t+i}^T \\hat{\\bf v}_{t+i}$\n",
105 | "\n",
106 | "The loss function for all the context words $V_{t \\pm C}$ given a center word ${\\bf v}_t$ is defined as following:\n",
107 | "\n",
108 | "$\n",
109 | "\\begin{eqnarray}\n",
110 | "L(V_{t \\pm C} | {\\bf v}_t; {\\bf W}_H, {\\bf W}_O)\n",
111 | "&=& \\sum_{V_{t \\pm C}} -\\log\\left(p({\\bf v}_{t+i} \\mid {\\bf v}_t)\\right) \\\\\n",
112 | "&=& \\sum_{V_{t \\pm C}} -\\log({\\bf v}_{t+i}^T \\hat{\\bf v}_{t+i})\n",
113 | "\\end{eqnarray}\n",
114 | "$"
115 | ]
116 | },
117 | {
118 | "metadata": {
119 | "id": "TAmvso8yOjkA",
120 | "colab_type": "text"
121 | },
122 | "cell_type": "markdown",
123 | "source": [
124 | "### 2.2 Continuous Bag of Words (CBoW)\n",
125 | "\n",
126 | "This model learns to predict the center word ${\\bf v}_t$ when context words $V_{t \\pm C}$ is given.\n",
127 | "\n",
128 | "When you give a set of context words $V_{t \\pm C}$ to the network, you can estimate the probability of the center word $\\hat{v}_t$ as follows:\n",
129 | "\n",
130 | "1. Calculate a mean embedding vector over all context words: ${\\bf l}_H = \\frac{1}{2C} \\sum_{V_{t \\pm C}} {\\bf W}_H {\\bf v}_{t+i}$\n",
131 | "2. Calculate an output vector: ${\\bf l}_O = {\\bf W}_O {\\bf l}_H$\n",
132 | "3. Calculate an probability vector: $\\hat{\\bf v}_t = \\text{softmax}({\\bf l}_O)$\n",
133 | "\n",
134 | "Each element of $\\hat{\\bf v}_t$ is a probability that a word in the vocabulary is considered as the center word. So, the prediction $p({\\bf v}_t \\mid V_{t \\pm C})$ can be calculated by ${\\bf v}_t^T \\hat{\\bf v}_t$, where ${\\bf v}_t$ denots the one-hot vector of the actual center word vector in the sentence from the dataset.\n",
135 | "\n",
136 | "The loss function for the center word prediction is defined as follows:\n",
137 | "\n",
138 | "$\n",
139 | "\\begin{eqnarray}\n",
140 | "L({\\bf v}_t|V_{t \\pm C}; W_H, W_O)\n",
141 | "&=& -\\log(p({\\bf v}_t|V_{t \\pm C})) \\\\\n",
142 | "&=& -\\log({\\bf v}_t^T \\hat{\\bf v}_t)\n",
143 | "\\end{eqnarray}\n",
144 | "$"
145 | ]
146 | },
147 | {
148 | "metadata": {
149 | "id": "YZKrh07hOjkB",
150 | "colab_type": "text"
151 | },
152 | "cell_type": "markdown",
153 | "source": [
154 | "## 3. Details of skip-gram\n",
155 | "\n",
156 | "In this notebook, we mainly explain skip-gram model because\n",
157 | "\n",
158 | "1. It is easier to understand the algorithm than CBoW.\n",
159 | "2. Even if the number of words increases, the accuracy is largely maintained. So, it is more scalable."
160 | ]
161 | },
162 | {
163 | "metadata": {
164 | "id": "QMspiJZzOjkB",
165 | "colab_type": "text"
166 | },
167 | "cell_type": "markdown",
168 | "source": [
169 | "So, let's think about a concrete example of calculating skip-gram under this setup:\n",
170 | "\n",
171 | "* The size of vocabulary $|\\mathcal{V}|$ is 10.\n",
172 | "* The size of embedding vector $D$ is 2.\n",
173 | "* Center word is \"dog\".\n",
174 | "* Context word is \"animal\".\n",
175 | "\n",
176 | "Since there should be more than one context words, repeat the following process for each context word.\n",
177 | "\n",
178 | "1. The one-hot vector of \"dog\" is `[0 0 1 0 0 0 0 0 0 0]` and you input it as the center word.\n",
179 | "2. The third row of embedding matrix ${\\bf W}_H$ is used for the word embedding of \"dog\" ${\\bf l}_H$.\n",
180 | "3. Then multiply ${\\bf W}_O$ with ${\\bf l}_H$ to obtain the output vector ${\\bf l}_O$\n",
181 | "4. Give ${\\bf l}_O$ to the softmax function to make it a predicted probability vector $\\hat{\\bf v}_{t+c}$ for a context word at the position $c$.\n",
182 | "5. Calculate the error between $\\hat{\\bf v}_{t+c}$ and the one-hot vector of \"animal\"; `[1 0 0 0 0 0 0 0 0 0 0]`.\n",
183 | "6. Propagate the error back to the network to update the parameters.\n",
184 | "\n",
185 | ""
186 | ]
187 | },
188 | {
189 | "metadata": {
190 | "id": "IQ19qx2UOjkB",
191 | "colab_type": "text"
192 | },
193 | "cell_type": "markdown",
194 | "source": [
195 | "## 4. Implementation of skip-gram in Chainer\n",
196 | "\n",
197 | "There is an example of Word2vec in the official repository of Chainer, so we will explain how to implement skip-gram based on this: [chainer/examples/word2vec](https://github.com/chainer/chainer/tree/master/examples/word2vec)"
198 | ]
199 | },
200 | {
201 | "metadata": {
202 | "id": "-fxZ_DM3O7Ea",
203 | "colab_type": "text"
204 | },
205 | "cell_type": "markdown",
206 | "source": [
207 | "First, we execute the following cell and install “Chainer” and its GPU back end “CuPy”. If the “runtime type” of Colaboratory is GPU, you can run Chainer with GPU as a backend."
208 | ]
209 | },
210 | {
211 | "metadata": {
212 | "id": "9xOsXDFRO54W",
213 | "colab_type": "code",
214 | "colab": {
215 | "base_uri": "https://localhost:8080/",
216 | "height": 133
217 | },
218 | "outputId": "ae9cad40-8a15-4b24-917c-5d4b239c1b6a"
219 | },
220 | "cell_type": "code",
221 | "source": [
222 | "!curl https://colab.chainer.org/install | sh -"
223 | ],
224 | "execution_count": 1,
225 | "outputs": [
226 | {
227 | "output_type": "stream",
228 | "text": [
229 | "Reading package lists... Done\n",
230 | "Building dependency tree \n",
231 | "Reading state information... Done\n",
232 | "libcusparse8.0 is already the newest version (8.0.61-1).\n",
233 | "libnvrtc8.0 is already the newest version (8.0.61-1).\n",
234 | "libnvtoolsext1 is already the newest version (8.0.61-1).\n",
235 | "0 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.\n"
236 | ],
237 | "name": "stdout"
238 | }
239 | ]
240 | },
241 | {
242 | "metadata": {
243 | "id": "G2O-8RkiOjkC",
244 | "colab_type": "text"
245 | },
246 | "cell_type": "markdown",
247 | "source": [
248 | "### 4.1 Preparation\n",
249 | "\n",
250 | "First, let's import necessary packages:"
251 | ]
252 | },
253 | {
254 | "metadata": {
255 | "id": "OvL9Gk08OjkD",
256 | "colab_type": "code",
257 | "colab": {}
258 | },
259 | "cell_type": "code",
260 | "source": [
261 | "import argparse\n",
262 | "import collections\n",
263 | "\n",
264 | "import numpy as np\n",
265 | "import six\n",
266 | "\n",
267 | "import chainer\n",
268 | "from chainer import cuda\n",
269 | "import chainer.functions as F\n",
270 | "import chainer.initializers as I\n",
271 | "import chainer.links as L\n",
272 | "import chainer.optimizers as O\n",
273 | "from chainer import reporter\n",
274 | "from chainer import training\n",
275 | "from chainer.training import extensions"
276 | ],
277 | "execution_count": 0,
278 | "outputs": []
279 | },
280 | {
281 | "metadata": {
282 | "id": "AXuiONJbOjkG",
283 | "colab_type": "text"
284 | },
285 | "cell_type": "markdown",
286 | "source": [
287 | "### 4.2 Define a skip-gram model\n",
288 | "\n",
289 | "Next, let's define a network for skip-gram."
290 | ]
291 | },
292 | {
293 | "metadata": {
294 | "id": "jLNEBp7gOjkG",
295 | "colab_type": "code",
296 | "colab": {}
297 | },
298 | "cell_type": "code",
299 | "source": [
300 | "class SkipGram(chainer.Chain):\n",
301 | "\n",
302 | " def __init__(self, n_vocab, n_units):\n",
303 | " super().__init__()\n",
304 | " with self.init_scope():\n",
305 | " self.embed = L.EmbedID(\n",
306 | " n_vocab, n_units, initialW=I.Uniform(1. / n_units))\n",
307 | " self.out = L.Linear(n_units, n_vocab, initialW=0)\n",
308 | "\n",
309 | " def __call__(self, x, context):\n",
310 | " e = self.embed(context)\n",
311 | " shape = e.shape\n",
312 | " x = F.broadcast_to(x[:, None], (shape[0], shape[1]))\n",
313 | " e = F.reshape(e, (shape[0] * shape[1], shape[2]))\n",
314 | " x = F.reshape(x, (shape[0] * shape[1],))\n",
315 | " center_predictions = self.out(e)\n",
316 | " loss = F.softmax_cross_entropy(center_predictions, x)\n",
317 | " reporter.report({'loss': loss}, self)\n",
318 | " return loss"
319 | ],
320 | "execution_count": 0,
321 | "outputs": []
322 | },
323 | {
324 | "metadata": {
325 | "id": "xL8OWlpZOjkI",
326 | "colab_type": "text"
327 | },
328 | "cell_type": "markdown",
329 | "source": [
330 | "**Note**\n",
331 | "\n",
332 | "- The weight matrix `self.embed.W` is the embbeding matrix for input vector `x`.\n",
333 | "- `__call__` takes the word ID of a center word `x` and word IDs of context words `contexts` as inputs, and outputs the error calculated by the loss function `softmax_cross_entropy`.\n",
334 | "- Note that the initial shape of `x` and `contexts` are `(batch_size,)` and `(batch_size, n_context)`, respectively.\n",
335 | "- The `batch_size` means the size of mini-batch, and `n_context` means the number of context words.\n",
336 | "\n",
337 | "First, we obtain the embedding vectors of `contexts` by `e = self.embed(contexts)`. \n",
338 | "\n",
339 | "Then `F.broadcast_to(x[:, None], (shape[0], shape[1]))` performs broadcasting of `x` (`(batch_size,)`) to `(batch_size, n_context)` by copying the same value `n_context` time to fill the second axis, and then the broadcasted `x` is reshaped into 1-D vector `(batchsize * n_context,)` while `e` is reshaped to `(batch_size * n_context, n_units)`.\n",
340 | "\n",
341 | "In skip-gram model, predicting a context word from the center word is the same as predicting the center word from a context word because the center word is always a context word when considering the context word as a center word. So, we create `batch_size * n_context` center word predictions by applying `self.out` linear layer to the embedding vectors of context words. Then, calculate softmax cross entropy between the broadcasted center word ID `x` and the predictions."
342 | ]
343 | },
344 | {
345 | "metadata": {
346 | "id": "dFAB4sObOjkJ",
347 | "colab_type": "text"
348 | },
349 | "cell_type": "markdown",
350 | "source": [
351 | "### 4.3 Prepare dataset and iterator\n",
352 | "\n",
353 | "Let's retrieve the Penn Tree Bank (PTB) dataset by using Chainer's dataset utility `get_ptb_words()` method."
354 | ]
355 | },
356 | {
357 | "metadata": {
358 | "id": "pqUBJ1r2OjkJ",
359 | "colab_type": "code",
360 | "colab": {}
361 | },
362 | "cell_type": "code",
363 | "source": [
364 | "train, val, _ = chainer.datasets.get_ptb_words()\n",
365 | "n_vocab = max(train) + 1 # The minimum word ID is 0"
366 | ],
367 | "execution_count": 0,
368 | "outputs": []
369 | },
370 | {
371 | "metadata": {
372 | "id": "Yf2sqARoOjkL",
373 | "colab_type": "text"
374 | },
375 | "cell_type": "markdown",
376 | "source": [
377 | "Then define an iterator to make mini-batches that contain a set of center words with their context words."
378 | ]
379 | },
380 | {
381 | "metadata": {
382 | "id": "jwYDyNyXOjkM",
383 | "colab_type": "code",
384 | "colab": {}
385 | },
386 | "cell_type": "code",
387 | "source": [
388 | "class WindowIterator(chainer.dataset.Iterator):\n",
389 | "\n",
390 | " def __init__(self, dataset, window, batch_size, repeat=True):\n",
391 | " self.dataset = np.array(dataset, np.int32)\n",
392 | " self.window = window\n",
393 | " self.batch_size = batch_size\n",
394 | " self._repeat = repeat\n",
395 | "\n",
396 | " self.order = np.random.permutation(\n",
397 | " len(dataset) - window * 2).astype(np.int32)\n",
398 | " self.order += window\n",
399 | " self.current_position = 0\n",
400 | " self.epoch = 0\n",
401 | " self.is_new_epoch = False\n",
402 | "\n",
403 | " def __next__(self):\n",
404 | " if not self._repeat and self.epoch > 0:\n",
405 | " raise StopIteration\n",
406 | "\n",
407 | " i = self.current_position\n",
408 | " i_end = i + self.batch_size\n",
409 | " position = self.order[i: i_end]\n",
410 | " w = np.random.randint(self.window - 1) + 1\n",
411 | " offset = np.concatenate([np.arange(-w, 0), np.arange(1, w + 1)])\n",
412 | " pos = position[:, None] + offset[None, :]\n",
413 | " context = self.dataset.take(pos)\n",
414 | " center = self.dataset.take(position)\n",
415 | "\n",
416 | " if i_end >= len(self.order):\n",
417 | " np.random.shuffle(self.order)\n",
418 | " self.epoch += 1\n",
419 | " self.is_new_epoch = True\n",
420 | " self.current_position = 0\n",
421 | " else:\n",
422 | " self.is_new_epoch = False\n",
423 | " self.current_position = i_end\n",
424 | "\n",
425 | " return center, context\n",
426 | "\n",
427 | " @property\n",
428 | " def epoch_detail(self):\n",
429 | " return self.epoch + float(self.current_position) / len(self.order)\n",
430 | "\n",
431 | " def serialize(self, serializer):\n",
432 | " self.current_position = serializer('current_position',\n",
433 | " self.current_position)\n",
434 | " self.epoch = serializer('epoch', self.epoch)\n",
435 | " self.is_new_epoch = serializer('is_new_epoch', self.is_new_epoch)\n",
436 | " if self._order is not None:\n",
437 | " serializer('_order', self._order)\n",
438 | "\n",
439 | "def convert(batch, device):\n",
440 | " center, context = batch\n",
441 | " if device >= 0:\n",
442 | " center = cuda.to_gpu(center)\n",
443 | " context = cuda.to_gpu(context)\n",
444 | " return center, context"
445 | ],
446 | "execution_count": 0,
447 | "outputs": []
448 | },
449 | {
450 | "metadata": {
451 | "id": "3Zq2jxKgOjkO",
452 | "colab_type": "text"
453 | },
454 | "cell_type": "markdown",
455 | "source": [
456 | "- In the constructor, we create an array `self.order` which denotes shuffled indices of `[window, window + 1, ..., len(dataset) - window - 1]` in order to choose a center word randomly from `dataset` in a mini-batch.\n",
457 | "- The iterator definition `__next__` returns `batch_size` sets of center word and context words.\n",
458 | "- The code `self.order[i:i_end]` returns the indices for a set of center words from the random-ordered array `self.order`. The center word IDs `center` at the random indices are retrieved by `self.dataset.take`.\n",
459 | "- `np.concatenate([np.arange(-w, 0), np.arange(1, w + 1)])` creates a set of offsets to retrieve context words from the dataset.\n",
460 | "- The code `position[:, None] + offset[None, :]` generates the indices of context words for each center word index in `position`. The context word IDs `context` are retrieved by `self.dataset.take`."
461 | ]
462 | },
463 | {
464 | "metadata": {
465 | "id": "IlIoeqeNOjkO",
466 | "colab_type": "text"
467 | },
468 | "cell_type": "markdown",
469 | "source": [
470 | "### 4.4 Prepare model, optimizer, and updater"
471 | ]
472 | },
473 | {
474 | "metadata": {
475 | "id": "OgPecreHOjkP",
476 | "colab_type": "code",
477 | "colab": {}
478 | },
479 | "cell_type": "code",
480 | "source": [
481 | "unit = 100 # number of hidden units\n",
482 | "window = 5\n",
483 | "batchsize = 1000\n",
484 | "gpu = 0\n",
485 | "\n",
486 | "# Instantiate model\n",
487 | "model = SkipGram(n_vocab, unit)\n",
488 | "\n",
489 | "if gpu >= 0:\n",
490 | " model.to_gpu(gpu)\n",
491 | "\n",
492 | "# Create optimizer\n",
493 | "optimizer = O.Adam()\n",
494 | "optimizer.setup(model)\n",
495 | "\n",
496 | "# Create iterators for both train and val datasets\n",
497 | "train_iter = WindowIterator(train, window, batchsize)\n",
498 | "val_iter = WindowIterator(val, window, batchsize, repeat=False)\n",
499 | "\n",
500 | "# Create updater\n",
501 | "updater = training.StandardUpdater(\n",
502 | " train_iter, optimizer, converter=convert, device=gpu)"
503 | ],
504 | "execution_count": 0,
505 | "outputs": []
506 | },
507 | {
508 | "metadata": {
509 | "id": "ccVACTY-OjkS",
510 | "colab_type": "text"
511 | },
512 | "cell_type": "markdown",
513 | "source": [
514 | "### 4.5 Start training"
515 | ]
516 | },
517 | {
518 | "metadata": {
519 | "id": "OVPlZWePOjkS",
520 | "colab_type": "code",
521 | "colab": {
522 | "base_uri": "https://localhost:8080/",
523 | "height": 1700
524 | },
525 | "outputId": "feb79699-0038-4df4-8e99-f480df3caace"
526 | },
527 | "cell_type": "code",
528 | "source": [
529 | "epoch = 100\n",
530 | "\n",
531 | "trainer = training.Trainer(updater, (epoch, 'epoch'), out='word2vec_result')\n",
532 | "trainer.extend(extensions.Evaluator(val_iter, model, converter=convert, device=gpu))\n",
533 | "trainer.extend(extensions.LogReport())\n",
534 | "trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time']))\n",
535 | "trainer.run()"
536 | ],
537 | "execution_count": 7,
538 | "outputs": [
539 | {
540 | "output_type": "stream",
541 | "text": [
542 | "epoch main/loss validation/main/loss elapsed_time\n",
543 | "\u001b[J1 6.87314 6.48688 54.154 \n",
544 | "\u001b[J2 6.44018 6.40645 107.352 \n",
545 | "\u001b[J3 6.35021 6.3558 159.544 \n",
546 | "\u001b[J4 6.28615 6.31679 212.612 \n",
547 | "\u001b[J5 6.23762 6.28779 266.059 \n",
548 | "\u001b[J6 6.19942 6.22658 319.874 \n",
549 | "\u001b[J7 6.15986 6.20715 372.798 \n",
550 | "\u001b[J8 6.13787 6.21461 426.456 \n",
551 | "\u001b[J9 6.10637 6.24927 479.725 \n",
552 | "\u001b[J10 6.08759 6.23192 532.966 \n",
553 | "\u001b[J11 6.06768 6.19332 586.339 \n",
554 | "\u001b[J12 6.04607 6.17291 639.295 \n",
555 | "\u001b[J13 6.0321 6.21226 692.67 \n",
556 | "\u001b[J14 6.02178 6.18489 746.599 \n",
557 | "\u001b[J15 6.00098 6.17341 799.408 \n",
558 | "\u001b[J16 5.99099 6.19581 852.966 \n",
559 | "\u001b[J17 5.97425 6.22275 905.819 \n",
560 | "\u001b[J18 5.95974 6.20495 958.404 \n",
561 | "\u001b[J19 5.96579 6.16532 1012.49 \n",
562 | "\u001b[J20 5.95292 6.21457 1066.24 \n",
563 | "\u001b[J21 5.93696 6.18441 1119.45 \n",
564 | "\u001b[J22 5.91804 6.20695 1171.98 \n",
565 | "\u001b[J23 5.93265 6.15757 1225.99 \n",
566 | "\u001b[J24 5.92238 6.17064 1279.85 \n",
567 | "\u001b[J25 5.9154 6.21545 1334.01 \n",
568 | "\u001b[J26 5.90538 6.1812 1387.68 \n",
569 | "\u001b[J27 5.8807 6.18523 1439.72 \n",
570 | "\u001b[J28 5.89009 6.19992 1492.67 \n",
571 | "\u001b[J29 5.8773 6.24146 1545.48 \n",
572 | "\u001b[J30 5.89217 6.21846 1599.79 \n",
573 | "\u001b[J31 5.88493 6.21654 1653.95 \n",
574 | "\u001b[J32 5.87784 6.18502 1707.45 \n",
575 | "\u001b[J33 5.88031 6.14161 1761.75 \n",
576 | "\u001b[J34 5.86278 6.22893 1815.29 \n",
577 | "\u001b[J35 5.83335 6.18966 1866.56 \n",
578 | "\u001b[J36 5.85978 6.24276 1920.18 \n",
579 | "\u001b[J37 5.85921 6.23888 1974.2 \n",
580 | "\u001b[J38 5.85195 6.19231 2027.92 \n",
581 | "\u001b[J39 5.8396 6.20542 2080.78 \n",
582 | "\u001b[J40 5.83745 6.27583 2133.37 \n",
583 | "\u001b[J41 5.85996 6.23596 2188 \n",
584 | "\u001b[J42 5.85743 6.17438 2242.4 \n",
585 | "\u001b[J43 5.84051 6.25449 2295.84 \n",
586 | "\u001b[J44 5.83023 6.30226 2348.84 \n",
587 | "\u001b[J45 5.84677 6.23473 2403.11 \n",
588 | "\u001b[J46 5.82406 6.27398 2456.11 \n",
589 | "\u001b[J47 5.82827 6.21509 2509.17 \n",
590 | "\u001b[J48 5.8253 6.23009 2562.15 \n",
591 | "\u001b[J49 5.83697 6.2564 2616.35 \n",
592 | "\u001b[J50 5.81998 6.29104 2669.38 \n",
593 | "\u001b[J51 5.82926 6.26068 2723.47 \n",
594 | "\u001b[J52 5.81457 6.30152 2776.36 \n",
595 | "\u001b[J53 5.82587 6.29581 2830.24 \n",
596 | "\u001b[J54 5.80614 6.30994 2882.85 \n",
597 | "\u001b[J55 5.8161 6.23224 2935.73 \n",
598 | "\u001b[J56 5.80867 6.26867 2988.48 \n",
599 | "\u001b[J57 5.79467 6.24508 3040.2 \n",
600 | "\u001b[J58 5.81687 6.24676 3093.57 \n",
601 | "\u001b[J59 5.82064 6.30236 3147.68 \n",
602 | "\u001b[J60 5.80855 6.30184 3200.75 \n",
603 | "\u001b[J61 5.81298 6.25173 3254.06 \n",
604 | "\u001b[J62 5.80753 6.32951 3307.42 \n",
605 | "\u001b[J63 5.82505 6.2472 3361.68 \n",
606 | "\u001b[J64 5.78396 6.28168 3413.14 \n",
607 | "\u001b[J65 5.80209 6.24962 3465.96 \n",
608 | "\u001b[J66 5.80107 6.326 3518.83 \n",
609 | "\u001b[J67 5.83765 6.28848 3574.57 \n",
610 | "\u001b[J68 5.7864 6.3506 3626.88 \n",
611 | "\u001b[J69 5.80329 6.30671 3679.82 \n",
612 | "\u001b[J70 5.80032 6.29277 3732.69 \n",
613 | "\u001b[J71 5.80647 6.30722 3786.21 \n",
614 | "\u001b[J72 5.8176 6.30046 3840.51 \n",
615 | "\u001b[J73 5.79912 6.35945 3893.81 \n",
616 | "\u001b[J74 5.80484 6.32439 3947.35 \n",
617 | "\u001b[J75 5.82065 6.29674 4002.03 \n",
618 | "\u001b[J76 5.80872 6.27921 4056.05 \n",
619 | "\u001b[J77 5.80891 6.28952 4110.1 \n",
620 | "\u001b[J78 5.79121 6.35363 4163.39 \n",
621 | "\u001b[J79 5.79161 6.32894 4216.34 \n",
622 | "\u001b[J80 5.78601 6.3255 4268.95 \n",
623 | "\u001b[J81 5.79062 6.29608 4321.73 \n",
624 | "\u001b[J82 5.7959 6.37235 4375.25 \n",
625 | "\u001b[J83 5.77828 6.31001 4427.44 \n",
626 | "\u001b[J84 5.7879 6.25628 4480.09 \n",
627 | "\u001b[J85 5.79297 6.29321 4533.27 \n",
628 | "\u001b[J86 5.79286 6.2725 4586.44 \n",
629 | "\u001b[J87 5.79388 6.36764 4639.82 \n",
630 | "\u001b[J88 5.79062 6.33841 4692.89 \n",
631 | "\u001b[J89 5.7879 6.31828 4745.68 \n",
632 | "\u001b[J90 5.81015 6.33247 4800.19 \n",
633 | "\u001b[J91 5.78858 6.37569 4853.31 \n",
634 | "\u001b[J92 5.7966 6.35733 4907.27 \n",
635 | "\u001b[J93 5.79814 6.34506 4961.09 \n",
636 | "\u001b[J94 5.81956 6.322 5016.65 \n",
637 | "\u001b[J95 5.81565 6.35974 5071.69 \n",
638 | "\u001b[J96 5.78953 6.37451 5125.02 \n",
639 | "\u001b[J97 5.7993 6.42065 5179.34 \n",
640 | "\u001b[J98 5.79129 6.37995 5232.89 \n",
641 | "\u001b[J99 5.76834 6.36254 5284.7 \n",
642 | "\u001b[J100 5.79829 6.3785 5338.93 \n"
643 | ],
644 | "name": "stdout"
645 | }
646 | ]
647 | },
648 | {
649 | "metadata": {
650 | "id": "YqGB9iD2Tmng",
651 | "colab_type": "code",
652 | "colab": {}
653 | },
654 | "cell_type": "code",
655 | "source": [
656 | "vocab = chainer.datasets.get_ptb_words_vocabulary()\n",
657 | "index2word = {wid: word for word, wid in six.iteritems(vocab)}\n",
658 | "\n",
659 | "# Save the word2vec model\n",
660 | "with open('word2vec.model', 'w') as f:\n",
661 | " f.write('%d %d\\n' % (len(index2word), unit))\n",
662 | " w = cuda.to_cpu(model.embed.W.data)\n",
663 | " for i, wi in enumerate(w):\n",
664 | " v = ' '.join(map(str, wi))\n",
665 | " f.write('%s %s\\n' % (index2word[i], v))"
666 | ],
667 | "execution_count": 0,
668 | "outputs": []
669 | },
670 | {
671 | "metadata": {
672 | "id": "HgBVYr_b8dS8",
673 | "colab_type": "text"
674 | },
675 | "cell_type": "markdown",
676 | "source": [
677 | "### 4.6 Search the similar words"
678 | ]
679 | },
680 | {
681 | "metadata": {
682 | "id": "7QDwFawQ8daT",
683 | "colab_type": "code",
684 | "colab": {}
685 | },
686 | "cell_type": "code",
687 | "source": [
688 | "import numpy\n",
689 | "import six\n",
690 | "\n",
691 | "n_result = 5 # number of search result to show\n",
692 | "\n",
693 | "\n",
694 | "with open('word2vec.model', 'r') as f:\n",
695 | " ss = f.readline().split()\n",
696 | " n_vocab, n_units = int(ss[0]), int(ss[1])\n",
697 | " word2index = {}\n",
698 | " index2word = {}\n",
699 | " w = numpy.empty((n_vocab, n_units), dtype=numpy.float32)\n",
700 | " for i, line in enumerate(f):\n",
701 | " ss = line.split()\n",
702 | " assert len(ss) == n_units + 1\n",
703 | " word = ss[0]\n",
704 | " word2index[word] = i\n",
705 | " index2word[i] = word\n",
706 | " w[i] = numpy.array([float(s) for s in ss[1:]], dtype=numpy.float32)\n",
707 | "\n",
708 | "\n",
709 | "s = numpy.sqrt((w * w).sum(1))\n",
710 | "w /= s.reshape((s.shape[0], 1)) # normalize"
711 | ],
712 | "execution_count": 0,
713 | "outputs": []
714 | },
715 | {
716 | "metadata": {
717 | "id": "MFel0uXmUfJl",
718 | "colab_type": "code",
719 | "colab": {}
720 | },
721 | "cell_type": "code",
722 | "source": [
723 | "def search(query):\n",
724 | " if query not in word2index:\n",
725 | " print('\"{0}\" is not found'.format(query))\n",
726 | " return\n",
727 | "\n",
728 | " v = w[word2index[query]]\n",
729 | " similarity = w.dot(v)\n",
730 | " print('query: {}'.format(query))\n",
731 | "\n",
732 | " count = 0\n",
733 | " for i in (-similarity).argsort():\n",
734 | " if numpy.isnan(similarity[i]):\n",
735 | " continue\n",
736 | " if index2word[i] == query:\n",
737 | " continue\n",
738 | " print('{0}: {1}'.format(index2word[i], similarity[i]))\n",
739 | " count += 1\n",
740 | " if count == n_result:\n",
741 | " return"
742 | ],
743 | "execution_count": 0,
744 | "outputs": []
745 | },
746 | {
747 | "metadata": {
748 | "id": "v3PrgDLi9pqf",
749 | "colab_type": "text"
750 | },
751 | "cell_type": "markdown",
752 | "source": [
753 | "Search by \"apple\" word."
754 | ]
755 | },
756 | {
757 | "metadata": {
758 | "id": "_JerH5KJ9NFj",
759 | "colab_type": "code",
760 | "colab": {
761 | "base_uri": "https://localhost:8080/",
762 | "height": 116
763 | },
764 | "outputId": "84e097d5-80e8-4a5f-c790-5bbe104d7f2c"
765 | },
766 | "cell_type": "code",
767 | "source": [
768 | "query = \"apple\"\n",
769 | "search(query)"
770 | ],
771 | "execution_count": 23,
772 | "outputs": [
773 | {
774 | "output_type": "stream",
775 | "text": [
776 | "query: apple\n",
777 | "computer: 0.5457335710525513\n",
778 | "compaq: 0.5068206191062927\n",
779 | "microsoft: 0.4654524028301239\n",
780 | "network: 0.42985647916793823\n",
781 | "trotter: 0.42716777324676514\n"
782 | ],
783 | "name": "stdout"
784 | }
785 | ]
786 | },
787 | {
788 | "metadata": {
789 | "id": "JVXz7sbc8diq",
790 | "colab_type": "text"
791 | },
792 | "cell_type": "markdown",
793 | "source": [
794 | "## 5. Reference\n",
795 | "\n",
796 | "* [1] [Mikolov, Tomas; et al. “Efficient Estimation of Word Representations in Vector Space”. arXiv:1301.3781](https://arxiv.org/abs/1301.3781)\n",
797 | "* [2] [Distributional Hypothesis](https://aclweb.org/aclwiki/Distributional_Hypothesis)\n"
798 | ]
799 | },
800 | {
801 | "metadata": {
802 | "id": "HhBJdMTi8jxb",
803 | "colab_type": "code",
804 | "colab": {}
805 | },
806 | "cell_type": "code",
807 | "source": [
808 | ""
809 | ],
810 | "execution_count": 0,
811 | "outputs": []
812 | }
813 | ]
814 | }
815 |
--------------------------------------------------------------------------------
/official_example_ja/word2vec.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "word2vec_ja.ipynb",
7 | "version": "0.3.2",
8 | "provenance": [],
9 | "collapsed_sections": []
10 | },
11 | "kernelspec": {
12 | "display_name": "Python 3",
13 | "language": "python",
14 | "name": "python3"
15 | },
16 | "accelerator": "GPU"
17 | },
18 | "cells": [
19 | {
20 | "metadata": {
21 | "id": "JR5-Ja21Ojj9",
22 | "colab_type": "text"
23 | },
24 | "cell_type": "markdown",
25 | "source": [
26 | "# Word2Vec: Obtain word embeddings\n",
27 | "\n",
28 | "## 0. Introduction\n",
29 | "\n",
30 | "**word2vec**は単語の分散表現を生成するツールで、Mikolov et al[1]によって提案されました。単語の意味が近いほど類似度が大きくなるように、word2vecは各単語に実ベクトルを割り当てます。\n",
31 | "\n",
32 | "ここで、**分散表現**とは各単語に対して実ベクトルを割り当て、そのベクトルで単語を表現することです。分散表現で単語を表現する場合、そのベクトルを**word embeddings(単語埋め込み)** と呼びます。このNotebookでは、Penn Tree Bankのデータセットからword embeddingsを獲得する方法を説明します。\n",
33 | "\n",
34 | "さて、そもそも単語の意味とはなんでしょうか。人であれば「動物」と「犬」という単語が似ているというのはなんとなく分かります。しかし、word2vecは何の情報を元に、「動物」と「犬」は似ているとか、「食べ物」と「犬」は似ていないといった意味の類似度を学習すれば良いのでしょうか。"
35 | ]
36 | },
37 | {
38 | "metadata": {
39 | "id": "8n5aX8oAOjj_",
40 | "colab_type": "text"
41 | },
42 | "cell_type": "markdown",
43 | "source": [
44 | "## 1. 基本的なアイデア\n",
45 | "\n",
46 | "word2vecは単語の意味の類似度を単純な情報から学習します。それは文章における単語の並びです、つまりある単語の意味は、その単語の周囲の単語で決まるというアイデアです。 このアイデアは**distributional hypothesis(分布仮設)**[2]に基づいています。\n",
47 | "\n",
48 | "学習対象の単語を**center word**、その周囲の単語を**context words**と呼びます。ウィンドウサイズ`C`に応じてcontex wordの数は変わります。\n",
49 | "\n",
50 | "例として、**The cute cat jumps over the lazy dog.**という文で説明を行います。\n",
51 | "以下の図は全てcenter wordをcatとした場合のものです。\n",
52 | "ウィンドウサイズ`C`に応じて、catを学習する際に使用するcontex wordが変わることがわかると思います。\n",
53 | "\n",
54 | ""
55 | ]
56 | },
57 | {
58 | "metadata": {
59 | "id": "CUHlpGTJOjj_",
60 | "colab_type": "text"
61 | },
62 | "cell_type": "markdown",
63 | "source": [
64 | "## 2. 主なアルゴリズム\n",
65 | "\n",
66 | "word2vecと呼ばれる手法は実は**Skip-gram**と**CBoW**という2つの手法の総称です。\n",
67 | "\n",
68 | "To explain the models with the figures below, we will use the following symbols.\n",
69 | "\n",
70 | "* $|\\mathcal{V}|$ : ボキャブラリ数\n",
71 | "* $D$ : 埋め込みベクトルのサイズ\n",
72 | "* ${\\bf v}_t$ : center wordのone-hotベクトル\n",
73 | "* $V_{\\pm C}$ : ${\\bf v}_t$の周囲のcontext wordのone-hotベクトルの集合、つまり$\\{{\\bf v}_{t+c}\\}_{c=-C}^C \\backslash {\\bf v}_t$\n",
74 | "* ${\\bf l}_H$ : 入力単語に対する埋め込みベクトル\n",
75 | "* ${\\bf l}_O$ : ネットワークの出力ベクトル\n",
76 | "* ${\\bf W}_H$ : 入力に対する埋め込み行列\n",
77 | "* ${\\bf W}_O$ : 出力に対する埋め込み行列\n",
78 | "\n",
79 | "**Note**\n",
80 | "\n",
81 | "**negative sampling**や**hierarchical softmax**をロス関数に使うことが一般的だが、**すべての単語に対するsoftmax関数**を使い、説明を簡略化するため他の説明は省略します。"
82 | ]
83 | },
84 | {
85 | "metadata": {
86 | "id": "AFx1gZ8KOjkA",
87 | "colab_type": "text"
88 | },
89 | "cell_type": "markdown",
90 | "source": [
91 | "### 2.1 Skip-gram\n",
92 | "\n",
93 | "このモデルは、 center wordが与えられたときにその周囲のcontext words $V_{t \\pm C}$を予測するように学習します。この時、入力に対する埋め込み行列$W_H$の各行が各単語の分散表現になります。\n",
94 | "\n",
95 | "center word ${\\bf v}_t$をネットワークに入力したとき、以下のようにしてcontext words $\\hat{\\bf v}_{t+i} \\in V_{t \\pm C}$を予測することができます\n",
96 | "\n",
97 | "1. 入力されたcenter wordに対する埋め込みベクトルを計算する: ${\\bf l}_H = {\\bf W}_H {\\bf v}_t$\n",
98 | "2. 埋め込みベクトルを使って出力ベクトルを計算する: ${\\bf l}_O = {\\bf W}_O {\\bf l}_H$\n",
99 | "3. context wordの確率ベクトルを計算する: $\\hat{\\bf v}_{t+i} = \\text{softmax}({\\bf l}_O)$\n",
100 | "\n",
101 | "$|\\mathcal{V}|$次元のベクトル$\\hat{\\bf v}_{t+i}$の各要素は、各単語がcontext wordである確率です。そのため、確率$p({\\bf v}_{t+i} \\mid {\\bf v}_t)$は、context wordのone-hotベクトル${\\bf v}_{t+i}$と確率ベクトル$\\hat{\\bf v}_{t+i}$の内積で計算することができます。\n",
102 | "\n",
103 | "\\begin{eqnarray}\n",
104 | "p({\\bf v}_{t+i} \\mid {\\bf v}_t) = {\\bf v}_{t+i}^T \\hat{\\bf v}_{t+i}\n",
105 | "\\end{eqnarray}\n",
106 | "\n",
107 | "そして、center word ${\\bf v}_t$に対するすべてのcontext word$V_{t \\pm C}$のロス関数は以下で計算することができます。\n",
108 | "\n",
109 | "\n",
110 | "\\begin{eqnarray}\n",
111 | "L(V_{t \\pm C} | {\\bf v}_t; {\\bf W}_H, {\\bf W}_O)\n",
112 | "&=& \\sum_{V_{t \\pm C}} -\\log\\left(p({\\bf v}_{t+i} \\mid {\\bf v}_t)\\right) \\\\\n",
113 | "&=& \\sum_{V_{t \\pm C}} -\\log({\\bf v}_{t+i}^T \\hat{\\bf v}_{t+i})\n",
114 | "\\end{eqnarray}"
115 | ]
116 | },
117 | {
118 | "metadata": {
119 | "id": "TAmvso8yOjkA",
120 | "colab_type": "text"
121 | },
122 | "cell_type": "markdown",
123 | "source": [
124 | "### 2.2 Continuous Bag of Words (CBoW)\n",
125 | "\n",
126 | "このモデルは、context word $V_{t \\pm C}$ が与えられたときにcenter word ${\\bf v}_t$を予測するように学習します。\n",
127 | "\n",
128 | "context words $V_{t \\pm C}$をネットワークに与えたとき、以下のようにcenter word $\\hat{v}_t$の確率を計算することができます。\n",
129 | "\n",
130 | "1. すべてのcontext wordに対する埋め込みベクトルの平均を計算します: ${\\bf l}_H = \\frac{1}{2C} \\sum_{V_{t \\pm C}} {\\bf W}_H {\\bf v}_{t+i}$\n",
131 | "2. 埋め込みベクトルを使って出力ベクトルを計算します: ${\\bf l}_O = {\\bf W}_O {\\bf l}_H$\n",
132 | "3. center wordの確率ベクトルを計算する: $\\hat{\\bf v}_t = \\text{softmax}({\\bf l}_O)$\n",
133 | "\n",
134 | "$|\\mathcal{V}|$次元のベクトル$\\hat{\\bf v}_t$の各要素は、各単語がcenter wordである確率です。そのため、確率$p({\\bf v}_t \\mid V_{t \\pm C})$は、center wordのone-hotベクトル${\\bf v}_{t}$と確率ベクトル$\\hat{\\bf v}_{t}$の内積で計算することができます。\n",
135 | "\n",
136 | "\\begin{eqnarray}\n",
137 | "p({\\bf v}_{t} \\mid V_{t \\pm C}) = {\\bf v}_{t}^T \\hat{\\bf v}_{t}\n",
138 | "\\end{eqnarray}\n",
139 | "\n",
140 | "The loss function for the center word prediction is defined as follows:\n",
141 | "\n",
142 | "そして、context word$V_{t \\pm C}$対するcenter word ${\\bf v}_t$のロス関数は以下で計算することができます。\n",
143 | "\n",
144 | "\n",
145 | "\\begin{eqnarray}\n",
146 | "L({\\bf v}_t|V_{t \\pm C}; W_H, W_O)\n",
147 | "&=& -\\log(p({\\bf v}_t|V_{t \\pm C})) \\\\\n",
148 | "&=& -\\log({\\bf v}_t^T \\hat{\\bf v}_t)\n",
149 | "\\end{eqnarray}"
150 | ]
151 | },
152 | {
153 | "metadata": {
154 | "id": "YZKrh07hOjkB",
155 | "colab_type": "text"
156 | },
157 | "cell_type": "markdown",
158 | "source": [
159 | "## 3. Skip-gramの詳細\n",
160 | "\n",
161 | "本チュートリアルでは、以下の観点からSkip-gramをメインで扱います。\n",
162 | "\n",
163 | "1. 学習アルゴリズムがCBoWに比べて理解しやすい\n",
164 | "2. 単語数が増えても精度が落ちにくく、スケールしやすい"
165 | ]
166 | },
167 | {
168 | "metadata": {
169 | "id": "QMspiJZzOjkB",
170 | "colab_type": "text"
171 | },
172 | "cell_type": "markdown",
173 | "source": [
174 | "skip-gramのアルゴリズムを理解するために、以下の設定で具体的な例から考えてみましょう:\n",
175 | "\n",
176 | "* ボキャブラリ数 $|\\mathcal{V}|$ は10。\n",
177 | "* 埋め込みベクトルのサイズ$D$は2。\n",
178 | "* Center wordはdog。\n",
179 | "* Context wordはanimal。\n",
180 | "\n",
181 | "そして、以下の工程をcontext word数回繰り返します。\n",
182 | "\n",
183 | "1. dogのone-hotベクトルは`[0 0 1 0 0 0 0 0 0 0]`で、 これをモデルに入力する。\n",
184 | "2. このとき、埋め込み行列${\\bf W}_H$の3番目の行${\\bf l}_H$がdogの埋め込みベクトルとなる。\n",
185 | "3. そして、出力ベクトル${\\bf l}_O$を計算するため、${\\bf W}_O$と${\\bf l}_H$の積を計算する。\n",
186 | "4. $c$の位置にあるcontext wordの確率ベクトル$\\hat{\\bf v}_{t+c}$を予測するため${\\bf l}_O$をsoftmax関数に入力する。\n",
187 | "5. $\\hat{\\bf v}_{t+c}$と animalのone-hotベクトル`[1 0 0 0 0 0 0 0 0 0 0]`の誤差を計算する。\n",
188 | "6. 誤差を伝播させてネットワークのパラメータを更新する。\n",
189 | "\n",
190 | ""
191 | ]
192 | },
193 | {
194 | "metadata": {
195 | "id": "IQ19qx2UOjkB",
196 | "colab_type": "text"
197 | },
198 | "cell_type": "markdown",
199 | "source": [
200 | "## 4. Chainerによるskip-gram実装方法\n",
201 | "\n",
202 | "GitHubレポジトリ上のexamples内にword2vecに関するコードがあるので、それに基づいて説明をしていきます。[chainer/examples/word2vec](https://github.com/chainer/chainer/tree/master/examples/word2vec)"
203 | ]
204 | },
205 | {
206 | "metadata": {
207 | "id": "-fxZ_DM3O7Ea",
208 | "colab_type": "text"
209 | },
210 | "cell_type": "markdown",
211 | "source": [
212 | "まずは、以下のセルを実行して、ChainerとそのGPUバックエンドであるCuPyをインストールします。Colaboratoryの「ランタイムのタイプ」がGPUであれば、GPUをバックエンドとしてChainerを動かすことができます。"
213 | ]
214 | },
215 | {
216 | "metadata": {
217 | "id": "9xOsXDFRO54W",
218 | "colab_type": "code",
219 | "colab": {
220 | "base_uri": "https://localhost:8080/",
221 | "height": 133
222 | },
223 | "outputId": "ae9cad40-8a15-4b24-917c-5d4b239c1b6a"
224 | },
225 | "cell_type": "code",
226 | "source": [
227 | "!curl https://colab.chainer.org/install | sh -"
228 | ],
229 | "execution_count": 0,
230 | "outputs": [
231 | {
232 | "output_type": "stream",
233 | "text": [
234 | "Reading package lists... Done\n",
235 | "Building dependency tree \n",
236 | "Reading state information... Done\n",
237 | "libcusparse8.0 is already the newest version (8.0.61-1).\n",
238 | "libnvrtc8.0 is already the newest version (8.0.61-1).\n",
239 | "libnvtoolsext1 is already the newest version (8.0.61-1).\n",
240 | "0 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.\n"
241 | ],
242 | "name": "stdout"
243 | }
244 | ]
245 | },
246 | {
247 | "metadata": {
248 | "id": "G2O-8RkiOjkC",
249 | "colab_type": "text"
250 | },
251 | "cell_type": "markdown",
252 | "source": [
253 | "### 4.1 準備\n",
254 | "\n",
255 | "必要なパッケージを`import`しましょう。"
256 | ]
257 | },
258 | {
259 | "metadata": {
260 | "id": "OvL9Gk08OjkD",
261 | "colab_type": "code",
262 | "colab": {}
263 | },
264 | "cell_type": "code",
265 | "source": [
266 | "import argparse\n",
267 | "import collections\n",
268 | "\n",
269 | "import numpy as np\n",
270 | "import six\n",
271 | "\n",
272 | "import chainer\n",
273 | "from chainer import cuda\n",
274 | "import chainer.functions as F\n",
275 | "import chainer.initializers as I\n",
276 | "import chainer.links as L\n",
277 | "import chainer.optimizers as O\n",
278 | "from chainer import reporter\n",
279 | "from chainer import training\n",
280 | "from chainer.training import extensions"
281 | ],
282 | "execution_count": 0,
283 | "outputs": []
284 | },
285 | {
286 | "metadata": {
287 | "id": "AXuiONJbOjkG",
288 | "colab_type": "text"
289 | },
290 | "cell_type": "markdown",
291 | "source": [
292 | "### 4.2 skip-gramモデルの定義\n",
293 | "\n",
294 | "次にskip-gramのネットワーク構造を定義しましょう。"
295 | ]
296 | },
297 | {
298 | "metadata": {
299 | "id": "jLNEBp7gOjkG",
300 | "colab_type": "code",
301 | "colab": {}
302 | },
303 | "cell_type": "code",
304 | "source": [
305 | "class SkipGram(chainer.Chain):\n",
306 | "\n",
307 | " def __init__(self, n_vocab, n_units):\n",
308 | " super().__init__()\n",
309 | " with self.init_scope():\n",
310 | " self.embed = L.EmbedID(\n",
311 | " n_vocab, n_units, initialW=I.Uniform(1. / n_units))\n",
312 | " self.out = L.Linear(n_units, n_vocab, initialW=0)\n",
313 | "\n",
314 | " def __call__(self, x, context):\n",
315 | " e = self.embed(context)\n",
316 | " shape = e.shape\n",
317 | " x = F.broadcast_to(x[:, None], (shape[0], shape[1]))\n",
318 | " e = F.reshape(e, (shape[0] * shape[1], shape[2]))\n",
319 | " x = F.reshape(x, (shape[0] * shape[1],))\n",
320 | " center_predictions = self.out(e)\n",
321 | " loss = F.softmax_cross_entropy(center_predictions, x)\n",
322 | " reporter.report({'loss': loss}, self)\n",
323 | " return loss"
324 | ],
325 | "execution_count": 0,
326 | "outputs": []
327 | },
328 | {
329 | "metadata": {
330 | "id": "xL8OWlpZOjkI",
331 | "colab_type": "text"
332 | },
333 | "cell_type": "markdown",
334 | "source": [
335 | "**Note**\n",
336 | "\n",
337 | "- 重み行列`self.embed.W`は入力`x`に対する埋め込み行列です。\n",
338 | "- `__call__`は center wordの単語ID `x`とcontext wordの単語ID `contexts`を入力として取ります。そして、ロス関数`softmax_cross_entropy`で計算された誤差を出力します。\n",
339 | "- 注意してもらいたいのが、 `x`と`contexts`の形がそれぞれ`(batch_size,)`と`(batch_size, n_context)`になっていることです。\n",
340 | "- `batch_size`はミニバッチサイズを意味し、 `n_context`はcontext word数を意味します。\n",
341 | "\n",
342 | "まず、`e = self.embed(contexts)`で`contexts`に対応する分散表現を取得しています。\n",
343 | "\n",
344 | "そして、 `F.broadcast_to(x[:, None], (shape[0], shape[1]))`とすることで、`x`(`(batch_size,)`) を`(batch_size, n_context)`の形にブロードキャストします。このとき、 列方向に`n_context`回だけ同じ値がコピーされます。そして、ブロードキャストされた`x`は1次元ベクトルにreshapeされ、`(batchsize * n_context,)`になります。一方で、`e`は`(batch_size * n_context, n_units)`の形にreshapeされます。\n",
345 | "\n",
346 | "注意してもらいたいのが、skip-gramの場合、center wordとcontext wordは1対1で対応するため、center wordとcontext wordを入れ替えてモデル化しても問題がないです。そのため、上記ではcenter wordとcontext wordを入れ替えて学習させているように見えますが、問題はありません。なぜこのようなことをするかと言うと、CBoWモデルとコードの整合性が取りやすいからです。"
347 | ]
348 | },
349 | {
350 | "metadata": {
351 | "id": "dFAB4sObOjkJ",
352 | "colab_type": "text"
353 | },
354 | "cell_type": "markdown",
355 | "source": [
356 | "### 4.3 datasetとiteratorの準備\n",
357 | "\n",
358 | "Chainer'が用意するユーティリティ関数`get_ptb_words()`を使って、Penn Tree Bank (PTB)のデータセットをダウンロードしましょう。"
359 | ]
360 | },
361 | {
362 | "metadata": {
363 | "id": "pqUBJ1r2OjkJ",
364 | "colab_type": "code",
365 | "colab": {}
366 | },
367 | "cell_type": "code",
368 | "source": [
369 | "train, val, _ = chainer.datasets.get_ptb_words()\n",
370 | "n_vocab = max(train) + 1 # The minimum word ID is 0"
371 | ],
372 | "execution_count": 0,
373 | "outputs": []
374 | },
375 | {
376 | "metadata": {
377 | "id": "Yf2sqARoOjkL",
378 | "colab_type": "text"
379 | },
380 | "cell_type": "markdown",
381 | "source": [
382 | "center wordと、そのcontext wordを含むミニバッチを生成するIteratorを定義しましょう。"
383 | ]
384 | },
385 | {
386 | "metadata": {
387 | "id": "jwYDyNyXOjkM",
388 | "colab_type": "code",
389 | "colab": {}
390 | },
391 | "cell_type": "code",
392 | "source": [
393 | "class WindowIterator(chainer.dataset.Iterator):\n",
394 | "\n",
395 | " def __init__(self, dataset, window, batch_size, repeat=True):\n",
396 | " self.dataset = np.array(dataset, np.int32)\n",
397 | " self.window = window\n",
398 | " self.batch_size = batch_size\n",
399 | " self._repeat = repeat\n",
400 | "\n",
401 | " self.order = np.random.permutation(\n",
402 | " len(dataset) - window * 2).astype(np.int32)\n",
403 | " self.order += window\n",
404 | " self.current_position = 0\n",
405 | " self.epoch = 0\n",
406 | " self.is_new_epoch = False\n",
407 | "\n",
408 | " def __next__(self):\n",
409 | " if not self._repeat and self.epoch > 0:\n",
410 | " raise StopIteration\n",
411 | "\n",
412 | " i = self.current_position\n",
413 | " i_end = i + self.batch_size\n",
414 | " position = self.order[i: i_end]\n",
415 | " w = np.random.randint(self.window - 1) + 1\n",
416 | " offset = np.concatenate([np.arange(-w, 0), np.arange(1, w + 1)])\n",
417 | " pos = position[:, None] + offset[None, :]\n",
418 | " context = self.dataset.take(pos)\n",
419 | " center = self.dataset.take(position)\n",
420 | "\n",
421 | " if i_end >= len(self.order):\n",
422 | " np.random.shuffle(self.order)\n",
423 | " self.epoch += 1\n",
424 | " self.is_new_epoch = True\n",
425 | " self.current_position = 0\n",
426 | " else:\n",
427 | " self.is_new_epoch = False\n",
428 | " self.current_position = i_end\n",
429 | "\n",
430 | " return center, context\n",
431 | "\n",
432 | " @property\n",
433 | " def epoch_detail(self):\n",
434 | " return self.epoch + float(self.current_position) / len(self.order)\n",
435 | "\n",
436 | " def serialize(self, serializer):\n",
437 | " self.current_position = serializer('current_position',\n",
438 | " self.current_position)\n",
439 | " self.epoch = serializer('epoch', self.epoch)\n",
440 | " self.is_new_epoch = serializer('is_new_epoch', self.is_new_epoch)\n",
441 | " if self._order is not None:\n",
442 | " serializer('_order', self._order)\n",
443 | "\n",
444 | "def convert(batch, device):\n",
445 | " center, context = batch\n",
446 | " if device >= 0:\n",
447 | " center = cuda.to_gpu(center)\n",
448 | " context = cuda.to_gpu(context)\n",
449 | " return center, context"
450 | ],
451 | "execution_count": 0,
452 | "outputs": []
453 | },
454 | {
455 | "metadata": {
456 | "id": "3Zq2jxKgOjkO",
457 | "colab_type": "text"
458 | },
459 | "cell_type": "markdown",
460 | "source": [
461 | "- コンストラクタの中で、文書中の単語の位置をシャッフルしたリスト`self.order`を作成しています。文書からランダムに単語を選択し学習するようにするためです。ウィンドウサイズ分だけ最初と最後を切り取った単語の位置がシャッフルされて入っています。\n",
462 | "- イテレータの定義`__next__`は、コンストラクタのパラメータに従ってミニバッチサイズ個のcenter word `center`とcontext word `context`を返します。\n",
463 | "- `self.order[i:i_end]`で、単語の位置をシャッフルしたリスト`self.order`から`batch_size`分のcenter wordのインデックス`position`を生成します。(`position`は後で`self.dataset.take`によってcenter word `center`に変換されます。)\n",
464 | "- `np.concatenate([np.arange(-w, 0), np.arange(1, w + 1)])`で、ウインドウを表現するオフセット`offset`を作成しています。\n",
465 | "- `position[:, None] + offset[None, :]`によって、それぞれのcenter wordに対するcontext word のインデックス`pos`を生成します。`pos`は後で`self.dataset.take`によってcontext word `context`に変換されます。"
466 | ]
467 | },
468 | {
469 | "metadata": {
470 | "id": "IlIoeqeNOjkO",
471 | "colab_type": "text"
472 | },
473 | "cell_type": "markdown",
474 | "source": [
475 | "### 4.4 model, optimizer, updaterの準備"
476 | ]
477 | },
478 | {
479 | "metadata": {
480 | "id": "OgPecreHOjkP",
481 | "colab_type": "code",
482 | "colab": {}
483 | },
484 | "cell_type": "code",
485 | "source": [
486 | "unit = 100 # number of hidden units\n",
487 | "window = 5\n",
488 | "batchsize = 1000\n",
489 | "gpu = 0\n",
490 | "\n",
491 | "# Instantiate model\n",
492 | "model = SkipGram(n_vocab, unit)\n",
493 | "\n",
494 | "if gpu >= 0:\n",
495 | " model.to_gpu(gpu)\n",
496 | "\n",
497 | "# Create optimizer\n",
498 | "optimizer = O.Adam()\n",
499 | "optimizer.setup(model)\n",
500 | "\n",
501 | "# Create iterators for both train and val datasets\n",
502 | "train_iter = WindowIterator(train, window, batchsize)\n",
503 | "val_iter = WindowIterator(val, window, batchsize, repeat=False)\n",
504 | "\n",
505 | "# Create updater\n",
506 | "updater = training.StandardUpdater(\n",
507 | " train_iter, optimizer, converter=convert, device=gpu)"
508 | ],
509 | "execution_count": 0,
510 | "outputs": []
511 | },
512 | {
513 | "metadata": {
514 | "id": "ccVACTY-OjkS",
515 | "colab_type": "text"
516 | },
517 | "cell_type": "markdown",
518 | "source": [
519 | "### 4.5 trainingの開始"
520 | ]
521 | },
522 | {
523 | "metadata": {
524 | "id": "OVPlZWePOjkS",
525 | "colab_type": "code",
526 | "colab": {
527 | "base_uri": "https://localhost:8080/",
528 | "height": 1700
529 | },
530 | "outputId": "feb79699-0038-4df4-8e99-f480df3caace"
531 | },
532 | "cell_type": "code",
533 | "source": [
534 | "epoch = 100\n",
535 | "\n",
536 | "trainer = training.Trainer(updater, (epoch, 'epoch'), out='word2vec_result')\n",
537 | "trainer.extend(extensions.Evaluator(val_iter, model, converter=convert, device=gpu))\n",
538 | "trainer.extend(extensions.LogReport())\n",
539 | "trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time']))\n",
540 | "trainer.run()"
541 | ],
542 | "execution_count": 0,
543 | "outputs": [
544 | {
545 | "output_type": "stream",
546 | "text": [
547 | "epoch main/loss validation/main/loss elapsed_time\n",
548 | "\u001b[J1 6.87314 6.48688 54.154 \n",
549 | "\u001b[J2 6.44018 6.40645 107.352 \n",
550 | "\u001b[J3 6.35021 6.3558 159.544 \n",
551 | "\u001b[J4 6.28615 6.31679 212.612 \n",
552 | "\u001b[J5 6.23762 6.28779 266.059 \n",
553 | "\u001b[J6 6.19942 6.22658 319.874 \n",
554 | "\u001b[J7 6.15986 6.20715 372.798 \n",
555 | "\u001b[J8 6.13787 6.21461 426.456 \n",
556 | "\u001b[J9 6.10637 6.24927 479.725 \n",
557 | "\u001b[J10 6.08759 6.23192 532.966 \n",
558 | "\u001b[J11 6.06768 6.19332 586.339 \n",
559 | "\u001b[J12 6.04607 6.17291 639.295 \n",
560 | "\u001b[J13 6.0321 6.21226 692.67 \n",
561 | "\u001b[J14 6.02178 6.18489 746.599 \n",
562 | "\u001b[J15 6.00098 6.17341 799.408 \n",
563 | "\u001b[J16 5.99099 6.19581 852.966 \n",
564 | "\u001b[J17 5.97425 6.22275 905.819 \n",
565 | "\u001b[J18 5.95974 6.20495 958.404 \n",
566 | "\u001b[J19 5.96579 6.16532 1012.49 \n",
567 | "\u001b[J20 5.95292 6.21457 1066.24 \n",
568 | "\u001b[J21 5.93696 6.18441 1119.45 \n",
569 | "\u001b[J22 5.91804 6.20695 1171.98 \n",
570 | "\u001b[J23 5.93265 6.15757 1225.99 \n",
571 | "\u001b[J24 5.92238 6.17064 1279.85 \n",
572 | "\u001b[J25 5.9154 6.21545 1334.01 \n",
573 | "\u001b[J26 5.90538 6.1812 1387.68 \n",
574 | "\u001b[J27 5.8807 6.18523 1439.72 \n",
575 | "\u001b[J28 5.89009 6.19992 1492.67 \n",
576 | "\u001b[J29 5.8773 6.24146 1545.48 \n",
577 | "\u001b[J30 5.89217 6.21846 1599.79 \n",
578 | "\u001b[J31 5.88493 6.21654 1653.95 \n",
579 | "\u001b[J32 5.87784 6.18502 1707.45 \n",
580 | "\u001b[J33 5.88031 6.14161 1761.75 \n",
581 | "\u001b[J34 5.86278 6.22893 1815.29 \n",
582 | "\u001b[J35 5.83335 6.18966 1866.56 \n",
583 | "\u001b[J36 5.85978 6.24276 1920.18 \n",
584 | "\u001b[J37 5.85921 6.23888 1974.2 \n",
585 | "\u001b[J38 5.85195 6.19231 2027.92 \n",
586 | "\u001b[J39 5.8396 6.20542 2080.78 \n",
587 | "\u001b[J40 5.83745 6.27583 2133.37 \n",
588 | "\u001b[J41 5.85996 6.23596 2188 \n",
589 | "\u001b[J42 5.85743 6.17438 2242.4 \n",
590 | "\u001b[J43 5.84051 6.25449 2295.84 \n",
591 | "\u001b[J44 5.83023 6.30226 2348.84 \n",
592 | "\u001b[J45 5.84677 6.23473 2403.11 \n",
593 | "\u001b[J46 5.82406 6.27398 2456.11 \n",
594 | "\u001b[J47 5.82827 6.21509 2509.17 \n",
595 | "\u001b[J48 5.8253 6.23009 2562.15 \n",
596 | "\u001b[J49 5.83697 6.2564 2616.35 \n",
597 | "\u001b[J50 5.81998 6.29104 2669.38 \n",
598 | "\u001b[J51 5.82926 6.26068 2723.47 \n",
599 | "\u001b[J52 5.81457 6.30152 2776.36 \n",
600 | "\u001b[J53 5.82587 6.29581 2830.24 \n",
601 | "\u001b[J54 5.80614 6.30994 2882.85 \n",
602 | "\u001b[J55 5.8161 6.23224 2935.73 \n",
603 | "\u001b[J56 5.80867 6.26867 2988.48 \n",
604 | "\u001b[J57 5.79467 6.24508 3040.2 \n",
605 | "\u001b[J58 5.81687 6.24676 3093.57 \n",
606 | "\u001b[J59 5.82064 6.30236 3147.68 \n",
607 | "\u001b[J60 5.80855 6.30184 3200.75 \n",
608 | "\u001b[J61 5.81298 6.25173 3254.06 \n",
609 | "\u001b[J62 5.80753 6.32951 3307.42 \n",
610 | "\u001b[J63 5.82505 6.2472 3361.68 \n",
611 | "\u001b[J64 5.78396 6.28168 3413.14 \n",
612 | "\u001b[J65 5.80209 6.24962 3465.96 \n",
613 | "\u001b[J66 5.80107 6.326 3518.83 \n",
614 | "\u001b[J67 5.83765 6.28848 3574.57 \n",
615 | "\u001b[J68 5.7864 6.3506 3626.88 \n",
616 | "\u001b[J69 5.80329 6.30671 3679.82 \n",
617 | "\u001b[J70 5.80032 6.29277 3732.69 \n",
618 | "\u001b[J71 5.80647 6.30722 3786.21 \n",
619 | "\u001b[J72 5.8176 6.30046 3840.51 \n",
620 | "\u001b[J73 5.79912 6.35945 3893.81 \n",
621 | "\u001b[J74 5.80484 6.32439 3947.35 \n",
622 | "\u001b[J75 5.82065 6.29674 4002.03 \n",
623 | "\u001b[J76 5.80872 6.27921 4056.05 \n",
624 | "\u001b[J77 5.80891 6.28952 4110.1 \n",
625 | "\u001b[J78 5.79121 6.35363 4163.39 \n",
626 | "\u001b[J79 5.79161 6.32894 4216.34 \n",
627 | "\u001b[J80 5.78601 6.3255 4268.95 \n",
628 | "\u001b[J81 5.79062 6.29608 4321.73 \n",
629 | "\u001b[J82 5.7959 6.37235 4375.25 \n",
630 | "\u001b[J83 5.77828 6.31001 4427.44 \n",
631 | "\u001b[J84 5.7879 6.25628 4480.09 \n",
632 | "\u001b[J85 5.79297 6.29321 4533.27 \n",
633 | "\u001b[J86 5.79286 6.2725 4586.44 \n",
634 | "\u001b[J87 5.79388 6.36764 4639.82 \n",
635 | "\u001b[J88 5.79062 6.33841 4692.89 \n",
636 | "\u001b[J89 5.7879 6.31828 4745.68 \n",
637 | "\u001b[J90 5.81015 6.33247 4800.19 \n",
638 | "\u001b[J91 5.78858 6.37569 4853.31 \n",
639 | "\u001b[J92 5.7966 6.35733 4907.27 \n",
640 | "\u001b[J93 5.79814 6.34506 4961.09 \n",
641 | "\u001b[J94 5.81956 6.322 5016.65 \n",
642 | "\u001b[J95 5.81565 6.35974 5071.69 \n",
643 | "\u001b[J96 5.78953 6.37451 5125.02 \n",
644 | "\u001b[J97 5.7993 6.42065 5179.34 \n",
645 | "\u001b[J98 5.79129 6.37995 5232.89 \n",
646 | "\u001b[J99 5.76834 6.36254 5284.7 \n",
647 | "\u001b[J100 5.79829 6.3785 5338.93 \n"
648 | ],
649 | "name": "stdout"
650 | }
651 | ]
652 | },
653 | {
654 | "metadata": {
655 | "id": "YqGB9iD2Tmng",
656 | "colab_type": "code",
657 | "colab": {}
658 | },
659 | "cell_type": "code",
660 | "source": [
661 | "vocab = chainer.datasets.get_ptb_words_vocabulary()\n",
662 | "index2word = {wid: word for word, wid in six.iteritems(vocab)}\n",
663 | "\n",
664 | "# Save the word2vec model\n",
665 | "with open('word2vec.model', 'w') as f:\n",
666 | " f.write('%d %d\\n' % (len(index2word), unit))\n",
667 | " w = cuda.to_cpu(model.embed.W.data)\n",
668 | " for i, wi in enumerate(w):\n",
669 | " v = ' '.join(map(str, wi))\n",
670 | " f.write('%s %s\\n' % (index2word[i], v))"
671 | ],
672 | "execution_count": 0,
673 | "outputs": []
674 | },
675 | {
676 | "metadata": {
677 | "id": "HgBVYr_b8dS8",
678 | "colab_type": "text"
679 | },
680 | "cell_type": "markdown",
681 | "source": [
682 | "### 4.6 似た単語の検索"
683 | ]
684 | },
685 | {
686 | "metadata": {
687 | "id": "7QDwFawQ8daT",
688 | "colab_type": "code",
689 | "colab": {}
690 | },
691 | "cell_type": "code",
692 | "source": [
693 | "import numpy\n",
694 | "import six\n",
695 | "\n",
696 | "n_result = 5 # number of search result to show\n",
697 | "\n",
698 | "\n",
699 | "with open('word2vec.model', 'r') as f:\n",
700 | " ss = f.readline().split()\n",
701 | " n_vocab, n_units = int(ss[0]), int(ss[1])\n",
702 | " word2index = {}\n",
703 | " index2word = {}\n",
704 | " w = numpy.empty((n_vocab, n_units), dtype=numpy.float32)\n",
705 | " for i, line in enumerate(f):\n",
706 | " ss = line.split()\n",
707 | " assert len(ss) == n_units + 1\n",
708 | " word = ss[0]\n",
709 | " word2index[word] = i\n",
710 | " index2word[i] = word\n",
711 | " w[i] = numpy.array([float(s) for s in ss[1:]], dtype=numpy.float32)\n",
712 | "\n",
713 | "\n",
714 | "s = numpy.sqrt((w * w).sum(1))\n",
715 | "w /= s.reshape((s.shape[0], 1)) # normalize"
716 | ],
717 | "execution_count": 0,
718 | "outputs": []
719 | },
720 | {
721 | "metadata": {
722 | "id": "MFel0uXmUfJl",
723 | "colab_type": "code",
724 | "colab": {}
725 | },
726 | "cell_type": "code",
727 | "source": [
728 | "def search(query):\n",
729 | " if query not in word2index:\n",
730 | " print('\"{0}\" is not found'.format(query))\n",
731 | " return\n",
732 | "\n",
733 | " v = w[word2index[query]]\n",
734 | " similarity = w.dot(v)\n",
735 | " print('query: {}'.format(query))\n",
736 | "\n",
737 | " count = 0\n",
738 | " for i in (-similarity).argsort():\n",
739 | " if numpy.isnan(similarity[i]):\n",
740 | " continue\n",
741 | " if index2word[i] == query:\n",
742 | " continue\n",
743 | " print('{0}: {1}'.format(index2word[i], similarity[i]))\n",
744 | " count += 1\n",
745 | " if count == n_result:\n",
746 | " return"
747 | ],
748 | "execution_count": 0,
749 | "outputs": []
750 | },
751 | {
752 | "metadata": {
753 | "id": "v3PrgDLi9pqf",
754 | "colab_type": "text"
755 | },
756 | "cell_type": "markdown",
757 | "source": [
758 | "appleで検索してみましょう。"
759 | ]
760 | },
761 | {
762 | "metadata": {
763 | "id": "_JerH5KJ9NFj",
764 | "colab_type": "code",
765 | "colab": {
766 | "base_uri": "https://localhost:8080/",
767 | "height": 116
768 | },
769 | "outputId": "84e097d5-80e8-4a5f-c790-5bbe104d7f2c"
770 | },
771 | "cell_type": "code",
772 | "source": [
773 | "query = \"apple\"\n",
774 | "search(query)"
775 | ],
776 | "execution_count": 0,
777 | "outputs": [
778 | {
779 | "output_type": "stream",
780 | "text": [
781 | "query: apple\n",
782 | "computer: 0.5457335710525513\n",
783 | "compaq: 0.5068206191062927\n",
784 | "microsoft: 0.4654524028301239\n",
785 | "network: 0.42985647916793823\n",
786 | "trotter: 0.42716777324676514\n"
787 | ],
788 | "name": "stdout"
789 | }
790 | ]
791 | },
792 | {
793 | "metadata": {
794 | "id": "JVXz7sbc8diq",
795 | "colab_type": "text"
796 | },
797 | "cell_type": "markdown",
798 | "source": [
799 | "## 5. Reference\n",
800 | "\n",
801 | "* [1] [Mikolov, Tomas; et al. “Efficient Estimation of Word Representations in Vector Space”. arXiv:1301.3781](https://arxiv.org/abs/1301.3781)\n",
802 | "* [2] [Distributional Hypothesis](https://aclweb.org/aclwiki/Distributional_Hypothesis)\n"
803 | ]
804 | },
805 | {
806 | "metadata": {
807 | "id": "HhBJdMTi8jxb",
808 | "colab_type": "code",
809 | "colab": {}
810 | },
811 | "cell_type": "code",
812 | "source": [
813 | ""
814 | ],
815 | "execution_count": 0,
816 | "outputs": []
817 | }
818 | ]
819 | }
820 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | sphinx-rtd-theme
3 | nbsphinx
4 | CommonMark==0.5.4
5 | recommonmark==0.4.0
6 |
--------------------------------------------------------------------------------