├── .gitignore
├── Makefile
├── autobuild.bat
├── make.bat
├── make_latex.bat
├── make_singlehtml.bat
├── readme.md
└── source
├── .vscode
└── settings.json
├── _static
├── code
│ ├── en
│ │ ├── basic
│ │ │ ├── eager
│ │ │ │ ├── 1plus1.py
│ │ │ │ ├── grad.py
│ │ │ │ └── regression.py
│ │ │ ├── example
│ │ │ │ ├── numpy_manual_grad.py
│ │ │ │ ├── python.py
│ │ │ │ ├── tensorflow_autograd.py
│ │ │ │ ├── tensorflow_eager.py
│ │ │ │ ├── tensorflow_eager_autograd.py
│ │ │ │ └── tensorflow_manual_grad.py
│ │ │ └── graph
│ │ │ │ ├── 1plus1.py
│ │ │ │ ├── AmatmulB.py
│ │ │ │ ├── aplusb.py
│ │ │ │ ├── grad.py
│ │ │ │ ├── variable.py
│ │ │ │ └── variable_with_initializer.py
│ │ ├── extended
│ │ │ ├── autograph
│ │ │ │ └── main.py
│ │ │ ├── gpu
│ │ │ │ └── main.py
│ │ │ ├── save_and_restore
│ │ │ │ ├── MNIST-data
│ │ │ │ │ ├── t10k-images-idx3-ubyte.gz
│ │ │ │ │ ├── t10k-labels-idx1-ubyte.gz
│ │ │ │ │ ├── train-images-idx3-ubyte.gz
│ │ │ │ │ └── train-labels-idx1-ubyte.gz
│ │ │ │ ├── mnist.py
│ │ │ │ ├── save
│ │ │ │ │ ├── checkpoint
│ │ │ │ │ ├── model.ckpt-10.data-00000-of-00001
│ │ │ │ │ ├── model.ckpt-10.index
│ │ │ │ │ ├── model.ckpt-6.data-00000-of-00001
│ │ │ │ │ ├── model.ckpt-6.index
│ │ │ │ │ ├── model.ckpt-7.data-00000-of-00001
│ │ │ │ │ ├── model.ckpt-7.index
│ │ │ │ │ ├── model.ckpt-8.data-00000-of-00001
│ │ │ │ │ ├── model.ckpt-8.index
│ │ │ │ │ ├── model.ckpt-9.data-00000-of-00001
│ │ │ │ │ └── model.ckpt-9.index
│ │ │ │ └── variables.weight
│ │ │ └── tensorboard
│ │ │ │ ├── MNIST-data
│ │ │ │ ├── t10k-images-idx3-ubyte.gz
│ │ │ │ ├── t10k-labels-idx1-ubyte.gz
│ │ │ │ ├── train-images-idx3-ubyte.gz
│ │ │ │ └── train-labels-idx1-ubyte.gz
│ │ │ │ ├── mnist.py
│ │ │ │ └── tensorboard
│ │ │ │ └── events.out.tfevents.1531645031.SNOWKYLIN-WORKS.v2
│ │ ├── model
│ │ │ ├── cnn
│ │ │ │ └── cnn.py
│ │ │ ├── custom_layer
│ │ │ │ └── linear.py
│ │ │ ├── linear
│ │ │ │ └── linear.py
│ │ │ ├── mlp
│ │ │ │ ├── MNIST-data
│ │ │ │ │ ├── t10k-images-idx3-ubyte.gz
│ │ │ │ │ ├── t10k-labels-idx1-ubyte.gz
│ │ │ │ │ ├── train-images-idx3-ubyte.gz
│ │ │ │ │ └── train-labels-idx1-ubyte.gz
│ │ │ │ ├── main.py
│ │ │ │ ├── mlp.py
│ │ │ │ └── utils.py
│ │ │ ├── rl
│ │ │ │ └── rl.py
│ │ │ └── rnn
│ │ │ │ ├── nietzsche.txt
│ │ │ │ └── rnn.py
│ │ └── test
│ │ │ └── test.py
│ └── zh
│ │ ├── basic
│ │ ├── eager
│ │ │ ├── 1plus1.py
│ │ │ ├── grad.py
│ │ │ └── regression.py
│ │ ├── example
│ │ │ ├── numpy_manual_grad.py
│ │ │ ├── python.py
│ │ │ ├── tensorflow_autograd.py
│ │ │ ├── tensorflow_eager.py
│ │ │ ├── tensorflow_eager_autograd.py
│ │ │ └── tensorflow_manual_grad.py
│ │ └── graph
│ │ │ ├── 1plus1.py
│ │ │ ├── AmatmulB.py
│ │ │ ├── aplusb.py
│ │ │ ├── grad.py
│ │ │ ├── variable.py
│ │ │ └── variable_with_initializer.py
│ │ ├── extended
│ │ ├── gpu
│ │ │ └── main.py
│ │ ├── save_and_restore
│ │ │ ├── MNIST-data
│ │ │ │ ├── t10k-images-idx3-ubyte.gz
│ │ │ │ ├── t10k-labels-idx1-ubyte.gz
│ │ │ │ ├── train-images-idx3-ubyte.gz
│ │ │ │ └── train-labels-idx1-ubyte.gz
│ │ │ ├── mnist.py
│ │ │ ├── save
│ │ │ │ ├── checkpoint
│ │ │ │ ├── model.ckpt-10.data-00000-of-00001
│ │ │ │ ├── model.ckpt-10.index
│ │ │ │ ├── model.ckpt-6.data-00000-of-00001
│ │ │ │ ├── model.ckpt-6.index
│ │ │ │ ├── model.ckpt-7.data-00000-of-00001
│ │ │ │ ├── model.ckpt-7.index
│ │ │ │ ├── model.ckpt-8.data-00000-of-00001
│ │ │ │ ├── model.ckpt-8.index
│ │ │ │ ├── model.ckpt-9.data-00000-of-00001
│ │ │ │ └── model.ckpt-9.index
│ │ │ └── variables.weight
│ │ └── tensorboard
│ │ │ ├── MNIST-data
│ │ │ ├── t10k-images-idx3-ubyte.gz
│ │ │ ├── t10k-labels-idx1-ubyte.gz
│ │ │ ├── train-images-idx3-ubyte.gz
│ │ │ └── train-labels-idx1-ubyte.gz
│ │ │ ├── mnist.py
│ │ │ └── tensorboard
│ │ │ └── events.out.tfevents.1531645031.SNOWKYLIN-WORKS.v2
│ │ ├── model
│ │ ├── cnn
│ │ │ └── cnn.py
│ │ ├── custom_layer
│ │ │ └── linear.py
│ │ ├── linear
│ │ │ └── linear.py
│ │ ├── mlp
│ │ │ ├── MNIST-data
│ │ │ │ ├── t10k-images-idx3-ubyte.gz
│ │ │ │ ├── t10k-labels-idx1-ubyte.gz
│ │ │ │ ├── train-images-idx3-ubyte.gz
│ │ │ │ └── train-labels-idx1-ubyte.gz
│ │ │ ├── main.py
│ │ │ ├── mlp.py
│ │ │ └── utils.py
│ │ ├── rl
│ │ │ └── rl.py
│ │ └── rnn
│ │ │ ├── nietzsche.txt
│ │ │ └── rnn.py
│ │ └── test
│ │ └── test.py
└── image
│ ├── extended
│ └── tensorboard.png
│ ├── figure.vsdx
│ └── model
│ ├── cartpole.gif
│ ├── cartpole.png
│ ├── cnn.pdf
│ ├── cnn.png
│ ├── mnist_0-9.png
│ ├── rnn.jpg
│ └── rnn_single.jpg
├── conf.py
├── conf_en.py
├── en
├── basic.rst
├── extended.rst
├── installation.rst
├── models.rst
├── preface.rst
└── static.rst
├── index.rst
└── zh
├── basic.rst
├── extended.rst
├── installation.rst
├── models.rst
├── preface.rst
└── static.rst
/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | book
3 | build
4 | reference
5 | teaching
6 | workspace.code-workspace
7 | __pycache__/
8 | source/.vscode
9 | source/_future
10 | source/_static/code/.idea
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = python -msphinx
7 | SPHINXPROJ = TensorFlow
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/autobuild.bat:
--------------------------------------------------------------------------------
1 | sphinx-autobuild --open-browser source build\html
--------------------------------------------------------------------------------
/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=python -msphinx
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | set SPHINXPROJ=TensorFlow
13 |
14 | if "%1" == "" goto help
15 |
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | echo.
19 | echo.The Sphinx module was not found. Make sure you have Sphinx installed,
20 | echo.then set the SPHINXBUILD environment variable to point to the full
21 | echo.path of the 'sphinx-build' executable. Alternatively you may add the
22 | echo.Sphinx directory to PATH.
23 | echo.
24 | echo.If you don't have Sphinx installed, grab it from
25 | echo.http://sphinx-doc.org/
26 | exit /b 1
27 | )
28 |
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 |
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 |
35 | :end
36 | popd
37 |
--------------------------------------------------------------------------------
/make_latex.bat:
--------------------------------------------------------------------------------
1 | make latex
--------------------------------------------------------------------------------
/make_singlehtml.bat:
--------------------------------------------------------------------------------
1 | make singlehtml
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # 简单粗暴TensorFlow | A Concise Handbook of TensorFlow
2 |
3 | *基于Eager Execution | Based on Eager Execution*
4 |
5 | **重要:此版本(基于 TensorFlow 1.X)不再更新,基于 TensorFlow 2 的新版见 https://github.com/snowkylin/tensorflow-handbook 和 https://tf.wiki**
6 |
7 | 在线阅读 | Read online : https://v1.tf.wiki
8 |
9 | 备用地址 | Alternative URL:https://snowkylin.github.io/TensorFlow-cn/
10 |
11 | 作者 | Author: Xihan Li (snowkylin)
12 |
13 | 英文版译者 | Translators of English version: Zida Jin, Ming, Ji-An Li, Xihan Li
14 |
15 | 本手册是一篇精简的TensorFlow入门指导,基于TensorFlow的Eager Execution(动态图)模式,力图让具备一定机器学习及Python基础的开发者们快速上手TensorFlow。
16 |
17 | This handbook is a concise introduction to TensorFlow based on Eager Execution mode, trying to help developers get started with TensorFlow quickly with some basic machine learning and Python knowledge.
18 |
19 | PDF下载 | PDF download :
20 |
21 | - (中文版 | Chinese): https://www.tensorflowers.cn/t/6230
22 | - (英文版 | English): https://github.com/snowkylin/TensorFlow-cn/releases
23 |
24 | 在线答疑区 | Online Q&A area :
25 |
26 | - (中文 | Chinese): https://www.tensorflowers.cn/b/48
27 | - (英文 | English): https://github.com/snowkylin/TensorFlow-cn/issues
28 |
--------------------------------------------------------------------------------
/source/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "restructuredtext.confPath": "c:\\Users\\xihan\\Desktop\\TensorFlow-cn\\source",
3 | "restructuredtext.builtDocumentationPath" : "c:\\Users\\xihan\\Desktop\\TensorFlow-cn\\build\\html",
4 | "restructuredtext.updateOnTextChanged" : "true",
5 | "restructuredtext.sphinxBuildPath" : "C:\\ProgramData\\Anaconda3\\Scripts\\sphinx-build.exe"
6 | }
--------------------------------------------------------------------------------
/source/_static/code/en/basic/eager/1plus1.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | tf.enable_eager_execution()
3 |
4 | a = tf.constant(1)
5 | b = tf.constant(1)
6 | c = tf.add(a, b) # The expression c = a + b is equivalent.
7 |
8 | print(c)
9 |
10 | A = tf.constant([[1, 2], [3, 4]])
11 | B = tf.constant([[5, 6], [7, 8]])
12 | C = tf.matmul(A, B)
13 |
14 | print(C)
15 |
--------------------------------------------------------------------------------
/source/_static/code/en/basic/eager/grad.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | tf.enable_eager_execution()
3 |
4 | x = tf.get_variable('x', shape=[1], initializer=tf.constant_initializer(3.))
5 | with tf.GradientTape() as tape: # All steps in the context of tf.GradientTape() are recorded for differentiation.
6 | y = tf.square(x)
7 | y_grad = tape.gradient(y, x) # Differentiate y with respect to x.
8 | print([y.numpy(), y_grad.numpy()])
9 |
10 | X = tf.constant([[1., 2.], [3., 4.]])
11 | y = tf.constant([[1.], [2.]])
12 | w = tf.get_variable('w', shape=[2, 1], initializer=tf.constant_initializer([[1.], [2.]]))
13 | b = tf.get_variable('b', shape=[1], initializer=tf.constant_initializer([1.]))
14 | with tf.GradientTape() as tape:
15 | L = 0.5 * tf.reduce_sum(tf.square(tf.matmul(X, w) + b - y))
16 | w_grad, b_grad = tape.gradient(L, [w, b]) # Differentiate L with respect to w and b.
17 | print([L.numpy(), w_grad.numpy(), b_grad.numpy()])
--------------------------------------------------------------------------------
/source/_static/code/en/basic/eager/regression.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | tf.enable_eager_execution()
3 |
4 | X = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
5 | y = tf.constant([[10.0], [20.0]])
6 |
7 | w = tf.get_variable('w', shape=[3, 1], initializer=tf.zeros_initializer())
8 | b = tf.get_variable('b', shape=[1], initializer=tf.zeros_initializer())
9 | variables = [w, b]
10 |
11 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
12 |
13 | for i in range(100):
14 | with tf.GradientTape() as tape:
15 | y_pred = tf.matmul(X, w) + b
16 | loss = tf.reduce_mean(tf.square(y_pred - y))
17 | grads = tape.gradient(loss, variables)
18 | optimizer.apply_gradients(grads_and_vars=zip(grads, variables))
19 | print(variables)
20 |
--------------------------------------------------------------------------------
/source/_static/code/en/basic/example/numpy_manual_grad.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | X_raw = np.array([2013, 2014, 2015, 2016, 2017], dtype=np.float32)
4 | y_raw = np.array([12000, 14000, 15000, 16500, 17500], dtype=np.float32)
5 |
6 | X = (X_raw - X_raw.min()) / (X_raw.max() - X_raw.min())
7 | y = (y_raw - y_raw.min()) / (y_raw.max() - y_raw.min())
8 |
9 | a, b = 0, 0
10 |
11 | num_epoch = 10000
12 | learning_rate = 1e-3
13 | for e in range(num_epoch):
14 | # Calculate the gradient of the loss function with respect to arguments (model parameters) manually.
15 | y_pred = a * X + b
16 | grad_a, grad_b = (y_pred - y).dot(X), (y_pred - y).sum()
17 |
18 | # Update parameters.
19 | a, b = a - learning_rate * grad_a, b - learning_rate * grad_b
20 |
21 | print(a, b)
--------------------------------------------------------------------------------
/source/_static/code/en/basic/example/python.py:
--------------------------------------------------------------------------------
1 | a = 0
2 | b = 0
3 |
4 | def f(x):
5 | y_pred = a * x + b
6 | return y_pred
7 |
8 | def loss(x, y):
9 | l = (a * x + b - y) ** 2
10 | return l
11 |
12 | def gradient_loss(x, y):
13 | g_a = 2 * (a * x + b - y) * x
14 | g_b = 2 * (a * x + b - y)
15 | return g_a, g_b
16 |
17 | X_raw = [2013, 2014, 2015, 2016, 2017]
18 | Y_raw = [12000, 14000, 15000, 16500, 17500]
19 | x_pred_raw = 2018
20 | X = [(x - min(X_raw)) / (max(X_raw) - min(X_raw)) for x in X_raw]
21 | Y = [(y - min(Y_raw)) / (max(Y_raw) - min(Y_raw)) for y in Y_raw]
22 |
23 | num_epoch = 10000
24 | learning_rate = 1e-3
25 | for e in range(num_epoch):
26 | for i in range(len(X)):
27 | x, y = X[i], Y[i]
28 | g_a, g_b = gradient_loss(x, y)
29 | a = a - learning_rate * g_a
30 | b = b - learning_rate * g_b
31 | print(a, b)
32 | for i in range(len(X)):
33 | x, y = X[i], Y[i]
34 | print(f(x), y)
35 | x_pred = (x_pred_raw - min(X_raw)) / (max(X_raw) - min(X_raw))
36 | y_pred = f(x_pred)
37 | y_pred_raw = y_pred * (max(Y_raw) - min(Y_raw)) + min(Y_raw)
38 | print(x_pred_raw, y_pred_raw)
--------------------------------------------------------------------------------
/source/_static/code/en/basic/example/tensorflow_autograd.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | X_raw = np.array([2013, 2014, 2015, 2016, 2017])
4 | y_raw = np.array([12000, 14000, 15000, 16500, 17500])
5 |
6 | X = (X_raw - X_raw.min()) / (X_raw.max() - X_raw.min())
7 | y = (y_raw - y_raw.min()) / (y_raw.max() - y_raw.min())
8 |
9 | import tensorflow as tf
10 |
11 | learning_rate_ = tf.placeholder(dtype=tf.float32)
12 | X_ = tf.placeholder(dtype=tf.float32, shape=[5])
13 | y_ = tf.placeholder(dtype=tf.float32, shape=[5])
14 | a = tf.get_variable('a', dtype=tf.float32, shape=[], initializer=tf.zeros_initializer)
15 | b = tf.get_variable('b', dtype=tf.float32, shape=[], initializer=tf.zeros_initializer)
16 |
17 | y_pred = a * X_ + b
18 | loss = tf.constant(0.5) * tf.reduce_sum(tf.square(y_pred - y_))
19 |
20 | # Back propagation,calculate and update gradient of varaibles(model parameters) with TensorFlow's GradientDescentOptimier
21 | train_op = tf.train.GradientDescentOptimizer(learning_rate=learning_rate_).minimize(loss)
22 |
23 | num_epoch = 10000
24 | learning_rate = 1e-3
25 | with tf.Session() as sess:
26 | tf.global_variables_initializer().run()
27 | for e in range(num_epoch):
28 | sess.run(train_op, feed_dict={X_: X, y_: y, learning_rate_: learning_rate})
29 | print(sess.run([a, b]))
30 |
--------------------------------------------------------------------------------
/source/_static/code/en/basic/example/tensorflow_eager.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import tensorflow.contrib.eager as tfe
3 | tfe.enable_eager_execution()
4 | import numpy as np
5 |
6 | X_raw = np.array([2013, 2014, 2015, 2016, 2017], dtype=np.float32)
7 | y_raw = np.array([12000, 14000, 15000, 16500, 17500], dtype=np.float32)
8 |
9 | X = (X_raw - X_raw.min()) / (X_raw.max() - X_raw.min())
10 | y = (y_raw - y_raw.min()) / (y_raw.max() - y_raw.min())
11 |
12 | X = tf.constant(X)
13 | y = tf.constant(y)
14 |
15 | a = tfe.Variable(0., name='a')
16 | b = tfe.Variable(0., name='b')
17 |
18 | num_epoch = 10000
19 | learning_rate = 1e-3
20 | for e in range(num_epoch):
21 | # Forward propagation
22 | y_pred = a * X + b
23 | loss = 0.5 * tf.reduce_sum(tf.square(y_pred - y)) # loss = 0.5 * np.sum(np.square(a * X + b - y))
24 |
25 | # Back propagation, calculate gradient of variables(model parameters) manually
26 | grad_a = tf.reduce_sum((y_pred - y) * X)
27 | grad_b = tf.reduce_sum(y_pred - y)
28 |
29 | # Update parameters
30 | a, b = a - learning_rate * grad_a, b - learning_rate * grad_b
31 |
32 | print(a, b)
33 |
--------------------------------------------------------------------------------
/source/_static/code/en/basic/example/tensorflow_eager_autograd.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | import tensorflow.contrib.eager as tfe
4 | tf.enable_eager_execution()
5 |
6 | X_raw = np.array([2013, 2014, 2015, 2016, 2017], dtype=np.float32)
7 | y_raw = np.array([12000, 14000, 15000, 16500, 17500], dtype=np.float32)
8 |
9 | X = (X_raw - X_raw.min()) / (X_raw.max() - X_raw.min())
10 | y = (y_raw - y_raw.min()) / (y_raw.max() - y_raw.min())
11 |
12 | X = tf.constant(X)
13 | y = tf.constant(y)
14 |
15 | a = tf.get_variable('a', dtype=tf.float32, shape=[], initializer=tf.zeros_initializer)
16 | b = tf.get_variable('b', dtype=tf.float32, shape=[], initializer=tf.zeros_initializer)
17 | variables = [a, b]
18 |
19 | num_epoch = 10000
20 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-3)
21 | for e in range(num_epoch):
22 | # Use tf.GradientTape() to record the gradient info of the loss function
23 | with tf.GradientTape() as tape:
24 | y_pred = a * X + b
25 | loss = 0.5 * tf.reduce_sum(tf.square(y_pred - y))
26 | # TensorFlow calculates the gradients of the loss function with respect to each argument (model paramter) automatically.
27 | grads = tape.gradient(loss, variables)
28 | # TensorFlow updates parameters automatically based on gradients.
29 | optimizer.apply_gradients(grads_and_vars=zip(grads, variables))
30 |
31 | print(a, b)
--------------------------------------------------------------------------------
/source/_static/code/en/basic/example/tensorflow_manual_grad.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | X_raw = np.array([2013, 2014, 2015, 2016, 2017])
4 | y_raw = np.array([12000, 14000, 15000, 16500, 17500])
5 |
6 | X = (X_raw - X_raw.min()) / (X_raw.max() - X_raw.min())
7 | y = (y_raw - y_raw.min()) / (y_raw.max() - y_raw.min())
8 |
9 | import tensorflow as tf
10 |
11 | # Define data flow gragh
12 | learning_rate_ = tf.placeholder(dtype=tf.float32)
13 | X_ = tf.placeholder(dtype=tf.float32, shape=[5])
14 | y_ = tf.placeholder(dtype=tf.float32, shape=[5])
15 | a = tf.get_variable('a', dtype=tf.float32, shape=[], initializer=tf.zeros_initializer)
16 | b = tf.get_variable('b', dtype=tf.float32, shape=[], initializer=tf.zeros_initializer)
17 |
18 | y_pred = a * X_ + b
19 | loss = tf.constant(0.5) * tf.reduce_sum(tf.square(y_pred - y_))
20 |
21 | # Back propagation, calculate gradient of variables(model parameters) manually
22 | grad_a = tf.reduce_sum((y_pred - y_) * X_)
23 | grad_b = tf.reduce_sum(y_pred - y_)
24 |
25 | # Gradient descent, update parameters manually
26 | new_a = a - learning_rate_ * grad_a
27 | new_b = b - learning_rate_ * grad_b
28 | update_a = tf.assign(a, new_a)
29 | update_b = tf.assign(b, new_b)
30 |
31 | train_op = [update_a, update_b]
32 | # End of defining of data flow gragh
33 | # Attention, until now, we haven't do any actually data calculation, just defined a data flow gragh
34 |
35 | num_epoch = 10000
36 | learning_rate = 1e-3
37 | with tf.Session() as sess:
38 | # Initialize variables a and b
39 | tf.global_variables_initializer().run()
40 | # Put data in the data flow gragh created above to calculate and update variables
41 | for e in range(num_epoch):
42 | sess.run(train_op, feed_dict={X_: X, y_: y, learning_rate_: learning_rate})
43 | print(sess.run([a, b]))
44 |
--------------------------------------------------------------------------------
/source/_static/code/en/basic/graph/1plus1.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | # Defince a "Computation Graph"
4 | a = tf.constant(1) # Defince a constant Tensor
5 | b = tf.constant(1)
6 | c = a + b # Equal to c = tf.add(a, b),c is a new Tensor created by Tensor a and Tesor b's add Operation
7 |
8 | sess = tf.Session() # Initailize a Session
9 | c_ = sess.run(c) # Session的run() will do actually computation to the nodes (Tensor) in the Computation Graph
10 | print(c_)
11 |
--------------------------------------------------------------------------------
/source/_static/code/en/basic/graph/AmatmulB.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | A = tf.ones(shape=[2, 3]) # tf.ones(shape) defines a all one matrix with shape
4 | B = tf.ones(shape=[3, 2])
5 | C = tf.matmul(A, B)
6 |
7 | sess = tf.Session()
8 | C_ = sess.run(C)
9 | print(C_)
10 |
--------------------------------------------------------------------------------
/source/_static/code/en/basic/graph/aplusb.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | a = tf.placeholder(dtype=tf.int32) # Define a placeholder Tensor
4 | b = tf.placeholder(dtype=tf.int32)
5 | c = a + b
6 |
7 | a_ = input("a = ") # Read an Integer from terminal and put it into a_
8 | b_ = input("b = ")
9 |
10 | sess = tf.Session()
11 | c_ = sess.run(c, feed_dict={a: a_, b: b_}) # feed_dict will input Tensors' value needed by computing c
12 | print("a + b = %d" % c_)
13 |
--------------------------------------------------------------------------------
/source/_static/code/en/basic/graph/grad.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | x = tf.Variable(initial_value=1.)
4 | y = tf.square(x) # y = x ^ 2
5 | y_grad = tf.gradients(y, x)
6 |
7 | sess = tf.Session()
8 | sess.run(tf.global_variables_initializer())
9 | y_, y_grad_ = sess.run([y, y_grad])
10 | print([y_, y_grad_])
--------------------------------------------------------------------------------
/source/_static/code/en/basic/graph/variable.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | a = tf.get_variable(name='a', shape=[])
4 | initializer = tf.assign(a, 0) # tf.assign(x, y) will return a operation “assign Tensor y's value to Tensor x”
5 | a_plus_1 = a + 1 # Equal to a + tf.constant(1)
6 | plus_one_op = tf.assign(a, a_plus_1)
7 |
8 | sess = tf.Session()
9 | sess.run(initializer)
10 | for i in range(5):
11 | sess.run(plus_one_op) # Do plus one operation to a
12 | a_ = sess.run(a) # Calculate a‘s value and put the result to a_
13 | print(a_)
14 |
--------------------------------------------------------------------------------
/source/_static/code/en/basic/graph/variable_with_initializer.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | a = tf.get_variable(name='a', shape=[], initializer=tf.zeros_initializer) # Made initializer as a all zero initializer
4 | a_plus_1 = a + 1
5 | plus_one_op = tf.assign(a, a_plus_1)
6 |
7 | sess = tf.Session()
8 | sess.run(tf.global_variables_initializer()) # Initailize all the
9 | for i in range(5):
10 | sess.run(plus_one_op)
11 | a_ = sess.run(a)
12 | print(a_)
13 |
--------------------------------------------------------------------------------
/source/_static/code/en/extended/autograph/main.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from tensorflow.contrib import autograph
4 |
5 |
6 | class RNN(tf.keras.Model):
7 | def __init__(self, num_chars, batch_size, num_units):
8 | super().__init__()
9 | self.num_chars = num_chars
10 | self.num_units = num_units
11 | batch_size = batch_size
12 | self.cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=num_units)
13 | self.dense = tf.keras.layers.Dense(units=self.num_chars)
14 |
15 | @autograph.convert()
16 | def call(self, inputs, seq_length):
17 | # batch_size, seq_length = tf.shape(inputs)
18 | inputs = tf.one_hot(inputs, depth=self.num_chars) # [batch_size, seq_length, num_chars]
19 | state = self.cell.zero_state(batch_size=self.batch_size, dtype=tf.float32)
20 | output = tf.zeros(shape=[self.batch_size, self.num_units], dtype=tf.float32)
21 | for t in range(seq_length):
22 | output, state = self.cell(inputs[:, t, :], state)
23 | output = self.dense(output)
24 | return output
25 |
26 | def predict(self, inputs, seq_length, temperature=1.):
27 | logits = self(inputs, seq_length)
28 | prob = tf.nn.softmax(logits / temperature)
29 | return prob
30 |
31 |
32 | class DataLoader():
33 | def __init__(self):
34 | path = tf.keras.utils.get_file('nietzsche.txt',
35 | origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
36 | with open(path, encoding='utf-8') as f:
37 | self.raw_text = f.read().lower()
38 | self.chars = sorted(list(set(self.raw_text)))
39 | self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
40 | self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
41 | self.text = [self.char_indices[c] for c in self.raw_text]
42 |
43 | def get_batch(self, seq_length, batch_size):
44 | seq = []
45 | next_char = []
46 | for i in range(batch_size):
47 | index = np.random.randint(0, len(self.text) - seq_length)
48 | seq.append(self.text[index:index+seq_length])
49 | next_char.append(self.text[index+seq_length])
50 | return np.array(seq), np.array(next_char) # [num_batch, seq_length], [num_batch]
51 |
52 |
53 | if __name__ == '__main__':
54 | num_batches = 100
55 | seq_length = 40
56 | batch_size = 50
57 | rnn_size = 256
58 | learning_rate = 1e-3
59 |
60 | data_loader = DataLoader()
61 | model = RNN(len(data_loader.chars), rnn_size)
62 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
63 | X_placeholder = tf.placeholder(name='X', shape=[None, seq_length], dtype=tf.int32)
64 | y_placeholder = tf.placeholder(name='y', shape=[None], dtype=tf.int32)
65 | seq_length_placeholder = tf.placeholder(name='seq_length', shape=None, dtype=tf.int32)
66 | y_logit_pred = model(X_placeholder, batch_size, seq_length_placeholder)
67 | loss = tf.losses.sparse_softmax_cross_entropy(labels=y_placeholder, logits=y_logit_pred)
68 | train_op = optimizer.minimize(loss)
69 | with tf.Session() as sess:
70 | sess.run(tf.global_variables_initializer())
71 | for batch_index in range(num_batches):
72 | X, y = data_loader.get_batch(seq_length, batch_size)
73 | _, train_loss = sess.run([train_op, loss], feed_dict={X_placeholder: X, y_placeholder: y, seq_length_placeholder: seq_length})
74 | print("batch %d: loss %f" % (batch_index, train_loss))
75 |
76 | prob = model.predict(X_placeholder)
77 | X_, _ = data_loader.get_batch(seq_length, 1)
78 | for diversity in [0.2, 0.5, 1.0, 1.2]:
79 | X = X_
80 | print("diversity %f:" % diversity)
81 | for t in range(400):
82 | test_prob = sess.run(prob, feed_dict={X_placeholder: X})
83 | y_pred = np.array([np.random.choice(len(data_loader.chars), p=test_prob[i, :])
84 | for i in range(batch_size)])
85 | print(data_loader.indices_char[y_pred[0]], end='', flush=True)
86 | X = np.concatenate([X[:, 1:], np.expand_dims(y_pred, axis=1)], axis=-1)
--------------------------------------------------------------------------------
/source/_static/code/en/extended/gpu/main.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import os
3 |
4 | config = tf.ConfigProto()
5 | # config.gpu_options.allow_growth = True
6 | config.gpu_options.per_process_gpu_memory_fraction = 0.4
7 | tf.enable_eager_execution(config=config)
8 |
9 | A = tf.constant([[1, 2], [3, 4]])
10 | B = tf.constant([[5, 6], [7, 8]])
11 | C = tf.matmul(A, B)
12 |
13 | print(C)
14 |
15 | os.system('pause')
16 |
17 |
--------------------------------------------------------------------------------
/source/_static/code/en/extended/save_and_restore/MNIST-data/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/save_and_restore/MNIST-data/t10k-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/en/extended/save_and_restore/MNIST-data/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/save_and_restore/MNIST-data/t10k-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/en/extended/save_and_restore/MNIST-data/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/save_and_restore/MNIST-data/train-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/en/extended/save_and_restore/MNIST-data/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/save_and_restore/MNIST-data/train-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/en/extended/save_and_restore/mnist.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from en.model.mlp.mlp import MLP
4 | from en.model.mlp.utils import DataLoader
5 |
6 | tf.enable_eager_execution()
7 | mode = 'test'
8 | num_batches = 1000
9 | batch_size = 50
10 | learning_rate = 0.001
11 | data_loader = DataLoader()
12 |
13 |
14 | def train():
15 | model = MLP()
16 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
17 | checkpoint = tf.train.Checkpoint(myAwesomeModel=model) # instantiate a Checkpoint, set `model` as object to be saved
18 | for batch_index in range(num_batches):
19 | X, y = data_loader.get_batch(batch_size)
20 | with tf.GradientTape() as tape:
21 | y_logit_pred = model(tf.convert_to_tensor(X))
22 | loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_logit_pred)
23 | print("batch %d: loss %f" % (batch_index, loss.numpy()))
24 | grads = tape.gradient(loss, model.variables)
25 | optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
26 | if (batch_index + 1) % 100 == 0: # save every 100 batches
27 | checkpoint.save('./save/model.ckpt') # save model to .ckpt file
28 |
29 |
30 | def test():
31 | model_to_be_restored = MLP()
32 | checkpoint = tf.train.Checkpoint(myAwesomeModel=model_to_be_restored) # instantiate a Checkpoint, set newly initialized model `model_to_be_restored` to be the object to be restored
33 | checkpoint.restore(tf.train.latest_checkpoint('./save')) # restore parameters of model from file
34 | num_eval_samples = np.shape(data_loader.eval_labels)[0]
35 | y_pred = model_to_be_restored.predict(tf.constant(data_loader.eval_data)).numpy()
36 | print("test accuracy: %f" % (sum(y_pred == data_loader.eval_labels) / num_eval_samples))
37 |
38 |
39 | if __name__ == '__main__':
40 | if mode == 'train':
41 | train()
42 | if mode == 'test':
43 | test()
44 |
--------------------------------------------------------------------------------
/source/_static/code/en/extended/save_and_restore/save/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "model.ckpt-10"
2 | all_model_checkpoint_paths: "model.ckpt-6"
3 | all_model_checkpoint_paths: "model.ckpt-7"
4 | all_model_checkpoint_paths: "model.ckpt-8"
5 | all_model_checkpoint_paths: "model.ckpt-9"
6 | all_model_checkpoint_paths: "model.ckpt-10"
7 |
--------------------------------------------------------------------------------
/source/_static/code/en/extended/save_and_restore/save/model.ckpt-10.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/save_and_restore/save/model.ckpt-10.data-00000-of-00001
--------------------------------------------------------------------------------
/source/_static/code/en/extended/save_and_restore/save/model.ckpt-10.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/save_and_restore/save/model.ckpt-10.index
--------------------------------------------------------------------------------
/source/_static/code/en/extended/save_and_restore/save/model.ckpt-6.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/save_and_restore/save/model.ckpt-6.data-00000-of-00001
--------------------------------------------------------------------------------
/source/_static/code/en/extended/save_and_restore/save/model.ckpt-6.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/save_and_restore/save/model.ckpt-6.index
--------------------------------------------------------------------------------
/source/_static/code/en/extended/save_and_restore/save/model.ckpt-7.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/save_and_restore/save/model.ckpt-7.data-00000-of-00001
--------------------------------------------------------------------------------
/source/_static/code/en/extended/save_and_restore/save/model.ckpt-7.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/save_and_restore/save/model.ckpt-7.index
--------------------------------------------------------------------------------
/source/_static/code/en/extended/save_and_restore/save/model.ckpt-8.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/save_and_restore/save/model.ckpt-8.data-00000-of-00001
--------------------------------------------------------------------------------
/source/_static/code/en/extended/save_and_restore/save/model.ckpt-8.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/save_and_restore/save/model.ckpt-8.index
--------------------------------------------------------------------------------
/source/_static/code/en/extended/save_and_restore/save/model.ckpt-9.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/save_and_restore/save/model.ckpt-9.data-00000-of-00001
--------------------------------------------------------------------------------
/source/_static/code/en/extended/save_and_restore/save/model.ckpt-9.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/save_and_restore/save/model.ckpt-9.index
--------------------------------------------------------------------------------
/source/_static/code/en/extended/save_and_restore/variables.weight:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/save_and_restore/variables.weight
--------------------------------------------------------------------------------
/source/_static/code/en/extended/tensorboard/MNIST-data/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/tensorboard/MNIST-data/t10k-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/en/extended/tensorboard/MNIST-data/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/tensorboard/MNIST-data/t10k-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/en/extended/tensorboard/MNIST-data/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/tensorboard/MNIST-data/train-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/en/extended/tensorboard/MNIST-data/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/tensorboard/MNIST-data/train-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/en/extended/tensorboard/mnist.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from en.model.mlp.mlp import MLP
4 | from en.model.mlp.utils import DataLoader
5 |
6 | tf.enable_eager_execution()
7 | num_batches = 10000
8 | batch_size = 50
9 | learning_rate = 0.001
10 | model = MLP()
11 | data_loader = DataLoader()
12 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
13 | summary_writer = tf.contrib.summary.create_file_writer('./tensorboard') # instantiate a logger
14 | with summary_writer.as_default(), tf.contrib.summary.always_record_summaries():
15 | for batch_index in range(num_batches):
16 | X, y = data_loader.get_batch(batch_size)
17 | with tf.GradientTape() as tape:
18 | y_logit_pred = model(tf.convert_to_tensor(X))
19 | loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_logit_pred)
20 | print("batch %d: loss %f" % (batch_index, loss.numpy()))
21 | tf.contrib.summary.scalar("loss", loss, step=batch_index) # log current loss
22 | grads = tape.gradient(loss, model.variables)
23 | optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
24 |
--------------------------------------------------------------------------------
/source/_static/code/en/extended/tensorboard/tensorboard/events.out.tfevents.1531645031.SNOWKYLIN-WORKS.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/extended/tensorboard/tensorboard/events.out.tfevents.1531645031.SNOWKYLIN-WORKS.v2
--------------------------------------------------------------------------------
/source/_static/code/en/model/cnn/cnn.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 |
4 | class CNN(tf.keras.Model):
5 | def __init__(self):
6 | super().__init__()
7 | self.conv1 = tf.keras.layers.Conv2D(
8 | filters=32, # Numbers of convolution kernels.
9 | kernel_size=[5, 5], # Size of the receptive field.
10 | padding="same", # Padding strategy.
11 | activation=tf.nn.relu # Activation function.
12 | )
13 | self.pool1 = tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2)
14 | self.conv2 = tf.keras.layers.Conv2D(
15 | filters=64,
16 | kernel_size=[5, 5],
17 | padding="same",
18 | activation=tf.nn.relu
19 | )
20 | self.pool2 = tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2)
21 | self.flatten = tf.keras.layers.Reshape(target_shape=(7 * 7 * 64,))
22 | self.dense1 = tf.keras.layers.Dense(units=1024, activation=tf.nn.relu)
23 | self.dense2 = tf.keras.layers.Dense(units=10)
24 |
25 | def call(self, inputs):
26 | inputs = tf.reshape(inputs, [-1, 28, 28, 1])
27 | x = self.conv1(inputs) # [batch_size, 28, 28, 32].
28 | x = self.pool1(x) # [batch_size, 14, 14, 32].
29 | x = self.conv2(x) # [batch_size, 14, 14, 64].
30 | x = self.pool2(x) # [batch_size, 7, 7, 64].
31 | x = self.flatten(x) # [batch_size, 7 * 7 * 64].
32 | x = self.dense1(x) # [batch_size, 1024].
33 | x = self.dense2(x) # [batch_size, 10].
34 | return x
35 |
36 | def predict(self, inputs):
37 | logits = self(inputs)
38 | return tf.argmax(logits, axis=-1)
--------------------------------------------------------------------------------
/source/_static/code/en/model/custom_layer/linear.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 |
4 | eager = True
5 | X = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32)
6 | y = np.array([[10.0], [20.0]], dtype=np.float32)
7 |
8 |
9 | class LinearLayer(tf.keras.layers.Layer):
10 | def __init__(self):
11 | super().__init__()
12 |
13 | def build(self, input_shape): # Here input_shape is a TensorShape.
14 | self.w = self.add_variable(name='w',
15 | shape=[input_shape[-1], 1], initializer=tf.zeros_initializer())
16 | self.b = self.add_variable(name='b',
17 | shape=[1], initializer=tf.zeros_initializer())
18 |
19 | def call(self, X):
20 | y_pred = tf.matmul(X, self.w) + self.b
21 | return y_pred
22 |
23 |
24 | class Linear(tf.keras.Model):
25 | def __init__(self):
26 | super().__init__()
27 | self.layer = LinearLayer()
28 |
29 | def call(self, input):
30 | output = self.layer(input)
31 | return output
32 |
33 |
34 | if eager:
35 | tf.enable_eager_execution()
36 | X = tf.constant(X)
37 | y = tf.constant(y)
38 | model = Linear()
39 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
40 | for i in range(100):
41 | with tf.GradientTape() as tape:
42 | y_pred = model(X)
43 | loss = tf.reduce_mean(tf.square(y_pred - y))
44 | grads = tape.gradient(loss, model.variables)
45 | optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
46 | print(model.variables)
47 | else:
48 | model = Linear()
49 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
50 | X_placeholder = tf.placeholder(name='X', shape=[None, 3], dtype=tf.float32)
51 | y_placeholder = tf.placeholder(name='y', shape=[None, 1], dtype=tf.float32)
52 | y_pred = model(X_placeholder)
53 | loss = tf.reduce_mean(tf.square(y_pred - y_placeholder))
54 | train_op = optimizer.minimize(loss)
55 | with tf.Session() as sess:
56 | sess.run(tf.global_variables_initializer())
57 | for i in range(100):
58 | sess.run(train_op, feed_dict={X_placeholder: X, y_placeholder: y})
59 | print(sess.run(model.variables))
--------------------------------------------------------------------------------
/source/_static/code/en/model/linear/linear.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | tf.enable_eager_execution()
3 |
4 | X = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
5 | y = tf.constant([[10.0], [20.0]])
6 |
7 |
8 | class Linear(tf.keras.Model):
9 | def __init__(self):
10 | super().__init__()
11 | self.dense = tf.keras.layers.Dense(units=1, kernel_initializer=tf.zeros_initializer(),
12 | bias_initializer=tf.zeros_initializer())
13 |
14 | def call(self, input):
15 | output = self.dense(input)
16 | return output
17 |
18 |
19 | # The structure of the following codes is similar to the previous one.
20 | model = Linear()
21 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
22 | for i in range(100):
23 | with tf.GradientTape() as tape:
24 | y_pred = model(X) # Call the model.
25 | loss = tf.reduce_mean(tf.square(y_pred - y))
26 | grads = tape.gradient(loss, model.variables)
27 | optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
28 | print(model.variables)
29 |
--------------------------------------------------------------------------------
/source/_static/code/en/model/mlp/MNIST-data/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/model/mlp/MNIST-data/t10k-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/en/model/mlp/MNIST-data/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/model/mlp/MNIST-data/t10k-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/en/model/mlp/MNIST-data/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/model/mlp/MNIST-data/train-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/en/model/mlp/MNIST-data/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/en/model/mlp/MNIST-data/train-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/en/model/mlp/main.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from en.model.mlp.mlp import MLP
4 | from en.model.cnn.cnn import CNN
5 |
6 | tf.enable_eager_execution()
7 | model_type = 'CNN'
8 | num_batches = 1000
9 | batch_size = 50
10 | learning_rate = 0.001
11 |
12 |
13 | class DataLoader():
14 | def __init__(self):
15 | mnist = tf.contrib.learn.datasets.load_dataset("mnist")
16 | self.train_data = mnist.train.images # np.array [55000, 784].
17 | self.train_labels = np.asarray(mnist.train.labels, dtype=np.int32) # np.array [55000] of int32.
18 | self.eval_data = mnist.test.images # np.array [10000, 784].
19 | self.eval_labels = np.asarray(mnist.test.labels, dtype=np.int32) # np.array [10000] of int32.
20 |
21 | def get_batch(self, batch_size):
22 | index = np.random.randint(0, np.shape(self.train_data)[0], batch_size)
23 | return self.train_data[index, :], self.train_labels[index]
24 |
25 |
26 | if model_type == 'MLP':
27 | model = MLP()
28 | else:
29 | model = CNN()
30 | data_loader = DataLoader()
31 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
32 | for batch_index in range(num_batches):
33 | X, y = data_loader.get_batch(batch_size)
34 | with tf.GradientTape() as tape:
35 | y_logit_pred = model(tf.convert_to_tensor(X))
36 | loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_logit_pred)
37 | print("batch %d: loss %f" % (batch_index, loss.numpy()))
38 | grads = tape.gradient(loss, model.variables)
39 | optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
40 |
41 | num_eval_samples = np.shape(data_loader.eval_labels)[0]
42 | y_pred = model.predict(data_loader.eval_data).numpy()
43 | print("test accuracy: %f" % (sum(y_pred == data_loader.eval_labels) / num_eval_samples))
44 |
45 | num_correct_pred = 0
46 | for batch_index in range(num_eval_samples // batch_size):
47 | y_pred = model.predict(data_loader.eval_data[batch_index * batch_size: (batch_index + 1) * batch_size]).numpy()
48 | num_correct_pred += sum(y_pred == data_loader.eval_labels[batch_index * batch_size: (batch_index + 1) * batch_size])
49 | print("test accuracy: %f" % (num_correct_pred / np.shape(data_loader.eval_labels)[0]))
50 |
51 |
52 |
--------------------------------------------------------------------------------
/source/_static/code/en/model/mlp/mlp.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 |
4 | class MLP(tf.keras.Model):
5 | def __init__(self):
6 | super().__init__()
7 | self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
8 | self.dense2 = tf.keras.layers.Dense(units=10)
9 |
10 | def call(self, inputs):
11 | x = self.dense1(inputs)
12 | x = self.dense2(x)
13 | return x
14 |
15 | def predict(self, inputs):
16 | logits = self(inputs)
17 | return tf.argmax(logits, axis=-1)
--------------------------------------------------------------------------------
/source/_static/code/en/model/mlp/utils.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 |
4 |
5 | class DataLoader():
6 | def __init__(self):
7 | mnist = tf.contrib.learn.datasets.load_dataset("mnist")
8 | self.train_data = mnist.train.images # np.array [55000, 784]
9 | self.train_labels = np.asarray(mnist.train.labels, dtype=np.int32) # np.array [55000] of int32
10 | self.eval_data = mnist.test.images # np.array [10000, 784]
11 | self.eval_labels = np.asarray(mnist.test.labels, dtype=np.int32) # np.array [10000] of int32
12 |
13 | def get_batch(self, batch_size):
14 | index = np.random.randint(0, np.shape(self.train_data)[0], batch_size)
15 | return self.train_data[index, :], self.train_labels[index]
--------------------------------------------------------------------------------
/source/_static/code/en/model/rl/rl.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | import gym
4 | import random
5 | from collections import deque
6 |
7 | tf.enable_eager_execution()
8 | num_episodes = 500
9 | num_exploration_episodes = 100
10 | max_len_episode = 1000
11 | batch_size = 32
12 | learning_rate = 1e-3
13 | gamma = 1.
14 | initial_epsilon = 1.
15 | final_epsilon = 0.01
16 |
17 |
18 | # Q-network is used to fit Q function resemebled as the aforementioned multilayer perceptron. It inputs state and output Q-value under each action (2 dimensional under CartPole).
19 | class QNetwork(tf.keras.Model):
20 | def __init__(self):
21 | super().__init__()
22 | self.dense1 = tf.keras.layers.Dense(units=24, activation=tf.nn.relu)
23 | self.dense2 = tf.keras.layers.Dense(units=24, activation=tf.nn.relu)
24 | self.dense3 = tf.keras.layers.Dense(units=2)
25 |
26 | def call(self, inputs):
27 | x = self.dense1(inputs)
28 | x = self.dense2(x)
29 | x = self.dense3(x)
30 | return x
31 |
32 | def predict(self, inputs):
33 | q_values = self(inputs)
34 | return tf.argmax(q_values, axis=-1)
35 |
36 |
37 | env = gym.make('CartPole-v1') # Instantiate a game environment. The parameter is its name.
38 | model = QNetwork()
39 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
40 | replay_buffer = deque(maxlen=10000)
41 | epsilon = initial_epsilon
42 | for episode_id in range(num_episodes):
43 | state = env.reset() # Initialize the environment and get its initial state.
44 | epsilon = max(
45 | initial_epsilon * (num_exploration_episodes - episode_id) / num_exploration_episodes,
46 | final_epsilon)
47 | for t in range(max_len_episode):
48 | env.render() # Render the current frame.
49 | if random.random() < epsilon: # Epsilon-greedy exploration strategy.
50 | action = env.action_space.sample() # Choose random action with the probability of epilson.
51 | else:
52 | action = model.predict(
53 | tf.constant(np.expand_dims(state, axis=0), dtype=tf.float32)).numpy()
54 | action = action[0]
55 | next_state, reward, done, info = env.step(action) # Let the environment to execute the action, get the next state of the action, the reward of the action, whether the game is done and extra information.
56 | reward = -10. if done else reward # Give a large negative reward if the game is over.
57 | replay_buffer.append((state, action, reward, next_state, 1 if done else 0)) # Put the (state, action, reward, next_state) quad back into the experience replay pool.
58 | state = next_state
59 |
60 | if done: # Exit this round and enter the next episode if the game is over.
61 | print("episode %d, epsilon %f, score %d" % (episode_id, epsilon, t))
62 | break
63 |
64 | if len(replay_buffer) >= batch_size:
65 | # Randomly take a batch quad from the experience replay pool and transform them to NumPy array respectively.
66 | batch_state, batch_action, batch_reward, batch_next_state, batch_done = zip(
67 | *random.sample(replay_buffer, batch_size))
68 | batch_state, batch_reward, batch_next_state, batch_done = \
69 | [np.array(a, dtype=np.float32) for a in [batch_state, batch_reward, batch_next_state, batch_done]]
70 | batch_action = np.array(batch_action, dtype=np.int32)
71 |
72 | q_value = model(tf.constant(batch_next_state, dtype=tf.float32))
73 | y = batch_reward + (gamma * tf.reduce_max(q_value, axis=1)) * (1 - batch_done) # Calculate y according to the method in the paper.
74 | with tf.GradientTape() as tape:
75 | loss = tf.losses.mean_squared_error( # Minimize the distance between y and Q-value.
76 | labels=y,
77 | predictions=tf.reduce_sum(model(tf.constant(batch_state)) *
78 | tf.one_hot(batch_action, depth=2), axis=1)
79 | )
80 | grads = tape.gradient(loss, model.variables)
81 | optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables)) # Calculate the gradient and update parameters.
--------------------------------------------------------------------------------
/source/_static/code/en/model/rnn/rnn.py:
--------------------------------------------------------------------------------
1 | # reference: https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py
2 |
3 | import tensorflow as tf
4 | import numpy as np
5 |
6 |
7 | class RNN(tf.keras.Model):
8 | def __init__(self, num_chars):
9 | super().__init__()
10 | self.num_chars = num_chars
11 | self.cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=256)
12 | self.dense = tf.keras.layers.Dense(units=self.num_chars)
13 |
14 | def call(self, inputs):
15 | batch_size, seq_length = tf.shape(inputs)
16 | inputs = tf.one_hot(inputs, depth=self.num_chars) # [batch_size, seq_length, num_chars]
17 | state = self.cell.zero_state(batch_size=batch_size, dtype=tf.float32)
18 | for t in range(seq_length.numpy()):
19 | output, state = self.cell(inputs[:, t, :], state)
20 | output = self.dense(output)
21 | return output
22 |
23 | def predict(self, inputs, temperature=1.):
24 | batch_size, _ = tf.shape(inputs)
25 | logits = self(inputs)
26 | prob = tf.nn.softmax(logits / temperature).numpy()
27 | return np.array([np.random.choice(self.num_chars, p=prob[i, :])
28 | for i in range(batch_size.numpy())])
29 |
30 |
31 | class DataLoader():
32 | def __init__(self):
33 | path = tf.keras.utils.get_file('nietzsche.txt',
34 | origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
35 | with open(path, encoding='utf-8') as f:
36 | self.raw_text = f.read().lower()
37 | self.chars = sorted(list(set(self.raw_text)))
38 | self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
39 | self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
40 | self.text = [self.char_indices[c] for c in self.raw_text]
41 |
42 | def get_batch(self, seq_length, batch_size):
43 | seq = []
44 | next_char = []
45 | for i in range(batch_size):
46 | index = np.random.randint(0, len(self.text) - seq_length)
47 | seq.append(self.text[index:index+seq_length])
48 | next_char.append(self.text[index+seq_length])
49 | return np.array(seq), np.array(next_char) # [num_batch, seq_length], [num_batch]
50 |
51 |
52 | if __name__ == '__main__':
53 | tf.enable_eager_execution()
54 | num_batches = 10000
55 | seq_length = 40
56 | batch_size = 50
57 | learning_rate = 1e-3
58 |
59 | data_loader = DataLoader()
60 | model = RNN(len(data_loader.chars))
61 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
62 | for batch_index in range(num_batches):
63 | X, y = data_loader.get_batch(seq_length, batch_size)
64 | with tf.GradientTape() as tape:
65 | y_logit_pred = model(X)
66 | loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_logit_pred)
67 | print("batch %d: loss %f" % (batch_index, loss.numpy()))
68 | grads = tape.gradient(loss, model.variables)
69 | optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
70 |
71 | X_, _ = data_loader.get_batch(seq_length, 1)
72 | for diversity in [0.2, 0.5, 1.0, 1.2]:
73 | X = X_
74 | print("diversity %f:" % diversity)
75 | for t in range(400):
76 | y_pred = model.predict(X, diversity)
77 | print(data_loader.indices_char[y_pred[0]], end='', flush=True)
78 | X = np.concatenate([X[:, 1:], np.expand_dims(y_pred, axis=1)], axis=-1)
--------------------------------------------------------------------------------
/source/_static/code/en/test/test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import random
3 | import gym
4 | import numpy as np
5 | from collections import deque
6 | from tensorflow.contrib.keras.api.keras.models import Sequential
7 | from tensorflow.contrib.keras.api.keras.layers import Dense
8 | from tensorflow.contrib.keras.api.keras.optimizers import Adam
9 |
10 | EPISODES = 1000
11 |
12 |
13 | class DQNAgent:
14 | def __init__(self, state_size, action_size):
15 | self.state_size = state_size
16 | self.action_size = action_size
17 | self.memory = deque(maxlen=2000)
18 | self.gamma = 0.95 # discount rate
19 | self.epsilon = 1.0 # exploration rate
20 | self.epsilon_min = 0.01
21 | self.epsilon_decay = 0.995
22 | self.learning_rate = 0.001
23 | self.model = self._build_model()
24 |
25 | def _build_model(self):
26 | # Neural Net for Deep-Q learning Model
27 | model = Sequential()
28 | model.add(Dense(24, input_dim=self.state_size, activation='relu'))
29 | model.add(Dense(24, activation='relu'))
30 | model.add(Dense(self.action_size, activation='linear'))
31 | model.compile(loss='mse',
32 | optimizer=Adam(lr=self.learning_rate))
33 | return model
34 |
35 | def remember(self, state, action, reward, next_state, done):
36 | self.memory.append((state, action, reward, next_state, done))
37 |
38 | def act(self, state):
39 | if np.random.rand() <= self.epsilon:
40 | return random.randrange(self.action_size)
41 | act_values = self.model.predict(state)
42 | return np.argmax(act_values[0]) # returns action
43 |
44 | def replay(self, batch_size):
45 | minibatch = random.sample(self.memory, batch_size)
46 | for state, action, reward, next_state, done in minibatch:
47 | target = reward
48 | if not done:
49 | target = (reward + self.gamma *
50 | np.amax(self.model.predict(next_state)[0]))
51 | target_f = self.model.predict(state)
52 | target_f[0][action] = target
53 | self.model.fit(state, target_f, epochs=1, verbose=0)
54 | if self.epsilon > self.epsilon_min:
55 | self.epsilon *= self.epsilon_decay
56 |
57 | def load(self, name):
58 | self.model.load_weights(name)
59 |
60 | def save(self, name):
61 | self.model.save_weights(name)
62 |
63 |
64 | if __name__ == "__main__":
65 | env = gym.make('CartPole-v1')
66 | state_size = env.observation_space.shape[0]
67 | action_size = env.action_space.n
68 | agent = DQNAgent(state_size, action_size)
69 | # agent.load("./save/cartpole-dqn.h5")
70 | done = False
71 | batch_size = 32
72 |
73 | for e in range(EPISODES):
74 | state = env.reset()
75 | state = np.reshape(state, [1, state_size])
76 | for time in range(500):
77 | # env.render()
78 | action = agent.act(state)
79 | next_state, reward, done, _ = env.step(action)
80 | reward = reward if not done else -10
81 | next_state = np.reshape(next_state, [1, state_size])
82 | agent.remember(state, action, reward, next_state, done)
83 | state = next_state
84 | if done:
85 | print("episode: {}/{}, score: {}, e: {:.2}"
86 | .format(e, EPISODES, time, agent.epsilon))
87 | break
88 | if len(agent.memory) > batch_size:
89 | agent.replay(batch_size)
90 | # if e % 10 == 0:
91 | # agent.save("./save/cartpole-dqn.h5")
--------------------------------------------------------------------------------
/source/_static/code/zh/basic/eager/1plus1.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | tf.enable_eager_execution()
3 |
4 | a = tf.constant(1)
5 | b = tf.constant(1)
6 | c = tf.add(a, b) # 也可以直接写 c = a + b,两者等价
7 |
8 | print(c)
9 |
10 | A = tf.constant([[1, 2], [3, 4]])
11 | B = tf.constant([[5, 6], [7, 8]])
12 | C = tf.matmul(A, B)
13 |
14 | print(C)
15 |
--------------------------------------------------------------------------------
/source/_static/code/zh/basic/eager/grad.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | tf.enable_eager_execution()
3 |
4 | x = tf.get_variable('x', shape=[1], initializer=tf.constant_initializer(3.))
5 | with tf.GradientTape() as tape: # 在 tf.GradientTape() 的上下文内,所有计算步骤都会被记录以用于求导
6 | y = tf.square(x)
7 | y_grad = tape.gradient(y, x) # 计算y关于x的导数
8 | print([y.numpy(), y_grad.numpy()])
9 |
10 | X = tf.constant([[1., 2.], [3., 4.]])
11 | y = tf.constant([[1.], [2.]])
12 | w = tf.get_variable('w', shape=[2, 1], initializer=tf.constant_initializer([[1.], [2.]]))
13 | b = tf.get_variable('b', shape=[1], initializer=tf.constant_initializer([1.]))
14 | with tf.GradientTape() as tape:
15 | L = 0.5 * tf.reduce_sum(tf.square(tf.matmul(X, w) + b - y))
16 | w_grad, b_grad = tape.gradient(L, [w, b]) # 计算L(w, b)关于w, b的偏导数
17 | print([L.numpy(), w_grad.numpy(), b_grad.numpy()])
--------------------------------------------------------------------------------
/source/_static/code/zh/basic/eager/regression.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | tf.enable_eager_execution()
3 |
4 | X = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
5 | y = tf.constant([[10.0], [20.0]])
6 |
7 | w = tf.get_variable('w', shape=[3, 1], initializer=tf.zeros_initializer())
8 | b = tf.get_variable('b', shape=[1], initializer=tf.zeros_initializer())
9 | variables = [w, b]
10 |
11 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
12 |
13 | for i in range(100):
14 | with tf.GradientTape() as tape:
15 | y_pred = tf.matmul(X, w) + b
16 | loss = tf.reduce_mean(tf.square(y_pred - y))
17 | grads = tape.gradient(loss, variables)
18 | optimizer.apply_gradients(grads_and_vars=zip(grads, variables))
19 | print(variables)
20 |
--------------------------------------------------------------------------------
/source/_static/code/zh/basic/example/numpy_manual_grad.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | X_raw = np.array([2013, 2014, 2015, 2016, 2017], dtype=np.float32)
4 | y_raw = np.array([12000, 14000, 15000, 16500, 17500], dtype=np.float32)
5 |
6 | X = (X_raw - X_raw.min()) / (X_raw.max() - X_raw.min())
7 | y = (y_raw - y_raw.min()) / (y_raw.max() - y_raw.min())
8 |
9 | a, b = 0, 0
10 |
11 | num_epoch = 10000
12 | learning_rate = 1e-3
13 | for e in range(num_epoch):
14 | # 手动计算损失函数关于自变量(模型参数)的梯度
15 | y_pred = a * X + b
16 | grad_a, grad_b = (y_pred - y).dot(X), (y_pred - y).sum()
17 |
18 | # 更新参数
19 | a, b = a - learning_rate * grad_a, b - learning_rate * grad_b
20 |
21 | print(a, b)
--------------------------------------------------------------------------------
/source/_static/code/zh/basic/example/python.py:
--------------------------------------------------------------------------------
1 | a = 0
2 | b = 0
3 |
4 | def f(x):
5 | y_pred = a * x + b
6 | return y_pred
7 |
8 | def loss(x, y):
9 | l = (a * x + b - y) ** 2
10 | return l
11 |
12 | def gradient_loss(x, y):
13 | g_a = 2 * (a * x + b - y) * x
14 | g_b = 2 * (a * x + b - y)
15 | return g_a, g_b
16 |
17 | X_raw = [2013, 2014, 2015, 2016, 2017]
18 | Y_raw = [12000, 14000, 15000, 16500, 17500]
19 | x_pred_raw = 2018
20 | X = [(x - min(X_raw)) / (max(X_raw) - min(X_raw)) for x in X_raw]
21 | Y = [(y - min(Y_raw)) / (max(Y_raw) - min(Y_raw)) for y in Y_raw]
22 |
23 | num_epoch = 10000
24 | learning_rate = 1e-3
25 | for e in range(num_epoch):
26 | for i in range(len(X)):
27 | x, y = X[i], Y[i]
28 | g_a, g_b = gradient_loss(x, y)
29 | a = a - learning_rate * g_a
30 | b = b - learning_rate * g_b
31 | print(a, b)
32 | for i in range(len(X)):
33 | x, y = X[i], Y[i]
34 | print(f(x), y)
35 | x_pred = (x_pred_raw - min(X_raw)) / (max(X_raw) - min(X_raw))
36 | y_pred = f(x_pred)
37 | y_pred_raw = y_pred * (max(Y_raw) - min(Y_raw)) + min(Y_raw)
38 | print(x_pred_raw, y_pred_raw)
--------------------------------------------------------------------------------
/source/_static/code/zh/basic/example/tensorflow_autograd.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | X_raw = np.array([2013, 2014, 2015, 2016, 2017])
4 | y_raw = np.array([12000, 14000, 15000, 16500, 17500])
5 |
6 | X = (X_raw - X_raw.min()) / (X_raw.max() - X_raw.min())
7 | y = (y_raw - y_raw.min()) / (y_raw.max() - y_raw.min())
8 |
9 | import tensorflow as tf
10 |
11 | learning_rate_ = tf.placeholder(dtype=tf.float32)
12 | X_ = tf.placeholder(dtype=tf.float32, shape=[5])
13 | y_ = tf.placeholder(dtype=tf.float32, shape=[5])
14 | a = tf.get_variable('a', dtype=tf.float32, shape=[], initializer=tf.zeros_initializer)
15 | b = tf.get_variable('b', dtype=tf.float32, shape=[], initializer=tf.zeros_initializer)
16 |
17 | y_pred = a * X_ + b
18 | loss = tf.constant(0.5) * tf.reduce_sum(tf.square(y_pred - y_))
19 |
20 | # 反向传播,利用TensorFlow的梯度下降优化器自动计算并更新变量(模型参数)的梯度
21 | train_op = tf.train.GradientDescentOptimizer(learning_rate=learning_rate_).minimize(loss)
22 |
23 | num_epoch = 10000
24 | learning_rate = 1e-3
25 | with tf.Session() as sess:
26 | tf.global_variables_initializer().run()
27 | for e in range(num_epoch):
28 | sess.run(train_op, feed_dict={X_: X, y_: y, learning_rate_: learning_rate})
29 | print(sess.run([a, b]))
--------------------------------------------------------------------------------
/source/_static/code/zh/basic/example/tensorflow_eager.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import tensorflow.contrib.eager as tfe
3 | tfe.enable_eager_execution()
4 | import numpy as np
5 |
6 | X_raw = np.array([2013, 2014, 2015, 2016, 2017], dtype=np.float32)
7 | y_raw = np.array([12000, 14000, 15000, 16500, 17500], dtype=np.float32)
8 |
9 | X = (X_raw - X_raw.min()) / (X_raw.max() - X_raw.min())
10 | y = (y_raw - y_raw.min()) / (y_raw.max() - y_raw.min())
11 |
12 | X = tf.constant(X)
13 | y = tf.constant(y)
14 |
15 | a = tfe.Variable(0., name='a')
16 | b = tfe.Variable(0., name='b')
17 |
18 | num_epoch = 10000
19 | learning_rate = 1e-3
20 | for e in range(num_epoch):
21 | # 前向传播
22 | y_pred = a * X + b
23 | loss = 0.5 * tf.reduce_sum(tf.square(y_pred - y)) # loss = 0.5 * np.sum(np.square(a * X + b - y))
24 |
25 | # 反向传播,手动计算变量(模型参数)的梯度
26 | grad_a = tf.reduce_sum((y_pred - y) * X)
27 | grad_b = tf.reduce_sum(y_pred - y)
28 |
29 | # 更新参数
30 | a, b = a - learning_rate * grad_a, b - learning_rate * grad_b
31 |
32 | print(a, b)
--------------------------------------------------------------------------------
/source/_static/code/zh/basic/example/tensorflow_eager_autograd.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | import tensorflow.contrib.eager as tfe
4 | tf.enable_eager_execution()
5 |
6 | X_raw = np.array([2013, 2014, 2015, 2016, 2017], dtype=np.float32)
7 | y_raw = np.array([12000, 14000, 15000, 16500, 17500], dtype=np.float32)
8 |
9 | X = (X_raw - X_raw.min()) / (X_raw.max() - X_raw.min())
10 | y = (y_raw - y_raw.min()) / (y_raw.max() - y_raw.min())
11 |
12 | X = tf.constant(X)
13 | y = tf.constant(y)
14 |
15 | a = tf.get_variable('a', dtype=tf.float32, shape=[], initializer=tf.zeros_initializer)
16 | b = tf.get_variable('b', dtype=tf.float32, shape=[], initializer=tf.zeros_initializer)
17 | variables = [a, b]
18 |
19 | num_epoch = 10000
20 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-3)
21 | for e in range(num_epoch):
22 | # 使用tf.GradientTape()记录损失函数的梯度信息
23 | with tf.GradientTape() as tape:
24 | y_pred = a * X + b
25 | loss = 0.5 * tf.reduce_sum(tf.square(y_pred - y))
26 | # TensorFlow自动计算损失函数关于自变量(模型参数)的梯度
27 | grads = tape.gradient(loss, variables)
28 | # TensorFlow自动根据梯度更新参数
29 | optimizer.apply_gradients(grads_and_vars=zip(grads, variables))
30 |
31 | print(a, b)
--------------------------------------------------------------------------------
/source/_static/code/zh/basic/example/tensorflow_manual_grad.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | X_raw = np.array([2013, 2014, 2015, 2016, 2017])
4 | y_raw = np.array([12000, 14000, 15000, 16500, 17500])
5 |
6 | X = (X_raw - X_raw.min()) / (X_raw.max() - X_raw.min())
7 | y = (y_raw - y_raw.min()) / (y_raw.max() - y_raw.min())
8 |
9 | import tensorflow as tf
10 |
11 | # 定义数据流图
12 | learning_rate_ = tf.placeholder(dtype=tf.float32)
13 | X_ = tf.placeholder(dtype=tf.float32, shape=[5])
14 | y_ = tf.placeholder(dtype=tf.float32, shape=[5])
15 | a = tf.get_variable('a', dtype=tf.float32, shape=[], initializer=tf.zeros_initializer)
16 | b = tf.get_variable('b', dtype=tf.float32, shape=[], initializer=tf.zeros_initializer)
17 |
18 | y_pred = a * X_ + b
19 | loss = tf.constant(0.5) * tf.reduce_sum(tf.square(y_pred - y_))
20 |
21 | # 反向传播,手动计算变量(模型参数)的梯度
22 | grad_a = tf.reduce_sum((y_pred - y_) * X_)
23 | grad_b = tf.reduce_sum(y_pred - y_)
24 |
25 | # 梯度下降法,手动更新参数
26 | new_a = a - learning_rate_ * grad_a
27 | new_b = b - learning_rate_ * grad_b
28 | update_a = tf.assign(a, new_a)
29 | update_b = tf.assign(b, new_b)
30 |
31 | train_op = [update_a, update_b]
32 | # 数据流图定义到此结束
33 | # 注意,直到目前,我们都没有进行任何实质的数据计算,仅仅是定义了一个数据图
34 |
35 | num_epoch = 10000
36 | learning_rate = 1e-3
37 | with tf.Session() as sess:
38 | # 初始化变量a和b
39 | tf.global_variables_initializer().run()
40 | # 循环将数据送入上面建立的数据流图中进行计算和更新变量
41 | for e in range(num_epoch):
42 | sess.run(train_op, feed_dict={X_: X, y_: y, learning_rate_: learning_rate})
43 | print(sess.run([a, b]))
--------------------------------------------------------------------------------
/source/_static/code/zh/basic/graph/1plus1.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | # 定义一个“计算图”
4 | a = tf.constant(1) # 定义一个常量Tensor(张量)
5 | b = tf.constant(1)
6 | c = a + b # 等价于 c = tf.add(a, b),c是张量a和张量b通过Add这一Operation(操作)所形成的新张量
7 |
8 | sess = tf.Session() # 实例化一个Session(会话)
9 | c_ = sess.run(c) # 通过Session的run()方法对计算图里的节点(张量)进行实际的计算
10 | print(c_)
--------------------------------------------------------------------------------
/source/_static/code/zh/basic/graph/AmatmulB.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | A = tf.ones(shape=[2, 3]) # tf.ones(shape)定义了一个形状为shape的全1矩阵
4 | B = tf.ones(shape=[3, 2])
5 | C = tf.matmul(A, B)
6 |
7 | sess = tf.Session()
8 | C_ = sess.run(C)
9 | print(C_)
--------------------------------------------------------------------------------
/source/_static/code/zh/basic/graph/aplusb.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | a = tf.placeholder(dtype=tf.int32) # 定义一个占位符Tensor
4 | b = tf.placeholder(dtype=tf.int32)
5 | c = a + b
6 |
7 | a_ = input("a = ") # 从终端读入一个整数并放入变量a_
8 | b_ = input("b = ")
9 |
10 | sess = tf.Session()
11 | c_ = sess.run(c, feed_dict={a: a_, b: b_}) # feed_dict参数传入为了计算c所需要的张量的值
12 | print("a + b = %d" % c_)
--------------------------------------------------------------------------------
/source/_static/code/zh/basic/graph/grad.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | x = tf.Variable(initial_value=1.)
4 | y = tf.square(x) # y = x ^ 2
5 | y_grad = tf.gradients(y, x)
6 |
7 | sess = tf.Session()
8 | sess.run(tf.global_variables_initializer())
9 | y_, y_grad_ = sess.run([y, y_grad])
10 | print([y_, y_grad_])
--------------------------------------------------------------------------------
/source/_static/code/zh/basic/graph/variable.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | a = tf.get_variable(name='a', shape=[])
4 | initializer = tf.assign(a, 0) # tf.assign(x, y)返回一个“将张量y的值赋给变量x”的操作
5 | a_plus_1 = a + 1 # 等价于 a + tf.constant(1)
6 | plus_one_op = tf.assign(a, a_plus_1)
7 |
8 | sess = tf.Session()
9 | sess.run(initializer)
10 | for i in range(5):
11 | sess.run(plus_one_op) # 对变量a执行加一操作
12 | a_ = sess.run(a) # 获得变量a的值并存入a_
13 | print(a_)
14 |
--------------------------------------------------------------------------------
/source/_static/code/zh/basic/graph/variable_with_initializer.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | a = tf.get_variable(name='a', shape=[], initializer=tf.zeros_initializer) # 指定初始化器为全0初始化
4 | a_plus_1 = a + 1
5 | plus_one_op = tf.assign(a, a_plus_1)
6 |
7 | sess = tf.Session()
8 | sess.run(tf.global_variables_initializer()) # 初始化所有变量
9 | for i in range(5):
10 | sess.run(plus_one_op)
11 | a_ = sess.run(a)
12 | print(a_)
13 |
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/gpu/main.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import os
3 |
4 | config = tf.ConfigProto()
5 | # config.gpu_options.allow_growth = True
6 | config.gpu_options.per_process_gpu_memory_fraction = 0.4
7 | tf.enable_eager_execution(config=config)
8 |
9 | A = tf.constant([[1, 2], [3, 4]])
10 | B = tf.constant([[5, 6], [7, 8]])
11 | C = tf.matmul(A, B)
12 |
13 | print(C)
14 |
15 | os.system('pause')
16 |
17 |
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/save_and_restore/MNIST-data/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/save_and_restore/MNIST-data/t10k-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/save_and_restore/MNIST-data/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/save_and_restore/MNIST-data/t10k-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/save_and_restore/MNIST-data/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/save_and_restore/MNIST-data/train-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/save_and_restore/MNIST-data/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/save_and_restore/MNIST-data/train-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/save_and_restore/mnist.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from zh.model.mlp.mlp import MLP
4 | from zh.model.mlp.utils import DataLoader
5 |
6 | tf.enable_eager_execution()
7 | mode = 'test'
8 | num_batches = 1000
9 | batch_size = 50
10 | learning_rate = 0.001
11 | data_loader = DataLoader()
12 |
13 |
14 | def train():
15 | model = MLP()
16 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
17 | checkpoint = tf.train.Checkpoint(myAwesomeModel=model) # 实例化Checkpoint,设置保存对象为model
18 | for batch_index in range(num_batches):
19 | X, y = data_loader.get_batch(batch_size)
20 | with tf.GradientTape() as tape:
21 | y_logit_pred = model(tf.convert_to_tensor(X))
22 | loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_logit_pred)
23 | print("batch %d: loss %f" % (batch_index, loss.numpy()))
24 | grads = tape.gradient(loss, model.variables)
25 | optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
26 | if (batch_index + 1) % 100 == 0: # 每隔100个Batch保存一次
27 | checkpoint.save('./save/model.ckpt') # 保存模型参数到文件
28 |
29 |
30 | def test():
31 | model_to_be_restored = MLP()
32 | checkpoint = tf.train.Checkpoint(myAwesomeModel=model_to_be_restored) # 实例化Checkpoint,设置恢复对象为新建立的模型model_to_be_restored
33 | checkpoint.restore(tf.train.latest_checkpoint('./save')) # 从文件恢复模型参数
34 | num_eval_samples = np.shape(data_loader.eval_labels)[0]
35 | y_pred = model_to_be_restored.predict(tf.constant(data_loader.eval_data)).numpy()
36 | print("test accuracy: %f" % (sum(y_pred == data_loader.eval_labels) / num_eval_samples))
37 |
38 |
39 | if __name__ == '__main__':
40 | if mode == 'train':
41 | train()
42 | if mode == 'test':
43 | test()
44 |
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/save_and_restore/save/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "model.ckpt-10"
2 | all_model_checkpoint_paths: "model.ckpt-6"
3 | all_model_checkpoint_paths: "model.ckpt-7"
4 | all_model_checkpoint_paths: "model.ckpt-8"
5 | all_model_checkpoint_paths: "model.ckpt-9"
6 | all_model_checkpoint_paths: "model.ckpt-10"
7 |
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-10.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-10.data-00000-of-00001
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-10.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-10.index
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-6.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-6.data-00000-of-00001
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-6.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-6.index
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-7.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-7.data-00000-of-00001
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-7.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-7.index
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-8.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-8.data-00000-of-00001
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-8.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-8.index
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-9.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-9.data-00000-of-00001
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-9.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/save_and_restore/save/model.ckpt-9.index
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/save_and_restore/variables.weight:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/save_and_restore/variables.weight
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/tensorboard/MNIST-data/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/tensorboard/MNIST-data/t10k-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/tensorboard/MNIST-data/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/tensorboard/MNIST-data/t10k-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/tensorboard/MNIST-data/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/tensorboard/MNIST-data/train-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/tensorboard/MNIST-data/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/tensorboard/MNIST-data/train-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/tensorboard/mnist.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from zh.model.mlp.mlp import MLP
4 | from zh.model.mlp.utils import DataLoader
5 |
6 | tf.enable_eager_execution()
7 | num_batches = 10000
8 | batch_size = 50
9 | learning_rate = 0.001
10 | model = MLP()
11 | data_loader = DataLoader()
12 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
13 | summary_writer = tf.contrib.summary.create_file_writer('./tensorboard') # 实例化记录器
14 | with summary_writer.as_default(), tf.contrib.summary.always_record_summaries():
15 | for batch_index in range(num_batches):
16 | X, y = data_loader.get_batch(batch_size)
17 | with tf.GradientTape() as tape:
18 | y_logit_pred = model(tf.convert_to_tensor(X))
19 | loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_logit_pred)
20 | print("batch %d: loss %f" % (batch_index, loss.numpy()))
21 | tf.contrib.summary.scalar("loss", loss, step=batch_index) # 记录当前loss
22 | grads = tape.gradient(loss, model.variables)
23 | optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
24 |
--------------------------------------------------------------------------------
/source/_static/code/zh/extended/tensorboard/tensorboard/events.out.tfevents.1531645031.SNOWKYLIN-WORKS.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/extended/tensorboard/tensorboard/events.out.tfevents.1531645031.SNOWKYLIN-WORKS.v2
--------------------------------------------------------------------------------
/source/_static/code/zh/model/cnn/cnn.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 |
4 | class CNN(tf.keras.Model):
5 | def __init__(self):
6 | super().__init__()
7 | self.conv1 = tf.keras.layers.Conv2D(
8 | filters=32, # 卷积核数目
9 | kernel_size=[5, 5], # 感受野大小
10 | padding="same", # padding策略
11 | activation=tf.nn.relu # 激活函数
12 | )
13 | self.pool1 = tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2)
14 | self.conv2 = tf.keras.layers.Conv2D(
15 | filters=64,
16 | kernel_size=[5, 5],
17 | padding="same",
18 | activation=tf.nn.relu
19 | )
20 | self.pool2 = tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2)
21 | self.flatten = tf.keras.layers.Reshape(target_shape=(7 * 7 * 64,))
22 | self.dense1 = tf.keras.layers.Dense(units=1024, activation=tf.nn.relu)
23 | self.dense2 = tf.keras.layers.Dense(units=10)
24 |
25 | def call(self, inputs):
26 | inputs = tf.reshape(inputs, [-1, 28, 28, 1])
27 | x = self.conv1(inputs) # [batch_size, 28, 28, 32]
28 | x = self.pool1(x) # [batch_size, 14, 14, 32]
29 | x = self.conv2(x) # [batch_size, 14, 14, 64]
30 | x = self.pool2(x) # [batch_size, 7, 7, 64]
31 | x = self.flatten(x) # [batch_size, 7 * 7 * 64]
32 | x = self.dense1(x) # [batch_size, 1024]
33 | x = self.dense2(x) # [batch_size, 10]
34 | return x
35 |
36 | def predict(self, inputs):
37 | logits = self(inputs)
38 | return tf.argmax(logits, axis=-1)
--------------------------------------------------------------------------------
/source/_static/code/zh/model/custom_layer/linear.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 |
4 | eager = True
5 | X = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32)
6 | y = np.array([[10.0], [20.0]], dtype=np.float32)
7 |
8 |
9 | class LinearLayer(tf.keras.layers.Layer):
10 | def __init__(self):
11 | super().__init__()
12 |
13 | def build(self, input_shape): # here input_shape is a TensorShape
14 | self.w = self.add_variable(name='w',
15 | shape=[input_shape[-1], 1], initializer=tf.zeros_initializer())
16 | self.b = self.add_variable(name='b',
17 | shape=[1], initializer=tf.zeros_initializer())
18 |
19 | def call(self, X):
20 | y_pred = tf.matmul(X, self.w) + self.b
21 | return y_pred
22 |
23 |
24 | class Linear(tf.keras.Model):
25 | def __init__(self):
26 | super().__init__()
27 | self.layer = LinearLayer()
28 |
29 | def call(self, input):
30 | output = self.layer(input)
31 | return output
32 |
33 |
34 | if eager:
35 | tf.enable_eager_execution()
36 | X = tf.constant(X)
37 | y = tf.constant(y)
38 | model = Linear()
39 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
40 | for i in range(100):
41 | with tf.GradientTape() as tape:
42 | y_pred = model(X)
43 | loss = tf.reduce_mean(tf.square(y_pred - y))
44 | grads = tape.gradient(loss, model.variables)
45 | optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
46 | print(model.variables)
47 | else:
48 | model = Linear()
49 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
50 | X_placeholder = tf.placeholder(name='X', shape=[None, 3], dtype=tf.float32)
51 | y_placeholder = tf.placeholder(name='y', shape=[None, 1], dtype=tf.float32)
52 | y_pred = model(X_placeholder)
53 | loss = tf.reduce_mean(tf.square(y_pred - y_placeholder))
54 | train_op = optimizer.minimize(loss)
55 | with tf.Session() as sess:
56 | sess.run(tf.global_variables_initializer())
57 | for i in range(100):
58 | sess.run(train_op, feed_dict={X_placeholder: X, y_placeholder: y})
59 | print(sess.run(model.variables))
--------------------------------------------------------------------------------
/source/_static/code/zh/model/linear/linear.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | tf.enable_eager_execution()
3 |
4 | X = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
5 | y = tf.constant([[10.0], [20.0]])
6 |
7 |
8 | class Linear(tf.keras.Model):
9 | def __init__(self):
10 | super().__init__()
11 | self.dense = tf.keras.layers.Dense(units=1, kernel_initializer=tf.zeros_initializer(),
12 | bias_initializer=tf.zeros_initializer())
13 |
14 | def call(self, input):
15 | output = self.dense(input)
16 | return output
17 |
18 |
19 | # 以下代码结构与前节类似
20 | model = Linear()
21 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
22 | for i in range(100):
23 | with tf.GradientTape() as tape:
24 | y_pred = model(X) # 调用模型
25 | loss = tf.reduce_mean(tf.square(y_pred - y))
26 | grads = tape.gradient(loss, model.variables)
27 | optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
28 | print(model.variables)
29 |
--------------------------------------------------------------------------------
/source/_static/code/zh/model/mlp/MNIST-data/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/model/mlp/MNIST-data/t10k-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/zh/model/mlp/MNIST-data/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/model/mlp/MNIST-data/t10k-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/zh/model/mlp/MNIST-data/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/model/mlp/MNIST-data/train-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/zh/model/mlp/MNIST-data/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/code/zh/model/mlp/MNIST-data/train-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/source/_static/code/zh/model/mlp/main.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from zh.model.mlp.mlp import MLP
4 | from zh.model.cnn.cnn import CNN
5 |
6 | tf.enable_eager_execution()
7 | model_type = 'CNN'
8 | num_batches = 1000
9 | batch_size = 50
10 | learning_rate = 0.001
11 |
12 |
13 | class DataLoader():
14 | def __init__(self):
15 | mnist = tf.contrib.learn.datasets.load_dataset("mnist")
16 | self.train_data = mnist.train.images # np.array [55000, 784]
17 | self.train_labels = np.asarray(mnist.train.labels, dtype=np.int32) # np.array [55000] of int32
18 | self.eval_data = mnist.test.images # np.array [10000, 784]
19 | self.eval_labels = np.asarray(mnist.test.labels, dtype=np.int32) # np.array [10000] of int32
20 |
21 | def get_batch(self, batch_size):
22 | index = np.random.randint(0, np.shape(self.train_data)[0], batch_size)
23 | return self.train_data[index, :], self.train_labels[index]
24 |
25 |
26 | if model_type == 'MLP':
27 | model = MLP()
28 | else:
29 | model = CNN()
30 | data_loader = DataLoader()
31 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
32 | for batch_index in range(num_batches):
33 | X, y = data_loader.get_batch(batch_size)
34 | with tf.GradientTape() as tape:
35 | y_logit_pred = model(tf.convert_to_tensor(X))
36 | loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_logit_pred)
37 | print("batch %d: loss %f" % (batch_index, loss.numpy()))
38 | grads = tape.gradient(loss, model.variables)
39 | optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
40 |
41 | num_eval_samples = np.shape(data_loader.eval_labels)[0]
42 | y_pred = model.predict(data_loader.eval_data).numpy()
43 | print("test accuracy: %f" % (sum(y_pred == data_loader.eval_labels) / num_eval_samples))
44 |
45 | num_correct_pred = 0
46 | for batch_index in range(num_eval_samples // batch_size):
47 | y_pred = model.predict(data_loader.eval_data[batch_index * batch_size: (batch_index + 1) * batch_size]).numpy()
48 | num_correct_pred += sum(y_pred == data_loader.eval_labels[batch_index * batch_size: (batch_index + 1) * batch_size])
49 | print("test accuracy: %f" % (num_correct_pred / np.shape(data_loader.eval_labels)[0]))
50 |
51 |
52 |
--------------------------------------------------------------------------------
/source/_static/code/zh/model/mlp/mlp.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 |
4 | class MLP(tf.keras.Model):
5 | def __init__(self):
6 | super().__init__()
7 | self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
8 | self.dense2 = tf.keras.layers.Dense(units=10)
9 |
10 | def call(self, inputs):
11 | x = self.dense1(inputs)
12 | x = self.dense2(x)
13 | return x
14 |
15 | def predict(self, inputs):
16 | logits = self(inputs)
17 | return tf.argmax(logits, axis=-1)
--------------------------------------------------------------------------------
/source/_static/code/zh/model/mlp/utils.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 |
4 |
5 | class DataLoader():
6 | def __init__(self):
7 | mnist = tf.contrib.learn.datasets.load_dataset("mnist")
8 | self.train_data = mnist.train.images # np.array [55000, 784]
9 | self.train_labels = np.asarray(mnist.train.labels, dtype=np.int32) # np.array [55000] of int32
10 | self.eval_data = mnist.test.images # np.array [10000, 784]
11 | self.eval_labels = np.asarray(mnist.test.labels, dtype=np.int32) # np.array [10000] of int32
12 |
13 | def get_batch(self, batch_size):
14 | index = np.random.randint(0, np.shape(self.train_data)[0], batch_size)
15 | return self.train_data[index, :], self.train_labels[index]
--------------------------------------------------------------------------------
/source/_static/code/zh/model/rl/rl.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | import gym
4 | import random
5 | from collections import deque
6 |
7 | tf.enable_eager_execution()
8 | num_episodes = 500
9 | num_exploration_episodes = 100
10 | max_len_episode = 1000
11 | batch_size = 32
12 | learning_rate = 1e-3
13 | gamma = 1.
14 | initial_epsilon = 1.
15 | final_epsilon = 0.01
16 |
17 |
18 | # Q-network用于拟合Q函数,和前节的多层感知机类似。输入state,输出各个action下的Q-value(CartPole下为2维)。
19 | class QNetwork(tf.keras.Model):
20 | def __init__(self):
21 | super().__init__()
22 | self.dense1 = tf.keras.layers.Dense(units=24, activation=tf.nn.relu)
23 | self.dense2 = tf.keras.layers.Dense(units=24, activation=tf.nn.relu)
24 | self.dense3 = tf.keras.layers.Dense(units=2)
25 |
26 | def call(self, inputs):
27 | x = self.dense1(inputs)
28 | x = self.dense2(x)
29 | x = self.dense3(x)
30 | return x
31 |
32 | def predict(self, inputs):
33 | q_values = self(inputs)
34 | return tf.argmax(q_values, axis=-1)
35 |
36 |
37 | env = gym.make('CartPole-v1') # 实例化一个游戏环境,参数为游戏名称
38 | model = QNetwork()
39 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
40 | replay_buffer = deque(maxlen=10000)
41 | epsilon = initial_epsilon
42 | for episode_id in range(num_episodes):
43 | state = env.reset() # 初始化环境,获得初始状态
44 | epsilon = max(
45 | initial_epsilon * (num_exploration_episodes - episode_id) / num_exploration_episodes,
46 | final_epsilon)
47 | for t in range(max_len_episode):
48 | env.render() # 对当前帧进行渲染,绘图到屏幕
49 | if random.random() < epsilon: # epsilon-greedy探索策略
50 | action = env.action_space.sample() # 以epsilon的概率选择随机动作
51 | else:
52 | action = model.predict(
53 | tf.constant(np.expand_dims(state, axis=0), dtype=tf.float32)).numpy()
54 | action = action[0]
55 | next_state, reward, done, info = env.step(action) # 让环境执行动作,获得执行完动作的下一个状态,动作的奖励,游戏是否已结束以及额外信息
56 | reward = -10. if done else reward # 如果游戏Game Over,给予大的负奖励
57 | replay_buffer.append((state, action, reward, next_state, 1 if done else 0)) # 将(state, action, reward, next_state)的四元组(外加done标签表示是否结束)放入经验重放池
58 | state = next_state
59 |
60 | if done: # 游戏结束则退出本轮循环,进行下一个episode
61 | print("episode %d, epsilon %f, score %d" % (episode_id, epsilon, t))
62 | break
63 |
64 | if len(replay_buffer) >= batch_size:
65 | # 从经验回放池中随机取一个batch的四元组,并分别转换为NumPy数组
66 | batch_state, batch_action, batch_reward, batch_next_state, batch_done = zip(
67 | *random.sample(replay_buffer, batch_size))
68 | batch_state, batch_reward, batch_next_state, batch_done = \
69 | [np.array(a, dtype=np.float32) for a in [batch_state, batch_reward, batch_next_state, batch_done]]
70 | batch_action = np.array(batch_action, dtype=np.int32)
71 |
72 | q_value = model(tf.constant(batch_next_state, dtype=tf.float32))
73 | y = batch_reward + (gamma * tf.reduce_max(q_value, axis=1)) * (1 - batch_done) # 按照论文计算y值
74 | with tf.GradientTape() as tape:
75 | loss = tf.losses.mean_squared_error( # 最小化y和Q-value的距离
76 | labels=y,
77 | predictions=tf.reduce_sum(model(tf.constant(batch_state)) *
78 | tf.one_hot(batch_action, depth=2), axis=1)
79 | )
80 | grads = tape.gradient(loss, model.variables)
81 | optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables)) # 计算梯度并更新参数
--------------------------------------------------------------------------------
/source/_static/code/zh/model/rnn/rnn.py:
--------------------------------------------------------------------------------
1 | # reference: https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py
2 |
3 | import tensorflow as tf
4 | import numpy as np
5 |
6 |
7 | class RNN(tf.keras.Model):
8 | def __init__(self, num_chars):
9 | super().__init__()
10 | self.num_chars = num_chars
11 | self.cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=256)
12 | self.dense = tf.keras.layers.Dense(units=self.num_chars)
13 |
14 | def call(self, inputs):
15 | batch_size, seq_length = tf.shape(inputs)
16 | inputs = tf.one_hot(inputs, depth=self.num_chars) # [batch_size, seq_length, num_chars]
17 | state = self.cell.zero_state(batch_size=batch_size, dtype=tf.float32)
18 | for t in range(seq_length.numpy()):
19 | output, state = self.cell(inputs[:, t, :], state)
20 | output = self.dense(output)
21 | return output
22 |
23 | def predict(self, inputs, temperature=1.):
24 | batch_size, _ = tf.shape(inputs)
25 | logits = self(inputs)
26 | prob = tf.nn.softmax(logits / temperature).numpy()
27 | return np.array([np.random.choice(self.num_chars, p=prob[i, :])
28 | for i in range(batch_size.numpy())])
29 |
30 |
31 | class DataLoader():
32 | def __init__(self):
33 | path = tf.keras.utils.get_file('nietzsche.txt',
34 | origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
35 | with open(path, encoding='utf-8') as f:
36 | self.raw_text = f.read().lower()
37 | self.chars = sorted(list(set(self.raw_text)))
38 | self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
39 | self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
40 | self.text = [self.char_indices[c] for c in self.raw_text]
41 |
42 | def get_batch(self, seq_length, batch_size):
43 | seq = []
44 | next_char = []
45 | for i in range(batch_size):
46 | index = np.random.randint(0, len(self.text) - seq_length)
47 | seq.append(self.text[index:index+seq_length])
48 | next_char.append(self.text[index+seq_length])
49 | return np.array(seq), np.array(next_char) # [num_batch, seq_length], [num_batch]
50 |
51 |
52 | if __name__ == '__main__':
53 | tf.enable_eager_execution()
54 | num_batches = 10000
55 | seq_length = 40
56 | batch_size = 50
57 | learning_rate = 1e-3
58 |
59 | data_loader = DataLoader()
60 | model = RNN(len(data_loader.chars))
61 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
62 | for batch_index in range(num_batches):
63 | X, y = data_loader.get_batch(seq_length, batch_size)
64 | with tf.GradientTape() as tape:
65 | y_logit_pred = model(X)
66 | loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_logit_pred)
67 | print("batch %d: loss %f" % (batch_index, loss.numpy()))
68 | grads = tape.gradient(loss, model.variables)
69 | optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
70 |
71 | X_, _ = data_loader.get_batch(seq_length, 1)
72 | for diversity in [0.2, 0.5, 1.0, 1.2]:
73 | X = X_
74 | print("diversity %f:" % diversity)
75 | for t in range(400):
76 | y_pred = model.predict(X, diversity)
77 | print(data_loader.indices_char[y_pred[0]], end='', flush=True)
78 | X = np.concatenate([X[:, 1:], np.expand_dims(y_pred, axis=1)], axis=-1)
--------------------------------------------------------------------------------
/source/_static/code/zh/test/test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import random
3 | import gym
4 | import numpy as np
5 | from collections import deque
6 | from tensorflow.contrib.keras.api.keras.models import Sequential
7 | from tensorflow.contrib.keras.api.keras.layers import Dense
8 | from tensorflow.contrib.keras.api.keras.optimizers import Adam
9 |
10 | EPISODES = 1000
11 |
12 |
13 | class DQNAgent:
14 | def __init__(self, state_size, action_size):
15 | self.state_size = state_size
16 | self.action_size = action_size
17 | self.memory = deque(maxlen=2000)
18 | self.gamma = 0.95 # discount rate
19 | self.epsilon = 1.0 # exploration rate
20 | self.epsilon_min = 0.01
21 | self.epsilon_decay = 0.995
22 | self.learning_rate = 0.001
23 | self.model = self._build_model()
24 |
25 | def _build_model(self):
26 | # Neural Net for Deep-Q learning Model
27 | model = Sequential()
28 | model.add(Dense(24, input_dim=self.state_size, activation='relu'))
29 | model.add(Dense(24, activation='relu'))
30 | model.add(Dense(self.action_size, activation='linear'))
31 | model.compile(loss='mse',
32 | optimizer=Adam(lr=self.learning_rate))
33 | return model
34 |
35 | def remember(self, state, action, reward, next_state, done):
36 | self.memory.append((state, action, reward, next_state, done))
37 |
38 | def act(self, state):
39 | if np.random.rand() <= self.epsilon:
40 | return random.randrange(self.action_size)
41 | act_values = self.model.predict(state)
42 | return np.argmax(act_values[0]) # returns action
43 |
44 | def replay(self, batch_size):
45 | minibatch = random.sample(self.memory, batch_size)
46 | for state, action, reward, next_state, done in minibatch:
47 | target = reward
48 | if not done:
49 | target = (reward + self.gamma *
50 | np.amax(self.model.predict(next_state)[0]))
51 | target_f = self.model.predict(state)
52 | target_f[0][action] = target
53 | self.model.fit(state, target_f, epochs=1, verbose=0)
54 | if self.epsilon > self.epsilon_min:
55 | self.epsilon *= self.epsilon_decay
56 |
57 | def load(self, name):
58 | self.model.load_weights(name)
59 |
60 | def save(self, name):
61 | self.model.save_weights(name)
62 |
63 |
64 | if __name__ == "__main__":
65 | env = gym.make('CartPole-v1')
66 | state_size = env.observation_space.shape[0]
67 | action_size = env.action_space.n
68 | agent = DQNAgent(state_size, action_size)
69 | # agent.load("./save/cartpole-dqn.h5")
70 | done = False
71 | batch_size = 32
72 |
73 | for e in range(EPISODES):
74 | state = env.reset()
75 | state = np.reshape(state, [1, state_size])
76 | for time in range(500):
77 | # env.render()
78 | action = agent.act(state)
79 | next_state, reward, done, _ = env.step(action)
80 | reward = reward if not done else -10
81 | next_state = np.reshape(next_state, [1, state_size])
82 | agent.remember(state, action, reward, next_state, done)
83 | state = next_state
84 | if done:
85 | print("episode: {}/{}, score: {}, e: {:.2}"
86 | .format(e, EPISODES, time, agent.epsilon))
87 | break
88 | if len(agent.memory) > batch_size:
89 | agent.replay(batch_size)
90 | # if e % 10 == 0:
91 | # agent.save("./save/cartpole-dqn.h5")
--------------------------------------------------------------------------------
/source/_static/image/extended/tensorboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/image/extended/tensorboard.png
--------------------------------------------------------------------------------
/source/_static/image/figure.vsdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/image/figure.vsdx
--------------------------------------------------------------------------------
/source/_static/image/model/cartpole.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/image/model/cartpole.gif
--------------------------------------------------------------------------------
/source/_static/image/model/cartpole.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/image/model/cartpole.png
--------------------------------------------------------------------------------
/source/_static/image/model/cnn.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/image/model/cnn.pdf
--------------------------------------------------------------------------------
/source/_static/image/model/cnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/image/model/cnn.png
--------------------------------------------------------------------------------
/source/_static/image/model/mnist_0-9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/image/model/mnist_0-9.png
--------------------------------------------------------------------------------
/source/_static/image/model/rnn.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/image/model/rnn.jpg
--------------------------------------------------------------------------------
/source/_static/image/model/rnn_single.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snowkylin/TensorFlow-cn/77350fe96841e98f589fe11476cefdf9e5ab5611/source/_static/image/model/rnn_single.jpg
--------------------------------------------------------------------------------
/source/conf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # 简单粗暴TensorFlow documentation build configuration file, created by
5 | # sphinx-quickstart on Sat Jan 20 00:48:15 2018.
6 | #
7 | # This file is execfile()d with the current directory set to its
8 | # containing dir.
9 | #
10 | # Note that not all possible configuration values are present in this
11 | # autogenerated file.
12 | #
13 | # All configuration values have a default; values that are commented out
14 | # serve to show the default.
15 |
16 | # If extensions (or modules to document with autodoc) are in another directory,
17 | # add these directories to sys.path here. If the directory is relative to the
18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
19 | #
20 | # import os
21 | # import sys
22 | # sys.path.insert(0, os.path.abspath('.'))
23 |
24 |
25 | # -- General configuration ------------------------------------------------
26 |
27 | # If your documentation needs a minimal Sphinx version, state it here.
28 | #
29 | # needs_sphinx = '1.0'
30 |
31 | # Add any Sphinx extension module names here, as strings. They can be
32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
33 | # ones.
34 | extensions = [ 'sphinx.ext.imgmath', 'sphinx.ext.intersphinx' ]
35 |
36 | # Add any paths that contain templates here, relative to this directory.
37 | templates_path = ['_templates']
38 |
39 | # The suffix(es) of source filenames.
40 | # You can specify multiple suffix as a list of string:
41 | #
42 | # source_suffix = ['.rst', '.md']
43 | source_suffix = '.rst'
44 |
45 | # The master toctree document.
46 | master_doc = 'index'
47 |
48 | # General information about the project.
49 | project = '简单粗暴TensorFlow'
50 | copyright = '2018, Xihan Li(雪麒)'
51 | author = 'Xihan Li(雪麒)'
52 |
53 | # The version info for the project you're documenting, acts as replacement for
54 | # |version| and |release|, also used in various other places throughout the
55 | # built documents.
56 | #
57 | # The short X.Y version.
58 | version = '0.3'
59 | # The full version, including alpha/beta/rc tags.
60 | release = '0.3 beta'
61 |
62 | # The language for content autogenerated by Sphinx. Refer to documentation
63 | # for a list of supported languages.
64 | #
65 | # This is also used if you do content translation via gettext catalogs.
66 | # Usually you set "language" from the command line for these cases.
67 | language = 'zh_CN'
68 |
69 | # List of patterns, relative to source directory, that match files and
70 | # directories to ignore when looking for source files.
71 | # This patterns also effect to html_static_path and html_extra_path
72 | exclude_patterns = []
73 |
74 | # The name of the Pygments (syntax highlighting) style to use.
75 | pygments_style = 'sphinx'
76 |
77 | # If true, `todo` and `todoList` produce output, else they produce nothing.
78 | todo_include_todos = False
79 |
80 |
81 | # -- Options for HTML output ----------------------------------------------
82 |
83 | # The theme to use for HTML and HTML Help pages. See the documentation for
84 | # a list of builtin themes.
85 | #
86 | html_theme = 'sphinx_rtd_theme'
87 |
88 | # Theme options are theme-specific and customize the look and feel of a theme
89 | # further. For a list of options available for each theme, see the
90 | # documentation.
91 | #
92 | # html_theme_options = {}
93 |
94 | # Add any paths that contain custom static files (such as style sheets) here,
95 | # relative to this directory. They are copied after the builtin static files,
96 | # so a file named "default.css" will overwrite the builtin "default.css".
97 | html_static_path = ['_static']
98 |
99 | # Custom sidebar templates, must be a dictionary that maps document names
100 | # to template names.
101 | #
102 | # This is required for the alabaster theme
103 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
104 | # html_sidebars = {
105 | # '**': [
106 | # 'about.html',
107 | # 'navigation.html',
108 | # 'relations.html', # needs 'show_related': True theme option to display
109 | # 'searchbox.html',
110 | # 'donate.html',
111 | # ]
112 | # }
113 |
114 |
115 | # -- Options for HTMLHelp output ------------------------------------------
116 |
117 | # Output file base name for HTML help builder.
118 | htmlhelp_basename = 'TensorFlowdoc'
119 |
120 |
121 | # -- Options for LaTeX output ---------------------------------------------
122 |
123 | latex_elements = {
124 | # The paper size ('letterpaper' or 'a4paper').
125 | #
126 | # 'papersize': 'letterpaper',
127 |
128 | # The font size ('10pt', '11pt' or '12pt').
129 | #
130 | # 'pointsize': '10pt',
131 |
132 | # Additional stuff for the LaTeX preamble.
133 | #
134 | 'preamble': r'\usepackage{ctex}',
135 |
136 | # Latex figure (float) alignment
137 | #
138 | # 'figure_align': 'htbp',
139 |
140 | # Remove blank pages
141 | 'classoptions': ',openany,oneside'
142 | }
143 |
144 | # Grouping the document tree into LaTeX files. List of tuples
145 | # (source start file, target name, title,
146 | # author, documentclass [howto, manual, or own class]).
147 | latex_documents = [
148 | (master_doc, 'TensorFlow-cn.tex', '简单粗暴TensorFlow',
149 | author, 'manual'),
150 | ]
151 |
152 |
153 | # -- Options for manual page output ---------------------------------------
154 |
155 | # One entry per manual page. List of tuples
156 | # (source start file, name, description, authors, manual section).
157 | man_pages = [
158 | (master_doc, 'tensorflow', '简单粗暴TensorFlow',
159 | [author], 1)
160 | ]
161 |
162 |
163 | # -- Options for Texinfo output -------------------------------------------
164 |
165 | # Grouping the document tree into Texinfo files. List of tuples
166 | # (source start file, target name, title, author,
167 | # dir menu entry, description, category)
168 | texinfo_documents = [
169 | (master_doc, 'TensorFlow', '简单粗暴TensorFlow',
170 | author, 'TensorFlow', '简单粗暴TensorFlow',
171 | 'Miscellaneous'),
172 | ]
173 |
174 |
175 |
176 |
--------------------------------------------------------------------------------
/source/conf_en.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # 简单粗暴TensorFlow documentation build configuration file, created by
5 | # sphinx-quickstart on Sat Jan 20 00:48:15 2018.
6 | #
7 | # This file is execfile()d with the current directory set to its
8 | # containing dir.
9 | #
10 | # Note that not all possible configuration values are present in this
11 | # autogenerated file.
12 | #
13 | # All configuration values have a default; values that are commented out
14 | # serve to show the default.
15 |
16 | # If extensions (or modules to document with autodoc) are in another directory,
17 | # add these directories to sys.path here. If the directory is relative to the
18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
19 | #
20 | # import os
21 | # import sys
22 | # sys.path.insert(0, os.path.abspath('.'))
23 |
24 |
25 | # -- General configuration ------------------------------------------------
26 |
27 | # If your documentation needs a minimal Sphinx version, state it here.
28 | #
29 | # needs_sphinx = '1.0'
30 |
31 | # Add any Sphinx extension module names here, as strings. They can be
32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
33 | # ones.
34 | extensions = [ 'sphinx.ext.imgmath', 'sphinx.ext.intersphinx' ]
35 |
36 | # Add any paths that contain templates here, relative to this directory.
37 | templates_path = ['_templates']
38 |
39 | # The suffix(es) of source filenames.
40 | # You can specify multiple suffix as a list of string:
41 | #
42 | # source_suffix = ['.rst', '.md']
43 | source_suffix = '.rst'
44 |
45 | # The master toctree document.
46 | master_doc = 'index'
47 |
48 | # General information about the project.
49 | # project = '简单粗暴TensorFlow'
50 | project = 'A Concise Handbook of TensorFlow'
51 | copyright = '2018, Xihan Li(雪麒)'
52 | author = 'Xihan Li(雪麒)'
53 |
54 | # The version info for the project you're documenting, acts as replacement for
55 | # |version| and |release|, also used in various other places throughout the
56 | # built documents.
57 | #
58 | # The short X.Y version.
59 | version = '0.3'
60 | # The full version, including alpha/beta/rc tags.
61 | release = '0.3 beta'
62 |
63 | # The language for content autogenerated by Sphinx. Refer to documentation
64 | # for a list of supported languages.
65 | #
66 | # This is also used if you do content translation via gettext catalogs.
67 | # Usually you set "language" from the command line for these cases.
68 | # language = 'zh_CN'
69 | language = 'en_US'
70 |
71 | # List of patterns, relative to source directory, that match files and
72 | # directories to ignore when looking for source files.
73 | # This patterns also effect to html_static_path and html_extra_path
74 | exclude_patterns = []
75 |
76 | # The name of the Pygments (syntax highlighting) style to use.
77 | pygments_style = 'sphinx'
78 |
79 | # If true, `todo` and `todoList` produce output, else they produce nothing.
80 | todo_include_todos = False
81 |
82 |
83 | # -- Options for HTML output ----------------------------------------------
84 |
85 | # The theme to use for HTML and HTML Help pages. See the documentation for
86 | # a list of builtin themes.
87 | #
88 | html_theme = 'sphinx_rtd_theme'
89 |
90 | # Theme options are theme-specific and customize the look and feel of a theme
91 | # further. For a list of options available for each theme, see the
92 | # documentation.
93 | #
94 | # html_theme_options = {}
95 |
96 | # Add any paths that contain custom static files (such as style sheets) here,
97 | # relative to this directory. They are copied after the builtin static files,
98 | # so a file named "default.css" will overwrite the builtin "default.css".
99 | html_static_path = ['_static']
100 |
101 | # Custom sidebar templates, must be a dictionary that maps document names
102 | # to template names.
103 | #
104 | # This is required for the alabaster theme
105 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
106 | # html_sidebars = {
107 | # '**': [
108 | # 'about.html',
109 | # 'navigation.html',
110 | # 'relations.html', # needs 'show_related': True theme option to display
111 | # 'searchbox.html',
112 | # 'donate.html',
113 | # ]
114 | # }
115 |
116 |
117 | # -- Options for HTMLHelp output ------------------------------------------
118 |
119 | # Output file base name for HTML help builder.
120 | htmlhelp_basename = 'TensorFlowdoc'
121 |
122 |
123 | # -- Options for LaTeX output ---------------------------------------------
124 |
125 | latex_elements = {
126 | # The paper size ('letterpaper' or 'a4paper').
127 | #
128 | # 'papersize': 'letterpaper',
129 |
130 | # The font size ('10pt', '11pt' or '12pt').
131 | #
132 | # 'pointsize': '10pt',
133 |
134 | # Additional stuff for the LaTeX preamble.
135 | #
136 | 'preamble': r'\usepackage{ctex}',
137 |
138 | # Latex figure (float) alignment
139 | #
140 | # 'figure_align': 'htbp',
141 |
142 | # Remove blank pages
143 | 'classoptions': ',openany,oneside'
144 | }
145 |
146 | # Grouping the document tree into LaTeX files. List of tuples
147 | # (source start file, target name, title,
148 | # author, documentclass [howto, manual, or own class]).
149 | latex_documents = [
150 | # (master_doc, 'TensorFlow-cn.tex', '简单粗暴TensorFlow',
151 | (master_doc, 'TensorFlow-cn.tex', 'A Concise Handbook of TensorFlow',
152 | author, 'manual'),
153 | ]
154 |
155 |
156 | # -- Options for manual page output ---------------------------------------
157 |
158 | # One entry per manual page. List of tuples
159 | # (source start file, name, description, authors, manual section).
160 | man_pages = [
161 | (master_doc, 'tensorflow', '简单粗暴TensorFlow',
162 | [author], 1)
163 | ]
164 |
165 |
166 | # -- Options for Texinfo output -------------------------------------------
167 |
168 | # Grouping the document tree into Texinfo files. List of tuples
169 | # (source start file, target name, title, author,
170 | # dir menu entry, description, category)
171 | texinfo_documents = [
172 | (master_doc, 'TensorFlow', '简单粗暴TensorFlow',
173 | author, 'TensorFlow', '简单粗暴TensorFlow',
174 | 'Miscellaneous'),
175 | ]
176 |
177 |
178 |
179 |
--------------------------------------------------------------------------------
/source/en/basic.rst:
--------------------------------------------------------------------------------
1 | TensorFlow Basic
2 | ======================
3 |
4 | ..
5 | https://www.datacamp.com/community/tutorials/tensorflow-tutorial
6 |
7 | As the name suggests, TensorFlow is a procedure which makes tensors flow. The tensor, like a multidimensional array, is a generalization of the vector (one dimensional) and the matrix (two dimensional), while the flows of tensors are based on the Dataflow Graph, also called the Computation Graph. A typical TensorFlow program consists of the following parts:
8 |
9 | 1. Define a Dataflow Graph (usually called a 'model' in deep learning), which consists of large numbers of variables (called 'trainable parameters');
10 | 2. Repeat the following steps:
11 |
12 | 1. Convert the training data into tensors and input them into the Dataflow Graph for calculation (forward propagation);
13 | #. Evaluate the loss function and compute its partial derivatives for each variable (backward propagation);
14 | #. Use gradient descent or other optimizers to update variables in order to reduce the value of the loss function (i.e. training parameters).
15 |
16 | After enough times (and time) for repetition in step 2, the loss function will decrease to a very small value, indicating the completion of the model training.
17 |
18 | Before introducing a variety of concepts in TensorFlow such as Tensor, Dataflow Graph, Variable, Optimizer and so on, we give an example first in this handbook so as to provide readers with an intuitive comprehension.
19 |
20 | This chapter describes basic operations in TensorFlow.
21 |
22 | Prerequisites:
23 |
24 | * `Basic Python operations `_ (assignment, branch & loop statement, library import)
25 | * `'With' statement in Python `_
26 | * `NumPy `_ , a common library for scientific computation, important for TensorFlow
27 | * `Vectors `_ & `Matrices `_ operations (matrix addition & subtraction, matrix multiplication with vectors & matrices, matrix transpose, etc., Quiz: :math:`\begin{bmatrix} 1 & 2 \\ 3 & 4 \end{bmatrix} \times \begin{bmatrix} 5 & 6 \\ 7 & 8 \end{bmatrix} = ?`)
28 | * `Derivatives of functions `_ , `derivatives of multivariable functions `_ (Quiz: :math:`f(x, y) = x^2 + xy + y^2, \frac{\partial f}{\partial x} = ?, \frac{\partial f}{\partial y} = ?`)
29 | * `Linear regression `_;
30 | * `Gradient descent `_ that searches local minima of a function
31 |
32 | TensorFlow 1+1
33 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^
34 |
35 | TensorFlow can be simply regarded as a library of scientific calculation (like Numpy in Python). Here we calculate :math:`1+1` and :math:`\begin{bmatrix} 1 & 2 \\ 3 & 4 \end{bmatrix} \times \begin{bmatrix} 5 & 6 \\ 7 & 8 \end{bmatrix}` as our first example.
36 |
37 | .. literalinclude:: ../_static/code/en/basic/eager/1plus1.py
38 |
39 | Output::
40 |
41 | tf.Tensor(2, shape=(), dtype=int32)
42 | tf.Tensor(
43 | [[19 22]
44 | [43 50]], shape=(2, 2), dtype=int32)
45 |
46 | The code above declares four **tensors** named ``a``, ``b``, ``A`` and ``B``. It also invokes two **operations** ``tf.add()`` and ``tf.matmul()`` which respectively do addition and matrix multiplication on tensors. Operation results are immediately stored in the tensors ``c`` and ``C``. **Shape** and **dtype** are two major attributes of a tensor. Here ``a``, ``b`` and ``c`` are scalars with null shape and int32 dtype, while ``A``, ``B``, ``C`` are 2-by-2 matrices with ``(2, 2)`` shape and int32 dtype.
47 |
48 | In machine learning, it's common to differentiate functions. TensorFlow provides us with the powerful **Automatic Differentiation Mechanism** for differentiation. The following codes show how to utilize ``tf.GradientTape()`` to get the slope of :math:`y(x) = x^2` at :math:`x = 3`.
49 |
50 | .. literalinclude:: ../_static/code/en/basic/eager/grad.py
51 | :lines: 1-8
52 |
53 | Output::
54 |
55 | [array([9.], dtype=float32), array([6.], dtype=float32)]
56 |
57 | Here ``x`` is a **variable** initialized to 3, declared by ``tf.get_variable()``. Like common tensors, variables also have shape and dtype attributes, but require an initialization. We can assign an initializer to ``tf.get_variable()`` by setting the ``Initializer`` parameter. Here we use ``tf.constant_initializer(3.)`` to initialize the variable ``x`` to ``3.`` with a float32 dtype. [#f0]_. An important difference between variables and common tensors is that a function can be differentiated by variables, not by tensors, using the automatic differentiation mechanism by default. Therefore variables are usually used as parameters defined in machine learning models. ``tf.GraidentTape()`` is a recorder of automatic differentiation which records all variables and steps of calculation automatically. In the previous example, the variable ``x`` and the calculation step ``y = tf.square(x)`` are recorded automatically, thus the derivative of the tensor ``y`` with respect to ``x`` can be obtained through ``y_grad = tape.gradient(y, x)``.
58 |
59 | In machine learning, calculating the derivatives of a multivariable function, a vector or a matrix is a more common case, which is a piece cake for TensorFlow. The following codes show how to utilize ``tf.GradientTape()`` to differentiate :math:`L(w, b) = \|Xw + b - y\|^2` with respect to :math:`w` and :math:`b` at :math:`w = (1, 2)^T, b = 1`.
60 |
61 | .. literalinclude:: ../_static/code/en/basic/eager/grad.py
62 | :lines: 10-17
63 |
64 | Output::
65 |
66 | [62.5, array([[35.],
67 | [50.]], dtype=float32), array([15.], dtype=float32)]
68 |
69 | Here the operation ``tf.square()`` squares every element in the input tensor without altering its shape. The operation ``tf.reduce_sum()`` outputs the sum of all elements in the input tensor with a null shape (the dimensions of the summation can be indicated by the ``axis`` parameter, while all elements are summed up if not specified). TensorFlow contains a large number of tensor operation APIs including mathematical operations, tensor shape operations (like ``tf.reshape()``), slicing and concatenation (like ``tf.concat()``), etc. You can heck TensorFlow official API documentation [#f3]_ for further information.
70 |
71 | As we can see from the output, TensorFlow helps us figure out that
72 |
73 | .. math::
74 |
75 | L((1, 2)^T, 1) &= 62.5
76 |
77 | \frac{\partial L(w, b)}{\partial w} |_{w = (1, 2)^T, b = 1} &= \begin{bmatrix} 35 \\ 50\end{bmatrix}
78 |
79 | \frac{\partial L(w, b)}{\partial b} |_{w = (1, 2)^T, b = 1} &= 15
80 |
81 | ..
82 | By combining the automatic differentiation mechanism above with an **optimizer**, we can evaluate the extrema of a function. Here we use linear regression as an example (Evaluating :math:`\min_{w, b} L = (Xw + b - y)^2` essentially, :ref:`The next paragraph ` reveals the principles):
83 |
84 | .. literalinclude:: ../_static/code/en/basic/eager/regression.py
85 |
86 | .. _linear-regression:
87 |
88 | A Basic Example: Linear Regression
89 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
90 |
91 | Let's consider a practical problem. The house prices of a city between 2013 and 2017 are given by the following table:
92 |
93 | ====== ===== ===== ===== ===== =====
94 | Year 2013 2014 2015 2016 2017
95 | Price 12000 14000 15000 16500 17500
96 | ====== ===== ===== ===== ===== =====
97 |
98 | Now we want to do linear regression on the given data, i.e. using the linear model :math:`y = ax + b` to fit the data, where ``a`` and ``b`` are unknown parameters.
99 |
100 | First, we define and normalize the data.
101 |
102 | .. literalinclude:: ../_static/code/en/basic/example/numpy_manual_grad.py
103 | :lines: 1-7
104 |
105 | Then, we use gradient descent to evaluate these two parameters ``a`` and ``b`` in the linear model [#f1]_.
106 |
107 | Recalling from the fundamentals of machine learning, for searching local minima of a multivariable function :math:`f(x)`, we use `gradient descent `_ which taking the following steps:
108 |
109 | * Initialize the argument to :math:`x_0` and have :math:`k=0`
110 | * Iterate the following steps repeatedly till the convergence criteria is met:
111 |
112 | * Find the gradient of the function :math:`f(x)` with respect to the parameter :math:`\nabla f(x_k)`
113 | * Update the parameter :math:`x_{k+1} = x_{k} - \gamma \nabla f(x_k)` where :math:`\gamma` is the learning rate (like the step size of the gradient descent)
114 | * :math:`k \leftarrow k+1`
115 |
116 | Next we focus on how to implement gradient descent in order to solve the linear regression :math:`\min_{a, b} L(a, b) = \sum_{i=1}^n(ax_i + b - y_i)^2`.
117 |
118 | NumPy
119 | -----------------------
120 |
121 | The implementation of machine learning models is not a patent of TensorFlow. In fact, even most common scientific calculators or tools can solve simple models. Here, we use Numpy, a general library for scientific computation, to implement gradient descent. Numpy supports multidimensional arrays to represent vectors, matrices and tensors with more dimensions. Meanwhile, it also supports lots of operations on multidimensional arrays (e.g. ``np.dot()`` calculates the inner products and ``np.sum()`` adds up all the elements). In this way Numpy is somewhat like MATLAB. In the following codes, we evaluate the partial derivatives of loss function with respect to the parameters ``a`` and ``b`` manually [#f2]_, and then iterate by gradient descent to acquire the value of ``a`` and ``b`` eventually.
122 |
123 | .. literalinclude:: ../_static/code/en/basic/example/numpy_manual_grad.py
124 | :lines: 9-
125 |
126 | However, you may have noticed that there are several pain points using common libraries for scientific computation to implement machine learning models:
127 |
128 | - It's often inevitable to differentiate functions manually. Simple ones may be fine, however the more complex ones (especially commonly appeared in deep learning models) are another story. Manual differentiation may be painful, even infeasible in the latter cases.
129 | - It's also often inevitable to update parameters based on the gradients manually. Manual update is still easy here because the gradient descent is a rather basic method while it's not going to be easy anymore if we apply a more complex approach to update parameters (like Adam or Adagrad).
130 |
131 | However, the appearance of TensorFlow eliminates these pain points to a large extent, granting users convenience for implementing machine learning models.
132 |
133 | TensorFlow
134 | --------------------------------------------------------
135 |
136 | The **Eager Execution Mode** of TensorFlow [#f4]_ have very similar operations as the above-mentioned Numpy. In addition, it also provides us with a series of critical functions for deep learning such as faster operation speed (need support from GPU), automatic differentiation and optimizers, etc. We will show how to do linear regression using Tensorflow. You may notice that its code structure is similar to the one of Numpy. Here we delegates TensorFlow to do two important jobs:
137 |
138 | * Using ``tape.gradient(ys, xs)`` to get the gradients automatically;
139 | * Using ``optimizer.apply_gradients(grads_and_vars)`` to update parameters automatically.
140 |
141 | .. literalinclude:: ../_static/code/en/basic/example/tensorflow_eager_autograd.py
142 | :lines: 12-29
143 |
144 | Here, we use the aforementioned approach to calculate the partial derivatives of the loss function with respect to each parameter, while we also use ``tf.train.GradientDescentOptimizer(learning_rate=1e-3)`` to declare an **optimizer** for graident descent with a learning rate of 1e-3. The optimizer can help us update parameters based on the result of differentiation in order to minimize a specific loss function by calling its ``apply_gradients()`` interface.
145 |
146 | Note that, for calling ``optimizer.apply_gradients()`` to update model parameters, we need to provide it with parameters ``grads_and_vars``, i.e. the variables to be updated (like ``variables`` in the aforementioned codes). To be specific, a Python list has to be passed, whose every element is a (partial derivative with respect to a variable, this variable) pair. For instance, ``[(grad_w, w), (grad_b, b)]`` is passed here. By executing ``grads = tape.gradient(loss, variables)`` we get partial derivatives of the loss function with respect to each variable recorded in ``tape``, i.e. ``grads = [grad_w, grad_b]``. Then we use ``zip()`` in Python to pair the elements in ``grads = [grad_w, grad_b]`` and ``vars = [w, b]`` together respectively so as to get the required parameters.
147 |
148 | In practice, we usually build much more complex models rather the linear model ``y_pred = tf.matmul(X, w) + b`` here which can be simply written in a single line. Therefore, we often write a model class and call it by ``y_pred = model(X)`` when needed. :doc:`The following chapter ` elaborates writing model classes.
149 |
150 | ..
151 | Chapter Summary
152 | ^^^^^^^^^^^^^^^^^^^^^^^
153 |
154 |
155 | .. [#f0] We can add a decimal point after an integer to make it become a floating point number in Python. E.g. ``3.`` represents the floating point number ``3.0``.
156 | .. [#f3] Mainly refer to `Tensor Transformations `_ and `Math `_. Note that the tensor operation API of TensorFlow is very similar to Numpy, thus one can get started on TensorFlow rather quickly if knowing about the latter.
157 | .. [#f1] In fact there is an analytic solution for the linear regression. We use gradient descent here just for showing you how TensorFlow works.
158 | .. [#f2] The loss function here is the mean square error :math:`L(x) = \frac{1}{2} \sum_{i=1}^5 (ax_i + b - y_i)^2` whose partial derivatives with respect to ``a`` and ``b`` are :math:`\frac{\partial L}{\partial a} = \sum_{i=1}^5 (ax_i + b - y) x_i` and :math:`\frac{\partial L}{\partial b} = \sum_{i=1}^5 (ax_i + b - y)`.
159 | .. [#f4] The opposite of Eager Execution is Graph Execution that TensorFlow adopts before version 1.8 in Mar 2018. This handbook is mainly written for Eager Execution aiming at fast iterative development, however the basic usage of Graph Execution is also attached in the appendices in case of reference.
160 |
161 | ..
162 | Tensors (Variables, Constants and Placeholders)
163 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
164 |
165 | Sessions and Computation Graphs
166 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
167 |
168 | Automatic Differentiation and Optimizers
169 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
170 |
171 | Scope of variables
172 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
173 | .. https://tensorflow.google.cn/versions/master/api_docs/python/tf/variable_scope
174 |
175 | Save, Restore and Persistence
176 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
177 |
--------------------------------------------------------------------------------
/source/en/extended.rst:
--------------------------------------------------------------------------------
1 | TensorFlow Extensions
2 | =====================
3 |
4 | This chapter introduces some of the most commonly used TensorFlow extensions. Although these features are not "must", they make the process of model training and calling more convenient.
5 |
6 | Prerequisites:
7 |
8 | * `Python serialization module Pickle `_ (not required)
9 | * `Python special function parameters **kwargs `_ (not required)
10 |
11 | Checkpoint: Saving and Restoring Variables
12 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13 |
14 | Usually, we hope to save the trained parameters (variables) after the model training is completed. By loading the model and parameters when you need model, you can get the trained model directly. Perhaps the first thing you think of is to store ``model.variables`` with the Python serialization module ``pickle``. But unfortunately, TensorFlow's variable type ``ResourceVariable`` cannot be serialized.
15 |
16 | Fortunately, TensorFlow provides a powerful variable saving and restoring class `tf.train.Checkpoint `_ , which can save and restore all objects in the TensorFlow containing the Checkpointable State by ``save()`` and ``restore()`` methods. Specifically, ``tf.train.Optimizer`` implementations, ``tf.Variable``, ``tf.keras.Layer`` implementations or ``tf.keras.Model`` implementations can all be saved. Its usage is very simple, we first declare a Checkpoint:
17 |
18 | .. code-block:: python
19 |
20 | checkpoint = tf.train.Checkpoint(model=model)
21 |
22 | Here the initialization parameter passed to ``tf.train.Checkpoint()`` is special, it is a ``**kwargs``. Specifically, it is a series of key-value pairs, and the keys can be taken at will, and the values are objects that need to be saved. For example, if we want to save a model instance ``model`` that inherits ``tf.keras.Model`` and an optimizer ``optimizer`` that inherits ``tf.train.Optimizer``, we can write:
23 |
24 | .. code-block:: python
25 |
26 | checkpoint = tf.train.Checkpoint(myAwesomeModel=model, myAwesomeOptimizer=optimizer)
27 |
28 | Here ``myAwesomeModel`` is any key we take to save the model ``model``. Note that we will also use this key when restoring variables.
29 |
30 | Next, when the trained model needs to be saved, use:
31 |
32 | .. code-block:: python
33 |
34 | checkpoint.save(save_path_with_prefix)
35 |
36 | is fine. ``save_path_with_prefix`` is the directory and prefix of the saved file. For example, if you create a folder named "save" in the source directory and call ``checkpoint.save('./save/model.ckpt')`` once, we can find three files in the directory named ``checkpoint``, ``model.ckpt-1.index``, and ``model.ckpt-1.data-00000-of-00001``, which record variable information. The ``checkpoint.save()`` method can be run multiple times. Each time we will get an .index file and a .data file. The serial number increase gradually.
37 |
38 | When you need to reload previously saved parameters for models elsewhere, you need to instantiate a checkpoint again, while keeping the keys consistent. Then call the restore method of checkpoint. Just like this:
39 |
40 | .. code-block:: python
41 |
42 | model_to_be_restored = MyModel() # The same model of the parameter to be restored
43 | checkpoint = tf.train.Checkpoint(myAwesomeModel=model_to_be_restored) # The key remains as "myAwesomeModel"
44 | checkpoint.restore(save_path_with_prefix_and_index)
45 |
46 | Then the model variables are restored. ``save_path_with_prefix_and_index`` is the directory + prefix + number of the previously saved file. For example, calling ``checkpoint.restore('./save/model.ckpt-1')`` will load the file with the prefix ``model.ckpt`` and sequence number 1 to restore the model.
47 |
48 | When saving multiple files, we often want to load the most recent one. You can use an assistant function ``tf.train.latest_checkpoint(save_path)`` to return the file name of the most recent checkpoint in the directory. For example, if there are 10 saved files from ``model.ckpt-1.index`` to ``model.ckpt-10.index`` in the save directory, ``tf.train.latest_checkpoint('./save')`` then returns ``./save/model.ckpt-10`` .
49 |
50 | In general, the typical framework for restoring and saving variables is as follows:
51 |
52 | .. code-block:: python
53 |
54 | # train.py - Model training phase
55 |
56 | model = MyModel()
57 | checkpoint = tf.train.Checkpoint(myModel=model) # Instantiate Checkpoint, specify the save object as model (if you need to save the optimizer's parameters, you can also add it)
58 | # Model training code
59 | checkpoint.save('./save/model.ckpt') # Save the parameters to a file after the model is trained, or save it periodically during the training process.
60 |
61 | .. code-block:: python
62 |
63 | # test.py - Model use phase
64 |
65 | model = MyModel()
66 | checkpoint = tf.train.Checkpoint(myModel=model) # Instantiate Checkpoint, specify the recovery object as model
67 | checkpoint.restore(tf.train.latest_checkpoint('./save')) # Restore model parameters from file
68 | # Model usage code
69 |
70 | By the way, ``tf.train.Checkpoint`` is more powerful than the ``tf.train.Saver``, which is commonly used in previous versions, because it supports "delayed" recovery variables under Eager Execution. Specifically, when ``checkpoint.restore()`` is called but the variables in the model have not yet been created, Checkpoint can wait until the variable is created before restoring the value. Under "Eager Execution" mode, the initialization of each layer in the model and the creation of variables are performed when the model is first called (the advantage is that the shape of the variable can be automatically determined based on the input tensor shape, without manual specification). This means that when the model has just been instantiated, there is actually no variable in it. At this time, using the previous method to recover the variable value will definitely cause an error. For example, you can try to save the parameters of model by calling the ``save_weight()`` method of ``tf.keras.Model`` in train.py, and call ``load_weight()`` method immediately after instantiating the model in test.py, it will cause an error. Only after calling the model and then run the ``load_weight()`` method can you get the correct result. It is obvious that ``tf.train.Checkpoint`` can bring us considerable convenience in this case. In addition, ``tf.train.Checkpoint`` also supports the Graph Execution mode.
71 |
72 | Finally, an example is provided. The previous chapter's :ref:`multilayer perceptron model ` shows the preservation and loading of model variables:
73 |
74 | .. literalinclude:: ../_static/code/en/extended/save_and_restore/mnist.py
75 |
76 | After the save folder is created in the source directory and the model is trained, the model variable data stored every 100 batches will be stored in the save folder. Change line 7 to ``model = 'test'`` and run the code again. The model will be restored directly using the last saved variable value and tested on the test set. You can directly get an accuracy of about 95%.
77 |
78 | TensorBoard: Visualization of the Training Process
79 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
80 |
81 | Sometimes you want to see how the various parameters change during the model training (such as the value of the loss function). Although it can be viewed through the terminal output, it is sometimes not intuitional enough. TensorBoard is a tool that helps us visualize the training process.
82 |
83 | Currently, TensorBoard support in Eager Execution mode is still in `tf.contrib.summary `_, and there may be more changes in the future. So here are just a simple example. First, create a folder (such as ./tensorboard) in the source directory to store the TensorBoard record file, and instantiate a logger in the code:
84 |
85 | .. code-block:: python
86 |
87 | summary_writer = tf.contrib.summary.create_file_writer('./tensorboard')
88 |
89 | Next, put the code of training part in the context of ``summary_writer.as_default()`` and ``tf.contrib.summary.always_record_summaries()`` using "with" statement, and run ``tf.contrib.summary.scalar(name, tensor, step=batch_index)`` for the parameters that need to be logged (usually scalar). The "step" parameter here can be set according to your own needs, and can commonly be set to be the batch number in the current training process. The overall framework is as follows:
90 |
91 | .. code-block:: python
92 |
93 | summary_writer = tf.contrib.summary.create_file_writer('./tensorboard')
94 | with summary_writer.as_default(), tf.contrib.summary.always_record_summaries():
95 | # Start model training
96 | for batch_index in range(num_batches):
97 | # Training code, the current loss of batch is put into the variable "loss"
98 | tf.contrib.summary.scalar("loss", loss, step=batch_index)
99 | tf.contrib.summary.scalar("MyScalar", my_scalar, step=batch_index) # You can also add other variables
100 |
101 | Each time you run ``tf.contrib.summary.scalar()``, the logger writes a record to the log file. In addition to the simplest scalar, TensorBoard can also visualize other types of data (such as images, audio, etc.) as described in the `API document `_.
102 |
103 | When we want to visualize the training process, open the terminal in the source directory (and enter the TensorFlow conda environment if necessary), run::
104 |
105 | tensorboard --logdir=./tensorboard
106 |
107 | Then use the browser to visit the URL output by the terminal (usually http://computer_name:6006), you can visit the visible interface of TensorBoard, as shown below:
108 |
109 | .. figure:: ../_static/image/extended/tensorboard.png
110 | :width: 100%
111 | :align: center
112 |
113 | By default, TensorBoard updates data every 30 seconds. However, you can also manually refresh by clicking the refresh button in the upper right corner.
114 |
115 | When using TensorBoard, please notice the following notes:
116 |
117 | * If you want to retrain, you need to delete the information in the record folder and restart TensorBoard (or create a new record folder and open TensorBoard with the ``--logdir`` parameter set to be the newly created folder);
118 | * Language of the record directory path should all be English.
119 |
120 | Finally, we provide an example of the previous chapter's :ref:`multilayer perceptron model ` showing the use of TensorBoard:
121 |
122 | .. literalinclude:: ../_static/code/en/extended/tensorboard/mnist.py
123 |
124 | GPU Usage and Allocation
125 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
126 |
127 | Usually the scenario is: there are many students/researchers in the lab/company research group who need to use the GPU, but there is only one multi-card machine. At this time, you need to pay attention to how to allocate graphics resources.
128 |
129 | The command ``nvidia-smi`` can view the existing GPU and the usage of the machine (in Windows, add ``C:\Program Files\NVIDIA Corporation\NVSMI`` to the environment variable "Path", or in Windows 10 you can view the graphics card information using the Performance tab of the Task Manager).
130 |
131 | Use the environment variable ``CUDA_VISIBLE_DEVICES`` to control the GPU used by the program. Assume that, on a four-card machine, GPUs 0, 1 are in use and GPUs 2, 3 are idle. Then type in the Linux terminal::
132 |
133 | export CUDA_VISIBLE_DEVICES=2,3
134 |
135 | or add this in the code,
136 |
137 | .. code-block:: python
138 |
139 | import os
140 | os.environ['CUDA_VISIBLE_DEVICES'] = "2,3"
141 |
142 | to specify that the program runs only on GPUs 2, 3.
143 |
144 | By default, TensorFlow will use almost all of the available graphic memory to avoid performance loss caused by memory fragmentation. You can set the strategy for TensorFlow to use graphic memory through the ``tf.ConfigProto`` class. The specific way is to instantiate a ``tf.ConfigProto`` class, set the parameters, and specify the "config" parameter when running ``tf.enable_eager_execution()``. The following code use the ``allow_growth`` option to set TensorFlow to apply for memory space only when necessary:
145 |
146 | .. code-block:: python
147 |
148 | config = tf.ConfigProto()
149 | config.gpu_options.allow_growth = True
150 | tf.enable_eager_execution(config=config)
151 |
152 | The following code sets TensorFlow to consume 40% of GPU memory by the ``per_process_gpu_memory_fraction`` option:
153 |
154 | .. code-block:: python
155 |
156 | config = tf.ConfigProto()
157 | config.gpu_options.per_process_gpu_memory_fraction = 0.4
158 | tf.enable_eager_execution(config=config)
159 |
160 | Under "Graph Execution", you can also pass the tf.ConfigPhoto class to set up when instantiating a new session.
161 |
--------------------------------------------------------------------------------
/source/en/installation.rst:
--------------------------------------------------------------------------------
1 | TensorFlow Installation
2 | =======================
3 |
4 | The most up-to-date installation method can be acquired through the official website (https://tensorflow.google.cn/install). TensorFlow supports multiple programming languages like Python, Java and Go and a variety of operating systems like Windows, OSX and Linux. We prefer Python in this handbook.
5 |
6 | We provide both simple and full installation methods for readers with different requirements.
7 |
8 | Simple Installation
9 | ^^^^^^^^^^^^^^^^^^^^^^
10 | You can follow these steps if you only want to install TensorFlow on your personal computer without GPU or you do not want to spend too much effort configuring the environment:
11 |
12 | - Download and install Python distribution `Anaconda `_ (with Python Ver 3.6).
13 | - Download and install Python IDE `PyCharm `_ (Community version. Students can apply for `licenses of Professional version for free `_).
14 | - Run ``Anaconda Prompt`` in the Start Menu (Windows), enter and execute ``pip install tensorflow``.
15 | - Start PyCharm, create a project with default python interpreter, and create a python file in the project.
16 |
17 | And done.
18 |
19 | Full Installation
20 | ^^^^^^^^^^^^^^^^^^^^
21 | This part includes more details of installation (e.g. building a conda environment) and guidance for the GPU version of TensorFlow environment installation.
22 |
23 | Environment configuration before installation
24 | ------------------------------------------------
25 | Before installing TensorFlow, we need to set up a proper environment with the following steps:
26 |
27 | 1. Check if your computer has an NVIDIA graphics card and install the GPU version of TensorFlow in order to take advantages of its powerful capability of computation acceleration [#f1]_ , or, just install CPU version if not so. To be more specific, the CUDA Computing Capability of your graphics card that you can check on `NVIDIA official website `_ should not be less than 3.0.
28 | 2. Install the Python environment. Anaconda is recommended. It is an open-source release version of Python that provides a full environment for scientific computation including common libraries such as NumPy and SciPy, or you can choose your favorite ones of course. Note that TensorFlow only supports Python Ver 3.X under Windows when we write this handbook.
29 |
30 | * You can choose to add the directory of Anaconda into the PATH (though not recommended by the installation wizard). It enables you to call all Anaconda commands under command line or Powershell directly. You can always call them under the Anaconda Prompt started in the Start Menu.
31 |
32 | 3. (For GPU version installation) Install the NVIDIA graphics driver, `CUDA Toolkit `_ and `cuDNN `_. You should note that:
33 |
34 | * We recommend you install it through the following order: 1) latest NVIDIA graphics driver 2) CUDA (without selecting the built-in driver when installing since the built-in ones may be out-of-date) 3) cuDNN;
35 | * There is a quite simple way to install drivers in Ubuntu. First click "Software & Updates" in "System Setting", then toggle on "Using NVIDIA binary driver" option in "Additional Drivers" and click "Apply Changes" for system to install NVIDIA drivers automatically, otherwise, it won't be peaceful for NVIDIA installation on Linux. You should disable the built-in graphics driver Nouveau and Secure Boot function of the motherboard. You can seek a more detailed guidance `here `_;
36 | * The version of CUDA Toolkit and cuDNN must agree with the requirements on TensorFlow official website which does not always require the latest version.
37 | * You have to copy the downloaded files of cuDNN to the installation directory of CUDA to complete cuDNN installation.
38 |
39 | Install
40 | ----------------
41 |
42 | These are the following steps of TensorFlow installation under Anaconda (taking Windows as example):
43 |
44 | 1. Create a conda environment named ``tensorflow``
45 |
46 | ::
47 |
48 | conda create -n tensorflow python=X.X # Substitute "X.X" with your own Python version, e.g. "3.6".
49 |
50 | 2. Activate the environment
51 |
52 | ::
53 |
54 | activate tensorflow
55 |
56 | 3. Use pip to install TensorFlow
57 |
58 | Install the CPU version
59 | ::
60 |
61 | pip install tensorflow
62 |
63 | Or, install the GPU version
64 | ::
65 |
66 | pip install tensorflow-gpu
67 |
68 | You can also choose to install the Nightly version of TensorFlow if you want. This version may include some latest features compared to the official version (e.g. the Eager Execution mode in this handbook was only supported in the Nightly version before TensorFlow Ver 1.8) yet with some instability. You can do so by running ``pip install tf-nightly`` (CPU version) or ``pip install tf-nightly-gpu`` (GPU version) in a new virtual environment. If you are going to install the GPU version, it may require higher versions of CUDA and cuDNN. Fortunately different versions of CUDA and cuDNN can coexist.
69 |
70 | If it is slow to install through pip command in China, you may want to try `TensorFlow mirror on TUNA `_.
71 |
72 | First Program
73 | ^^^^^^^^^^^^^^^
74 |
75 | We write a piece of code to verify the installation.
76 |
77 | Enter ``activate tensorflow`` under command line to enter the previously built conda environment with TensorFlow. Then enter ``python`` to enter Python environment. Input the following codes line by line:
78 |
79 | .. code-block:: python
80 |
81 | import tensorflow as tf
82 | tf.enable_eager_execution()
83 |
84 | A = tf.constant([[1, 2], [3, 4]])
85 | B = tf.constant([[5, 6], [7, 8]])
86 | C = tf.matmul(A, B)
87 |
88 | print(C)
89 |
90 | If the output is::
91 |
92 | tf.Tensor(
93 | [[19 22]
94 | [43 50]], shape=(2, 2), dtype=int32)
95 |
96 | We can draw conclusions that TensorFlow was successfully installed. It's normal for the program to output some prompt messages when running.
97 |
98 | Here we use Python. You can get Python tutorials on https://docs.python.org/3/tutorial/. From now on we assume that readers are familiar with the basics of Python. Relax, Python is easy to handle and advanced features of Python will be barely involved in TensorFlow. We recommend you to use `PyCharm `_ as your Python IDE. If you are a student with an email address ended with .edu, you can apply for a free license `here `_. You can always download PyCharm Community version whose main functions do not differ that much from the former if you do not meet the aforementioned criteria.
99 |
100 | .. [#f1] The effect of acceleration is relative to the GPU performance. It won't be satisfactory if you have a high performance CPU and a beginner level GPU where the acceleration rate will be like 1-2. However, the acceleration rate may reach 10 or even higher under specific models if you have a powerful GPU (e.g. NVIDIA GeForce GTX 1080 Ti or NVIDIA GeForce TITAN Series are powerful graphics card types when this handbook was being written). Meanwhile, the acceleration rate is also influenced by the running task itself. The beginner level models of TensorFlow do not require too much performance as the CPU version is adequate. You may determine if you will purchase a higher level graphics card to get faster training speed after you master the basics of TensorFlow.
101 |
102 | Upgrade to A New Version
103 | ^^^^^^^^^^^^^^^^^^^^^^^^
104 |
105 | TensorFlow is updated frequently. If you want to upgrade TensorFlow, please enter the conda environment with TensorFlow installed and type the following command
106 |
107 | ::
108 |
109 | pip install tensorflow --upgrade
110 |
111 | If you want to install a specific version of TensorFlow, please type
112 |
113 | ::
114 |
115 | pip install tensorflow==1.8.0 # here 1.8.0 is the specified version
116 |
117 | Upgrade of TensorFlow can be risky, and there may be errors whe you import TensorFlow after the upgrade. The simpler way is to delete the current conda environment and reinstall it. The following conda commands may be useful::
118 |
119 | conda list # List all packages and versions in the current conda environment
120 | conda env list # List all conda environments
121 | conda create --name new_env_name --clone old_env_name # Backup the current conda environment `old_env_name` to `new_env_name`
122 | conda env remove -n tensorflow # Delete the conda environment named `tensorflow`
123 |
--------------------------------------------------------------------------------
/source/en/preface.rst:
--------------------------------------------------------------------------------
1 | Preface
2 | =========
3 |
4 | On Mar. 30th, 2018, Google held the second TensorFlow Dev Summit in Mountain View, California and announced the official release of TensorFlow version 1.8. I was fortunate to attend the summit with Google's sponsorship, witnessing the release of this new version, a milestone. Lots of new functions added and supported shows the ambition of TensorFlow. Meanwhile, Eager Execution, which has been tested since 2017 fall, was finally included officially in this version and became the recommended mode for newcomers of TensorFlow.
5 |
6 | The easiest way to get started with TensorFlow is using Eager Execution.
7 |
8 | —— https://www.tensorflow.org/get_started/
9 |
10 | Before then, the disadvantages of Graph Execution mode in TensorFlow, such as high learning threshold, difficulty in debugging, poor flexibility and inability to use Python native controlling statements, have already been criticized by developers for a long time. Some new deep learning frameworks based on dynamic computational graph (e.g. PyTorch) have come out and won their places by their usability and efficiency for development. These dynamic deep learning frameworks are popular especially in academic researches where fast iterative development of models are required. In fact, I was the only person who used "old-fashioned" TensorFlow in my machine learning laboratory where I worked with dozens of colleagues. However, until now, most of the Chinese technical books and materials about TensorFlow still based on Graph Execution mode, which really dissuades beginners (especially those undergraduates who have just finished their machine learning courses) from learning. Therefore, as TensorFlow officially supports Eager Execution, it's necessary to publish a brand new handbook which helps beginners and researchers who need to iterate models rapidly and to get started quickly from a new perspective.
11 |
12 | Meanwhile, this handbook has another mission. Most Chinese technical books about TensorFlow focus mainly on deep learning and regard TensorFlow as a mere tool to implement deep learning models. Admittedly, they are self-contained, but it's not friendly enough for those who have already known about machine learning and deep learning theories and want to focus on learning TensorFlow itself. In addition, though TensorFlow has its official documentation (https://tensorflow.google.cn/tutorials), its structure is not well organized, lacking the step-by-step feature of a common tutorial, thus being more similar to a technological documentation. Therefore, I hope to write a handbook to show the main features of TensorFlow as a computing framework as much as possible, and to make up for the shortcomings of the official manual, in an effort to make readers who already have certain machine learning/deep learning knowledge and programming skills get started quickly. TensorFlow, and can solve practical problems during the actual programming process.
13 |
14 | The main features of this handbook are:
15 |
16 | * This book is mainly based on the most up-to-date Eager Execution mode in TensorFlow for fast iterative development of models. However, traditional Graph Execution mode is also included and we will do our best to make the codes provided in this book compatible with both modes.
17 | * We position this book mainly as a tutorial and handbook, and arrange the concepts and functions of TensorFlow as the core part, for TensorFlow developers to refer quickly. Chapters are relatively independent with one another, therefore it's not necessary to read this book in a sequential order. There won't be much theory of deep learning and machine learning in the text, however some recommendation is still provided for beginners to grasp related basic knowledge.
18 | * All codes are carefully written in order to be concise and efficient. All models are implemented based on the ``tf.keras.Model`` and ``tf.keras.layers.Layer`` methods, which just proposed by `TensorFlow official documentation `_ and are barely introduced in other technical documentations. This implementation guarantees high reusability. Each project is written by codes fewer than 100 lines for readers to understand and practice quickly.
19 | * Less is more. No all-rounded or large blocks of details.
20 |
21 | The parts marked "*" are optional in this handbook.
22 |
23 | This handbook is tentatively named as "A Concise Handbook of TensorFlow" in order to pay a tribute to the book "A Concise Handbook of :math:`\text{\LaTeX}`" (https://github.com/wklchris/Note-by-LaTeX) written by my friend and colleague Chris Wu. The latter is a rare Chinese material about :math:`\text{\LaTeX}`. I also learned from it while I was writing this handbook. This handbook was initially written and used by meself as a prerequisite handout in a deep learning seminar organized by my friend Ji-An Li. My friends' wise and selflessness also prompted me to finish this work.
24 |
25 | The English version of this handbook is translated by my friend Zida Jin (Chapter 1-4) and Ming (Chapter 5-6), and revised by Ji-An Li and me. My three friends sacrificed a lot of valuable time to translate and proofread this handbook. Ji-An Li also provided valuable comments on the teaching content and code details of this manual. I would like to express my heartfelt thanks to my friends for their hard work.
26 |
27 | I am grateful to the members of the Google China Developer Relations team and the TensorFlow engineering team for their help in writing this handbook. Among them, Luke Cheng of the Developer Relations team provided inspiration and continuous encouragement throughout the writing of this manual. Rui Li and Pryce Mu of the Developer Relations team provided strong support in the promotion of this manual, as well as Tiezhen Wang of TensorFlow team provided many suggestions to the engineering details of the manual.
28 |
29 | |
30 |
31 | Xihan Li (Snowkylin)
32 |
33 | August 2018 in Yanyuan
34 |
--------------------------------------------------------------------------------
/source/en/static.rst:
--------------------------------------------------------------------------------
1 | Appendix: Static TensorFlow
2 | ======================================
3 |
4 | TensorFlow 1+1
5 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6 |
7 | Essentially, TensorFlow is a symbolic computational framework (based on computational graph). Here is a "Hello World" example of computing 1+1.
8 |
9 | .. literalinclude:: ../_static/code/en/basic/graph/1plus1.py
10 |
11 | Output::
12 |
13 | 2
14 |
15 | The program above is capable of computing 1+1 only, the following program, however, shows how to use TensorFlow to compute the sum of any two numbers through the parameter ``feed_dict=`` of ``tf.placeholder()`` and ``sess.run()``:
16 |
17 | .. literalinclude:: ../_static/code/en/basic/graph/aplusb.py
18 |
19 | Terminal::
20 |
21 | >>> a = 2
22 | >>> b = 3
23 | a + b = 5
24 |
25 | **Variable** is a special type of tensor, which is built using ``tf.get_variable()``. Just like variables in common progamming language, a ``Variable`` should be initialized before used and its value can be modified during computation in the computational graph. The following example shows how to create a ``Variable``, initialize its value to 0, and increment by one.
26 |
27 | .. literalinclude:: ../_static/code/en/basic/graph/variable.py
28 |
29 | Output::
30 |
31 | 1.0
32 | 2.0
33 | 3.0
34 | 4.0
35 | 5.0
36 |
37 | The following code is equivalent to the code shown above. It specifies the initializer upon declaring variables and initializes all variables at once by ``tf.global_variables_initializer()``, which is used more often in practical projects:
38 |
39 | .. literalinclude:: ../_static/code/en/basic/graph/variable_with_initializer.py
40 |
41 | Matrix and tensor calculation is the basic operation in scientific computation (including Machine Learning). The program shown below is to demonstrate how to calculate the product of the two matrices :math:`\begin{bmatrix} 1 & 1 & 1 \\ 1 & 1 & 1 \end{bmatrix}` and :math:`\begin{bmatrix} 1 & 1 \\ 1 & 1 \\ 1 & 1 \end{bmatrix}`:
42 |
43 | .. literalinclude:: ../_static/code/en/basic/graph/AmatmulB.py
44 |
45 | Output::
46 |
47 | [[3. 3.]
48 | [3. 3.]]
49 |
50 | Placeholders and Variables are also allowed to be vector, matrix and even higher dimentional tensor.
51 |
52 | A Basic Example: Linear Regression
53 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
54 |
55 | Unlike previous NumPy and Eager Execution mode, TensorFlow's Graph Execution mode uses **symbolic programming** for numerical operations. First, we need to abstract the computational processes into a Dataflow Graph, and represent the inputs, operations and outputs with symbolized nodes. Then, we continually send the data to the input nodes, let the data be calculated and flow along the dataflow graph, and finally reach the specific output nodes we want. The following code shows how to accomplish the same task as the code does in previous section based on TensorFlow's symbolic programming approach, where ``tf.placeholder()`` can be regarded as a kind of "symbolic input node", using ``tf.get_variable()`` to define the parameters of the model (the tensor of the Variable type can be assigned using ``tf.assign()``), and ``sess.run(output_node, feed_dict={input_node: data})`` can be thought of as a process which sends data to the input node, calculates along the dataflow graph and reach the output node and eventually return values.
56 |
57 | .. literalinclude:: ../_static/code/en/basic/example/tensorflow_manual_grad.py
58 | :lines: 9-
59 |
60 | In the two examples above, we manually calculated the partial derivatives of the loss function with regard to each parameter. But when both the model and the loss function become very complicated (especially in deep learning models), the workload of manual derivation is unacceptable. TensorFlow provides an **automatic derivation mechanism** that eliminates the hassle of manually calculating derivatives, using TensorFlow's derivation function ``tf.gradients(ys, xs)`` to compute the partial derivatives of the loss function with regard to a and b. Thus, the two lines of code in the previous section for calculating derivatives manually,
61 |
62 | .. literalinclude:: ../_static/code/en/basic/example/tensorflow_manual_grad.py
63 | :lines: 21-23
64 |
65 | could be replaced by
66 |
67 | .. code-block:: python
68 |
69 | grad_a, grad_b = tf.gradients(loss, [a, b])
70 |
71 | and the result won't change.
72 |
73 | Moreover,TensorFlow has many kinds of **optimizer**, which can complete derivation and gradient update together at the same time. The code in the previous section,
74 |
75 | .. literalinclude:: ../_static/code/en/basic/example/tensorflow_manual_grad.py
76 | :lines: 21-31
77 |
78 | could be replaced by
79 |
80 | .. code-block:: python
81 |
82 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate_)
83 | grad = optimizer.compute_gradients(loss)
84 | train_op = optimizer.apply_gradients(grad)
85 |
86 | Here, we first instantiate a gradient descent optimizer ``tf.train.GradientDescentOptimizer()`` in TensorFlow and set the learning rate. Then use its ``compute_gradients(loss)`` method to find the gradients of ``loss`` with regard to all variables (parameters). Finally, through the method ``apply_gradients(grad)``, the variables (parameters) are updated according to the previously calculated gradients.
87 |
88 | These three lines of code are equivalent to the following line of code:
89 |
90 | .. code-block:: python
91 |
92 | train_op = tf.train.GradientDescentOptimizer(learning_rate=learning_rate_).minimize(loss)
93 |
94 | The simplified code is as follows:
95 |
96 | .. literalinclude:: ../_static/code/en/basic/example/tensorflow_autograd.py
97 | :lines: 9-29
98 |
--------------------------------------------------------------------------------
/source/index.rst:
--------------------------------------------------------------------------------
1 | .. 简单粗暴TensorFlow documentation master file, created by
2 | sphinx-quickstart on Sat Jan 20 00:48:15 2018.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | ==================================================================
7 | 简单粗暴TensorFlow | A Concise Handbook of TensorFlow
8 | ==================================================================
9 |
10 | *基于Eager Execution | Based on Eager Execution*
11 |
12 | ..
13 | 本文档为未完成版本,内容会随时更改修订,目前请不要扩散。
14 |
15 | This document is unfinished, content will be updated rapidly. Please keep it internal at this time.
16 |
17 | 本手册是一篇精简的TensorFlow入门指导,基于TensorFlow的Eager Execution(动态图)模式,力图让具备一定机器学习及Python基础的开发者们快速上手TensorFlow。
18 |
19 | 友情提醒:如果发现阅读中有难以理解的部分,请检查自己对每章的“前置知识”部分是否有清楚的理解。
20 |
21 | This handbook is a concise introduction to TensorFlow based on Eager Execution mode, trying to help developers get started with TensorFlow quickly with some basic machine learning and Python knowledge.
22 |
23 | Friendly reminder: If you find something difficult to understand in reading, please check the "Prerequisites" part of each chapter.
24 |
25 | +----------------------+-----------------------+
26 | | .. toctree:: | .. toctree:: |
27 | | :maxdepth: 2 | :maxdepth: 2 |
28 | | :caption: 目录 | :caption: Contents |
29 | | | |
30 | | zh/preface | en/preface |
31 | | zh/installation | en/installation |
32 | | zh/basic | en/basic |
33 | | zh/models | en/models |
34 | | zh/extended | en/extended |
35 | | zh/static | en/static |
36 | +----------------------+-----------------------+
37 |
38 | ..
39 | .. toctree::
40 | zh/preface
41 | zh/installation
42 | zh/basic
43 | zh/models
44 | zh/extended
45 | zh/static
46 |
47 | ..
48 | .. toctree::
49 | en/preface
50 | en/installation
51 | en/basic
52 | en/models
53 | en/extended
54 | en/static
55 |
56 | 答疑区
57 |
58 | - (中文)TensorFlow中文社区“简单粗暴TensorFlow”版面: https://www.tensorflowers.cn/b/48 (中文的疑问和建议请来此处,将以中文回答和讨论。欢迎使用中文的开发者们前来TensorFlow中文社区交流讨论)
59 | - (英文)https://github.com/snowkylin/TensorFlow-cn/releases (英文的疑问或建议可在GitHub issue中提出,会以英文回答)
60 |
61 | PDF下载:
62 |
63 | - 中文版:https://www.tensorflowers.cn/t/6230 (同时也有英文版下载)
64 | - 英文版:https://github.com/snowkylin/TensorFlow-cn/releases
65 |
66 | GitHub: https://github.com/snowkylin/TensorFlow-cn
67 |
68 | Q&A area
69 |
70 | - (Chinese) TensorFlow Chinese community "A Concise Handbook of TensorFlow" forum: https://www.tensorflowers.cn/b/48
71 | - (English) https://github.com/snowkylin/TensorFlow-cn/issues
72 |
73 | PDF download:
74 |
75 | - Chinese version: https://www.tensorflowers.cn/t/6230
76 | - English version: https://github.com/snowkylin/TensorFlow-cn/releases
77 |
78 | GitHub: https://github.com/snowkylin/TensorFlow-cn
79 |
80 | ..
81 | preface
82 | introduction
83 | installation
84 | basic
85 | ops
86 | models
87 | --
88 | visualization
89 | debugging
90 | --
91 | distributed
92 | dynamic
93 | code
94 | appendix
95 |
96 | .. only:: html
97 |
98 | Indices and tables
99 | ==================
100 |
101 | * :ref:`genindex`
102 | * :ref:`modindex`
103 | * :ref:`search`
104 |
105 | .. raw:: html
106 |
107 |
108 |
109 |
--------------------------------------------------------------------------------
/source/zh/basic.rst:
--------------------------------------------------------------------------------
1 | TensorFlow基础
2 | ======================
3 |
4 | ..
5 | https://www.datacamp.com/community/tutorials/tensorflow-tutorial
6 |
7 | TensorFlow,顾名思义,就是Tensor(张量)进行Flow(流动)的过程。所谓张量,即对向量(一维)和矩阵(二维)的一种推广,类似于多维数组。而张量的流动则是基于数据流图(Dataflow Graph,也称计算图Computational Graph)。一个典型的TensorFlow程序由以下几个部分组成:
8 |
9 | 1. 定义一个数据流图(在深度学习中往往称之为“模型”),其中往往包含大量的变量(深度学习中“模型的待训练参数”);
10 | 2. 反复进行以下步骤:
11 |
12 | 1. 将训练数据转换为张量,并送入数据流图进行计算(前向传播);
13 | #. 计算损失函数的值,并对各变量求偏导数(反向传播);
14 | #. 使用梯度下降或其他优化器(Optimizer)对变量进行更新以减小损失函数的值(即“对参数进行训练”)。
15 |
16 | 在步骤2重复足够多的次数(训练足够长的时间)后,损失函数达到较小的值并保持稳定,即完成了模型的训练。
17 |
18 | 在对TensorFlow的具体概念,如张量(Tensor)、数据流图(Dataflow Graph)、变量(Variable)、优化器(Optimizer)等进行具体介绍之前,本手册先举一个具体的例子,以让读者能对TensorFlow的基本运作方式有一个直观的理解。
19 |
20 | 本章介绍TensorFlow的基本操作。
21 |
22 | 前置知识:
23 |
24 | * `Python基本操作 `_ (赋值、分支及循环语句、使用import导入库);
25 | * `Python的With语句 `_ ;
26 | * `NumPy `_ ,Python下常用的科学计算库。TensorFlow与之结合紧密;
27 | * `向量 `_ 和 `矩阵 `_ 运算(矩阵的加减法、矩阵与向量相乘、矩阵与矩阵相乘、矩阵的转置等。测试题::math:`\begin{bmatrix} 1 & 2 \\ 3 & 4 \end{bmatrix} \times \begin{bmatrix} 5 & 6 \\ 7 & 8 \end{bmatrix} = ?`);
28 | * `函数的导数 `_ ,`多元函数求导 `_ (测试题::math:`f(x, y) = x^2 + xy + y^2, \frac{\partial f}{\partial x} = ?, \frac{\partial f}{\partial y} = ?`);
29 | * `线性回归 `_ ;
30 | * `梯度下降方法 `_ 求函数的局部最小值。
31 |
32 | TensorFlow 1+1
33 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^
34 |
35 | 我们可以先简单地将TensorFlow视为一个科学计算库(类似于Python下的NumPy)。这里以计算 :math:`1+1` 和 :math:`\begin{bmatrix} 1 & 2 \\ 3 & 4 \end{bmatrix} \times \begin{bmatrix} 5 & 6 \\ 7 & 8 \end{bmatrix}` 作为Hello World的示例。
36 |
37 | .. literalinclude:: ../_static/code/zh/basic/eager/1plus1.py
38 |
39 | 输出::
40 |
41 | tf.Tensor(2, shape=(), dtype=int32)
42 | tf.Tensor(
43 | [[19 22]
44 | [43 50]], shape=(2, 2), dtype=int32)
45 |
46 | 以上代码声明了 ``a``、``b``、``A``、``B`` 四个 **张量** (Tensor),并使用了 ``tf.add()`` 和 ``tf.matmul()`` 两个 **操作** (Operation)对张量进行了加法和矩阵乘法运算,运算结果即时存储于 ``c``、``C`` 两个张量内。张量的重要属性是其形状(shape)和类型(dtype)。这里 ``a``、``b``、``c`` 是纯量,形状为空,类型为int32;``A``、``B``、``C`` 为2×2的矩阵,形状为 ``(2, 2)``,类型为int32。
47 |
48 | 在机器学习中,我们经常需要计算函数的导数。TensorFlow提供了强大的 **自动求导机制** 来计算导数。以下代码展示了如何使用 ``tf.GradientTape()`` 计算函数 :math:`y(x) = x^2` 在 :math:`x = 3` 时的导数:
49 |
50 | .. literalinclude:: ../_static/code/zh/basic/eager/grad.py
51 | :lines: 1-8
52 |
53 | 输出::
54 |
55 | [array([9.], dtype=float32), array([6.], dtype=float32)]
56 |
57 | 这里 ``x`` 是一个初始化为3的 **变量** (Variable),使用 ``tf.get_variable()`` 声明。与普通张量一样,变量同样具有形状(shape)和类型(dtype)属性,不过使用变量需要有一个初始化过程,可以通过在 ``tf.get_variable()`` 中指定 ``initializer`` 参数来指定所使用的初始化器。这里使用 ``tf.constant_initializer(3.)`` 将变量 ``x`` 初始化为float32类型的 ``3.`` [#f0]_。变量与普通张量的一个重要区别是其默认能够被TensorFlow的自动求导机制所求导,因此往往被用于定义机器学习模型的参数。 ``tf.GradientTape()`` 是一个自动求导的记录器,在其中的变量和计算步骤都会被自动记录。上面的示例中,变量 ``x`` 和计算步骤 ``y = tf.square(x)`` 被自动记录,因此可以通过 ``y_grad = tape.gradient(y, x)`` 求张量 ``y`` 对变量 ``x`` 的导数。
58 |
59 | 在机器学习中,更加常见的是对多元函数求偏导数,以及对向量或矩阵的求导。这些对于TensorFlow也不在话下。以下代码展示了如何使用 ``tf.GradientTape()`` 计算函数 :math:`L(w, b) = \|Xw + b - y\|^2` 在 :math:`w = (1, 2)^T, b = 1` 时分别对 :math:`w, b` 的偏导数。其中 :math:`X = \begin{bmatrix} 1 & 2 \\ 3 & 4 \end{bmatrix}, y = \begin{bmatrix} 1 \\ 2\end{bmatrix}`。
60 |
61 | .. literalinclude:: ../_static/code/zh/basic/eager/grad.py
62 | :lines: 10-17
63 |
64 | 输出::
65 |
66 | [62.5, array([[35.],
67 | [50.]], dtype=float32), array([15.], dtype=float32)]
68 |
69 | 这里, ``tf.square()`` 操作代表对输入张量的每一个元素求平方,不改变张量形状。 ``tf.reduce_sum()`` 操作代表对输入张量的所有元素求和,输出一个形状为空的纯量张量(可以通过 ``axis`` 参数来指定求和的维度,不指定则默认对所有元素求和)。TensorFlow中有大量的张量操作API,包括数学运算、张量形状操作(如 ``tf.reshape()``)、切片和连接(如 ``tf.concat()``)等多种类型,可以通过查阅TensorFlow的官方API文档 [#f3]_ 来进一步了解。
70 |
71 | 从输出可见,TensorFlow帮助我们计算出了
72 |
73 | .. math::
74 |
75 | L((1, 2)^T, 1) &= 62.5
76 |
77 | \frac{\partial L(w, b)}{\partial w} |_{w = (1, 2)^T, b = 1} &= \begin{bmatrix} 35 \\ 50\end{bmatrix}
78 |
79 | \frac{\partial L(w, b)}{\partial b} |_{w = (1, 2)^T, b = 1} &= 15
80 |
81 | ..
82 | 以上的自动求导机制结合 **优化器** ,可以计算函数的极值。这里以线性回归示例(本质是求 :math:`\min_{w, b} L = (Xw + b - y)^2` ,具体原理见 :ref:`后节 ` ):
83 |
84 | .. literalinclude:: ../_static/code/zh/basic/eager/regression.py
85 |
86 | .. _linear-regression:
87 |
88 | 基础示例:线性回归
89 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^
90 |
91 | 考虑一个实际问题,某城市在2013年-2017年的房价如下表所示:
92 |
93 | ====== ===== ===== ===== ===== =====
94 | 年份 2013 2014 2015 2016 2017
95 | 房价 12000 14000 15000 16500 17500
96 | ====== ===== ===== ===== ===== =====
97 |
98 | 现在,我们希望通过对该数据进行线性回归,即使用线性模型 :math:`y = ax + b` 来拟合上述数据,此处 ``a`` 和 ``b`` 是待求的参数。
99 |
100 | 首先,我们定义数据,进行基本的归一化操作。
101 |
102 | .. literalinclude:: ../_static/code/zh/basic/example/numpy_manual_grad.py
103 | :lines: 1-7
104 |
105 | 接下来,我们使用梯度下降方法来求线性模型中两个参数 ``a`` 和 ``b`` 的值 [#f1]_。
106 |
107 | 回顾机器学习的基础知识,对于多元函数 :math:`f(x)` 求局部极小值,`梯度下降 `_ 的过程如下:
108 |
109 | * 初始化自变量为 :math:`x_0` , :math:`k=0`
110 | * 迭代进行下列步骤直到满足收敛条件:
111 |
112 | * 求函数 :math:`f(x)` 关于自变量的梯度 :math:`\nabla f(x_k)`
113 | * 更新自变量: :math:`x_{k+1} = x_{k} - \gamma \nabla f(x_k)` 。这里 :math:`\gamma` 是学习率(也就是梯度下降一次迈出的“步子”大小)
114 | * :math:`k \leftarrow k+1`
115 |
116 | 接下来,我们考虑如何使用程序来实现梯度下降方法,求得线性回归的解 :math:`\min_{a, b} L(a, b) = \sum_{i=1}^n(ax_i + b - y_i)^2` 。
117 |
118 | NumPy
119 | -----------------------
120 |
121 | 机器学习模型的实现并不是TensorFlow的专利。事实上,对于简单的模型,即使使用常规的科学计算库或者工具也可以求解。在这里,我们使用NumPy这一通用的科学计算库来实现梯度下降方法。NumPy提供了多维数组支持,可以表示向量、矩阵以及更高维的张量。同时,也提供了大量支持在多维数组上进行操作的函数(比如下面的 ``np.dot()`` 是求内积, ``np.sum()`` 是求和)。在这方面,NumPy和MATLAB比较类似。在以下代码中,我们手工求损失函数关于参数 ``a`` 和 ``b`` 的偏导数 [#f2]_,并使用梯度下降法反复迭代,最终获得 ``a`` 和 ``b`` 的值。
122 |
123 | .. literalinclude:: ../_static/code/zh/basic/example/numpy_manual_grad.py
124 | :lines: 9-
125 |
126 | 然而,你或许已经可以注意到,使用常规的科学计算库实现机器学习模型有两个痛点:
127 |
128 | - 经常需要手工求函数关于参数的偏导数。如果是简单的函数或许还好,但一旦函数的形式变得复杂(尤其是深度学习模型),手工求导的过程将变得非常痛苦,甚至不可行。
129 | - 经常需要手工根据求导的结果更新参数。这里使用了最基础的梯度下降方法,因此参数的更新还较为容易。但如果使用更加复杂的参数更新方法(例如Adam或者Adagrad),这个更新过程的编写同样会非常繁杂。
130 |
131 | 而TensorFlow等深度学习框架的出现很大程度上解决了这些痛点,为机器学习模型的实现带来了很大的便利。
132 |
133 | TensorFlow
134 | --------------------------------------------------------
135 |
136 | TensorFlow的 **Eager Execution(动态图)模式** [#f4]_ 与上述NumPy的运行方式十分类似,然而提供了更快速的运算(GPU支持)、自动求导、优化器等一系列对深度学习非常重要的功能。以下展示了如何使用TensorFlow计算线性回归。可以注意到,程序的结构和前述NumPy的实现非常类似。这里,TensorFlow帮助我们做了两件重要的工作:
137 |
138 | * 使用 ``tape.gradient(ys, xs)`` 自动计算梯度;
139 | * 使用 ``optimizer.apply_gradients(grads_and_vars)`` 自动更新模型参数。
140 |
141 | .. literalinclude:: ../_static/code/zh/basic/example/tensorflow_eager_autograd.py
142 | :lines: 12-29
143 |
144 | 在这里,我们使用了前文的方式计算了损失函数关于参数的偏导数。同时,使用 ``tf.train.GradientDescentOptimizer(learning_rate=1e-3)`` 声明了一个梯度下降 **优化器** (Optimizer),其学习率为1e-3。优化器可以帮助我们根据计算出的求导结果更新模型参数,从而最小化某个特定的损失函数,具体使用方式是调用其 ``apply_gradients()`` 方法。
145 |
146 | 注意到这里,更新模型参数的方法 ``optimizer.apply_gradients()`` 需要提供参数 ``grads_and_vars``,即待更新的变量(如上述代码中的 ``variables`` )及损失函数关于这些变量的偏导数(如上述代码中的 ``grads`` )。具体而言,这里需要传入一个Python列表(List),列表中的每个元素是一个(变量的偏导数,变量)对。比如这里是 ``[(grad_w, w), (grad_b, b)]`` 。我们通过 ``grads = tape.gradient(loss, variables)`` 求出tape中记录的 ``loss`` 关于 ``variables = [w, b]`` 中每个变量的偏导数,也就是 ``grads = [grad_w, grad_b]``,再使用Python的 ``zip()`` 函数将 ``grads = [grad_w, grad_b]`` 和 ``vars = [w, b]`` 拼装在一起,就可以组合出所需的参数了。
147 |
148 | 在实际应用中,我们编写的模型往往比这里一行就能写完的线性模型 ``y_pred = tf.matmul(X, w) + b`` 要复杂得多。所以,我们往往会编写一个模型类,然后在需要调用的时候使用 ``y_pred = model(X)`` 进行调用。关于模型类的编写方式可见 :doc:`下章 `。
149 |
150 | ..
151 | 本章小结
152 | ^^^^^^^^^^^^^^^^^^^^^^^
153 |
154 |
155 | .. [#f0] Python中可以使用整数后加小数点表示将该整数定义为浮点数类型。例如 ``3.`` 代表浮点数 ``3.0``。
156 | .. [#f3] 主要可以参考 `Tensor Transformations `_ 和 `Math `_ 两个页面。可以注意到,TensorFlow的张量操作API在形式上和Python下流行的科学计算库NumPy非常类似,如果对后者有所了解的话可以快速上手。
157 | .. [#f1] 其实线性回归是有解析解的。这里使用梯度下降方法只是为了展示TensorFlow的运作方式。
158 | .. [#f2] 此处的损失函数为均方差 :math:`L(x) = \frac{1}{2} \sum_{i=1}^5 (ax_i + b - y_i)^2`。其关于参数 ``a`` 和 ``b`` 的偏导数为 :math:`\frac{\partial L}{\partial a} = \sum_{i=1}^5 (ax_i + b - y) x_i`,:math:`\frac{\partial L}{\partial b} = \sum_{i=1}^5 (ax_i + b - y)`
159 | .. [#f4] 与Eager Execution相对的是Graph Execution(静态图)模式,即TensorFlow在2018年3月的1.8版本发布之前所主要使用的模式。本手册以面向快速迭代开发的动态模式为主,但会在附录中介绍静态图模式的基本使用,供需要的读者查阅。
160 |
161 | ..
162 | 张量(变量、常量与占位符)
163 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
164 |
165 | 会话与计算图
166 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
167 |
168 | 自动求导与优化器
169 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
170 |
171 | 变量的范围(Scope)
172 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
173 | .. https://tensorflow.google.cn/versions/master/api_docs/python/tf/variable_scope
174 |
175 | 保存、恢复和持久化
176 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
--------------------------------------------------------------------------------
/source/zh/extended.rst:
--------------------------------------------------------------------------------
1 | TensorFlow扩展
2 | ================
3 |
4 | 本章介绍一些最为常用的TensorFlow扩展功能。虽然这些功能称不上“必须”,但能让模型训练和调用的过程更加方便。
5 |
6 | 前置知识:
7 |
8 | * `Python的序列化模块Pickle `_ (非必须)
9 | * `Python的特殊函数参数**kwargs `_ (非必须)
10 |
11 | Checkpoint:变量的保存与恢复
12 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13 |
14 | 很多时候,我们希望在模型训练完成后能将训练好的参数(变量)保存起来。在需要使用模型的其他地方载入模型和参数,就能直接得到训练好的模型。可能你第一个想到的是用Python的序列化模块 ``pickle`` 存储 ``model.variables``。但不幸的是,TensorFlow的变量类型 ``ResourceVariable`` 并不能被序列化。
15 |
16 | 好在TensorFlow提供了 `tf.train.Checkpoint `_ 这一强大的变量保存与恢复类,可以使用其 ``save()`` 和 ``restore()`` 方法将TensorFlow中所有包含Checkpointable State的对象进行保存和恢复。具体而言,``tf.train.Optimizer`` 实现, ``tf.Variable``, ``tf.keras.Layer`` 实现或者 ``tf.keras.Model`` 实现都可以被保存。其使用方法非常简单,我们首先声明一个Checkpoint:
17 |
18 | .. code-block:: python
19 |
20 | checkpoint = tf.train.Checkpoint(model=model)
21 |
22 | 这里 ``tf.train.Checkpoint()`` 接受的初始化参数比较特殊,是一个 ``**kwargs`` 。具体而言,是一系列的键值对,键名可以随意取,值为需要保存的对象。例如,如果我们希望保存一个继承 ``tf.keras.Model`` 的模型实例 ``model`` 和一个继承 ``tf.train.Optimizer`` 的优化器 ``optimizer`` ,我们可以这样写:
23 |
24 | .. code-block:: python
25 |
26 | checkpoint = tf.train.Checkpoint(myAwesomeModel=model, myAwesomeOptimizer=optimizer)
27 |
28 | 这里 ``myAwesomeModel`` 是我们为待保存的模型 ``model`` 所取的任意键名。注意,在恢复变量的时候,我们还将使用这一键名。
29 |
30 | 接下来,当模型训练完成需要保存的时候,使用:
31 |
32 | .. code-block:: python
33 |
34 | checkpoint.save(save_path_with_prefix)
35 |
36 | 就可以。 ``save_path_with_prefix`` 是保存文件的目录+前缀。例如,在源代码目录建立一个名为save的文件夹并调用一次 ``checkpoint.save('./save/model.ckpt')`` ,我们就可以在可以在save目录下发现名为 ``checkpoint`` 、 ``model.ckpt-1.index`` 、 ``model.ckpt-1.data-00000-of-00001`` 的三个文件,这些文件就记录了变量信息。``checkpoint.save()`` 方法可以运行多次,每运行一次都会得到一个.index文件和.data文件,序号依次累加。
37 |
38 | 当在其他地方需要为模型重新载入之前保存的参数时,需要再次实例化一个checkpoint,同时保持键名的一致。再调用checkpoint的restore方法。就像下面这样:
39 |
40 | .. code-block:: python
41 |
42 | model_to_be_restored = MyModel() # 待恢复参数的同一模型
43 | checkpoint = tf.train.Checkpoint(myAwesomeModel=model_to_be_restored) # 键名保持为“myAwesomeModel”
44 | checkpoint.restore(save_path_with_prefix_and_index)
45 |
46 | 即可恢复模型变量。 ``save_path_with_prefix_and_index`` 是之前保存的文件的目录+前缀+编号。例如,调用 ``checkpoint.restore('./save/model.ckpt-1')`` 就可以载入前缀为 ``model.ckpt`` ,序号为1的文件来恢复模型。
47 |
48 | 当保存了多个文件时,我们往往想载入最近的一个。可以使用 ``tf.train.latest_checkpoint(save_path)`` 这个辅助函数返回目录下最近一次checkpoint的文件名。例如如果save目录下有 ``model.ckpt-1.index`` 到 ``model.ckpt-10.index`` 的10个保存文件, ``tf.train.latest_checkpoint('./save')`` 即返回 ``./save/model.ckpt-10`` 。
49 |
50 | 总体而言,恢复与保存变量的典型代码框架如下:
51 |
52 | .. code-block:: python
53 |
54 | # train.py 模型训练阶段
55 |
56 | model = MyModel()
57 | checkpoint = tf.train.Checkpoint(myModel=model) # 实例化Checkpoint,指定保存对象为model(如果需要保存Optimizer的参数也可加入)
58 | # 模型训练代码
59 | checkpoint.save('./save/model.ckpt') # 模型训练完毕后将参数保存到文件,也可以在模型训练过程中每隔一段时间就保存一次
60 |
61 | .. code-block:: python
62 |
63 | # test.py 模型使用阶段
64 |
65 | model = MyModel()
66 | checkpoint = tf.train.Checkpoint(myModel=model) # 实例化Checkpoint,指定恢复对象为model
67 | checkpoint.restore(tf.train.latest_checkpoint('./save')) # 从文件恢复模型参数
68 | # 模型使用代码
69 |
70 | 顺便一提, ``tf.train.Checkpoint`` 与以前版本常用的 ``tf.train.Saver`` 相比,强大之处在于其支持在Eager Execution下“延迟”恢复变量。具体而言,当调用了 ``checkpoint.restore()`` ,但模型中的变量还没有被建立的时候,Checkpoint可以等到变量被建立的时候再进行数值的恢复。Eager Execution下,模型中各个层的初始化和变量的建立是在模型第一次被调用的时候才进行的(好处在于可以根据输入的张量形状而自动确定变量形状,无需手动指定)。这意味着当模型刚刚被实例化的时候,其实里面还一个变量都没有,这时候使用以往的方式去恢复变量数值是一定会报错的。比如,你可以试试在train.py调用 ``tf.keras.Model`` 的 ``save_weight()`` 方法保存model的参数,并在test.py中实例化model后立即调用 ``load_weight()`` 方法,就会出错,只有当调用了一遍model之后再运行 ``load_weight()`` 方法才能得到正确的结果。可见, ``tf.train.Checkpoint`` 在这种情况下可以给我们带来相当大的便利。另外, ``tf.train.Checkpoint`` 同时也支持Graph Execution模式。
71 |
72 | 最后提供一个实例,以前章的 :ref:`多层感知机模型 ` 为例展示模型变量的保存和载入:
73 |
74 | .. literalinclude:: ../_static/code/zh/extended/save_and_restore/mnist.py
75 |
76 | 在代码目录下建立save文件夹并运行代码进行训练后,save文件夹内将会存放每隔100个batch保存一次的模型变量数据。将第7行改为 ``model = 'test'`` 并再次运行代码,将直接使用最后一次保存的变量值恢复模型并在测试集上测试模型性能,可以直接获得95%左右的准确率。
77 |
78 | ..
79 | AutoGraph:动态图转静态图 *
80 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
81 |
82 | `AutoGraph `_
83 |
84 | SavedModel:模型的封装 *
85 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
86 |
87 |
88 | TensorBoard:训练过程可视化
89 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
90 |
91 | 有时,你希望查看模型训练过程中各个参数的变化情况(例如损失函数loss的值)。虽然可以通过命令行输出来查看,但有时显得不够直观。而TensorBoard就是一个能够帮助我们将训练过程可视化的工具。
92 |
93 | 目前,Eager Execution模式下的TensorBoard支持尚在 `tf.contrib.summary `_ 内,可能以后会有较多变化,因此这里只做简单示例。首先在代码目录下建立一个文件夹(如./tensorboard)存放TensorBoard的记录文件,并在代码中实例化一个记录器:
94 |
95 | .. code-block:: python
96 |
97 | summary_writer = tf.contrib.summary.create_file_writer('./tensorboard')
98 |
99 | 接下来,将训练的代码部分通过with语句放在 ``summary_writer.as_default()`` 和 ``tf.contrib.summary.always_record_summaries()`` 的上下文中,并对需要记录的参数(一般是scalar)运行 ``tf.contrib.summary.scalar(name, tensor, step=batch_index)`` 即可。这里的step参数可根据自己的需要自行制定,一般可设置为当前训练过程中的batch序号。整体框架如下:
100 |
101 | .. code-block:: python
102 |
103 | summary_writer = tf.contrib.summary.create_file_writer('./tensorboard')
104 | with summary_writer.as_default(), tf.contrib.summary.always_record_summaries():
105 | # 开始模型训练
106 | for batch_index in range(num_batches):
107 | # 训练代码,当前batch的损失值放入变量loss中
108 | tf.contrib.summary.scalar("loss", loss, step=batch_index)
109 | tf.contrib.summary.scalar("MyScalar", my_scalar, step=batch_index) # 还可以添加其他自定义的变量
110 |
111 | 每运行一次 ``tf.contrib.summary.scalar()`` ,记录器就会向记录文件中写入一条记录。除了最简单的标量(scalar)以外,TensorBoard还可以对其他类型的数据(如图像,音频等)进行可视化,详见 `API文档 `_ 。
112 |
113 | 当我们要对训练过程可视化时,在代码目录打开终端(如需要的话进入TensorFlow的conda环境),运行::
114 |
115 | tensorboard --logdir=./tensorboard
116 |
117 | 然后使用浏览器访问命令行程序所输出的网址(一般是http://计算机名称:6006),即可访问TensorBoard的可视界面,如下图所示:
118 |
119 | .. figure:: ../_static/image/extended/tensorboard.png
120 | :width: 100%
121 | :align: center
122 |
123 | 默认情况下,TensorBoard每30秒更新一次数据。不过也可以点击右上角的刷新按钮手动刷新。
124 |
125 | TensorBoard的使用有以下注意事项:
126 |
127 | * 如果需要重新训练,需要删除掉记录文件夹内的信息并重启TensorBoard(或者建立一个新的记录文件夹并开启TensorBoard, ``--logdir`` 参数设置为新建立的文件夹);
128 | * 记录文件夹目录保持全英文。
129 |
130 | 最后提供一个实例,以前章的 :ref:`多层感知机模型 ` 为例展示TensorBoard的使用:
131 |
132 | .. literalinclude:: ../_static/code/zh/extended/tensorboard/mnist.py
133 |
134 | GPU的使用与分配
135 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
136 |
137 | 很多时候的场景是:实验室/公司研究组里有许多学生/研究员都需要使用GPU,但多卡的机器只有一台,这时就需要注意如何分配显卡资源。
138 |
139 | 命令 ``nvidia-smi`` 可以查看机器上现有的GPU及使用情况(在Windows下,将 ``C:\Program Files\NVIDIA Corporation\NVSMI`` 加入Path环境变量中即可,或Windows 10下可使用任务管理器的“性能”标签查看显卡信息)。
140 |
141 | 使用环境变量 ``CUDA_VISIBLE_DEVICES`` 可以控制程序所使用的GPU。假设发现四卡的机器上显卡0,1使用中,显卡2,3空闲,Linux终端输入::
142 |
143 | export CUDA_VISIBLE_DEVICES=2,3
144 |
145 | 或在代码中加入
146 |
147 | .. code-block:: python
148 |
149 | import os
150 | os.environ['CUDA_VISIBLE_DEVICES'] = "2,3"
151 |
152 | 即可指定程序只在显卡2,3上运行。
153 |
154 | 默认情况下,TensorFlow将使用几乎所有可用的显存,以避免内存碎片化所带来的性能损失。可以通过 ``tf.ConfigProto`` 类来设置TensorFlow使用显存的策略。具体方式是实例化一个 ``tf.ConfigProto`` 类,设置参数,并在运行 ``tf.enable_eager_execution()`` 时指定Config参数。以下代码通过 ``allow_growth`` 选项设置TensorFlow仅在需要时申请显存空间:
155 |
156 | .. code-block:: python
157 |
158 | config = tf.ConfigProto()
159 | config.gpu_options.allow_growth = True
160 | tf.enable_eager_execution(config=config)
161 |
162 | 以下代码通过 ``per_process_gpu_memory_fraction`` 选项设置TensorFlow固定消耗40%的GPU显存:
163 |
164 | .. code-block:: python
165 |
166 | config = tf.ConfigProto()
167 | config.gpu_options.per_process_gpu_memory_fraction = 0.4
168 | tf.enable_eager_execution(config=config)
169 |
170 | Graph Execution下,也可以在实例化新的session时传入 tf.ConfigPhoto 类来进行设置。
171 |
--------------------------------------------------------------------------------
/source/zh/installation.rst:
--------------------------------------------------------------------------------
1 | TensorFlow安装
2 | ================
3 |
4 | TensorFlow的最新安装步骤可参考官方网站上的说明(https://tensorflow.google.cn/install)。TensorFlow支持Python、Java、Go、C等多种编程语言以及Windows、OSX、Linux等多种操作系统,此处及后文均以主流的Python语言为准。
5 |
6 | 以下提供简易安装和正式安装两种途径,供不同层级的读者选用。
7 |
8 | 简易安装
9 | ^^^^^^^^^^^^
10 | 如果只是安装一个运行在自己电脑上的,无需GPU的简易环境,不希望在环境配置上花费太多精力,建议按以下步骤安装(以Windows系统为例):
11 |
12 | - 下载并安装Python集成环境 `Anaconda `_ (Python 3.6版本);
13 | - 下载并安装Python的IDE `PyCharm `_ (Community版本,或学生可申请Professional版本的 `免费授权 `_);
14 | - 打开开始菜单中的“Anaconda Prompt”,输入 ``pip install tensorflow``;
15 | - 启动PyCharm,新建工程(使用默认python interpreter),在工程内新建一个Python文件。
16 |
17 | 完毕。
18 |
19 | 正式安装
20 | ^^^^^^^^^^^^
21 | 该部分包含了更多安装上的细节(如建立conda环境),以及GPU版本TensorFlow的环境配置方法。
22 |
23 | 安装前的环境配置
24 | -------------------------------
25 | 正式安装TensorFlow前,需要为其配置合适的环境。步骤如下:
26 |
27 | 1. 检查自己的电脑是否具有NVIDIA显卡。如有,建议安装GPU版本的TensorFlow,以利用GPU强大的计算加速能力 [#f1]_ ,否则可以安装CPU版本。具体而言,该显卡的CUDA Compute Capability须不低于3.0,可以到 `NVIDIA的官方网站 `_ 查询自己所用显卡的CUDA Compute Capability;
28 | 2. 安装Python环境。此处建议安装Anaconda,这是一个开源的Python发行版本,提供了一个完整的科学计算环境,包括NumPy、SciPy等常用科学计算库。当然,你有权选择自己喜欢的Python环境。注意截至本手册撰写时,TensorFlow在Windows下的安装仅支持Python 3.X版本;
29 |
30 | * 安装Anaconda时,可以选择将Anaconda目录添加到系统的PATH中(虽然安装程序不推荐这样做),这样可以直接在命令行环境下使用Anaconda的各项功能。当然,不添加的话也可以使用开始菜单中的Anaconda Prompt进入命令行的Anaconda环境。
31 |
32 | 3. (针对GPU版本)安装NVIDIA显卡驱动程序、 `CUDA Toolkit `_ 和 `cuDNN `_ 。值得注意的事项有:
33 |
34 | * 建议的顺序是:先安装最新版NVIDIA显卡驱动程序,再安装CUDA(安装时不要选择同时安装驱动),最后安装cuDNN。CUDA附带的显卡驱动程序可能过旧;
35 | * 在Ubuntu下有一个很简易的驱动安装方法:在系统设置(System Setting)里面选软件与更新(Software & Updates),然后点选Additional Drivers里面的“Using NVIDIA binary driver”选项并点选右下角的“Apply Changes”即可,系统即会自动安装NVIDIA驱动。否则,NVIDIA显卡驱动程序在Linux系统上的安装往往不会一帆风顺,注意在安装前禁用系统自带的开源显卡驱动Nouveau、禁用主板的Secure Boot功能。更详细的指导可以参考 `这篇文章 `_ ;
36 | * CUDA Toolkit和cuDNN的版本一定要与TensorFlow官方网站安装说明的版本一致,注意官方网站安装说明里要求安装的版本可能并非最新版本;
37 | * cuDNN的安装方式比较特殊,你需要手动将下载的安装包复制到CUDA的安装目录下。
38 |
39 | 安装
40 | ----------------
41 |
42 | 在Anaconda环境下的安装过程如下(以Windows系统为例):
43 |
44 | 1. 新建一个叫做 ``tensorflow`` 的conda环境
45 |
46 | ::
47 |
48 | conda create -n tensorflow python=X.X # 注意这里的X.X填写自己Python环境的版本,例如3.6
49 |
50 | 2. 激活环境
51 |
52 | ::
53 |
54 | activate tensorflow
55 |
56 | 3. 使用pip安装TensorFlow
57 |
58 | 安装CPU版本
59 | ::
60 |
61 | pip install tensorflow
62 |
63 | 安装GPU版本
64 | ::
65 |
66 | pip install tensorflow-gpu
67 |
68 | 如有需要,也可以安装TensorFlow的Nightly版本,该版本较之于正式版本会具有一些最新的特性(例如在TensorFlow 1.8版本以前,本手册主要使用的Eager Execution模式只在Nightly版本中提供),然而稳定度可能稍弱。在一个新的虚拟环境里运行 ``pip install tf-nightly`` (CPU版本)或 ``pip install tf-nightly-gpu`` (GPU版本)即可。注意,若安装GPU版本,其往往要求安装比正式版要求中更新的CUDA和cuDNN。好在CUDA和cuDNN的不同版本是可以共存的。
69 |
70 | 如果使用pip命令安装速度较慢,可以尝试 `清华大学开源软件镜像站的TensorFlow镜像 `_。
71 |
72 | 第一个程序
73 | ^^^^^^^^^^^^^^^
74 |
75 | 安装完毕后,我们来编写一个简单的程序来验证安装。
76 |
77 | 在命令行下输入 ``activate tensorflow`` 进入之前建立的安装有TensorFlow的conda环境,再输入 ``python`` 进入Python环境,逐行输入以下代码:
78 |
79 | .. code-block:: python
80 |
81 | import tensorflow as tf
82 | tf.enable_eager_execution()
83 |
84 | A = tf.constant([[1, 2], [3, 4]])
85 | B = tf.constant([[5, 6], [7, 8]])
86 | C = tf.matmul(A, B)
87 |
88 | print(C)
89 |
90 | 如果能够最终输出::
91 |
92 | tf.Tensor(
93 | [[19 22]
94 | [43 50]], shape=(2, 2), dtype=int32)
95 |
96 | 说明TensorFlow已安装成功。运行途中可能会输出一些TensorFlow的提示信息,属于正常现象。
97 |
98 | 此处使用的是Python语言,关于Python语言的入门教程可以参考 http://www.runoob.com/python3/python3-tutorial.html 或 https://www.liaoxuefeng.com ,本手册之后将默认读者拥有Python语言的基本知识。不用紧张,Python语言易于上手,而TensorFlow本身也不会用到Python语言的太多高级特性。关于Python的IDE,建议使用 `PyCharm `_ 。如果你是学生并有.edu结尾的邮箱的话,可以在 `这里 `_ 申请免费的授权。如果没有,也可以下载社区版本的PyCharm,主要功能差别不大。
99 |
100 | .. [#f1] GPU加速的效果与模型类型和GPU的性能有关,如果CPU性能较高,但GPU仅有入门级的性能,其实速度提升不大,大概1-2倍。不过如果GPU性能强大的话(例如,本手册写作时,NVIDIA GeForce GTX 1080 Ti或NVIDIA GeForce TITAN系列是市场上性能较强大的显卡型号),对于特定模型,十几倍甚至更高的加速效果也是可以达到的。同时,GPU的加速效果与任务本身也有关。入门级的TensorFlow模型往往不需要太高的计算性能,CPU版本的TensorFlow足以胜任,因此可以待到掌握TensorFlow的基本知识后,再决定是否购入更高级的GPU以得到更快的训练速度。
101 |
102 | 升级到新版本
103 | ^^^^^^^^^^^^^^^^^^^^^
104 |
105 | TensorFlow的版本频繁更新,如果希望升级当前的TensorFlow版本,请进入安装有TensorFlow的conda环境下输入
106 |
107 | ::
108 |
109 | pip install tensorflow --upgrade
110 |
111 | 如果你想安装特定版本的TensorFlow,请输入
112 |
113 | ::
114 |
115 | pip install tensorflow==1.8.0 # 1.8.0为指定版本
116 |
117 | 升级有风险,可能出现升级后TensorFlow导入出错的情况,比较简单的方式是删除当前conda环境后重新安装一遍。以下conda命令可能会有用
118 |
119 | ::
120 |
121 | conda list # 列出当前conda环境下所有package及版本
122 | conda env list # 列出所有conda环境
123 | conda create --name new_env_name --clone old_env_name # 备份当前conda环境`old_env_name`到`new_env_name`
124 | conda env remove -n tensorflow # 删除名为`tensorflow`的conda环境
125 |
--------------------------------------------------------------------------------
/source/zh/models.rst:
--------------------------------------------------------------------------------
1 | TensorFlow模型
2 | ================
3 |
4 | .. _linear:
5 |
6 | 本章介绍如何使用TensorFlow快速搭建动态模型。
7 |
8 | 前置知识:
9 |
10 | * `Python面向对象 `_ (在Python内定义类和方法、类的继承、构造和析构函数,`使用super()函数调用父类方法 `_ ,`使用__call__()方法对实例进行调用 `_ 等);
11 | * 多层感知机、卷积神经网络、循环神经网络和强化学习(每节之前给出参考资料)。
12 |
13 | 模型(Model)与层(Layer)
14 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
15 | .. https://www.tensorflow.org/programmers_guide/eager
16 |
17 | 如上一章所述,为了增强代码的可复用性,我们往往会将模型编写为类,然后在模型调用的地方使用 ``y_pred = model(X)`` 的形式进行调用。 **模型类** 的形式非常简单,主要包含 ``__init__()`` (构造函数,初始化)和 ``call(input)`` (模型调用)两个方法,但也可以根据需要增加自定义的方法。 [#call]_
18 |
19 | .. code-block:: python
20 |
21 | class MyModel(tf.keras.Model):
22 | def __init__(self):
23 | super().__init__() # Python 2 下使用 super(MyModel, self).__init__()
24 | # 此处添加初始化代码(包含call方法中会用到的层)
25 |
26 | def call(self, inputs):
27 | # 此处添加模型调用的代码(处理输入并返回输出)
28 | return output
29 |
30 | 在这里,我们的模型类继承了 ``tf.keras.Model`` 。Keras是一个用Python编写的高级神经网络API,现已得到TensorFlow的官方支持和内置。继承 ``tf.keras.Model`` 的一个好处在于我们可以使用父类的若干方法和属性,例如在实例化类后可以通过 ``model.variables`` 这一属性直接获得模型中的所有变量,免去我们一个个显式指定变量的麻烦。
31 |
32 | 同时,我们引入 **“层”(Layer)** 的概念,层可以视为比模型粒度更细的组件单位,将计算流程和变量进行了封装。我们可以使用层来快速搭建模型。
33 |
34 | 上一章中简单的线性模型 ``y_pred = tf.matmul(X, w) + b`` ,我们可以通过模型类的方式编写如下:
35 |
36 | .. literalinclude:: ../_static/code/zh/model/linear/linear.py
37 |
38 | 这里,我们没有显式地声明 ``w`` 和 ``b`` 两个变量并写出 ``y_pred = tf.matmul(X, w) + b`` 这一线性变换,而是在初始化部分实例化了一个全连接层( ``tf.keras.layers.Dense`` ),并在call方法中对这个层进行调用。全连接层封装了 ``output = activation(tf.matmul(input, kernel) + bias)`` 这一线性变换+激活函数的计算操作,以及 ``kernel`` 和 ``bias`` 两个变量。当不指定激活函数时(即 ``activation(x) = x`` ),这个全连接层就等价于我们上述的线性变换。顺便一提,全连接层可能是我们编写模型时使用最频繁的层。
39 |
40 | 如果我们需要显式地声明自己的变量并使用变量进行自定义运算,请参考 :ref:`自定义层 `。
41 |
42 | .. [#call] 在Python类中,对类的实例 ``myClass`` 进行形如 ``myClass()`` 的调用等价于 ``myClass.__call__()`` 。在这里,我们的模型继承了 ``tf.keras.Model`` 这一父类。该父类中包含 ``__call__()`` 的定义,其中调用了 ``call()`` 方法,同时进行了一些keras的内部操作。这里,我们通过继承 ``tf.keras.Model`` 并重载 ``call()`` 方法,即可在保持keras结构的同时加入模型调用的代码。具体请见本章初“前置知识”的 ``__call__()`` 部分。
43 |
44 | .. _mlp:
45 |
46 | 基础示例:多层感知机(MLP)
47 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
48 |
49 | 我们从编写一个最简单的 `多层感知机 `_ (Multilayer Perceptron, MLP)开始,介绍TensorFlow的模型编写方式。这里,我们使用多层感知机完成MNIST手写体数字图片数据集 [LeCun1998]_ 的分类任务。
50 |
51 | .. figure:: ../_static/image/model/mnist_0-9.png
52 | :align: center
53 |
54 | MNIST手写体数字图片示例
55 |
56 | 先进行预备工作,实现一个简单的 ``DataLoader`` 类来读取MNIST数据集数据。
57 |
58 | .. literalinclude:: ../_static/code/zh/model/mlp/main.py
59 | :lines: 13-23
60 |
61 | 多层感知机的模型类实现与上面的线性模型类似,所不同的地方在于层数增加了(顾名思义,“多层”感知机),以及引入了非线性激活函数(这里使用了 `ReLU函数 `_ , 即下方的 ``activation=tf.nn.relu`` )。该模型输入一个向量(比如这里是拉直的1×784手写体数字图片),输出10维的信号,分别代表这张图片属于0到9的概率。这里我们加入了一个predict方法,对图片对应的数字进行预测。在预测的时候,选择概率最大的数字进行预测输出。
62 |
63 | .. literalinclude:: ../_static/code/zh/model/mlp/mlp.py
64 | :lines: 4-17
65 |
66 | 定义一些模型超参数:
67 |
68 | .. literalinclude:: ../_static/code/zh/model/mlp/main.py
69 | :lines: 8-10
70 |
71 | 实例化模型,数据读取类和优化器:
72 |
73 | .. code-block:: python
74 |
75 | model = MLP()
76 | data_loader = DataLoader()
77 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
78 |
79 | 然后迭代进行以下步骤:
80 |
81 | - 从DataLoader中随机取一批训练数据;
82 | - 将这批数据送入模型,计算出模型的预测值;
83 | - 将模型预测值与真实值进行比较,计算损失函数(loss);
84 | - 计算损失函数关于模型变量的导数;
85 | - 使用优化器更新模型参数以最小化损失函数。
86 |
87 | 具体代码实现如下:
88 |
89 | .. literalinclude:: ../_static/code/zh/model/mlp/main.py
90 | :lines: 32-39
91 |
92 | 接下来,我们使用验证集测试模型性能。具体而言,比较验证集上模型预测的结果与真实结果,输出预测正确的样本数占总样本数的比例:
93 |
94 | .. literalinclude:: ../_static/code/zh/model/mlp/main.py
95 | :lines: 41-43
96 |
97 | 输出结果::
98 |
99 | test accuracy: 0.947900
100 |
101 | 可以注意到,使用这样简单的模型,已经可以达到95%左右的准确率。
102 |
103 | 卷积神经网络(CNN)
104 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
105 |
106 | `卷积神经网络 `_ (Convolutional Neural Network, CNN)是一种结构类似于人类或动物的 `视觉系统 `_ 的人工神经网络,包含一个或多个卷积层(Convolutional Layer)、池化层(Pooling Layer)和全连接层(Dense Layer)。具体原理建议可以参考台湾大学李宏毅教授的《机器学习》课程的 `Convolutional Neural Network `_ 一章。
107 |
108 | 具体的实现见下,和MLP很类似,只是新加入了一些卷积层和池化层。
109 |
110 | .. figure:: ../_static/image/model/cnn.png
111 | :align: center
112 |
113 | CNN结构图示
114 |
115 | .. literalinclude:: ../_static/code/zh/model/cnn/cnn.py
116 | :lines: 4-38
117 |
118 | 将前节的 ``model = MLP()`` 更换成 ``model = CNN()`` ,输出如下::
119 |
120 | test accuracy: 0.988100
121 |
122 | 可以发现准确率有非常显著的提高。事实上,通过改变模型的网络结构(比如加入Dropout层防止过拟合),准确率还有进一步提升的空间。
123 |
124 | 循环神经网络(RNN)
125 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
126 |
127 | 循环神经网络(Recurrent Neural Network, RNN)是一种适宜于处理序列数据的神经网络,被广泛用于语言模型、文本生成、机器翻译等。关于RNN的原理,可以参考:
128 |
129 | - `Recurrent Neural Networks Tutorial, Part 1 – Introduction to RNNs `_
130 | - 台湾大学李宏毅教授的《机器学习》课程的 `Recurrent Neural Network (part 1) `_ `Recurrent Neural Network (part 2) `_ 两部分。
131 | - LSTM原理:`Understanding LSTM Networks `_
132 | - RNN序列生成:[Graves2013]_
133 |
134 | 这里,我们使用RNN来进行尼采风格文本的自动生成。 [#rnn_reference]_
135 |
136 | 这个任务的本质其实预测一段英文文本的接续字母的概率分布。比如,我们有以下句子::
137 |
138 | I am a studen
139 |
140 | 这个句子(序列)一共有13个字符(包含空格)。当我们阅读到这个由13个字符组成的序列后,根据我们的经验,我们可以预测出下一个字符很大概率是“t”。我们希望建立这样一个模型,输入num_batch个由编码后字符组成的,长为seq_length的序列,输入张量形状为[num_batch, seq_length],输出这些序列接续的下一个字符的概率分布,概率分布的维度为字符种类数num_chars,输出张量形状为[num_batch, num_chars]。我们从下一个字符的概率分布中采样作为预测值,然后滚雪球式地生成下两个字符,下三个字符等等,即可完成文本的生成任务。
141 |
142 | 首先,还是实现一个简单的 ``DataLoader`` 类来读取文本,并以字符为单位进行编码。
143 |
144 | .. literalinclude:: ../_static/code/zh/model/rnn/rnn.py
145 | :lines: 31-49
146 |
147 | 接下来进行模型的实现。在 ``__init__`` 方法中我们实例化一个常用的 ``BasicLSTMCell`` 单元,以及一个线性变换用的全连接层,我们首先对序列进行One Hot操作,即将编码i变换为一个n维向量,其第i位为1,其余均为0。这里n为字符种类数num_char。变换后的序列张量形状为[num_batch, seq_length, num_chars]。接下来,我们将序列从头到尾依序送入RNN单元,即将当前时间t的RNN单元状态 ``state`` 和t时刻的序列 ``inputs[:, t, :]`` 送入RNN单元,得到当前时间的输出 ``output`` 和下一个时间t+1的RNN单元状态。取RNN单元最后一次的输出,通过全连接层变换到num_chars维,即作为模型的输出。
148 |
149 | .. figure:: ../_static/image/model/rnn_single.jpg
150 | :width: 30%
151 | :align: center
152 |
153 | ``output, state = self.cell(inputs[:, t, :], state)`` 图示
154 |
155 | .. figure:: ../_static/image/model/rnn.jpg
156 | :width: 50%
157 | :align: center
158 |
159 | RNN流程图示
160 |
161 | 具体实现如下:
162 |
163 | .. literalinclude:: ../_static/code/zh/model/rnn/rnn.py
164 | :lines: 7-21
165 |
166 | 训练过程与前节基本一致,在此复述:
167 |
168 | - 从DataLoader中随机取一批训练数据;
169 | - 将这批数据送入模型,计算出模型的预测值;
170 | - 将模型预测值与真实值进行比较,计算损失函数(loss);
171 | - 计算损失函数关于模型变量的导数;
172 | - 使用优化器更新模型参数以最小化损失函数。
173 |
174 | .. literalinclude:: ../_static/code/zh/model/rnn/rnn.py
175 | :lines: 59-69
176 |
177 | 关于文本生成的过程有一点需要特别注意。之前,我们一直使用 ``tf.argmax()`` 函数,将对应概率最大的值作为预测值。然而对于文本生成而言,这样的预测方式过于绝对,会使得生成的文本失去丰富性。于是,我们使用 ``np.random.choice()`` 函数按照生成的概率分布取样。这样,即使是对应概率较小的字符,也有机会被取样到。同时,我们加入一个 ``temperature`` 参数控制分布的形状,参数值越大则分布越平缓(最大值和最小值的差值越小),生成文本的丰富度越高;参数值越小则分布越陡峭,生成文本的丰富度越低。
178 |
179 | .. literalinclude:: ../_static/code/zh/model/rnn/rnn.py
180 | :lines: 23-28
181 |
182 | 通过这种方式进行“滚雪球”式的连续预测,即可得到生成文本。
183 |
184 | .. literalinclude:: ../_static/code/zh/model/rnn/rnn.py
185 | :lines: 71-78
186 |
187 | 生成的文本如下::
188 |
189 | diversity 0.200000:
190 | conserted and conseive to the conterned to it is a self--and seast and the selfes as a seast the expecience and and and the self--and the sered is a the enderself and the sersed and as a the concertion of the series of the self in the self--and the serse and and the seried enes and seast and the sense and the eadure to the self and the present and as a to the self--and the seligious and the enders
191 |
192 | diversity 0.500000:
193 | can is reast to as a seligut and the complesed
194 | has fool which the self as it is a the beasing and us immery and seese for entoured underself of the seless and the sired a mears and everyther to out every sone thes and reapres and seralise as a streed liees of the serse to pease the cersess of the selung the elie one of the were as we and man one were perser has persines and conceity of all self-el
195 |
196 | diversity 1.000000:
197 | entoles by
198 | their lisevers de weltaale, arh pesylmered, and so jejurted count have foursies as is
199 | descinty iamo; to semplization refold, we dancey or theicks-welf--atolitious on his
200 | such which
201 | here
202 | oth idey of pire master, ie gerw their endwit in ids, is an trees constenved mase commars is leed mad decemshime to the mor the elige. the fedies (byun their ope wopperfitious--antile and the it as the f
203 |
204 | diversity 1.200000:
205 | cain, elvotidue, madehoublesily
206 | inselfy!--ie the rads incults of to prusely le]enfes patuateded:.--a coud--theiritibaior "nrallysengleswout peessparify oonsgoscess teemind thenry ansken suprerial mus, cigitioum: 4reas. whouph: who
207 | eved
208 | arn inneves to sya" natorne. hag open reals whicame oderedte,[fingo is
209 | zisternethta simalfule dereeg hesls lang-lyes thas quiin turjentimy; periaspedey tomm--whach
210 |
211 | .. [#rnn_reference] 此处的任务及实现参考了 https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py
212 |
213 | 深度强化学习(DRL)
214 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
215 |
216 | `强化学习 `_ (Reinforcement learning,RL)强调如何基于环境而行动,以取得最大化的预期利益。结合了深度学习技术后的强化学习更是如虎添翼。这两年广为人知的AlphaGo即是深度强化学习的典型应用。深度强化学习的基础知识可参考:
217 |
218 | - `Demystifying Deep Reinforcement Learning `_ (`中文编译 `_)
219 | - [Mnih2013]_
220 |
221 | 这里,我们使用深度强化学习玩CartPole(平衡杆)游戏。简单说,我们需要让模型控制杆的左右运动,以让其一直保持竖直平衡状态。
222 |
223 | .. only:: html
224 |
225 | .. figure:: ../_static/image/model/cartpole.gif
226 | :width: 500
227 | :align: center
228 |
229 | CartPole游戏
230 |
231 | .. only:: latex
232 |
233 | .. figure:: ../_static/image/model/cartpole.png
234 | :width: 500
235 | :align: center
236 |
237 | CartPole游戏
238 |
239 | 我们使用 `OpenAI推出的Gym环境库 `_ 中的CartPole游戏环境,具体安装步骤和教程可参考 `官方文档 `_ 和 `这里 `_ 。Gym的基本调用方法如下:
240 |
241 | .. code-block:: python
242 |
243 | import gym
244 |
245 | env = gym.make('CartPole-v1') # 实例化一个游戏环境,参数为游戏名称
246 | state = env.reset() # 初始化环境,获得初始状态
247 | while True:
248 | env.render() # 对当前帧进行渲染,绘图到屏幕
249 | action = model.predict(state) # 假设我们有一个训练好的模型,能够通过当前状态预测出这时应该进行的动作
250 | next_state, reward, done, info = env.step(action) # 让环境执行动作,获得执行完动作的下一个状态,动作的奖励,游戏是否已结束以及额外信息
251 | if done: # 如果游戏结束则退出循环
252 | break
253 |
254 | 那么,我们的任务就是训练出一个模型,能够根据当前的状态预测出应该进行的一个好的动作。粗略地说,一个好的动作应当能够最大化整个游戏过程中获得的奖励之和,这也是强化学习的目标。
255 |
256 | 以下代码展示了如何使用深度强化学习中的Deep Q-Learning方法来训练模型。
257 |
258 | .. literalinclude:: ../_static/code/zh/model/rl/rl.py
259 |
260 | .. _custom_layer:
261 |
262 | 自定义层 *
263 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
264 |
265 | 可能你还会问,如果现有的这些层无法满足我的要求,我需要定义自己的层怎么办?
266 |
267 | 事实上,我们不仅可以继承 ``tf.keras.Model`` 编写自己的模型类,也可以继承 ``tf.keras.layers.Layer`` 编写自己的层。
268 |
269 | .. code-block:: python
270 |
271 | class MyLayer(tf.keras.layers.Layer):
272 | def __init__(self):
273 | super().__init__()
274 | # 初始化代码
275 |
276 | def build(self, input_shape): # input_shape 是一个 TensorShape 类型对象,提供输入的形状
277 | # 在第一次使用该层的时候调用该部分代码,在这里创建变量可以使得变量的形状自适应输入的形状
278 | # 而不需要使用者额外指定变量形状。
279 | # 如果已经可以完全确定变量的形状,也可以在__init__部分创建变量
280 | self.variable_0 = self.add_variable(...)
281 | self.variable_1 = self.add_variable(...)
282 |
283 | def call(self, input):
284 | # 模型调用的代码(处理输入并返回输出)
285 | return output
286 |
287 | 例如,如果我们要自己实现一个 :ref:`本章第一节 ` 中的全连接层,但指定输出维度为1,可以按如下方式编写,在 ``build`` 方法中创建两个变量,并在 ``call`` 方法中使用创建的变量进行运算:
288 |
289 | .. literalinclude:: ../_static/code/zh/model/custom_layer/linear.py
290 | :lines: 9-21
291 |
292 | 使用相同的方式,可以调用我们自定义的层 ``LinearLayer``:
293 |
294 | .. literalinclude:: ../_static/code/zh/model/custom_layer/linear.py
295 | :lines: 24-31
296 |
297 | Graph Execution模式 *
298 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
299 |
300 | 事实上,只要在编写模型的时候稍加注意,以上的模型都是可以同时兼容Eager Execution模式和Graph Execution模式的 [#rnn_exception]_ 。注意,在Graph Execution模式下, ``model(input_tensor)`` 只需运行一次以完成图的建立操作。
301 |
302 | 例如,通过以下代码,同样可以调用 :ref:`本章第一节 ` 建立的线性模型并进行线性回归:
303 |
304 | .. literalinclude:: ../_static/code/zh/model/custom_layer/linear.py
305 | :lines: 48-59
306 |
307 | .. [#rnn_exception] 除了本章实现的RNN模型以外。在RNN模型的实现中,我们通过Eager Execution动态获取了seq_length的长度,使得我们可以方便地动态控制RNN的展开长度。然而Graph Execution不支持这一点,为了达到相同的效果,我们需要固定seq_length的长度,或者使用 ``tf.nn.dynamic_rnn`` ( `文档 `_ )。
308 |
309 | .. [LeCun1998] Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. "Gradient-based learning applied to document recognition." Proceedings of the IEEE, 86(11):2278-2324, November 1998. http://yann.lecun.com/exdb/mnist/
310 | .. [Graves2013] Graves, Alex. “Generating Sequences With Recurrent Neural Networks.” ArXiv:1308.0850 [Cs], August 4, 2013. http://arxiv.org/abs/1308.0850.
311 | .. [Mnih2013] Mnih, Volodymyr, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis Antonoglou, Daan Wierstra, and Martin Riedmiller. “Playing Atari with Deep Reinforcement Learning.” ArXiv:1312.5602 [Cs], December 19, 2013. http://arxiv.org/abs/1312.5602.
312 |
313 |
314 |
315 |
--------------------------------------------------------------------------------
/source/zh/preface.rst:
--------------------------------------------------------------------------------
1 | 前言
2 | ======
3 |
4 | 2018年3月30日,Google在加州山景城举行了第二届TensorFlow Dev Summit开发者峰会,并宣布正式发布TensorFlow 1.8版本。笔者有幸获得Google的资助亲临峰会现场,见证了这一具有里程碑式意义的新版本发布。众多新功能的加入和支持展示了TensorFlow的雄心壮志,同时早在2017年秋就开始测试的Eager Execution(动态图机制)在这一版本中终于正式加入,并成为了入门TensorFlow的官方推荐模式。
5 |
6 | The easiest way to get started with TensorFlow is using Eager Execution.
7 |
8 | —— https://www.tensorflow.org/get_started/
9 |
10 | 在此之前,TensorFlow所基于的传统Graph Execution的弊端,如入门门槛高、调试困难、灵活性差、无法使用Python原生控制语句等早已被开发者诟病许久。一些新的基于动态图机制的深度学习框架(如PyTorch)也横空出世,并以其易用性和快速开发的特性而占据了一席之地。尤其是在学术研究等需要快速迭代模型的领域,PyTorch等新兴深度学习框架已经成为主流。笔者所在的数十人的机器学习实验室中,竟只有笔者一人“守旧”地使用TensorFlow。然而,直到目前,市面上相关的TensorFlow相关的中文技术书籍及资料仍然基于传统的Graph Execution模式,让不少初学者(尤其是刚学过机器学习课程的大学生)望而却步。由此,在TensorFlow正式支持Eager Execution之际,有必要出现一本全新的技术手册,帮助初学者及需要快速迭代模型的研究者,以一个全新的角度快速入门TensorFlow。
11 |
12 | 同时,本手册还有第二个任务。市面上与TensorFlow相关的中文技术书籍大部分都以深度学习为主线,而将TensorFlow作为这些深度学习模型的实现方式。这样固然有体系完整的优点,然而对于已经对机器学习或深度学习理论有所了解,希望侧重于学习TensorFlow本身的读者而言,就显得不够友好。同时,虽然TensorFlow有官方的教学文档(https://tensorflow.google.cn/tutorials),然而在体例上显得逻辑性不足,缺乏一般教学文档从浅入深,层次递进的特性,而更类似于一系列技术文档的罗列。于是,笔者希望编写一本手册,以尽量精简的篇幅展示TensorFlow作为一个计算框架的主要特性,并弥补官方手册的不足,力图能让已经有一定机器学习/深度学习知识及编程能力的读者迅速上手TensorFlow,并在实际编程过程中可以随时查阅并解决实际问题。
13 |
14 | 本手册的主要特征有:
15 |
16 | * 主要基于TensorFlow最新的Eager Execution(动态图)模式,以便于模型的快速迭代开发。但依然会包含传统的Graph Execution模式,代码上尽可能兼容两者;
17 | * 定位以教学及工具书为主,编排以TensorFlow的各项概念和功能为核心,力求能够让TensorFlow开发者快速查阅。各章相对独立,不一定需要按顺序阅读。正文中不会出现太多关于深度学习和机器学习的理论介绍,但会提供若干阅读推荐以便初学者掌握相关基础知识;
18 | * 代码实现均进行仔细推敲,力图简洁高效和表意清晰。模型实现均统一使用 `TensorFlow官方文档 `_ 最新提出的继承 ``tf.keras.Model`` 和 ``tf.keras.layers.Layer`` 的方式(在其他技术文档中鲜少介绍),保证代码的高度可复用性。每个完整项目的代码总行数均不过百行,让读者可以快速理解并举一反三;
19 | * 注重详略,少即是多,不追求巨细靡遗和面面俱到,不进行大篇幅的细节论述。
20 |
21 | 在整本手册中,带“*”的部分均为选读。
22 |
23 | 本手册的暂定名称《简单粗暴TensorFlow》是向我的好友兼同学Chris Wu编写的《简单粗暴 :math:`\text{\LaTeX}` 》(https://github.com/wklchris/Note-by-LaTeX)致敬。该手册清晰精炼,是 :math:`\text{\LaTeX}` 领域不可多得的中文资料,也是我在编写这一技术文档时所学习的对象。本手册最初是在我的好友Ji-An Li所组织的深度学习研讨小组中,由我作为预备知识的讲义而编写和使用。好友们的才学卓著与无私分享的品格也是编写此拙作的重要助力。
24 |
25 | 本手册的英文版由我的好友Zida Jin(1-4章)和Ming(5-6章)翻译,并由Ji-An Li和笔者审校。三位朋友牺牲了自己的大量宝贵时间翻译和校对本手册,同时Ji-An Li亦对本手册的教学内容和代码细节提供了诸多宝贵意见。我谨向好友们为本手册的辛勤付出致以衷心的感谢。
26 |
27 | 衷心感谢Google中国开发者关系团队和TensorFlow工程团队的成员们对本手册编写所提供的帮助。其中包括开发者关系团队的Luke Cheng在本手册写作全程提供的思路启发和持续鼓励,开发者关系团队的Rui Li, Pryce Mu和TensorFlow社群维护的小伙伴们在本手册宣发及推广上提供的大力支持,以及TensorFlow团队的Tiezhen Wang在本手册工程细节方面提供的诸多建议和补充。
28 |
29 | 关于本手册的意见和建议,欢迎在 https://github.com/snowkylin/TensorFlow-cn/issues 提交。这是一个开源项目,您的宝贵意见将促进本手册的持续更新。
30 |
31 | |
32 |
33 | Xihan Li(雪麒)
34 |
35 | 2018年8月于燕园
36 |
--------------------------------------------------------------------------------
/source/zh/static.rst:
--------------------------------------------------------------------------------
1 | 附录:静态的TensorFlow
2 | ======================================
3 |
4 | TensorFlow 1+1
5 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6 |
7 | TensorFlow本质上是一个符号式的(基于计算图的)计算框架。这里以计算1+1作为Hello World的示例。
8 |
9 | .. literalinclude:: ../_static/code/zh/basic/graph/1plus1.py
10 |
11 | 输出::
12 |
13 | 2
14 |
15 | 上面这个程序只能计算1+1,以下程序通过 ``tf.placeholder()`` (占位符张量)和 ``sess.run()`` 的 ``feed_dict=`` 参数展示了如何使用TensorFlow计算任意两个数的和:
16 |
17 | .. literalinclude:: ../_static/code/zh/basic/graph/aplusb.py
18 |
19 | 运行程序::
20 |
21 | >>> a = 2
22 | >>> b = 3
23 | a + b = 5
24 |
25 | **变量**(Variable)是一种特殊类型的张量,使用 ``tf.get_variable()`` 建立,与编程语言中的变量很相似。使用变量前需要先初始化,变量内存储的值可以在计算图的计算过程中被修改。以下示例如何建立一个变量,将其值初始化为0,并逐次累加1。
26 |
27 | .. literalinclude:: ../_static/code/zh/basic/graph/variable.py
28 |
29 | 输出::
30 |
31 | 1.0
32 | 2.0
33 | 3.0
34 | 4.0
35 | 5.0
36 |
37 | 以下代码和上述代码等价,在声明变量时指定初始化器,并通过 ``tf.global_variables_initializer()`` 一次性初始化所有变量,在实际工程中更常用:
38 |
39 | .. literalinclude:: ../_static/code/zh/basic/graph/variable_with_initializer.py
40 |
41 | 矩阵乃至张量运算是科学计算(包括机器学习)的基本操作。以下程序展示如何计算两个矩阵 :math:`\begin{bmatrix} 1 & 1 & 1 \\ 1 & 1 & 1 \end{bmatrix}` 和 :math:`\begin{bmatrix} 1 & 1 \\ 1 & 1 \\ 1 & 1 \end{bmatrix}` 的乘积:
42 |
43 | .. literalinclude:: ../_static/code/zh/basic/graph/AmatmulB.py
44 |
45 | 输出::
46 |
47 | [[3. 3.]
48 | [3. 3.]]
49 |
50 | Placeholder(占位符张量)和Variable(变量张量)也同样可以为向量、矩阵乃至更高维的张量。
51 |
52 | 基础示例:线性回归
53 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^
54 |
55 | 与前面的NumPy和Eager Execution模式不同,TensorFlow的Graph Execution模式使用 **符号式编程** 来进行数值运算。首先,我们需要将待计算的过程抽象为数据流图,将输入、运算和输出都用符号化的节点来表达。然后,我们将数据不断地送入输入节点,让数据沿着数据流图进行计算和流动,最终到达我们需要的特定输出节点。以下代码展示了如何基于TensorFlow的符号式编程方法完成与前节相同的任务。其中, ``tf.placeholder()`` 即可以视为一种“符号化的输入节点”,使用 ``tf.get_variable()`` 定义模型的参数(Variable类型的张量可以使用 ``tf.assign()`` 进行赋值),而 ``sess.run(output_node, feed_dict={input_node: data})`` 可以视作将数据送入输入节点,沿着数据流图计算并到达输出节点并返回值的过程。
56 |
57 | .. literalinclude:: ../_static/code/zh/basic/example/tensorflow_manual_grad.py
58 | :lines: 9-
59 |
60 | 在上面的两个示例中,我们都是手工计算获得损失函数关于各参数的偏导数。但当模型和损失函数都变得十分复杂时(尤其是深度学习模型),这种手动求导的工程量就难以接受了。TensorFlow提供了 **自动求导机制** ,免去了手工计算导数的繁琐。利用TensorFlow的求导函数 ``tf.gradients(ys, xs)`` 求出损失函数loss关于a,b的偏导数。由此,我们可以将上节中的两行手工计算导数的代码
61 |
62 | .. literalinclude:: ../_static/code/zh/basic/example/tensorflow_manual_grad.py
63 | :lines: 21-23
64 |
65 | 替换为
66 |
67 | .. code-block:: python
68 |
69 | grad_a, grad_b = tf.gradients(loss, [a, b])
70 |
71 | 计算结果将不会改变。
72 |
73 | 甚至不仅于此,TensorFlow附带有多种 **优化器** (optimizer),可以将求导和梯度更新一并完成。我们可以将上节的代码
74 |
75 | .. literalinclude:: ../_static/code/zh/basic/example/tensorflow_manual_grad.py
76 | :lines: 21-31
77 |
78 | 整体替换为
79 |
80 | .. code-block:: python
81 |
82 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate_)
83 | grad = optimizer.compute_gradients(loss)
84 | train_op = optimizer.apply_gradients(grad)
85 |
86 | 这里,我们先实例化了一个TensorFlow中的梯度下降优化器 ``tf.train.GradientDescentOptimizer()`` 并设置学习率。然后利用其 ``compute_gradients(loss)`` 方法求出 ``loss`` 对所有变量(参数)的梯度。最后通过 ``apply_gradients(grad)`` 方法,根据前面算出的梯度来梯度下降更新变量(参数)。
87 |
88 | 以上三行代码等价于下面一行代码:
89 |
90 | .. code-block:: python
91 |
92 | train_op = tf.train.GradientDescentOptimizer(learning_rate=learning_rate_).minimize(loss)
93 |
94 | 简化后的代码如下:
95 |
96 | .. literalinclude:: ../_static/code/zh/basic/example/tensorflow_autograd.py
97 | :lines: 9-29
98 |
--------------------------------------------------------------------------------