├── ch06_hmm
    ├── __init__.py
    ├── README.md
    ├── forward.py
    ├── hmm.py
    ├── Concept01_forward.ipynb
    └── Concept02_hmm.ipynb
├── ch08_rl
    ├── .gitignore
    ├── prices.png
    ├── stock_prices.npy
    ├── README.md
    └── rl.py
├── .gitignore
├── ch07_autoencoder
    ├── .gitignore
    ├── checkpoint
    ├── model.ckpt.meta
    ├── main.py
    ├── export_parameters.py
    ├── README.md
    ├── main_imgs.py
    ├── denoising_autoencoder.py
    ├── autoencoder_batch.py
    ├── denoiser.py
    ├── autoencoder.py
    ├── Concept03_denoising.ipynb
    └── Concept01_autoencoder.ipynb
├── ch02_basics
    ├── .gitignore
    ├── main.py
    ├── interactive_session.py
    ├── log_example.py
    ├── logging_example.py
    ├── loading_vars.py
    ├── gradient.py
    ├── types.py
    ├── spikes.py
    ├── moving_avg.py
    ├── README.md
    ├── saving_vars.py
    ├── Concept04_session_logging.ipynb
    ├── Concept03_interactive_session.ipynb
    ├── Concept07_loading_variables.ipynb
    ├── Concept02_evaluating_ops.ipynb
    ├── Concept06_saving_variables.ipynb
    ├── Concept05_variables.ipynb
    ├── Concept01_defining_tensors.ipynb
    └── Concept08_TensorBoard.ipynb
├── ch09_cnn
    ├── .gitignore
    ├── using_cifar.py
    ├── README.md
    ├── cifar_tools.py
    ├── conv_visuals.py
    ├── cnn.py
    ├── cnn_viz.py
    └── Concept03_cnn.ipynb
├── ch10_rnn
    ├── .gitignore
    ├── data_loader.py
    ├── README.md
    ├── simple_regression.py
    ├── regression.py
    ├── Concept02_rnn.ipynb
    └── Concept03_rnn_real_world.ipynb
├── ch05_clustering
    ├── audio_dataset
    │   ├── cough_1.wav
    │   ├── cough_2.wav
    │   ├── scream_1.wav
    │   ├── scream_2.wav
    │   ├── scream_3.wav
    │   ├── cough_1.wav.png
    │   ├── cough_2.wav.png
    │   ├── scream_1.wav.png
    │   ├── scream_2.wav.png
    │   └── scream_3.wav.png
    ├── som_test.py
    ├── README.md
    ├── audio_segmentation.py
    ├── som.py
    ├── audio_clustering.py
    ├── Concept01_clustering.ipynb
    └── Concept02_segmentation.ipynb
├── ch03_regression
    ├── data_reader.py
    ├── README.md
    └── Concept03_regularization.ipynb
├── LICENCE
├── ch04_classification
    ├── logistic_1d.py
    ├── linear_1d.py
    ├── logistic_2d.py
    ├── README.md
    └── softmax.py
└── README.md


/ch06_hmm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ch08_rl/.gitignore:
--------------------------------------------------------------------------------
1 | checkpoint


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | *.pyc
3 | *~
4 | *.ckpt


--------------------------------------------------------------------------------
/ch07_autoencoder/.gitignore:
--------------------------------------------------------------------------------
1 | cifar-10-batches-py


--------------------------------------------------------------------------------
/ch02_basics/.gitignore:
--------------------------------------------------------------------------------
1 | *.ckpt.meta
2 | checkpoint
3 | logs


--------------------------------------------------------------------------------
/ch09_cnn/.gitignore:
--------------------------------------------------------------------------------
1 | cifar-10-batches-py
2 | *.png
3 | summaries


--------------------------------------------------------------------------------
/ch10_rnn/.gitignore:
--------------------------------------------------------------------------------
1 | *.png
2 | *.csv
3 | checkpoint
4 | *.ckpt*


--------------------------------------------------------------------------------
/ch08_rl/prices.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudalaiRajkumar/TensorFlow-Book/HEAD/ch08_rl/prices.png


--------------------------------------------------------------------------------
/ch07_autoencoder/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "model.ckpt"
2 | all_model_checkpoint_paths: "model.ckpt"
3 | 


--------------------------------------------------------------------------------
/ch08_rl/stock_prices.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudalaiRajkumar/TensorFlow-Book/HEAD/ch08_rl/stock_prices.npy


--------------------------------------------------------------------------------
/ch07_autoencoder/model.ckpt.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudalaiRajkumar/TensorFlow-Book/HEAD/ch07_autoencoder/model.ckpt.meta


--------------------------------------------------------------------------------
/ch05_clustering/audio_dataset/cough_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudalaiRajkumar/TensorFlow-Book/HEAD/ch05_clustering/audio_dataset/cough_1.wav


--------------------------------------------------------------------------------
/ch05_clustering/audio_dataset/cough_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudalaiRajkumar/TensorFlow-Book/HEAD/ch05_clustering/audio_dataset/cough_2.wav


--------------------------------------------------------------------------------
/ch05_clustering/audio_dataset/scream_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudalaiRajkumar/TensorFlow-Book/HEAD/ch05_clustering/audio_dataset/scream_1.wav


--------------------------------------------------------------------------------
/ch05_clustering/audio_dataset/scream_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudalaiRajkumar/TensorFlow-Book/HEAD/ch05_clustering/audio_dataset/scream_2.wav


--------------------------------------------------------------------------------
/ch05_clustering/audio_dataset/scream_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudalaiRajkumar/TensorFlow-Book/HEAD/ch05_clustering/audio_dataset/scream_3.wav


--------------------------------------------------------------------------------
/ch05_clustering/audio_dataset/cough_1.wav.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudalaiRajkumar/TensorFlow-Book/HEAD/ch05_clustering/audio_dataset/cough_1.wav.png


--------------------------------------------------------------------------------
/ch05_clustering/audio_dataset/cough_2.wav.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudalaiRajkumar/TensorFlow-Book/HEAD/ch05_clustering/audio_dataset/cough_2.wav.png


--------------------------------------------------------------------------------
/ch05_clustering/audio_dataset/scream_1.wav.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudalaiRajkumar/TensorFlow-Book/HEAD/ch05_clustering/audio_dataset/scream_1.wav.png


--------------------------------------------------------------------------------
/ch05_clustering/audio_dataset/scream_2.wav.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudalaiRajkumar/TensorFlow-Book/HEAD/ch05_clustering/audio_dataset/scream_2.wav.png


--------------------------------------------------------------------------------
/ch05_clustering/audio_dataset/scream_3.wav.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SudalaiRajkumar/TensorFlow-Book/HEAD/ch05_clustering/audio_dataset/scream_3.wav.png


--------------------------------------------------------------------------------
/ch02_basics/main.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | x = tf.constant([[1, 2]])
 5 | neg_x = tf.neg(x)
 6 | 
 7 | print(neg_x)
 8 | 
 9 | with tf.Session() as sess:
10 |     result = sess.run(neg_x)
11 | print(result)
12 | 


--------------------------------------------------------------------------------
/ch02_basics/interactive_session.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | sess = tf.InteractiveSession()
 3 | 
 4 | matrix = tf.constant([[1., 2.]])
 5 | negMatrix = tf.neg(matrix)
 6 | 
 7 | result = negMatrix.eval()
 8 | print(result)
 9 | sess.close()
10 | 


--------------------------------------------------------------------------------
/ch02_basics/log_example.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | matrix = tf.constant([[1., 2.]])
 4 | negMatrix = tf.neg(matrix)
 5 | 
 6 | with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
 7 |     result = sess.run(negMatrix)
 8 | 
 9 | print(result)
10 | 


--------------------------------------------------------------------------------
/ch02_basics/logging_example.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | matrix = tf.constant([[1, 2]])
 4 | neg_matrix = tf.neg(matrix)
 5 | 
 6 | with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
 7 |     result = sess.run(neg_matrix)
 8 | 
 9 | print result
10 | 


--------------------------------------------------------------------------------
/ch07_autoencoder/main.py:
--------------------------------------------------------------------------------
 1 | from autoencoder import Autoencoder
 2 | from sklearn import datasets
 3 | 
 4 | hidden_dim = 1
 5 | data = datasets.load_iris().data
 6 | input_dim = len(data[0])
 7 | ae = Autoencoder(input_dim, hidden_dim)
 8 | ae.train(data)
 9 | ae.test([[8, 4, 6, 2]])
10 | 


--------------------------------------------------------------------------------
/ch02_basics/loading_vars.py:
--------------------------------------------------------------------------------
 1 | # # Loading Variables in TensorFlow
 2 | 
 3 | import tensorflow as tf
 4 | sess = tf.InteractiveSession()
 5 | 
 6 | 
 7 | # Create a boolean vector called `spike` to locate a sudden spike in data.
 8 | # 
 9 | # Since all variables must be initialized, initialize the variable by calling `run()` on its `initializer`.
10 | 
11 | spikes = tf.Variable([False]*8, name='spikes')
12 | saver = tf.train.Saver()
13 | 
14 | saver.restore(sess, "spikes.ckpt")
15 | print(spikes.eval())
16 | 
17 | sess.close()
18 | 


--------------------------------------------------------------------------------
/ch05_clustering/som_test.py:
--------------------------------------------------------------------------------
 1 | #For plotting the images
 2 | from matplotlib import pyplot as plt
 3 | import numpy as np
 4 | from som import SOM
 5 | 
 6 | colors = np.array(
 7 |      [[0., 0., 1.],
 8 |       [0., 0., 0.95],
 9 |       [0., 0.05, 1.],
10 |       [0., 1., 0.],
11 |       [0., 0.95, 0.],
12 |       [0., 1, 0.05],
13 |       [1., 0., 0.],
14 |       [1., 0.05, 0.],
15 |       [1., 0., 0.05],
16 |       [1., 1., 0.]])
17 | 
18 | som = SOM(4, 4, 3)
19 | som.train(colors)
20 | 
21 | plt.imshow(som.centroid_grid)
22 | plt.show()
23 | 


--------------------------------------------------------------------------------
/ch02_basics/gradient.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | def my_loss_function(var, data):
 4 |     return tf.abs(tf.sub(var, data))
 5 | 
 6 | def my_other_loss_function(var, data):
 7 |     return tf.square(tf.sub(var, data))
 8 | 
 9 | data = tf.placeholder(tf.float32)
10 | var = tf.Variable(1.)
11 | loss = my_loss_function(var, data)
12 | var_grad = tf.gradients(loss, [var])[0]
13 | 
14 | with tf.Session() as sess:
15 |     sess.run(tf.initialize_all_variables())
16 |     var_grad_val = sess.run(var_grad, feed_dict={data: 4})
17 |     print(var_grad_val)


--------------------------------------------------------------------------------
/ch02_basics/types.py:
--------------------------------------------------------------------------------
 1 | # # Tensor Types
 2 | 
 3 | import tensorflow as tf
 4 | import numpy as np
 5 | 
 6 | 
 7 | # Define a 2x2 matrix in 3 different ways
 8 | 
 9 | m1 = [[1.0, 2.0], [3.0, 4.0]]
10 | m2 = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
11 | m3 = tf.constant([[1.0, 2.0], [3.0, 4.0]])
12 | 
13 | 
14 | print(type(m1))
15 | print(type(m2))
16 | print(type(m3))
17 | 
18 | 
19 | # Create tensor objects out of various types
20 | 
21 | t1 = tf.convert_to_tensor(m1, dtype=tf.float32)
22 | t2 = tf.convert_to_tensor(m2, dtype=tf.float32)
23 | t3 = tf.convert_to_tensor(m3, dtype=tf.float32)
24 | 
25 | 
26 | print(type(t1))
27 | print(type(t2))
28 | print(type(t3))
29 | 


--------------------------------------------------------------------------------
/ch09_cnn/using_cifar.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | import cifar_tools
 4 | import random
 5 | 
 6 | names, data, labels = \
 7 |     cifar_tools.read_data('/home/binroot/res/cifar-10-batches-py')
 8 | 
 9 | random.seed(1)
10 | 
11 | 
12 | def show_some_examples(names, data, labels):
13 |     plt.figure()
14 |     rows, cols = 4, 4
15 |     random_idxs = random.sample(range(len(data)), rows * cols)
16 |     for i in range(rows * cols):
17 |         plt.subplot(rows, cols, i + 1)
18 |         j = random_idxs[i]
19 |         plt.title(names[labels[j]])
20 |         img = np.reshape(data[j, :], (24, 24))
21 |         plt.imshow(img, cmap='Greys_r')
22 |         plt.axis('off')
23 |     plt.tight_layout()
24 |     plt.savefig('cifar_examples.png')
25 | 
26 | show_some_examples(names, data, labels)


--------------------------------------------------------------------------------
/ch02_basics/spikes.py:
--------------------------------------------------------------------------------
 1 | # # Using Variables in TensorFlow
 2 | 
 3 | import tensorflow as tf
 4 | sess = tf.InteractiveSession()
 5 | 
 6 | 
 7 | # Create a boolean variable called `spike` to detect sudden a sudden increase in a series of numbers.
 8 | # 
 9 | # Since all variables must be initialized, initialize the variable by calling `run()` on its `initializer`.
10 | 
11 | raw_data = [1., 2., 8., -1., 0., 5.5, 6., 13]
12 | spike = tf.Variable(False)
13 | spike.initializer.run()
14 | 
15 | 
16 | # Loop through the data and update the spike variable when there is a significant increase 
17 | 
18 | for i in range(1, len(raw_data)):
19 |     if raw_data[i] - raw_data[i-1] > 5:
20 |         updater = tf.assign(spike, tf.constant(True))
21 |         updater.eval()
22 |     else:
23 |         tf.assign(spike, False).eval()
24 |     print("Spike", spike.eval())
25 | 
26 | sess.close()
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/ch07_autoencoder/export_parameters.py:
--------------------------------------------------------------------------------
 1 | from autoencoder import Autoencoder
 2 | from scipy.misc import imread, imresize, imsave
 3 | import numpy as np
 4 | import h5py
 5 | 
 6 | def zero_pad(num, pad):
 7 |     return format(num, '0' + str(pad))
 8 | 
 9 | data_dir = '../vids/'
10 | filename_prefix = 'raw_rgb_'
11 | 
12 | hidden_dim = 1000
13 | 
14 | filepath = data_dir + str(1) + '/' + filename_prefix + zero_pad(20, 5) + '.png'
15 | img = imresize(imread(filepath, True), 1. / 8.)
16 | 
17 | img_data = img.flatten()
18 | 
19 | ae = Autoencoder([img_data], hidden_dim)
20 | 
21 | weights, biases = ae.get_params()
22 | 
23 | print(np.shape(weights))
24 | print(np.shape([biases]))
25 | 
26 | h5f_W = h5py.File('encoder_W.h5', 'w')
27 | h5f_W.create_dataset('dataset_1', data=weights)
28 | h5f_W.close()
29 | 
30 | h5f_b = h5py.File('encoder_b.h5', 'w')
31 | h5f_b.create_dataset('dataset_1', data=[biases])
32 | h5f_b.close()
33 | 


--------------------------------------------------------------------------------
/ch02_basics/moving_avg.py:
--------------------------------------------------------------------------------
 1 | ## Using TensorBoard
 2 | 
 3 | # mkdir logs
 4 | 
 5 | import tensorflow as tf
 6 | import numpy as np
 7 | 
 8 | raw_data = np.random.normal(10, 1, 100)
 9 | 
10 | alpha = tf.constant(0.05)
11 | curr_value = tf.placeholder(tf.float32)
12 | prev_avg = tf.Variable(0.)
13 | update_avg = alpha * curr_value + (1 - alpha) * prev_avg
14 | 
15 | avg_hist = tf.scalar_summary("running average", update_avg)
16 | value_hist = tf.scalar_summary("incoming values", curr_value)
17 | merged = tf.merge_all_summaries()
18 | writer = tf.train.SummaryWriter("./logs")
19 | init = tf.initialize_all_variables()
20 | 
21 | with tf.Session() as sess:
22 |     sess.run(init)
23 |     for i in range(len(raw_data)):
24 |         summary_str, curr_avg = sess.run([merged, update_avg], feed_dict={curr_value: raw_data[i]})
25 |         sess.run(tf.assign(prev_avg, curr_avg))
26 |         print(raw_data[i], curr_avg)
27 |         writer.add_summary(summary_str, i)
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/ch03_regression/data_reader.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import time
 3 | 
 4 | # https://data.lacity.org/dataset/311-Call-Center-Tracking-Data/ukiu-8trj
 5 | # crime14_freq = data_reader.read('crimes_2014.csv', 1, '%d-%b-%y %H:%M:%S', 2014)
 6 | # freq = read('311.csv', 0, '%m/%d/%Y', 2014)
 7 | 
 8 | def read(filename, date_idx, date_parse, year, bucket=7):
 9 | 
10 |     days_in_year = 365
11 |     bucket = 7
12 | 
13 |     # Create initial frequency map
14 |     freq = {}
15 |     for period in range(0, int(days_in_year/bucket)):
16 |         freq[period] = 0
17 | 
18 |     # Read data and aggregate crimes per day
19 |     with open(filename, 'rb') as csvfile:
20 |         csvreader = csv.reader(csvfile)
21 |         csvreader.next()
22 |         for row in csvreader:
23 |             t = time.strptime(row[date_idx], date_parse)
24 |             if t.tm_year == year and t.tm_yday < (days_in_year-1):
25 |                 freq[int(t.tm_yday / bucket)] += 1
26 | 
27 |     return freq
28 | 


--------------------------------------------------------------------------------
/ch02_basics/README.md:
--------------------------------------------------------------------------------
 1 | # Chapter 2
 2 | 
 3 | <p align="center"><a href="http://tensorflowbook.com" target="_blank"><img src="http://i.imgur.com/0buO2b2.png"/></a></p>
 4 | 
 5 | Before implementing machine learning algorithms, let’s first familiarize ourselves with how to use TensorFlow. You’re going to get your hands dirty writing some simple code right away! This chapter will cover some essential advantages of TensorFlow to convince you it’s the machine learning library of choice.
 6 | 
 7 | - **Concept 1**: Defining tensors
 8 | - **Concept 2**: Evaluating ops
 9 | - **Concept 3**: Interactive session
10 | - **Concept 4**: Session loggings
11 | - **Concept 5**: Variables
12 | - **Concept 6**: Saving variables
13 | - **Concept 7**: Loading variables
14 | - **Concept 8**: TensorBoard
15 | 
16 | ---
17 | 
18 | * Listing 2-4: `types.py`
19 | * Listing 5-6: `main.py`
20 | * Listing 7: `interactive_session.py`
21 | * Listing 8: `logging.py`
22 | * Listing 9: `spikes.py`
23 | * Listing 10: `saving_vars.py`
24 | * Listing 11: `loading_vars.py`
25 | * Listing 12-15: `moving_avg.py`


--------------------------------------------------------------------------------
/ch02_basics/saving_vars.py:
--------------------------------------------------------------------------------
 1 | # # Saving Variables in TensorFlow
 2 | 
 3 | import tensorflow as tf
 4 | sess = tf.InteractiveSession()
 5 | 
 6 | 
 7 | # Create a boolean vector called `spike` to locate a sudden spike in data.
 8 | # 
 9 | # Since all variables must be initialized, initialize the variable by calling `run()` on its `initializer`.
10 | 
11 | raw_data = [1., 2., 8., -1., 0., 5.5, 6., 13]
12 | spikes = tf.Variable([False] * len(raw_data), name='spikes')
13 | spikes.initializer.run()
14 | 
15 | 
16 | # The saver op will enable saving and restoring
17 | 
18 | saver = tf.train.Saver()
19 | 
20 | 
21 | # Loop through the data and update the spike variable when there is a significant increase 
22 | 
23 | for i in range(1, len(raw_data)):
24 |     if raw_data[i] - raw_data[i-1] > 5:
25 |         spikes_val = spikes.eval()
26 |         spikes_val[i] = True
27 |         updater = tf.assign(spikes, spikes_val)
28 |         updater.eval()
29 | 
30 | 
31 | save_path = saver.save(sess, "spikes.ckpt")
32 | print("spikes data saved in file: %s" % save_path)
33 | 
34 | 
35 | sess.close()
36 | 


--------------------------------------------------------------------------------
/ch08_rl/README.md:
--------------------------------------------------------------------------------
 1 | # Chapter 8
 2 | 
 3 | <p align="center"><a href="http://tensorflowbook.com" target="_blank"><img src="http://i.imgur.com/9B8Pfkc.png"/></a></p>
 4 | 
 5 | Humans learn from past experiences (or, you know, at least they should). You didn’t get so charming by accident. Years of positive compliments as well as negative criticism have all helped shape who you are today. This chapter is all about designing a machine learning system driven by criticisms and rewards.
 6 | 
 7 | Consider the following examples. You learn what makes people happy by interacting with friends, family, or even strangers, and you figure out how to ride a bike by trying out different muscle movements until it just clicks. When you perform actions, you’re sometimes rewarded immediately. For example, finding a restaurant nearby might yield instant gratification. Other times, the reward doesn’t appear right away, such as travelling a long distance to find an exceptional place to eat. Reinforcement learning is all about making the right actions given any state.
 8 | 
 9 | - **Concept 1**: Reinforcement learning
10 | 
11 | ---
12 | 
13 | * Listing 1-10: `rl.py`


--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Nishant Shukla
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/ch10_rnn/data_loader.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | 
 6 | def load_series(filename, series_idx=1):
 7 |     try:
 8 |         with open(filename, 'rb') as csvfile:
 9 |             csvreader = csv.reader(csvfile)
10 |             data = [float(row[series_idx]) for row in csvreader if len(row) > 0]
11 |             normalized_data = (data - np.mean(data)) / np.std(data)
12 |         return normalized_data
13 |     except IOError:
14 |         return None
15 | 
16 | 
17 | def split_data(data, percent_train=0.80):
18 |     num_rows = len(data)
19 |     train_data, test_data = [], []
20 |     for idx, row in enumerate(data):
21 |         if idx < num_rows * percent_train:
22 |             train_data.append(row)
23 |         else:
24 |             test_data.append(row)
25 |     return train_data, test_data
26 | 
27 | 
28 | if __name__=='__main__':
29 |     # https://datamarket.com/data/set/22u3/international-airline-passengers-monthly-totals-in-thousands-jan-49-dec-60#!ds=22u3&display=line
30 |     timeseries = load_series('international-airline-passengers.csv')
31 |     print(np.shape(timeseries))
32 | 
33 |     plt.figure()
34 |     plt.plot(timeseries)
35 |     plt.show()
36 | 
37 | 


--------------------------------------------------------------------------------
/ch04_classification/logistic_1d.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | learning_rate = 0.01
 6 | training_epochs = 1000
 7 | 
 8 | def sigmoid(x):
 9 |     return 1. / (1. + np.exp(-x))
10 | 
11 | x1 = np.random.normal(-4, 2, 1000)
12 | x2 = np.random.normal(4, 2, 1000)
13 | xs = np.append(x1, x2)
14 | ys = np.asarray([0.] * len(x1) + [1.] * len(x2))
15 | 
16 | plt.scatter(xs, ys)
17 | 
18 | X = tf.placeholder(tf.float32, shape=(None,), name="x")
19 | Y = tf.placeholder(tf.float32, shape=(None,), name="y")
20 | w = tf.Variable([0., 0.], name="parameter", trainable=True)
21 | y_model = tf.sigmoid(-(w[1] * X + w[0]))
22 | cost = tf.reduce_mean(-tf.log(y_model * Y + (1 - y_model) * (1 - Y)))
23 | 
24 | train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
25 | 
26 | with tf.Session() as sess:
27 |     sess.run(tf.initialize_all_variables())
28 |     prev_err = 0
29 |     for epoch in range(training_epochs):
30 |         err, _ = sess.run([cost, train_op], {X: xs, Y: ys})
31 |         print(epoch, err)
32 |         if abs(prev_err - err) < 0.0001:
33 |             break
34 |         prev_err = err
35 |     w_val = sess.run(w, {X: xs, Y: ys})
36 | 
37 | all_xs = np.linspace(-10, 10, 100)
38 | plt.plot(all_xs, sigmoid(all_xs * w_val[1] + w_val[0]))
39 | plt.show()
40 | 


--------------------------------------------------------------------------------
/ch09_cnn/README.md:
--------------------------------------------------------------------------------
 1 | # Chapter 9
 2 | 
 3 | <p align="center"><a href="http://tensorflowbook.com" target="_blank"><img src="http://i.imgur.com/5WIGI1r.png"/></a></p>
 4 | 
 5 | After a long day of work, when I go grocery shopping, my eyes feel bombarded with information. Sales, coupons, colors, toddlers, flashing lights, crowded aisles are just a few examples of all the signals my eyes pick up, whether or not I actively try to pay attention. The visual system absorbs an abundance of information. 
 6 | 
 7 | Ever heard the phrase “a picture is worth a thousand words?" At least that might be true for you or me, but can a machine find meaning within images as well? The photoreceptor cells in our retinas pick up wavelengths of light, but that information doesn’t seem to propagate up to our consciousness. Similarly, a camera picks up pixels, yet we want to squeeze out some form of higher-level knowledg instead.
 8 | 
 9 | To achieve some intelligent meaning from raw sensory input with machine learning, we’ll design a neural network model. In the previous chapters, we’ve seen a few types of neural networks models such as fully-connected ones or autoencoders. New to this chapter, there’s another type of model called a convolutional neural network (CNN), which performs exceptionally well on images and other sensory data such as audio.
10 | 
11 | - **Concept 1**: Using CIFAR-10 dataset
12 | - **Concept 2**: Convolutions
13 | - **Concept 3**: Convolutional neural network


--------------------------------------------------------------------------------
/ch07_autoencoder/README.md:
--------------------------------------------------------------------------------
 1 | # Chapter 7
 2 | 
 3 | <p align="center"><a href="http://tensorflowbook.com" target="_blank"><img src="http://i.imgur.com/sBoP8cg.png"/></a></p>
 4 | 
 5 | 
 6 | Have you ever identified a song from a person just humming a melody? It might be easy for you, but I’m comically tone-deaf when it comes to music. Humming, by itself, is an approximation of its corresponding song. An even better approximation could be singing. Include some instrumentals, and sometimes a cover of a song sounds indistinguishable from the original.
 7 | 
 8 | Instead of songs, in this chapter, we will approximate functions. Functions are a very general notion of relations between inputs and outputs. In machine learning, we typically want to find the function that relates inputs to outputs. Finding the best possible function is difficult, but approximating the function is much easier.
 9 | 
10 | Conveniently, artificial neural networks are a model in machine learning that can approximate any function. Given training data, we want to build a neural network model that best approximates the implicit function that might have generated the data.
11 | 
12 | After introducing neural networks in section 8.1, we’ll learn how to use them to encode data into a smaller representation in section 8.2, using a network structure called an autoencoder. 
13 | 
14 | - **Concept 1**: Autoencoder
15 | - **Concept 2**: Applying an autoencoder to images
16 | - **Concept 3**: Denoising autoencoder


--------------------------------------------------------------------------------
/ch04_classification/linear_1d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | x_label0 = np.random.normal(5, 1, 10)
 6 | x_label1 = np.random.normal(2, 1, 10)
 7 | xs = np.append(x_label0, x_label1)
 8 | labels = [0.] * len(x_label0) + [1.] * len(x_label1)
 9 | 
10 | plt.scatter(xs, labels)
11 | 
12 | learning_rate = 0.001
13 | training_epochs = 1000
14 | 
15 | X = tf.placeholder("float")
16 | Y = tf.placeholder("float")
17 | 
18 | def model(X, w):
19 |     return tf.add(tf.mul(w[1], tf.pow(X, 1)),
20 |                   tf.mul(w[0], tf.pow(X, 0)))
21 | 
22 | w = tf.Variable([0., 0.], name="parameters")
23 | y_model = model(X, w)
24 | cost = tf.reduce_sum(tf.square(Y-y_model))
25 | 
26 | train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
27 | correct_prediction = tf.equal(Y, tf.to_float(tf.greater(y_model, 0.5)))
28 | accuracy = tf.reduce_mean(tf.to_float(correct_prediction))
29 | 
30 | sess = tf.Session()
31 | init = tf.initialize_all_variables()
32 | sess.run(init)
33 | 
34 | for epoch in range(training_epochs):
35 |     sess.run(train_op, feed_dict={X: xs, Y: labels})
36 |     current_cost = sess.run(cost, feed_dict={X: xs, Y: labels})
37 |     print(epoch, current_cost)
38 | 
39 | w_val = sess.run(w)
40 | print('learned parameters', w_val)
41 | 
42 | print('accuracy', sess.run(accuracy, feed_dict={X: xs, Y: labels}))
43 | 
44 | sess.close()
45 | 
46 | all_xs = np.linspace(0, 10, 100)
47 | plt.plot(all_xs, all_xs*w_val[1] + w_val[0])
48 | plt.show()


--------------------------------------------------------------------------------
/ch09_cnn/cifar_tools.py:
--------------------------------------------------------------------------------
 1 | import cPickle
 2 | import numpy as np
 3 | 
 4 | 
 5 | def unpickle(file):
 6 |     fo = open(file, 'rb')
 7 |     dict = cPickle.load(fo)
 8 |     fo.close()
 9 |     return dict
10 | 
11 | 
12 | def clean(data):
13 |     imgs = data.reshape(data.shape[0], 3, 32, 32)
14 |     grayscale_imgs = imgs.mean(1)
15 |     cropped_imgs = grayscale_imgs[:, 4:28, 4:28]
16 |     img_data = cropped_imgs.reshape(data.shape[0], -1)
17 |     img_size = np.shape(img_data)[1]
18 |     means = np.mean(img_data, axis=1)
19 |     meansT = means.reshape(len(means), 1)
20 |     stds = np.std(img_data, axis=1)
21 |     stdsT = stds.reshape(len(stds), 1)
22 |     adj_stds = np.maximum(stdsT, 1.0 / np.sqrt(img_size))
23 |     normalized = (img_data - meansT) / adj_stds
24 |     return normalized
25 | 
26 | 
27 | def read_data(directory):
28 |     names = unpickle('{}/batches.meta'.format(directory))['label_names']
29 |     print('names', names)
30 | 
31 |     data, labels = [], []
32 |     for i in range(1, 6):
33 |         filename = '{}/data_batch_{}'.format(directory, i)
34 |         batch_data = unpickle(filename)
35 |         if len(data) > 0:
36 |             data = np.vstack((data, batch_data['data']))
37 |             labels = np.hstack((labels, batch_data['labels']))
38 |         else:
39 |             data = batch_data['data']
40 |             labels = batch_data['labels']
41 | 
42 |     print(np.shape(data), np.shape(labels))
43 | 
44 |     data = clean(data)
45 |     data = data.astype(np.float32)
46 |     return names, data, labels
47 | 


--------------------------------------------------------------------------------
/ch03_regression/README.md:
--------------------------------------------------------------------------------
 1 | # Chapter 3
 2 | 
 3 | <p align="center"><a href="http://tensorflowbook.com" target="_blank"><img src="http://i.imgur.com/F2FOdon.png"/></a></p>
 4 | 
 5 | Remember science courses back in grade school? It might have been a while ago, or who knows - maybe you’re in grade school now starting your journey in machine learning early. Either way, whether you took biology, chemistry, or physics, a common technique to analyze data is to plot how changing one variable affects the other.
 6 | 
 7 | Imagine plotting the correlation between rainfall frequency and agriculture production. You may observe that an increase in rainfall produces an increase in agriculture rate. Fitting a line to these data points enables you to make predictions about the agriculture rate under different rain conditions. If you discover the underlying function from a few data points, then that learned function empowers you to make predictions about the values of unseen data.
 8 | 
 9 | Regression is a study of how to best fit a curve to summarize your data. It is one of the most powerful and well-studied types of supervised learning algorithms. In regression, we try to understand the data points by discovering the curve that might have generated them. In doing so, we seek an explanation for why the given data is scattered the way it is. The best fit curve gives us a model for explaining how the dataset might have been produced.
10 | 
11 | This chapter will show you how to formulate a real world problem to use regression. As you’ll see, TensorFlow is just the right tool that endows us with some of the most powerful predictors.
12 | 
13 | - **Concept 1**: Linear regression
14 | - **Concept 2**: Polynomial regression
15 | - **Concept 3**: Regularization
16 | 
17 | ---
18 | 
19 | * Listing 1-2: `simple_model.py`
20 | * Listing 3: `polynomial_model.py`
21 | * Listing 4-5: `regularization.py`
22 | * Listing 6: `data_reader.py`
23 | 


--------------------------------------------------------------------------------
/ch06_hmm/README.md:
--------------------------------------------------------------------------------
 1 | # Chapter 6
 2 | 
 3 | <p align="center"><a href="http://tensorflowbook.com" target="_blank"><img src="http://i.imgur.com/yhpbDGv.png"/></a></p>
 4 | 
 5 | If a rocket blows up, someone’s probably getting fired, so rocket scientists and engineers must be able to make confident decisions about all components and configurations. They do so by physical simulations and mathematical deduction from first principles. You, too, have solved science problems with pure logical thinking. Consider Boyle’s law: pressure and volume of a gas are inversely related under a fixed temperature. You can make insightful inferences from these simple laws that have been discovered about the world. Recently, machine learning has started to play the role of an important side-kick to deductive reasoning.
 6 | 
 7 | “Rocket science” and “machine learning” aren’t phrases that usually appear together.  But nowadays, modeling real-world sensor readings using intelligent data-driven algorithms is more approachable in the aerospace industry. Also, the use of machine learning techniques is flourishing in the healthcare and automotive industries. But why?
 8 | 
 9 | Part of the reason for this influx can be attributed to better understanding of interpretable models, which are machine learning models where the learned parameters have clear interpretations. If a rocket blows up, for example, an interpretable model might help trace the root cause.
10 | 
11 | This chapter is about exposing the hidden explanations behind observations. Consider a puppet-master pulling strings to make a puppet appear alive. Analyzing only the motions of the puppet might lead to over-complicated conclusions about how it’s possible for an inanimate object to move. Once you notice the attached strings, you’ll realize that a puppet-master is the best explanation for the life-like motions. 
12 | 
13 | - **Concept 1**: Forward algorithm
14 | - **Concept 2**: Viterbi decode
15 | 
16 | --
17 | 
18 | * Listing 1-6: `forward.py`
19 | * Listing 7-11: `hmm.py`
20 | 


--------------------------------------------------------------------------------
/ch04_classification/logistic_2d.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | learning_rate = 0.1
 6 | training_epochs = 2000
 7 | 
 8 | 
 9 | def sigmoid(x):
10 |     return 1. / (1. + np.exp(-x))
11 | 
12 | x1_label1 = np.random.normal(3, 1, 1000)
13 | x2_label1 = np.random.normal(2, 1, 1000)
14 | x1_label2 = np.random.normal(7, 1, 1000)
15 | x2_label2 = np.random.normal(6, 1, 1000)
16 | x1s = np.append(x1_label1, x1_label2)
17 | x2s = np.append(x2_label1, x2_label2)
18 | ys = np.asarray([0.] * len(x1_label1) + [1.] * len(x1_label2))
19 | 
20 | X1 = tf.placeholder(tf.float32, shape=(None,), name="x1")
21 | X2 = tf.placeholder(tf.float32, shape=(None,), name="x2")
22 | Y = tf.placeholder(tf.float32, shape=(None,), name="y")
23 | w = tf.Variable([0., 0., 0.], name="w", trainable=True)
24 | 
25 | y_model = tf.sigmoid(-(w[2] * X2 + w[1] * X1 + w[0]))
26 | cost = tf.reduce_mean(-tf.log(y_model * Y + (1 - y_model) * (1 - Y)))
27 | train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
28 | 
29 | with tf.Session() as sess:
30 |     sess.run(tf.initialize_all_variables())
31 |     prev_err = 0
32 |     for epoch in range(training_epochs):
33 |         err, _ = sess.run([cost, train_op], {X1: x1s, X2: x2s, Y: ys})
34 |         print(epoch, err)
35 |         if abs(prev_err - err) < 0.0001:
36 |             break
37 |         prev_err = err
38 | 
39 |     w_val = sess.run(w, {X1: x1s, X2: x2s, Y: ys})
40 | 
41 | x1_boundary, x2_boundary = [], []
42 | for x1_test in np.linspace(0, 10, 100):
43 |     for x2_test in np.linspace(0, 10, 100):
44 |         z = sigmoid(-x2_test*w_val[2] - x1_test*w_val[1] - w_val[0])
45 |         if abs(z - 0.5) < 0.01:
46 |             x1_boundary.append(x1_test)
47 |             x2_boundary.append(x2_test)
48 | 
49 | plt.scatter(x1_boundary, x2_boundary, c='b', marker='o', s=20)
50 | plt.scatter(x1_label1, x2_label1, c='r', marker='x', s=20)
51 | plt.scatter(x1_label2, x2_label2, c='g', marker='1', s=20)
52 | 
53 | plt.show()
54 | 


--------------------------------------------------------------------------------
/ch07_autoencoder/main_imgs.py:
--------------------------------------------------------------------------------
 1 | # https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
 2 | 
 3 | import cPickle
 4 | import numpy as np
 5 | from autoencoder import Autoencoder
 6 | #
 7 | # def grayscale(x):
 8 | #     gray = np.zeros(len(x)/3)
 9 | #     for i in range(len(x)/3):
10 | #         gray[i] = (x[i] + x[2*i] + x[3*i]) / 3
11 | 
12 | 
13 | def grayscale(a):
14 |     return a.reshape(a.shape[0], 3, 32, 32).mean(1).reshape(a.shape[0], -1)
15 | 
16 | 
17 | def unpickle(file):
18 |     fo = open(file, 'rb')
19 |     dict = cPickle.load(fo)
20 |     fo.close()
21 |     return dict
22 | 
23 | names = unpickle('./cifar-10-batches-py/batches.meta')['label_names']
24 | data, labels = [], []
25 | for i in range(1, 6):
26 |     filename = './cifar-10-batches-py/data_batch_' + str(i)
27 |     batch_data = unpickle(filename)
28 |     if len(data) > 0:
29 |         data = np.vstack((data, batch_data['data']))
30 |         labels = np.vstack((labels, batch_data['labels']))
31 |     else:
32 |         data = batch_data['data']
33 |         labels = batch_data['labels']
34 | 
35 | data = grayscale(data)
36 | 
37 | x = np.matrix(data)
38 | y = np.array(labels)
39 | 
40 | horse_indices = np.where(y == 7)[0]
41 | 
42 | horse_x = x[horse_indices]
43 | 
44 | print(np.shape(horse_x))  # (5000, 3072)
45 | 
46 | input_dim = np.shape(horse_x)[1]
47 | hidden_dim = 100
48 | ae = Autoencoder(input_dim, hidden_dim)
49 | ae.train(horse_x)
50 | 
51 | test_data = unpickle('./cifar-10-batches-py/test_batch')
52 | test_x = grayscale(test_data['data'])
53 | test_labels = np.array(test_data['labels'])
54 | encoding = ae.classify(test_x, test_labels)
55 | encoding = np.matrix(encoding)
56 | from matplotlib import pyplot as plt
57 | 
58 | # encoding = np.matrix(np.random.choice([0, 1], size=(hidden_dim,)))
59 | 
60 | original_img = np.reshape(test_x[7,:], (32,32))
61 | plt.imshow(original_img, cmap='Greys_r')
62 | plt.show()
63 | 
64 | print(np.size(encoding))
65 | while(True):
66 |     img = ae.decode(encoding)
67 |     plt.imshow(img, cmap='Greys_r')
68 |     plt.show()
69 |     rand_idx = np.random.randint(np.size(encoding))
70 |     encoding[0,rand_idx] = np.random.randint(2)
71 | 


--------------------------------------------------------------------------------
/ch04_classification/README.md:
--------------------------------------------------------------------------------
 1 | # Chapter 4
 2 | 
 3 | <p align="center"><a href="http://tensorflowbook.com" target="_blank"><img src="http://i.imgur.com/8pYWN0k.png"/></a></p>
 4 | 
 5 | Imagine an advertisement agency collecting information about user interactions to decide what type of ad to show. That’s not so uncommon. Google, Twitter, Facebook, and other big tech giants that rely on ads have creepy-good personal profiles of their users to help deliver personalized ads. A user who’s recently searched for gaming keyboards or graphics cards is probably more likely to click ads about the latest and greatest video games.
 6 | 
 7 | It may be difficult to cater a specially crafted advertisement for each individual, so grouping users into categories is a common technique. For example, a user may be categorized as a “gamer” to receive relevant video game related ads.
 8 | 
 9 | Machine learning has been the go-to tool to accomplish such as task. At the most fundamental level, machine learning practitioners want to build a tool to help them understand data. Being able to label data items into separate categories is an excellent way to characterize it for specific needs.
10 | 
11 | The previous chapter dealt with regression, which was about fitting a curve to data. If you recall, the best-fit curve is a function that takes as input a data item and assigns it a number. Creating a machine learning model that instead assigns discrete labels to its inputs is called classification. It is a supervised learning algorithm for dealing with discrete output. (Each discrete value is called a class.) The input is typically a feature vector, and the output is a class. If there are only two class labels (for example, True/False, On/Off, Yes/No), then we call this learning algorithm a binary classifier. Otherwise, it’s called a multiclass classifier.
12 | 
13 | - **Concept 1**: Linear regression for classification
14 | - **Concept 2**: Logistic regression
15 | - **Concept 3**: 2D Logistic regression
16 | - **Concept 4**: Softmax classification
17 | 
18 | ---
19 | 
20 | * Listing 1-3: `linear_1d.py`
21 | * Listing 4: `logistic_1d.py`
22 | * Listing 5: `logistic_2d.py`
23 | * Listing 6-10: `softmax.py`
24 | 


--------------------------------------------------------------------------------
/ch10_rnn/README.md:
--------------------------------------------------------------------------------
 1 | # Chapter 10
 2 | 
 3 | <p align="center"><a href="http://tensorflowbook.com" target="_blank"><img src="http://i.imgur.com/IZbRx4E.png"/></a></p>
 4 | 
 5 | Back in school, I remember the sigh of relief when one of my midterm exams was made up of only true-or-false questions. I can’t be the only one that assumed half the answers would be “true” and the other half would be “false.”
 6 | 
 7 | I figured out answers to a most of the questions, and left the rest to random guessing. Actually, I did something clever, a strategy that you might have employed as well. After counting my number of “true” answers, I realized a disproportionate amount of “false” answers were lacking. So, a majority of my guesses were “false” to balance the distribution. 
 8 | 
 9 | It worked. I sure felt sly in the moment. What exactly is this feeling of craftiness that makes us feel so confident in our decisions, and how can we give a neural network the same power? 
10 | 
11 | One answer to this question is to use context to answer questions. Contextual cues are important signals that can also improve the performance of machine learning algorithms. For example, imagine you want to examine an English sentence and tag the part of speech of each word. The naive approach is to individually classify each word as a “noun,”, “adjective,”, and so on, without acknowledging its neighboring words. Consider trying that technique on the words in this sentence. The word “trying” was used as a verb, but depending on the context, you can also use it as an adjective, making parts-of-speech tagging a very trying problem. 
12 | 
13 | A better approach would consider the context. To bestow neural networks with contextual cues, we’ll study an architecture called a recurrent neural network. Instead of natural language data, we’ll be dealing with continuous timeseries data, similar to stock-market prices, as covered in previous chapters. By the end of the chapter, you’ll be able to model the patterns in timeseries data to make predictions about future value
14 | 
15 | - **Concept 1**: Loading timeseries data
16 | - **Concept 2**: Recurrent neural networks
17 | - **Concept 3**: Applying RNN to real-world data for timeseries prediction


--------------------------------------------------------------------------------
/ch05_clustering/README.md:
--------------------------------------------------------------------------------
 1 | # Chapter 5
 2 | 
 3 | <p align="center"><a href="http://tensorflowbook.com" target="_blank"><img src="http://i.imgur.com/OMR8tkf.png"/></a></p>
 4 | 
 5 | Suppose there’s a collection of not-pirated-totally-legal mp3s on your hard drive. All your songs are crowded in one massive folder. But it might help to automatically group together similar songs and organize them into categories like “country,” “rap,” “rock,” and so on. This act of assigning an item to a group (such as an mp3 to a playlist) in an unsupervised fashion is called clustering.
 6 | 
 7 | The previous chapter on classification assumes you’re given a training dataset of correctly labeled data. Unfortunately, we don’t always have that luxury when we collect data in the real-world. For example, suppose we would like to divide up a large amount of music into interesting playlists. How could we possibly group together songs if we don’t have direct access to their metadata?
 8 | 
 9 | Spotify, SoundCloud, Google Music, Pandora, and many other music streaming services try to solve this problem to recommend similar songs to customers. Their approach includes a mixture of various machine learning techniques, but clustering is often at the heart of the solution.
10 | 
11 | The overall idea of clustering is that two items in the same cluster are “closer” to each other than items that belong to separate clusters. That is the general definition, leaving the interpretation of “closeness” open. For example, perhaps cheetahs and leopards belong in the same cluster, whereas elephants belong to another when closeness is measured by how similar two species are in the hierarchy of biological classification (family, genus, and species).
12 | 
13 | You can image there are many clustering algorithms out there. In this chapter we’ll focus on two types, namely k-means and self-organizing map. These approaches are completely unsupervised, meaning they fit a model without ground-truth examples.
14 | 
15 | - **Concept 1**: Clustering
16 | - **Concept 2**: Segmentation
17 | - **Concept 3**: Self-organizing map
18 | 
19 | ---
20 | 
21 | * Listing 1-4: `audio_clustering.py`
22 | * Listing 5-6: `audio_segmentation.py`
23 | * Listing 7-12: `som.py`
24 | 


--------------------------------------------------------------------------------
/ch02_basics/Concept04_session_logging.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Ch `02`: Concept `04`"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "## Session logging"
15 |    ]
16 |   },
17 |   {
18 |    "cell_type": "markdown",
19 |    "metadata": {},
20 |    "source": [
21 |     "Define an op on a tensor. Here's an example:"
22 |    ]
23 |   },
24 |   {
25 |    "cell_type": "code",
26 |    "execution_count": 18,
27 |    "metadata": {
28 |     "collapsed": false
29 |    },
30 |    "outputs": [],
31 |    "source": [
32 |     "import tensorflow as tf\n",
33 |     "\n",
34 |     "x = tf.constant([[1, 2]])\n",
35 |     "neg_op = tf.neg(x)"
36 |    ]
37 |   },
38 |   {
39 |    "cell_type": "markdown",
40 |    "metadata": {
41 |     "collapsed": true
42 |    },
43 |    "source": [
44 |     "Now let's use a session with a special argument passed in."
45 |    ]
46 |   },
47 |   {
48 |    "cell_type": "code",
49 |    "execution_count": 19,
50 |    "metadata": {
51 |     "collapsed": false
52 |    },
53 |    "outputs": [
54 |     {
55 |      "name": "stdout",
56 |      "output_type": "stream",
57 |      "text": [
58 |       "[[-1 -2]]\n"
59 |      ]
60 |     }
61 |    ],
62 |    "source": [
63 |     "with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:\n",
64 |     "    result = sess.run(neg_op)\n",
65 |     "    print result"
66 |    ]
67 |   },
68 |   {
69 |    "cell_type": "markdown",
70 |    "metadata": {},
71 |    "source": [
72 |     "Try this from a terminal. Jupyter notebooks won't show the logging info."
73 |    ]
74 |   }
75 |  ],
76 |  "metadata": {
77 |   "kernelspec": {
78 |    "display_name": "Python 2",
79 |    "language": "python",
80 |    "name": "python2"
81 |   },
82 |   "language_info": {
83 |    "codemirror_mode": {
84 |     "name": "ipython",
85 |     "version": 2
86 |    },
87 |    "file_extension": ".py",
88 |    "mimetype": "text/x-python",
89 |    "name": "python",
90 |    "nbconvert_exporter": "python",
91 |    "pygments_lexer": "ipython2",
92 |    "version": "2.7.12"
93 |   }
94 |  },
95 |  "nbformat": 4,
96 |  "nbformat_minor": 0
97 | }
98 | 


--------------------------------------------------------------------------------
/ch06_hmm/forward.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | class HMM(object):
 6 |     def __init__(self, initial_prob, trans_prob, obs_prob):
 7 |         self.N = np.size(initial_prob)
 8 |         self.initial_prob = initial_prob
 9 |         self.trans_prob = trans_prob
10 |         self.emission = tf.constant(obs_prob)
11 | 
12 |         assert self.initial_prob.shape == (self.N, 1)
13 |         assert self.trans_prob.shape == (self.N, self.N)
14 |         assert obs_prob.shape[0] == self.N
15 | 
16 |         self.obs_idx = tf.placeholder(tf.int32)
17 |         self.fwd = tf.placeholder(tf.float64)
18 | 
19 |     def get_emission(self, obs_idx):
20 |         slice_location = [0, obs_idx]
21 |         num_rows = tf.shape(self.emission)[0]
22 |         slice_shape = [num_rows, 1]
23 |         return tf.slice(self.emission, slice_location, slice_shape)
24 | 
25 |     def forward_init_op(self):
26 |         obs_prob = self.get_emission(self.obs_idx)
27 |         fwd = tf.mul(self.initial_prob, obs_prob)
28 |         return fwd
29 | 
30 |     def forward_op(self):
31 |         transitions = tf.matmul(self.fwd, tf.transpose(self.get_emission(self.obs_idx)))
32 |         weighted_transitions = transitions * self.trans_prob
33 |         fwd = tf.reduce_sum(weighted_transitions, 0)
34 |         return tf.reshape(fwd, tf.shape(self.fwd))
35 | 
36 | 
37 | def forward_algorithm(sess, hmm, observations):
38 |     fwd = sess.run(hmm.forward_init_op(), feed_dict={hmm.obs_idx: observations[0]})
39 |     for t in range(1, len(observations)):
40 |         fwd = sess.run(hmm.forward_op(), feed_dict={hmm.obs_idx: observations[t], hmm.fwd: fwd})
41 |     prob = sess.run(tf.reduce_sum(fwd))
42 |     return prob
43 | 
44 | if __name__ == '__main__':
45 |     initial_prob = np.array([[0.6], [0.4]])
46 |     trans_prob = np.array([[0.7, 0.3], [0.4, 0.6]])
47 |     obs_prob = np.array([[0.5, 0.4, 0.1], [0.1, 0.3, 0.6]])
48 | 
49 |     hmm = HMM(initial_prob=initial_prob, trans_prob=trans_prob, obs_prob=obs_prob)
50 | 
51 |     observations = [0, 1, 1, 2, 1]
52 |     with tf.Session() as sess:
53 |         prob = forward_algorithm(sess, hmm, observations)
54 |         print('Probability of observing {} is {}'.format(observations, prob))
55 | 


--------------------------------------------------------------------------------
/ch09_cnn/conv_visuals.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | import cifar_tools
 4 | import tensorflow as tf
 5 | 
 6 | names, data, labels = \
 7 |     cifar_tools.read_data('/home/binroot/res/cifar-10-batches-py')
 8 | 
 9 | 
10 | def show_conv_results(data, filename=None):
11 |     plt.figure()
12 |     rows, cols = 4, 8
13 |     for i in range(np.shape(data)[3]):
14 |         img = data[0, :, :, i]
15 |         plt.subplot(rows, cols, i + 1)
16 |         plt.imshow(img, cmap='Greys_r', interpolation='none')
17 |         plt.axis('off')
18 |     if filename:
19 |         plt.savefig(filename)
20 |     else:
21 |         plt.show()
22 | 
23 | 
24 | def show_weights(W, filename=None):
25 |     plt.figure()
26 |     rows, cols = 4, 8
27 |     for i in range(np.shape(W)[3]):
28 |         img = W[:, :, 0, i]
29 |         plt.subplot(rows, cols, i + 1)
30 |         plt.imshow(img, cmap='Greys_r', interpolation='none')
31 |         plt.axis('off')
32 |     if filename:
33 |         plt.savefig(filename)
34 |     else:
35 |         plt.show()
36 | 
37 | raw_data = data[4, :]
38 | raw_img = np.reshape(raw_data, (24, 24))
39 | plt.figure()
40 | plt.imshow(raw_img, cmap='Greys_r')
41 | plt.savefig('input_image.png')
42 | 
43 | x = tf.reshape(raw_data, shape=[-1, 24, 24, 1])
44 | W = tf.Variable(tf.random_normal([5, 5, 1, 32]))
45 | b = tf.Variable(tf.random_normal([32]))
46 | 
47 | conv = tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
48 | conv_with_b = tf.nn.bias_add(conv, b)
49 | conv_out = tf.nn.relu(conv_with_b)
50 | 
51 | k = 2
52 | maxpool = tf.nn.max_pool(conv_out, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')
53 | 
54 | with tf.Session() as sess:
55 |     sess.run(tf.initialize_all_variables())
56 | 
57 |     W_val = sess.run(W)
58 |     show_weights(W_val, 'step0_weights.png')
59 | 
60 |     conv_val = sess.run(conv)
61 |     show_conv_results(conv_val, 'step1_convs.png')
62 |     print(np.shape(conv_val))
63 | 
64 |     conv_out_val = sess.run(conv_out)
65 |     show_conv_results(conv_out_val, 'step2_conv_outs.png')
66 |     print(np.shape(conv_out_val))
67 | 
68 |     maxpool_val = sess.run(maxpool)
69 |     show_conv_results(maxpool_val, 'step3_maxpool.png')
70 |     print(np.shape(maxpool_val))
71 | 
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/ch07_autoencoder/denoising_autoencoder.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | def get_batch(X, size):
 5 |     a = np.random.choice(len(X), size, replace=False)
 6 |     return X[a]
 7 | 
 8 | class Autoencoder:
 9 |     def __init__(self, input_dim, hidden_dim, epoch=1000, batch_size=50, learning_rate=0.001):
10 |         self.epoch = epoch
11 |         self.batch_size = batch_size
12 |         self.learning_rate = learning_rate
13 | 
14 |         x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim])
15 |         with tf.name_scope('encode'):
16 |             weights = tf.Variable(tf.random_normal([input_dim, hidden_dim], dtype=tf.float32))
17 |             biases = tf.Variable(tf.zeros([hidden_dim]))
18 |             encoded = tf.nn.sigmoid(tf.matmul(x, weights) + biases)
19 |         with tf.name_scope('decode'):
20 |             weights = tf.Variable(tf.random_normal([hidden_dim, input_dim], dtype=tf.float32))
21 |             biases = tf.Variable(tf.zeros([input_dim]))
22 |             decoded = tf.matmul(encoded, weights) + biases
23 | 
24 |         self.x = x
25 |         self.encoded = encoded
26 |         self.decoded = decoded
27 | 
28 |         self.loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(self.x, self.decoded))))
29 |         self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)
30 |         self.saver = tf.train.Saver()
31 | 
32 |     def train(self, data):
33 |         with tf.Session() as sess:
34 |             sess.run(tf.initialize_all_variables())
35 |             for i in range(self.epoch):
36 |                 for j in range(50):
37 |                     batch_data = get_batch(data, self.batch_size)
38 |                     l, _ = sess.run([self.loss, self.train_op], feed_dict={self.x: batch_data})
39 |                 if i % 10 == 0:
40 |                     print('epoch {0}: loss = {1}'.format(i, l))
41 |                     self.saver.save(sess, './model.ckpt')
42 |             self.saver.save(sess, './model.ckpt')
43 |         
44 |     def test(self, data):
45 |         with tf.Session() as sess:
46 |             self.saver.restore(sess, './model.ckpt')
47 |             hidden, reconstructed = sess.run([self.encoded, self.decoded], feed_dict={self.x: data})
48 |         print('input', data)
49 |         print('compressed', hidden)
50 |         print('reconstructed', reconstructed)
51 |         return reconstructed
52 | 
53 |     def get_params(self):
54 |         with tf.Session() as sess:
55 |             self.saver.restore(sess, './model.ckpt')
56 |             weights, biases = sess.run([self.weights1, self.biases1])
57 |         return weights, biases
58 | 


--------------------------------------------------------------------------------
/ch05_clustering/audio_segmentation.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | from bregman.suite import *
 4 | 
 5 | k = 4
 6 | segment_size = 50  # out of 24,526
 7 | max_iterations = 100
 8 | 
 9 | 
10 | chromo = tf.placeholder(tf.float32)
11 | max_freqs = tf.argmax(chromo, 0)
12 | 
13 | def get_chromogram(audio_file):
14 |     F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205)
15 |     return F.X
16 | 
17 | def get_dataset(sess, audio_file):
18 |     chromo_data = get_chromogram(audio_file)
19 |     print('chromo_data', np.shape(chromo_data))
20 |     chromo_length = np.shape(chromo_data)[1]
21 |     xs = []
22 |     for i in range(chromo_length/segment_size):
23 |         chromo_segment = chromo_data[:, i*segment_size:(i+1)*segment_size]
24 |         x = extract_feature_vector(sess, chromo_segment)
25 |         if len(xs) == 0:
26 |             xs = x
27 |         else:
28 |             xs = np.vstack((xs, x))
29 |     return xs
30 | 
31 | 
32 | def initial_cluster_centroids(X, k):
33 |     return X[0:k, :]
34 | 
35 | 
36 | # op
37 | def assign_cluster(X, centroids):
38 |     expanded_vectors = tf.expand_dims(X, 0)
39 |     expanded_centroids = tf.expand_dims(centroids, 1)
40 |     distances = tf.reduce_sum(tf.square(tf.sub(expanded_vectors, expanded_centroids)), 2)
41 |     mins = tf.argmin(distances, 0)
42 |     return mins
43 | 
44 | 
45 | # op
46 | def recompute_centroids(X, Y):
47 |     sums = tf.unsorted_segment_sum(X, Y, k)
48 |     counts = tf.unsorted_segment_sum(tf.ones_like(X), Y, k)
49 |     return sums / counts
50 | 
51 | 
52 | def extract_feature_vector(sess, chromo_data):
53 |     num_features, num_samples = np.shape(chromo_data)
54 |     freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data})
55 |     hist, bins = np.histogram(freq_vals, bins=range(num_features + 1))
56 |     return hist.astype(float) / num_samples
57 | 
58 | 
59 | with tf.Session() as sess:
60 |     X = get_dataset(sess, 'sysk.wav')
61 |     print(np.shape(X))
62 |     centroids = initial_cluster_centroids(X, k)
63 |     i, converged = 0, False
64 |     # prev_Y = None
65 |     while not converged and i < max_iterations:
66 |         i += 1
67 |         Y = assign_cluster(X, centroids)
68 |         # if prev_Y == Y:
69 |         #     converged = True
70 |         #     break
71 |         # prev_Y = Y
72 |         centroids = sess.run(recompute_centroids(X, Y))
73 |         if i % 50 == 0:
74 |             print('iteration', i)
75 |     segments = sess.run(Y)
76 |     for i in range(len(segments)):
77 |         seconds = (i * segment_size) / float(10)
78 |         min, sec = divmod(seconds, 60)
79 |         time_str = str(min) + 'm ' + str(sec) + 's'
80 |         print(time_str, segments[i])
81 | 
82 | 


--------------------------------------------------------------------------------
/ch07_autoencoder/autoencoder_batch.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | def get_batch(X, size):
 5 |     a = np.random.choice(len(X), size, replace=False)
 6 |     return X[a]
 7 | 
 8 | class Autoencoder:
 9 |     def __init__(self, input_dim, hidden_dim, epoch=1000, batch_size=10, learning_rate=0.001):
10 |         self.epoch = epoch
11 |         self.batch_size = batch_size
12 |         self.learning_rate = learning_rate
13 | 
14 |         x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim])
15 |         with tf.name_scope('encode'):
16 |             weights = tf.Variable(tf.random_normal([input_dim, hidden_dim], dtype=tf.float32), name='weights')
17 |             biases = tf.Variable(tf.zeros([hidden_dim]), name='biases')
18 |             encoded = tf.nn.sigmoid(tf.matmul(x, weights) + biases)
19 |         with tf.name_scope('decode'):
20 |             weights = tf.Variable(tf.random_normal([hidden_dim, input_dim], dtype=tf.float32), name='weights')
21 |             biases = tf.Variable(tf.zeros([input_dim]), name='biases')
22 |             decoded = tf.matmul(encoded, weights) + biases
23 | 
24 |         self.x = x
25 |         self.encoded = encoded
26 |         self.decoded = decoded
27 | 
28 |         self.loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(self.x, self.decoded))))
29 | 
30 |         self.all_loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(self.x, self.decoded)), 1))
31 |         self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)
32 |         self.saver = tf.train.Saver()
33 | 
34 |     def train(self, data):
35 |         with tf.Session() as sess:
36 |             sess.run(tf.initialize_all_variables())
37 |             for i in range(self.epoch):
38 |                 for j in range(500):
39 |                     batch_data = get_batch(data, self.batch_size)
40 |                     l, _ = sess.run([self.loss, self.train_op], feed_dict={self.x: batch_data})
41 |                 if i % 10 == 0:
42 |                     print('epoch {0}: loss = {1}'.format(i, l))
43 |                     self.saver.save(sess, './model.ckpt')
44 |             self.saver.save(sess, './model.ckpt')
45 |         
46 |     def test(self, data):
47 |         with tf.Session() as sess:
48 |             self.saver.restore(sess, './model.ckpt')
49 |             hidden, reconstructed = sess.run([self.encoded, self.decoded], feed_dict={self.x: data})
50 |         print('input', data)
51 |         print('compressed', hidden)
52 |         print('reconstructed', reconstructed)
53 |         return reconstructed
54 | 
55 |     def get_params(self):
56 |         with tf.Session() as sess:
57 |             self.saver.restore(sess, './model.ckpt')
58 |             weights, biases = sess.run([self.weights1, self.biases1])
59 |         return weights, biases
60 | 
61 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # [Machine Learning with TensorFlow](http://www.tensorflowbook.com/)
 2 | 
 3 | [This](https://github.com/BinRoot/TensorFlow-Book) is the official code repository for [Machine Learning with TensorFlow](http://www.tensorflowbook.com/).
 4 | 
 5 | :warning: **Warning**: The book will be released in a month or two, so this repo is a **pre-release** of the entire code. I will be heavily updating this repo in the coming weeks. Stay tuned, and follow along! :)
 6 | 
 7 | Get started with machine learning using TensorFlow, Google's latest and greatest machine learning library.
 8 | 
 9 | # Summary
10 | 
11 | ## [Chapter 2](https://github.com/BinRoot/TensorFlow-Book/tree/master/ch02_basics) - TensorFlow Basics
12 | 
13 | - **Concept 1**: Defining tensors
14 | - **Concept 2**: Evaluating ops
15 | - **Concept 3**: Interactive session
16 | - **Concept 4**: Session loggings
17 | - **Concept 5**: Variables
18 | - **Concept 6**: Saving variables
19 | - **Concept 7**: Loading variables
20 | - **Concept 8**: TensorBoard
21 | 
22 | ## [Chapter 3](https://github.com/BinRoot/TensorFlow-Book/tree/master/ch03_regression) - Regression
23 | 
24 | - **Concept 1**: Linear regression
25 | - **Concept 2**: Polynomial regression
26 | - **Concept 3**: Regularization
27 | 
28 | ## [Chapter 4](https://github.com/BinRoot/TensorFlow-Book/tree/master/ch04_classification) - Classification
29 | 
30 | - **Concept 1**: Linear regression for classification
31 | - **Concept 2**: Logistic regression
32 | - **Concept 3**: 2D Logistic regression
33 | - **Concept 4**: Softmax classification
34 | 
35 | ## [Chapter 5](https://github.com/BinRoot/TensorFlow-Book/tree/master/ch05_clustering) - Clustering
36 | 
37 | - **Concept 1**: Clustering
38 | - **Concept 2**: Segmentation
39 | - **Concept 3**: Self-organizing map
40 | 
41 | ## [Chapter 6](https://github.com/BinRoot/TensorFlow-Book/tree/master/ch06_hmm) - Hidden markov models
42 | 
43 | - **Concept 1**: Forward algorithm
44 | - **Concept 2**: Viterbi decode
45 | 
46 | ## [Chapter 7](https://github.com/BinRoot/TensorFlow-Book/tree/master/ch07_autoencoder) - Autoencoders
47 | 
48 | - **Concept 1**: Autoencoder
49 | - **Concept 2**: Applying an autoencoder to images
50 | - **Concept 3**: Denoising autoencoder
51 | 
52 | ## [Chapter 8](https://github.com/BinRoot/TensorFlow-Book/tree/master/ch08_rl) - Reinforcement learning
53 | 
54 | - **Concept 1**: Reinforcement learning
55 | 
56 | ## [Chapter 9](https://github.com/BinRoot/TensorFlow-Book/tree/master/ch09_cnn) - Convolutional Neural Networks
57 | 
58 | - **Concept 1**: Using CIFAR-10 dataset
59 | - **Concept 2**: Convolutions
60 | - **Concept 3**: Convolutional neural network
61 | 
62 | ## [Chapter 10](https://github.com/BinRoot/TensorFlow-Book/tree/master/ch10_rnn) - Recurrent Neural Network
63 | 
64 | - **Concept 1**: Loading timeseries data
65 | - **Concept 2**: Recurrent neural networks
66 | - **Concept 3**: Applying RNN to real-world data for timeseries prediction


--------------------------------------------------------------------------------
/ch02_basics/Concept03_interactive_session.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ch `02`: Concept `03`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Interactive session"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "Interactive sessions are another way to use a session. Go ahead and define one:"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 13,
 27 |    "metadata": {
 28 |     "collapsed": true
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import tensorflow as tf\n",
 33 |     "sess = tf.InteractiveSession()"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {
 39 |     "collapsed": true
 40 |    },
 41 |    "source": [
 42 |     "We have a matrix we want to invert:"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 14,
 48 |    "metadata": {
 49 |     "collapsed": false
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "x = tf.constant([[1., 2.]])\n",
 54 |     "neg_op = tf.neg(x)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "Since we're using an interactive session, we can just call the `eval()` method on the op."
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 15,
 67 |    "metadata": {
 68 |     "collapsed": false
 69 |    },
 70 |    "outputs": [
 71 |     {
 72 |      "name": "stdout",
 73 |      "output_type": "stream",
 74 |      "text": [
 75 |       "[[-1. -2.]]\n"
 76 |      ]
 77 |     }
 78 |    ],
 79 |    "source": [
 80 |     "result = neg_op.eval()\n",
 81 |     "\n",
 82 |     "print(result)\n"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "That code's a little cleaner when using Jupyter notebooks (like this one).\n",
 90 |     "\n",
 91 |     "Don't forget to close the session:"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 16,
 97 |    "metadata": {
 98 |     "collapsed": false
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "sess.close()"
103 |    ]
104 |   }
105 |  ],
106 |  "metadata": {
107 |   "kernelspec": {
108 |    "display_name": "Python 2",
109 |    "language": "python",
110 |    "name": "python2"
111 |   },
112 |   "language_info": {
113 |    "codemirror_mode": {
114 |     "name": "ipython",
115 |     "version": 2
116 |    },
117 |    "file_extension": ".py",
118 |    "mimetype": "text/x-python",
119 |    "name": "python",
120 |    "nbconvert_exporter": "python",
121 |    "pygments_lexer": "ipython2",
122 |    "version": "2.7.12"
123 |   }
124 |  },
125 |  "nbformat": 4,
126 |  "nbformat_minor": 0
127 | }
128 | 


--------------------------------------------------------------------------------
/ch02_basics/Concept07_loading_variables.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ch `02`: Concept `07`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Loading variables"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "Concept 06 was about saving variables. This one's about loading what you saved. Start by creating an interactive session:"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 6,
 27 |    "metadata": {
 28 |     "collapsed": true
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import tensorflow as tf\n",
 33 |     "sess = tf.InteractiveSession()"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "Create a boolean vector called `spikes` of the same dimensions as before:"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 7,
 46 |    "metadata": {
 47 |     "collapsed": false
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "spikes = tf.Variable([False]*8, name='spikes')"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "Restored the variable data from disk, serve warm, and enjoy:"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 17,
 64 |    "metadata": {
 65 |     "collapsed": false
 66 |    },
 67 |    "outputs": [
 68 |     {
 69 |      "name": "stdout",
 70 |      "output_type": "stream",
 71 |      "text": [
 72 |       "file not found\n"
 73 |      ]
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "saver = tf.train.Saver()\n",
 78 |     "\n",
 79 |     "try:\n",
 80 |     "    saver.restore(sess, 'spikes.ckpt')\n",
 81 |     "    print(spikes.eval())\n",
 82 |     "except:\n",
 83 |     "    print('file not found')\n"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "Show's over, goodnight:"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 19,
 96 |    "metadata": {
 97 |     "collapsed": true
 98 |    },
 99 |    "outputs": [],
100 |    "source": [
101 |     "sess.close()"
102 |    ]
103 |   }
104 |  ],
105 |  "metadata": {
106 |   "kernelspec": {
107 |    "display_name": "Python 2",
108 |    "language": "python",
109 |    "name": "python2"
110 |   },
111 |   "language_info": {
112 |    "codemirror_mode": {
113 |     "name": "ipython",
114 |     "version": 2
115 |    },
116 |    "file_extension": ".py",
117 |    "mimetype": "text/x-python",
118 |    "name": "python",
119 |    "nbconvert_exporter": "python",
120 |    "pygments_lexer": "ipython2",
121 |    "version": "2.7.12"
122 |   }
123 |  },
124 |  "nbformat": 4,
125 |  "nbformat_minor": 0
126 | }
127 | 


--------------------------------------------------------------------------------
/ch10_rnn/simple_regression.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from tensorflow.python.ops import rnn, rnn_cell
 4 | 
 5 | 
 6 | class SeriesPredictor:
 7 | 
 8 |     def __init__(self, input_dim, seq_size, hidden_dim=10):
 9 |         # Hyperparameters
10 |         self.input_dim = input_dim
11 |         self.seq_size = seq_size
12 |         self.hidden_dim = hidden_dim
13 | 
14 |         # Weight variables and input placeholders
15 |         self.W_out = tf.Variable(tf.random_normal([hidden_dim, 1]), name='W_out')
16 |         self.b_out = tf.Variable(tf.random_normal([1]), name='b_out')
17 |         self.x = tf.placeholder(tf.float32, [None, seq_size, input_dim])
18 |         self.y = tf.placeholder(tf.float32, [None, seq_size])
19 | 
20 |         # Cost optimizer
21 |         self.cost = tf.reduce_mean(tf.square(self.model() - self.y))
22 |         self.train_op = tf.train.AdamOptimizer().minimize(self.cost)
23 | 
24 |         # Auxiliary ops
25 |         self.saver = tf.train.Saver()
26 | 
27 |     def model(self):
28 |         """
29 |         :param x: inputs of size [T, batch_size, input_size]
30 |         :param W: matrix of fully-connected output layer weights
31 |         :param b: vector of fully-connected output layer biases
32 |         """
33 |         cell = rnn_cell.BasicLSTMCell(self.hidden_dim)
34 |         outputs, states = rnn.dynamic_rnn(cell, self.x, dtype=tf.float32)
35 |         num_examples = tf.shape(self.x)[0]
36 |         W_repeated = tf.tile(tf.expand_dims(self.W_out, 0), [num_examples, 1, 1])
37 |         out = tf.batch_matmul(outputs, W_repeated) + self.b_out
38 |         out = tf.squeeze(out)
39 |         return out
40 | 
41 |     def train(self, train_x, train_y):
42 |         with tf.Session() as sess:
43 |             tf.get_variable_scope().reuse_variables()
44 |             sess.run(tf.initialize_all_variables())
45 |             for i in range(1000):
46 |                 _, mse = sess.run([self.train_op, self.cost], feed_dict={self.x: train_x, self.y: train_y})
47 |                 if i % 100 == 0:
48 |                     print(i, mse)
49 |             save_path = self.saver.save(sess, 'model.ckpt')
50 |             print('Model saved to {}'.format(save_path))
51 | 
52 |     def test(self, test_x):
53 |         with tf.Session() as sess:
54 |             tf.get_variable_scope().reuse_variables()
55 |             self.saver.restore(sess, 'model.ckpt')
56 |             output = sess.run(self.model(), feed_dict={self.x: test_x})
57 |             print(output)
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     predictor = SeriesPredictor(input_dim=1, seq_size=4, hidden_dim=10)
62 |     train_x = [[[1], [2], [5], [6]],
63 |                [[5], [7], [7], [8]],
64 |                [[3], [4], [5], [7]]]
65 |     train_y = [[1, 3, 7, 11],
66 |                [5, 12, 14, 15],
67 |                [3, 7, 9, 12]]
68 |     predictor.train(train_x, train_y)
69 | 
70 |     test_x = [[[1], [2], [3], [4]],  # 1, 3, 5, 7
71 |               [[4], [5], [6], [7]]]  # 4, 9, 11, 13
72 |     predictor.test(test_x)


--------------------------------------------------------------------------------
/ch05_clustering/som.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | class SOM:
 5 |     def __init__(self, width, height, dim):
 6 |         self.num_iters = 100
 7 |         self.width = width
 8 |         self.height = height
 9 |         self.dim = dim
10 |         self.node_locs = self.get_locs()
11 | 
12 |         # Each node is a vector of dimension `dim`
13 |         # For a 2D grid, there are `width * height` nodes
14 |         nodes = tf.Variable(tf.random_normal([width*height, dim]))
15 |         self.nodes = nodes
16 | 
17 |         # These two ops are inputs at each iteration
18 |         x = tf.placeholder(tf.float32, [dim])
19 |         iter = tf.placeholder(tf.float32)
20 | 
21 |         self.x = x
22 |         self.iter = iter
23 | 
24 |         # Find the node that matches closest to the input
25 |         bmu_loc = self.get_bmu_loc(x)
26 | 
27 |         self.propagate_nodes = self.get_propagation(bmu_loc, x, iter)
28 | 
29 |     def get_propagation(self, bmu_loc, x, iter):
30 |         num_nodes = self.width * self.height
31 |         rate = 1.0 - tf.div(iter, self.num_iters)
32 |         alpha = rate * 0.5
33 |         sigma = rate * tf.to_float(tf.maximum(self.width, self.height)) / 2.
34 |         expanded_bmu_loc = tf.expand_dims(tf.to_float(bmu_loc), 0)
35 |         sqr_dists_from_bmu = tf.reduce_sum(tf.square(tf.sub(expanded_bmu_loc, self.node_locs)), 1)
36 |         neigh_factor = tf.exp(-tf.div(sqr_dists_from_bmu, 2 * tf.square(sigma)))
37 |         rate = tf.mul(alpha, neigh_factor)
38 |         rate_factor = tf.pack([tf.tile(tf.slice(rate, [i], [1]), [self.dim]) for i in range(num_nodes)])
39 |         nodes_diff = tf.mul(rate_factor, tf.sub(tf.pack([x for i in range(num_nodes)]), self.nodes))
40 |         update_nodes = tf.add(self.nodes, nodes_diff)
41 |         return tf.assign(self.nodes, update_nodes)
42 | 
43 |     def get_bmu_loc(self, x):
44 |         expanded_x = tf.expand_dims(x, 0)
45 |         sqr_diff = tf.square(tf.sub(expanded_x, self.nodes))
46 |         dists = tf.reduce_sum(sqr_diff, 1)
47 |         bmu_idx = tf.argmin(dists, 0)
48 |         bmu_loc = tf.pack([tf.mod(bmu_idx, self.width), tf.div(bmu_idx, self.width)])
49 |         return bmu_loc
50 | 
51 |     def get_locs(self):
52 |         locs = [[x, y]
53 |                 for y in range(self.height)
54 |                 for x in range(self.width)]
55 |         return tf.to_float(locs)
56 | 
57 |     def train(self, data):
58 |         with tf.Session() as sess:
59 |             sess.run(tf.initialize_all_variables())
60 |             for i in range(self.num_iters):
61 |                 for data_x in data:
62 |                     sess.run(self.propagate_nodes, feed_dict={self.x: data_x, self.iter: i})
63 |             centroid_grid = [[] for i in range(self.width)]
64 |             self.nodes_val = list(sess.run(self.nodes))
65 |             self.locs_val = list(sess.run(self.node_locs))
66 |             for i, l in enumerate(self.locs_val):
67 |                 centroid_grid[int(l[0])].append(self.nodes_val[i])
68 |             self.centroid_grid = centroid_grid


--------------------------------------------------------------------------------
/ch05_clustering/audio_clustering.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | from bregman.suite import *
 4 | 
 5 | k = 2
 6 | max_iterations = 100
 7 | 
 8 | filenames = tf.train.match_filenames_once('./audio_dataset/*.wav')
 9 | count_num_files = tf.size(filenames)
10 | filename_queue = tf.train.string_input_producer(filenames)
11 | reader = tf.WholeFileReader()
12 | filename, file_contents = reader.read(filename_queue)
13 | 
14 | chromo = tf.placeholder(tf.float32)
15 | max_freqs = tf.argmax(chromo, 0)
16 | 
17 | 
18 | def get_next_chromogram(sess):
19 |     audio_file = sess.run(filename)
20 |     F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205)
21 |     return F.X, audio_file
22 | 
23 | 
24 | def extract_feature_vector(sess, chromo_data):
25 |     num_features, num_samples = np.shape(chromo_data)
26 |     freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data})
27 |     hist, bins = np.histogram(freq_vals, bins=range(num_features + 1))
28 |     normalized_hist = hist.astype(float) / num_samples
29 |     return normalized_hist
30 | 
31 | 
32 | def get_dataset(sess):
33 |     num_files = sess.run(count_num_files)
34 |     coord = tf.train.Coordinator()
35 |     threads = tf.train.start_queue_runners(coord=coord)
36 |     xs = list()
37 |     names = list()
38 |     plt.figure()
39 |     for _ in range(num_files):
40 |         chromo_data, filename = get_next_chromogram(sess)
41 | 
42 |         plt.subplot(1, 2, 1)
43 |         plt.imshow(chromo_data, cmap='Greys', interpolation='nearest')
44 |         plt.title('Visualization of Sound Spectrum')
45 | 
46 |         plt.subplot(1, 2, 2)
47 |         freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data})
48 |         plt.hist(freq_vals)
49 |         plt.title('Histogram of Notes')
50 |         plt.xlabel('Musical Note')
51 |         plt.ylabel('Count')
52 |         plt.savefig('{}.png'.format(filename))
53 |         plt.clf()
54 | 
55 |         plt.clf()
56 |         names.append(filename)
57 |         x = extract_feature_vector(sess, chromo_data)
58 |         xs.append(x)
59 |     xs = np.asmatrix(xs)
60 |     return xs, names
61 | 
62 | 
63 | def initial_cluster_centroids(X, k):
64 |     return X[0:k, :]
65 | 
66 | 
67 | def assign_cluster(X, centroids):
68 |     expanded_vectors = tf.expand_dims(X, 0)
69 |     expanded_centroids = tf.expand_dims(centroids, 1)
70 |     distances = tf.reduce_sum(tf.square(tf.sub(expanded_vectors, expanded_centroids)), 2)
71 |     mins = tf.argmin(distances, 0)
72 |     return mins
73 | 
74 | 
75 | def recompute_centroids(X, Y):
76 |     sums = tf.unsorted_segment_sum(X, Y, k)
77 |     counts = tf.unsorted_segment_sum(tf.ones_like(X), Y, k)
78 |     return sums / counts
79 | 
80 | 
81 | with tf.Session() as sess:
82 |     sess.run(tf.initialize_all_variables())
83 |     X, names = get_dataset(sess)
84 |     centroids = initial_cluster_centroids(X, k)
85 |     i, converged = 0, False
86 |     while not converged and i < max_iterations:
87 |         i += 1
88 |         Y = assign_cluster(X, centroids)
89 |         centroids = sess.run(recompute_centroids(X, Y))
90 |     print(zip(sess.run(Y), names))
91 | 


--------------------------------------------------------------------------------
/ch04_classification/softmax.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | learning_rate = 0.01
 6 | training_epochs = 1000
 7 | num_labels = 3
 8 | batch_size = 100
 9 | 
10 | x1_label0 = np.random.normal(1, 1, (100, 1))
11 | x2_label0 = np.random.normal(1, 1, (100, 1))
12 | x1_label1 = np.random.normal(5, 1, (100, 1))
13 | x2_label1 = np.random.normal(4, 1, (100, 1))
14 | x1_label2 = np.random.normal(8, 1, (100, 1))
15 | x2_label2 = np.random.normal(0, 1, (100, 1))
16 | 
17 | plt.scatter(x1_label0, x2_label0, c='r', marker='o', s=60)
18 | plt.scatter(x1_label1, x2_label1, c='g', marker='x', s=60)
19 | plt.scatter(x1_label2, x2_label2, c='b', marker='_', s=60)
20 | plt.show()
21 | 
22 | xs_label0 = np.hstack((x1_label0, x2_label0))
23 | xs_label1 = np.hstack((x1_label1, x2_label1))
24 | xs_label2 = np.hstack((x1_label2, x2_label2))
25 | 
26 | xs = np.vstack((xs_label0, xs_label1, xs_label2))
27 | labels = np.matrix([[1., 0., 0.]] * len(x1_label0) + [[0., 1., 0.]] * len(x1_label1) + [[0., 0., 1.]] * len(x1_label2))
28 | 
29 | arr = np.arange(xs.shape[0])
30 | np.random.shuffle(arr)
31 | xs = xs[arr, :]
32 | labels = labels[arr, :]
33 | 
34 | test_x1_label0 = np.random.normal(1, 1, (10, 1))
35 | test_x2_label0 = np.random.normal(1, 1, (10, 1))
36 | test_x1_label1 = np.random.normal(5, 1, (10, 1))
37 | test_x2_label1 = np.random.normal(4, 1, (10, 1))
38 | test_x1_label2 = np.random.normal(8, 1, (10, 1))
39 | test_x2_label2 = np.random.normal(0, 1, (10, 1))
40 | test_xs_label0 = np.hstack((test_x1_label0, test_x2_label0))
41 | test_xs_label1 = np.hstack((test_x1_label1, test_x2_label1))
42 | test_xs_label2 = np.hstack((test_x1_label2, test_x2_label2))
43 | 
44 | test_xs = np.vstack((test_xs_label0, test_xs_label1, test_xs_label2))
45 | test_labels = np.matrix([[1., 0., 0.]] * 10 + [[0., 1., 0.]] * 10 + [[0., 0., 1.]] * 10)
46 | 
47 | train_size, num_features = xs.shape
48 | 
49 | X = tf.placeholder("float", shape=[None, num_features])
50 | Y = tf.placeholder("float", shape=[None, num_labels])
51 | 
52 | W = tf.Variable(tf.zeros([num_features, num_labels]))
53 | b = tf.Variable(tf.zeros([num_labels]))
54 | y_model = tf.nn.softmax(tf.matmul(X, W) + b)
55 | 
56 | cost = -tf.reduce_sum(Y * tf.log(y_model))
57 | train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
58 | 
59 | correct_prediction = tf.equal(tf.argmax(y_model, 1), tf.argmax(Y, 1))
60 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
61 | 
62 | with tf.Session() as sess:
63 |     tf.initialize_all_variables().run()
64 | 
65 |     for step in xrange(training_epochs * train_size // batch_size):
66 |         offset = (step * batch_size) % train_size
67 |         batch_xs = xs[offset:(offset + batch_size), :]
68 |         batch_labels = labels[offset:(offset + batch_size)]
69 |         err, _ = sess.run([cost, train_op], feed_dict={X: batch_xs, Y: batch_labels})
70 |         print (step, err)
71 | 
72 |     W_val = sess.run(W)
73 |     print('w', W_val)
74 |     b_val = sess.run(b)
75 |     print('b', b_val)
76 |     print "accuracy", accuracy.eval(feed_dict={X: test_xs, Y: test_labels})
77 | 


--------------------------------------------------------------------------------
/ch09_cnn/cnn.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | import cifar_tools
  4 | import tensorflow as tf
  5 | 
  6 | learning_rate = 0.001
  7 | 
  8 | names, data, labels = \
  9 |     cifar_tools.read_data('/home/binroot/res/cifar-10-batches-py')
 10 | 
 11 | x = tf.placeholder(tf.float32, [None, 24 * 24])
 12 | y = tf.placeholder(tf.float32, [None, len(names)])
 13 | W1 = tf.Variable(tf.random_normal([5, 5, 1, 64]))
 14 | b1 = tf.Variable(tf.random_normal([64]))
 15 | W2 = tf.Variable(tf.random_normal([5, 5, 64, 64]))
 16 | b2 = tf.Variable(tf.random_normal([64]))
 17 | W3 = tf.Variable(tf.random_normal([6*6*64, 1024]))
 18 | b3 = tf.Variable(tf.random_normal([1024]))
 19 | W_out = tf.Variable(tf.random_normal([1024, len(names)]))
 20 | b_out = tf.Variable(tf.random_normal([len(names)]))
 21 | 
 22 | 
 23 | def conv_layer(x, W, b):
 24 |     conv = tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
 25 |     conv_with_b = tf.nn.bias_add(conv, b)
 26 |     conv_out = tf.nn.relu(conv_with_b)
 27 |     return conv_out
 28 | 
 29 | 
 30 | def maxpool_layer(conv, k=2):
 31 |     return tf.nn.max_pool(conv, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')
 32 | 
 33 | 
 34 | def model():
 35 |     x_reshaped = tf.reshape(x, shape=[-1, 24, 24, 1])
 36 | 
 37 |     conv_out1 = conv_layer(x_reshaped, W1, b1)
 38 |     maxpool_out1 = maxpool_layer(conv_out1)
 39 |     norm1 = tf.nn.lrn(maxpool_out1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
 40 |     conv_out2 = conv_layer(norm1, W2, b2)
 41 |     norm2 = tf.nn.lrn(conv_out2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
 42 |     maxpool_out2 = maxpool_layer(norm2)
 43 | 
 44 |     maxpool_reshaped = tf.reshape(maxpool_out2, [-1, W3.get_shape().as_list()[0]])
 45 |     local = tf.add(tf.matmul(maxpool_reshaped, W3), b3)
 46 |     local_out = tf.nn.relu(local)
 47 | 
 48 |     out = tf.add(tf.matmul(local_out, W_out), b_out)
 49 |     return out
 50 | 
 51 | model_op = model()
 52 | 
 53 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(model_op, y))
 54 | train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
 55 | 
 56 | correct_pred = tf.equal(tf.argmax(model_op, 1), tf.argmax(y, 1))
 57 | accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
 58 | 
 59 | with tf.Session() as sess:
 60 |     sess.run(tf.initialize_all_variables())
 61 |     onehot_labels = tf.one_hot(labels, len(names), on_value=1., off_value=0., axis=-1)
 62 |     onehot_vals = sess.run(onehot_labels)
 63 |     batch_size = len(data) / 200
 64 |     print('batch size', batch_size)
 65 |     for j in range(0, 1000):
 66 |         print('EPOCH', j)
 67 |         for i in range(0, len(data), batch_size):
 68 |             batch_data = data[i:i+batch_size, :]
 69 |             batch_onehot_vals = onehot_vals[i:i+batch_size, :]
 70 |             _, accuracy_val = sess.run([train_op, accuracy], feed_dict={x: batch_data, y: batch_onehot_vals})
 71 |             if i % 1000 == 0:
 72 |                 print(i, accuracy_val)
 73 |         print('DONE WITH EPOCH')
 74 | 
 75 | 
 76 | 
 77 | 
 78 | 
 79 | 
 80 | 
 81 | 
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 


--------------------------------------------------------------------------------
/ch02_basics/Concept02_evaluating_ops.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ch `02`: Concept `02`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Evaluating ops"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "Import TensorFlow:"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 13,
 27 |    "metadata": {
 28 |     "collapsed": true
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import tensorflow as tf"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {
 38 |     "collapsed": true
 39 |    },
 40 |    "source": [
 41 |     "Start with a 1x2 matrix:"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 14,
 47 |    "metadata": {
 48 |     "collapsed": false
 49 |    },
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "x = tf.constant([[1, 2]])"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "Let's negate it. Define the negation op to be run on the matrix:"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 15,
 65 |    "metadata": {
 66 |     "collapsed": false
 67 |    },
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "neg_x = tf.neg(x)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "It's nothing special if you print it out. In fact, it doesn't even perform the negation computation. Check out what happens when you simply print it:"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 16,
 83 |    "metadata": {
 84 |     "collapsed": false
 85 |    },
 86 |    "outputs": [
 87 |     {
 88 |      "name": "stdout",
 89 |      "output_type": "stream",
 90 |      "text": [
 91 |       "Tensor(\"Neg_3:0\", shape=(1, 2), dtype=int32)\n"
 92 |      ]
 93 |     }
 94 |    ],
 95 |    "source": [
 96 |     "print(neg_x)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "You need to summon a session so you can launch the negation op:"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 17,
109 |    "metadata": {
110 |     "collapsed": false
111 |    },
112 |    "outputs": [
113 |     {
114 |      "name": "stdout",
115 |      "output_type": "stream",
116 |      "text": [
117 |       "[[-1 -2]]\n"
118 |      ]
119 |     }
120 |    ],
121 |    "source": [
122 |     "with tf.Session() as sess:\n",
123 |     "    result = sess.run(neg_x)\n",
124 |     "    print(result)"
125 |    ]
126 |   }
127 |  ],
128 |  "metadata": {
129 |   "kernelspec": {
130 |    "display_name": "Python 2",
131 |    "language": "python",
132 |    "name": "python2"
133 |   },
134 |   "language_info": {
135 |    "codemirror_mode": {
136 |     "name": "ipython",
137 |     "version": 2
138 |    },
139 |    "file_extension": ".py",
140 |    "mimetype": "text/x-python",
141 |    "name": "python",
142 |    "nbconvert_exporter": "python",
143 |    "pygments_lexer": "ipython2",
144 |    "version": "2.7.12"
145 |   }
146 |  },
147 |  "nbformat": 4,
148 |  "nbformat_minor": 0
149 | }
150 | 


--------------------------------------------------------------------------------
/ch02_basics/Concept06_saving_variables.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ch `02`: Concept `06`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Saving variables"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "Create an interactive session and initialize a variable:"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 1,
 27 |    "metadata": {
 28 |     "collapsed": true
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import tensorflow as tf\n",
 33 |     "sess = tf.InteractiveSession()\n",
 34 |     "\n",
 35 |     "raw_data = [1., 2., 8., -1., 0., 5.5, 6., 13]\n",
 36 |     "spikes = tf.Variable([False] * len(raw_data), name='spikes')\n",
 37 |     "spikes.initializer.run()"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "The saver op will enable saving and restoring:"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 2,
 50 |    "metadata": {
 51 |     "collapsed": true
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "saver = tf.train.Saver()"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "Loop through the data and update the spike variable when there is a significant increase:"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 3,
 68 |    "metadata": {
 69 |     "collapsed": false
 70 |    },
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "for i in range(1, len(raw_data)):\n",
 74 |     "    if raw_data[i] - raw_data[i-1] > 5:\n",
 75 |     "        spikes_val = spikes.eval()\n",
 76 |     "        spikes_val[i] = True\n",
 77 |     "        updater = tf.assign(spikes, spikes_val)\n",
 78 |     "        updater.eval()"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "Now, save your variable to disk!"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 5,
 91 |    "metadata": {
 92 |     "collapsed": false
 93 |    },
 94 |    "outputs": [
 95 |     {
 96 |      "name": "stdout",
 97 |      "output_type": "stream",
 98 |      "text": [
 99 |       "spikes data saved in file: spikes.ckpt\n"
100 |      ]
101 |     }
102 |    ],
103 |    "source": [
104 |     "save_path = saver.save(sess, \"spikes.ckpt\")\n",
105 |     "print(\"spikes data saved in file: %s\" % save_path)"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "Adieu:"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 6,
118 |    "metadata": {
119 |     "collapsed": true
120 |    },
121 |    "outputs": [],
122 |    "source": [
123 |     "sess.close()"
124 |    ]
125 |   }
126 |  ],
127 |  "metadata": {
128 |   "kernelspec": {
129 |    "display_name": "Python 2",
130 |    "language": "python",
131 |    "name": "python2"
132 |   },
133 |   "language_info": {
134 |    "codemirror_mode": {
135 |     "name": "ipython",
136 |     "version": 2
137 |    },
138 |    "file_extension": ".py",
139 |    "mimetype": "text/x-python",
140 |    "name": "python",
141 |    "nbconvert_exporter": "python",
142 |    "pygments_lexer": "ipython2",
143 |    "version": "2.7.12"
144 |   }
145 |  },
146 |  "nbformat": 4,
147 |  "nbformat_minor": 0
148 | }
149 | 


--------------------------------------------------------------------------------
/ch09_cnn/cnn_viz.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | import cifar_tools
  4 | import tensorflow as tf
  5 | 
  6 | learning_rate = 0.001
  7 | 
  8 | names, data, labels = \
  9 |     cifar_tools.read_data('/home/binroot/res/cifar-10-batches-py')
 10 | 
 11 | x = tf.placeholder(tf.float32, [None, 24 * 24], name='input')
 12 | y = tf.placeholder(tf.float32, [None, len(names)], name='prediction')
 13 | W1 = tf.Variable(tf.random_normal([5, 5, 1, 64]), name='W1')
 14 | b1 = tf.Variable(tf.random_normal([64]), name='b1')
 15 | W2 = tf.Variable(tf.random_normal([5, 5, 64, 64]), name='W2')
 16 | b2 = tf.Variable(tf.random_normal([64]), name='b2')
 17 | W3 = tf.Variable(tf.random_normal([6*6*64, 1024]), name='W3')
 18 | b3 = tf.Variable(tf.random_normal([1024]), name='b3')
 19 | W_out = tf.Variable(tf.random_normal([1024, len(names)]), name='W_out')
 20 | b_out = tf.Variable(tf.random_normal([len(names)]), name='b_out')
 21 | 
 22 | 
 23 | W1_summary = tf.image_summary('W1_img', W1)
 24 | 
 25 | def conv_layer(x, W, b):
 26 |     conv = tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
 27 |     conv_with_b = tf.nn.bias_add(conv, b)
 28 |     conv_out = tf.nn.relu(conv_with_b)
 29 |     return conv_out
 30 | 
 31 | 
 32 | def maxpool_layer(conv, k=2):
 33 |     return tf.nn.max_pool(conv, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')
 34 | 
 35 | 
 36 | def model():
 37 |     x_reshaped = tf.reshape(x, shape=[-1, 24, 24, 1])
 38 | 
 39 |     conv_out1 = conv_layer(x_reshaped, W1, b1)
 40 |     maxpool_out1 = maxpool_layer(conv_out1)
 41 |     norm1 = tf.nn.lrn(maxpool_out1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
 42 |     conv_out2 = conv_layer(norm1, W2, b2)
 43 |     norm2 = tf.nn.lrn(conv_out2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
 44 |     maxpool_out2 = maxpool_layer(norm2)
 45 | 
 46 |     maxpool_reshaped = tf.reshape(maxpool_out2, [-1, W3.get_shape().as_list()[0]])
 47 |     local = tf.add(tf.matmul(maxpool_reshaped, W3), b3)
 48 |     local_out = tf.nn.relu(local)
 49 | 
 50 |     out = tf.add(tf.matmul(local_out, W_out), b_out)
 51 |     return out
 52 | 
 53 | model_op = model()
 54 | 
 55 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(model_op, y))
 56 | tf.scalar_summary('cost', cost)
 57 | train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
 58 | 
 59 | correct_pred = tf.equal(tf.argmax(model_op, 1), tf.argmax(y, 1))
 60 | accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
 61 | 
 62 | merged = tf.merge_all_summaries()
 63 | 
 64 | with tf.Session() as sess:
 65 |     summary_writer = tf.train.SummaryWriter('summaries/train', sess.graph)
 66 |     sess.run(tf.initialize_all_variables())
 67 |     onehot_labels = tf.one_hot(labels, len(names), on_value=1., off_value=0., axis=-1)
 68 |     onehot_vals = sess.run(onehot_labels)
 69 |     batch_size = len(data) / 200
 70 |     print('batch size', batch_size)
 71 |     for j in range(0, 1000):
 72 |         print('EPOCH', j)
 73 |         for i in range(0, len(data), batch_size):
 74 |             batch_data = data[i:i+batch_size, :]
 75 |             batch_onehot_vals = onehot_vals[i:i+batch_size, :]
 76 |             _, accuracy_val, summary = sess.run([train_op, accuracy, merged], feed_dict={x: batch_data, y: batch_onehot_vals})
 77 |             summary_writer.add_summary(summary, i)
 78 |             if i % 1000 == 0:
 79 |                 print(i, accuracy_val)
 80 |         print('DONE WITH EPOCH')
 81 | 
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 


--------------------------------------------------------------------------------
/ch07_autoencoder/denoiser.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import time
 4 | 
 5 | def get_batch(X, Xn, size):
 6 |     a = np.random.choice(len(X), size, replace=False)
 7 |     return X[a], Xn[a]
 8 | 
 9 | class Denoiser:
10 | 
11 |     def __init__(self, input_dim, hidden_dim, epoch=10000, batch_size=50, learning_rate=0.001):
12 |         self.epoch = epoch
13 |         self.batch_size = batch_size
14 |         self.learning_rate = learning_rate
15 | 
16 |         self.x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x')
17 |         self.x_noised = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x_noised')
18 |         with tf.name_scope('encode'):
19 |             self.weights1 = tf.Variable(tf.random_normal([input_dim, hidden_dim], dtype=tf.float32), name='weights')
20 |             self.biases1 = tf.Variable(tf.zeros([hidden_dim]), name='biases')
21 |             self.encoded = tf.nn.sigmoid(tf.matmul(self.x_noised, self.weights1) + self.biases1, name='encoded')
22 |         with tf.name_scope('decode'):
23 |             weights = tf.Variable(tf.random_normal([hidden_dim, input_dim], dtype=tf.float32), name='weights')
24 |             biases = tf.Variable(tf.zeros([input_dim]), name='biases')
25 |             self.decoded = tf.matmul(self.encoded, weights) + biases
26 |         self.loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(self.x, self.decoded))))
27 |         self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)
28 |         self.saver = tf.train.Saver()
29 | 
30 |     def add_noise(self, data):
31 |         noise_type = 'mask-0.2'
32 |         if noise_type == 'gaussian':
33 |             n = np.random.normal(0, 0.1, np.shape(data))
34 |             return data + n
35 |         if 'mask' in noise_type:
36 |             frac = float(noise_type.split('-')[1])
37 |             temp = np.copy(data)
38 |             for i in temp:
39 |                 n = np.random.choice(len(i), round(frac * len(i)), replace=False)
40 |                 i[n] = 0
41 |             return temp
42 | 
43 |     def train(self, data):
44 |         data_noised = self.add_noise(data)
45 |         with open('log.csv', 'w') as writer:
46 |             with tf.Session() as sess:
47 |                 sess.run(tf.initialize_all_variables())
48 |                 for i in range(self.epoch):
49 |                     for j in range(50):
50 |                         batch_data, batch_data_noised = get_batch(data, data_noised, self.batch_size)
51 |                         l, _ = sess.run([self.loss, self.train_op], feed_dict={self.x: batch_data, self.x_noised: batch_data_noised})
52 |                     if i % 10 == 0:
53 |                         print('epoch {0}: loss = {1}'.format(i, l))
54 |                         self.saver.save(sess, './model.ckpt')
55 |                         epoch_time = int(time.time())
56 |                         row_str = str(epoch_time) + ',' + str(i) + ',' + str(l) + '\n'
57 |                         writer.write(row_str)
58 |                         writer.flush()
59 |                 self.saver.save(sess, './model.ckpt')
60 | 
61 |     def test(self, data):
62 |         with tf.Session() as sess:
63 |             self.saver.restore(sess, './model.ckpt')
64 |             hidden, reconstructed = sess.run([self.encoded, self.decoded], feed_dict={self.x: data})
65 |         print('input', data)
66 |         print('compressed', hidden)
67 |         print('reconstructed', reconstructed)
68 |         return reconstructed
69 | 
70 |     def get_params(self):
71 |         with tf.Session() as sess:
72 |             self.saver.restore(sess, './model.ckpt')
73 |             weights, biases = sess.run([self.weights1, self.biases1])
74 |         return weights, biases
75 | 


--------------------------------------------------------------------------------
/ch02_basics/Concept05_variables.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ch `02`: Concept `05`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Using variables"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "Here we go, here we go, here we go! Moving on from those simple examples, let's get a better understanding of variables. Start with a session:"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 27,
 27 |    "metadata": {
 28 |     "collapsed": false
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import tensorflow as tf\n",
 33 |     "sess = tf.InteractiveSession()"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "Below is a series of numbers. Don't worry what they mean. Just for fun, let's think of them as neural activations."
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 28,
 46 |    "metadata": {
 47 |     "collapsed": true
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "raw_data = [1., 2., 8., -1., 0., 5.5, 6., 13]"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {
 57 |     "collapsed": true
 58 |    },
 59 |    "source": [
 60 |     "Create a boolean variable called `spike` to detect a sudden increase in the values.\n",
 61 |     "\n",
 62 |     "All variables must be initialized. Go ahead and initialize the variable by calling `run()` on its `initializer`:"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 29,
 68 |    "metadata": {
 69 |     "collapsed": false
 70 |    },
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "spike = tf.Variable(False)\n",
 74 |     "spike.initializer.run()"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "Loop through the data and update the spike variable when there is a significant increase:"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 30,
 87 |    "metadata": {
 88 |     "collapsed": false
 89 |    },
 90 |    "outputs": [
 91 |     {
 92 |      "name": "stdout",
 93 |      "output_type": "stream",
 94 |      "text": [
 95 |       "('Spike', False)\n",
 96 |       "('Spike', True)\n",
 97 |       "('Spike', False)\n",
 98 |       "('Spike', False)\n",
 99 |       "('Spike', True)\n",
100 |       "('Spike', False)\n",
101 |       "('Spike', True)\n"
102 |      ]
103 |     }
104 |    ],
105 |    "source": [
106 |     "for i in range(1, len(raw_data)):\n",
107 |     "    if raw_data[i] - raw_data[i-1] > 5:\n",
108 |     "        updater = tf.assign(spike, tf.constant(True))\n",
109 |     "        updater.eval()\n",
110 |     "    else:\n",
111 |     "        tf.assign(spike, False).eval()\n",
112 |     "    print(\"Spike\", spike.eval())"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "You forgot to close the session! Here, let me do it:"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 31,
125 |    "metadata": {
126 |     "collapsed": true
127 |    },
128 |    "outputs": [],
129 |    "source": [
130 |     "sess.close()"
131 |    ]
132 |   }
133 |  ],
134 |  "metadata": {
135 |   "kernelspec": {
136 |    "display_name": "Python 2",
137 |    "language": "python",
138 |    "name": "python2"
139 |   },
140 |   "language_info": {
141 |    "codemirror_mode": {
142 |     "name": "ipython",
143 |     "version": 2
144 |    },
145 |    "file_extension": ".py",
146 |    "mimetype": "text/x-python",
147 |    "name": "python",
148 |    "nbconvert_exporter": "python",
149 |    "pygments_lexer": "ipython2",
150 |    "version": "2.7.12"
151 |   }
152 |  },
153 |  "nbformat": 4,
154 |  "nbformat_minor": 0
155 | }
156 | 


--------------------------------------------------------------------------------
/ch07_autoencoder/autoencoder.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | def get_batch(X, size):
 5 |     a = np.random.choice(len(X), size, replace=False)
 6 |     return X[a]
 7 | 
 8 | class Autoencoder:
 9 |     def __init__(self, input_dim, hidden_dim, epoch=200, batch_size=10, learning_rate=0.001):
10 |         self.epoch = epoch
11 |         self.batch_size = batch_size
12 |         self.learning_rate = learning_rate
13 | 
14 |         x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim])
15 |         with tf.name_scope('encode'):
16 |             weights = tf.Variable(tf.random_normal([input_dim, hidden_dim], dtype=tf.float32), name='weights')
17 |             biases = tf.Variable(tf.zeros([hidden_dim]), name='biases')
18 |             encoded = tf.nn.sigmoid(tf.matmul(x, weights) + biases)
19 |         with tf.name_scope('decode'):
20 |             weights = tf.Variable(tf.random_normal([hidden_dim, input_dim], dtype=tf.float32), name='weights')
21 |             biases = tf.Variable(tf.zeros([input_dim]), name='biases')
22 |             decoded = tf.matmul(encoded, weights) + biases
23 | 
24 |         self.x = x
25 |         self.encoded = encoded
26 |         self.decoded = decoded
27 | 
28 |         self.loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(self.x, self.decoded))))
29 | 
30 |         self.all_loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(self.x, self.decoded)), 1))
31 |         self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)
32 |         self.saver = tf.train.Saver()
33 | 
34 |     def train(self, data):
35 |         with tf.Session() as sess:
36 |             sess.run(tf.initialize_all_variables())
37 |             for i in range(self.epoch):
38 |                 for j in range(500):
39 |                     batch_data = get_batch(data, self.batch_size)
40 |                     l, _ = sess.run([self.loss, self.train_op], feed_dict={self.x: batch_data})
41 |                 if i % 10 == 0:
42 |                     print('epoch {0}: loss = {1}'.format(i, l))
43 |                     self.saver.save(sess, './model.ckpt')
44 |             self.saver.save(sess, './model.ckpt')
45 |         
46 |     def test(self, data):
47 |         with tf.Session() as sess:
48 |             self.saver.restore(sess, './model.ckpt')
49 |             hidden, reconstructed = sess.run([self.encoded, self.decoded], feed_dict={self.x: data})
50 |         print('input', data)
51 |         print('compressed', hidden)
52 |         print('reconstructed', reconstructed)
53 |         return reconstructed
54 | 
55 |     def get_params(self):
56 |         with tf.Session() as sess:
57 |             self.saver.restore(sess, './model.ckpt')
58 |             weights, biases = sess.run([self.weights1, self.biases1])
59 |         return weights, biases
60 | 
61 |     def classify(self, data, labels):
62 |         with tf.Session() as sess:
63 |             sess.run(tf.initialize_all_variables())
64 |             self.saver.restore(sess, './model.ckpt')
65 |             hidden, reconstructed = sess.run([self.encoded, self.decoded], feed_dict={self.x: data})
66 |             reconstructed = reconstructed[0]
67 |             # loss = sess.run(self.all_loss, feed_dict={self.x: data})
68 |             print('data', np.shape(data))
69 |             print('reconstructed', np.shape(reconstructed))
70 |             loss = np.sqrt(np.mean(np.square(data - reconstructed), axis=1))
71 |             print('loss', np.shape(loss))
72 |             horse_indices = np.where(labels == 7)[0]
73 |             not_horse_indices = np.where(labels != 7)[0]
74 |             horse_loss = np.mean(loss[horse_indices])
75 |             not_horse_loss = np.mean(loss[not_horse_indices])
76 |             print('horse', horse_loss)
77 |             print('not horse', not_horse_loss)
78 |             return hidden[7,:]
79 | 
80 |     def decode(self, encoding):
81 |         with tf.Session() as sess:
82 |             sess.run(tf.initialize_all_variables())
83 |             self.saver.restore(sess, './model.ckpt')
84 |             reconstructed = sess.run(self.decoded, feed_dict={self.encoded: encoding})
85 |         img = np.reshape(reconstructed, (32, 32))
86 |         return img
87 | 


--------------------------------------------------------------------------------
/ch02_basics/Concept01_defining_tensors.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ch `02`: Concept `01`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Defining tensors"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "Import TensorFlow and Numpy:"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 2,
 27 |    "metadata": {
 28 |     "collapsed": true
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import tensorflow as tf\n",
 33 |     "import numpy as np"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {
 39 |     "collapsed": true
 40 |    },
 41 |    "source": [
 42 |     "Now, define a 2x2 matrix in different ways:"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 7,
 48 |    "metadata": {
 49 |     "collapsed": false
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "m1 = [[1.0, 2.0], \n",
 54 |     "      [3.0, 4.0]]\n",
 55 |     "\n",
 56 |     "m2 = np.array([[1.0, 2.0], \n",
 57 |     "               [3.0, 4.0]], dtype=np.float32)\n",
 58 |     "\n",
 59 |     "m3 = tf.constant([[1.0, 2.0], \n",
 60 |     "                  [3.0, 4.0]])"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "Let's see what happens when we print them:"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 8,
 73 |    "metadata": {
 74 |     "collapsed": false
 75 |    },
 76 |    "outputs": [
 77 |     {
 78 |      "name": "stdout",
 79 |      "output_type": "stream",
 80 |      "text": [
 81 |       "<type 'list'>\n",
 82 |       "<type 'numpy.ndarray'>\n",
 83 |       "<class 'tensorflow.python.framework.ops.Tensor'>\n"
 84 |      ]
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "print(type(m1))\n",
 89 |     "print(type(m2))\n",
 90 |     "print(type(m3))"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "metadata": {},
 96 |    "source": [
 97 |     "So, that's what we're dealing with. Interesting. \n",
 98 |     "\n",
 99 |     "By the way, there's a function called `convert_to_tensor(...)` that does exactly what you might expect. \n",
100 |     "\n",
101 |     "Let's use it to create tensor objects out of various types:"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 11,
107 |    "metadata": {
108 |     "collapsed": true
109 |    },
110 |    "outputs": [],
111 |    "source": [
112 |     "t1 = tf.convert_to_tensor(m1, dtype=tf.float32)\n",
113 |     "t2 = tf.convert_to_tensor(m2, dtype=tf.float32)\n",
114 |     "t3 = tf.convert_to_tensor(m3, dtype=tf.float32)"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "Ok, ok! Time for the reveal:"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 15,
127 |    "metadata": {
128 |     "collapsed": false
129 |    },
130 |    "outputs": [
131 |     {
132 |      "name": "stdout",
133 |      "output_type": "stream",
134 |      "text": [
135 |       "<class 'tensorflow.python.framework.ops.Tensor'>\n",
136 |       "<class 'tensorflow.python.framework.ops.Tensor'>\n",
137 |       "<class 'tensorflow.python.framework.ops.Tensor'>\n"
138 |      ]
139 |     }
140 |    ],
141 |    "source": [
142 |     "print(type(t1))\n",
143 |     "print(type(t2))\n",
144 |     "print(type(t3))"
145 |    ]
146 |   }
147 |  ],
148 |  "metadata": {
149 |   "kernelspec": {
150 |    "display_name": "Python 2",
151 |    "language": "python",
152 |    "name": "python2"
153 |   },
154 |   "language_info": {
155 |    "codemirror_mode": {
156 |     "name": "ipython",
157 |     "version": 2
158 |    },
159 |    "file_extension": ".py",
160 |    "mimetype": "text/x-python",
161 |    "name": "python",
162 |    "nbconvert_exporter": "python",
163 |    "pygments_lexer": "ipython2",
164 |    "version": "2.7.12"
165 |   }
166 |  },
167 |  "nbformat": 4,
168 |  "nbformat_minor": 0
169 | }
170 | 


--------------------------------------------------------------------------------
/ch06_hmm/hmm.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | # initial parameters can be learned on training data
 6 | # theory reference https://web.stanford.edu/~jurafsky/slp3/8.pdf
 7 | # code reference https://phvu.net/2013/12/06/sweet-implementation-of-viterbi-in-python/
 8 | class HMM(object):
 9 |     def __init__(self, initial_prob, trans_prob, obs_prob):
10 |         self.N = np.size(initial_prob)
11 |         self.initial_prob = initial_prob
12 |         self.trans_prob = trans_prob
13 |         self.obs_prob = obs_prob
14 |         self.emission = tf.constant(obs_prob)
15 |         assert self.initial_prob.shape == (self.N, 1)
16 |         assert self.trans_prob.shape == (self.N, self.N)
17 |         assert self.obs_prob.shape[0] == self.N
18 |         self.obs = tf.placeholder(tf.int32)
19 |         self.fwd = tf.placeholder(tf.float64)
20 |         self.viterbi = tf.placeholder(tf.float64)
21 | 
22 |     def get_emission(self, obs_idx):
23 |         slice_location = [0, obs_idx]
24 |         num_rows = tf.shape(self.emission)[0]
25 |         slice_shape = [num_rows, 1]
26 |         return tf.slice(self.emission, slice_location, slice_shape)
27 | 
28 |     def forward_init_op(self):
29 |         obs_prob = self.get_emission(self.obs)
30 |         fwd = tf.mul(self.initial_prob, obs_prob)
31 |         return fwd
32 | 
33 |     def forward_op(self):
34 |         transitions = tf.matmul(self.fwd, tf.transpose(self.get_emission(self.obs)))
35 |         weighted_transitions = transitions * self.trans_prob
36 |         fwd = tf.reduce_sum(weighted_transitions, 0)
37 |         return tf.reshape(fwd, tf.shape(self.fwd))
38 | 
39 |     def decode_op(self):
40 |         transitions = tf.matmul(self.viterbi, tf.transpose(self.get_emission(self.obs)))
41 |         weighted_transitions = transitions * self.trans_prob
42 |         viterbi = tf.reduce_max(weighted_transitions, 0)
43 |         return tf.reshape(viterbi, tf.shape(self.viterbi))
44 | 
45 |     def backpt_op(self):
46 |         back_transitions = tf.matmul(self.viterbi, np.ones((1, self.N)))
47 |         weighted_back_transitions = back_transitions * self.trans_prob
48 |         return tf.argmax(weighted_back_transitions, 0)
49 | 
50 | 
51 | def forward_algorithm(sess, hmm, observations):
52 |     fwd = sess.run(hmm.forward_init_op(), feed_dict={hmm.obs: observations[0]})
53 |     for t in range(1, len(observations)):
54 |         fwd = sess.run(hmm.forward_op(), feed_dict={hmm.obs: observations[t], hmm.fwd: fwd})
55 |     prob = sess.run(tf.reduce_sum(fwd))
56 |     return prob
57 | 
58 | def viterbi_decode(sess, hmm, observations):
59 |     viterbi = sess.run(hmm.forward_init_op(), feed_dict={hmm.obs: observations[0]})
60 |     backpts = np.ones((hmm.N, len(observations)), 'int32') * -1
61 |     for t in range(1, len(observations)):
62 |         viterbi, backpt = sess.run([hmm.decode_op(), hmm.backpt_op()],
63 |                                     feed_dict={hmm.obs: observations[t],
64 |                                                hmm.viterbi: viterbi})
65 |         backpts[:, t] = backpt
66 |     tokens = [viterbi[:, -1].argmax()]
67 |     for i in range(len(observations) - 1, 0, -1):
68 |         tokens.append(backpts[tokens[-1], i])
69 |     return tokens[::-1]
70 | 
71 | if __name__ == '__main__':
72 |     states = ('Healthy', 'Fever')
73 |     observations = ('normal', 'cold', 'dizzy')
74 |     start_probability = {'Healthy': 0.6, 'Fever': 0.4}
75 |     transition_probability = {
76 |         'Healthy': {'Healthy': 0.7, 'Fever': 0.3},
77 |         'Fever': {'Healthy': 0.4, 'Fever': 0.6}
78 |     }
79 |     emission_probability = {
80 |         'Healthy': {'normal': 0.5, 'cold': 0.4, 'dizzy': 0.1},
81 |         'Fever': {'normal': 0.1, 'cold': 0.3, 'dizzy': 0.6}
82 |     }
83 |     initial_prob = np.array([[0.6], [0.4]])
84 |     trans_prob = np.array([[0.7, 0.3], [0.4, 0.6]])
85 |     obs_prob = np.array([[0.5, 0.4, 0.1], [0.1, 0.3, 0.6]])
86 |     hmm = HMM(initial_prob=initial_prob, trans_prob=trans_prob, obs_prob=obs_prob)
87 | 
88 |     observations = [0, 1, 1, 2, 1]
89 |     with tf.Session() as sess:
90 |         prob = forward_algorithm(sess, hmm, observations)
91 |         print('Probability of observing {} is {}'.format(observations, prob))
92 | 
93 |         seq = viterbi_decode(sess, hmm, observations)
94 |         print('Most likely hidden states are {}'.format(seq))
95 | 
96 | 


--------------------------------------------------------------------------------
/ch10_rnn/regression.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from tensorflow.python.ops import rnn, rnn_cell
  4 | import data_loader
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | class SeriesPredictor:
  8 | 
  9 |     def __init__(self, input_dim, seq_size, hidden_dim):
 10 |         # Hyperparameters
 11 |         self.input_dim = input_dim
 12 |         self.seq_size = seq_size
 13 |         self.hidden_dim = hidden_dim
 14 | 
 15 |         # Weight variables and input placeholders
 16 |         self.W_out = tf.Variable(tf.random_normal([hidden_dim, 1]), name='W_out')
 17 |         self.b_out = tf.Variable(tf.random_normal([1]), name='b_out')
 18 |         self.x = tf.placeholder(tf.float32, [None, seq_size, input_dim])
 19 |         self.y = tf.placeholder(tf.float32, [None, seq_size])
 20 | 
 21 |         # Cost optimizer
 22 |         self.cost = tf.reduce_mean(tf.square(self.model() - self.y))
 23 |         self.train_op = tf.train.AdamOptimizer(learning_rate=0.003).minimize(self.cost)
 24 | 
 25 |         # Auxiliary ops
 26 |         self.saver = tf.train.Saver()
 27 | 
 28 |     def model(self):
 29 |         """
 30 |         :param x: inputs of size [T, batch_size, input_size]
 31 |         :param W: matrix of fully-connected output layer weights
 32 |         :param b: vector of fully-connected output layer biases
 33 |         """
 34 |         cell = rnn_cell.BasicLSTMCell(self.hidden_dim)
 35 |         outputs, states = rnn.dynamic_rnn(cell, self.x, dtype=tf.float32)
 36 |         num_examples = tf.shape(self.x)[0]
 37 |         W_repeated = tf.tile(tf.expand_dims(self.W_out, 0), [num_examples, 1, 1])
 38 |         out = tf.batch_matmul(outputs, W_repeated) + self.b_out
 39 |         out = tf.squeeze(out)
 40 |         return out
 41 | 
 42 |     def train(self, train_x, train_y, test_x, test_y):
 43 |         with tf.Session() as sess:
 44 |             tf.get_variable_scope().reuse_variables()
 45 |             sess.run(tf.initialize_all_variables())
 46 |             max_patience = 3
 47 |             patience = max_patience
 48 |             min_test_err = float('inf')
 49 |             step = 0
 50 |             while patience > 0:
 51 |                 _, train_err = sess.run([self.train_op, self.cost], feed_dict={self.x: train_x, self.y: train_y})
 52 |                 if step % 100 == 0:
 53 |                     test_err = sess.run(self.cost, feed_dict={self.x: test_x, self.y: test_y})
 54 |                     print('step: {}\t\ttrain err: {}\t\ttest err: {}'.format(step, train_err, test_err))
 55 |                     if test_err < min_test_err:
 56 |                         min_test_err = test_err
 57 |                         patience = max_patience
 58 |                     else:
 59 |                         patience -= 1
 60 |                 step += 1
 61 |             save_path = self.saver.save(sess, 'model.ckpt')
 62 |             print('Model saved to {}'.format(save_path))
 63 | 
 64 |     def test(self, sess, test_x):
 65 |         tf.get_variable_scope().reuse_variables()
 66 |         self.saver.restore(sess, 'model.ckpt')
 67 |         output = sess.run(self.model(), feed_dict={self.x: test_x})
 68 |         return output
 69 | 
 70 | 
 71 | def plot_results(train_x, predictions, actual, filename):
 72 |     plt.figure()
 73 |     num_train = len(train_x)
 74 |     plt.plot(list(range(num_train)), train_x, color='b', label='training data')
 75 |     plt.plot(list(range(num_train, num_train + len(predictions))), predictions, color='r', label='predicted')
 76 |     plt.plot(list(range(num_train, num_train + len(actual))), actual, color='g', label='test data')
 77 |     plt.legend()
 78 |     if filename is not None:
 79 |         plt.savefig(filename)
 80 |     else:
 81 |         plt.show()
 82 | 
 83 | 
 84 | if __name__ == '__main__':
 85 |     seq_size = 5
 86 |     predictor = SeriesPredictor(input_dim=1, seq_size=seq_size, hidden_dim=5)
 87 |     data = data_loader.load_series('international-airline-passengers.csv')
 88 |     train_data, actual_vals = data_loader.split_data(data)
 89 | 
 90 |     train_x, train_y = [], []
 91 |     for i in range(len(train_data) - seq_size - 1):
 92 |         train_x.append(np.expand_dims(train_data[i:i+seq_size], axis=1).tolist())
 93 |         train_y.append(train_data[i+1:i+seq_size+1])
 94 | 
 95 |     test_x, test_y = [], []
 96 |     for i in range(len(actual_vals) - seq_size - 1):
 97 |         test_x.append(np.expand_dims(actual_vals[i:i+seq_size], axis=1).tolist())
 98 |         test_y.append(actual_vals[i+1:i+seq_size+1])
 99 | 
100 |     predictor.train(train_x, train_y, test_x, test_y)
101 | 
102 |     with tf.Session() as sess:
103 |         predicted_vals = predictor.test(sess, test_x)[:,0]
104 |         print('predicted_vals', np.shape(predicted_vals))
105 |         plot_results(train_data, predicted_vals, actual_vals, 'predictions.png')
106 | 
107 |         prev_seq = train_x[-1]
108 |         predicted_vals = []
109 |         for i in range(20):
110 |             next_seq = predictor.test(sess, [prev_seq])
111 |             predicted_vals.append(next_seq[-1])
112 |             prev_seq = np.vstack((prev_seq[1:], next_seq[-1]))
113 |         plot_results(train_data, predicted_vals, actual_vals, 'hallucinations.png')
114 | 


--------------------------------------------------------------------------------
/ch06_hmm/Concept01_forward.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ch `06`: Concept `01`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Hidden Markov model forward algorithm"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "Oof this code's a bit complicated if you don't already know how HMMs work. Please see the book chapter for step-by-step explanations. I'll try to improve the documentation, or feel free to send a pull request with your own documentation!\n",
 22 |     "\n",
 23 |     "First, let's import TensorFlow and NumPy:"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 2,
 29 |    "metadata": {
 30 |     "collapsed": true
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "import numpy as np\n",
 35 |     "import tensorflow as tf"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "Define the HMM model:"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 3,
 48 |    "metadata": {
 49 |     "collapsed": true
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "class HMM(object):\n",
 54 |     "    def __init__(self, initial_prob, trans_prob, obs_prob):\n",
 55 |     "        self.N = np.size(initial_prob)\n",
 56 |     "        self.initial_prob = initial_prob\n",
 57 |     "        self.trans_prob = trans_prob\n",
 58 |     "        self.emission = tf.constant(obs_prob)\n",
 59 |     "\n",
 60 |     "        assert self.initial_prob.shape == (self.N, 1)\n",
 61 |     "        assert self.trans_prob.shape == (self.N, self.N)\n",
 62 |     "        assert obs_prob.shape[0] == self.N\n",
 63 |     "\n",
 64 |     "        self.obs_idx = tf.placeholder(tf.int32)\n",
 65 |     "        self.fwd = tf.placeholder(tf.float64)\n",
 66 |     "\n",
 67 |     "    def get_emission(self, obs_idx):\n",
 68 |     "        slice_location = [0, obs_idx]\n",
 69 |     "        num_rows = tf.shape(self.emission)[0]\n",
 70 |     "        slice_shape = [num_rows, 1]\n",
 71 |     "        return tf.slice(self.emission, slice_location, slice_shape)\n",
 72 |     "\n",
 73 |     "    def forward_init_op(self):\n",
 74 |     "        obs_prob = self.get_emission(self.obs_idx)\n",
 75 |     "        fwd = tf.mul(self.initial_prob, obs_prob)\n",
 76 |     "        return fwd\n",
 77 |     "\n",
 78 |     "    def forward_op(self):\n",
 79 |     "        transitions = tf.matmul(self.fwd, tf.transpose(self.get_emission(self.obs_idx)))\n",
 80 |     "        weighted_transitions = transitions * self.trans_prob\n",
 81 |     "        fwd = tf.reduce_sum(weighted_transitions, 0)\n",
 82 |     "        return tf.reshape(fwd, tf.shape(self.fwd))"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "Define the forward algorithm:"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 5,
 95 |    "metadata": {
 96 |     "collapsed": true
 97 |    },
 98 |    "outputs": [],
 99 |    "source": [
100 |     "def forward_algorithm(sess, hmm, observations):\n",
101 |     "    fwd = sess.run(hmm.forward_init_op(), feed_dict={hmm.obs_idx: observations[0]})\n",
102 |     "    for t in range(1, len(observations)):\n",
103 |     "        fwd = sess.run(hmm.forward_op(), feed_dict={hmm.obs_idx: observations[t], hmm.fwd: fwd})\n",
104 |     "    prob = sess.run(tf.reduce_sum(fwd))\n",
105 |     "    return prob"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "Let's try it out:"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 6,
118 |    "metadata": {
119 |     "collapsed": false
120 |    },
121 |    "outputs": [
122 |     {
123 |      "name": "stdout",
124 |      "output_type": "stream",
125 |      "text": [
126 |       "Probability of observing [0, 1, 1, 2, 1] is 0.0046421488\n"
127 |      ]
128 |     }
129 |    ],
130 |    "source": [
131 |     "if __name__ == '__main__':\n",
132 |     "    initial_prob = np.array([[0.6], [0.4]])\n",
133 |     "    trans_prob = np.array([[0.7, 0.3], [0.4, 0.6]])\n",
134 |     "    obs_prob = np.array([[0.5, 0.4, 0.1], [0.1, 0.3, 0.6]])\n",
135 |     "\n",
136 |     "    hmm = HMM(initial_prob=initial_prob, trans_prob=trans_prob, obs_prob=obs_prob)\n",
137 |     "\n",
138 |     "    observations = [0, 1, 1, 2, 1]\n",
139 |     "    with tf.Session() as sess:\n",
140 |     "        prob = forward_algorithm(sess, hmm, observations)\n",
141 |     "        print('Probability of observing {} is {}'.format(observations, prob))"
142 |    ]
143 |   }
144 |  ],
145 |  "metadata": {
146 |   "kernelspec": {
147 |    "display_name": "Python 2",
148 |    "language": "python",
149 |    "name": "python2"
150 |   },
151 |   "language_info": {
152 |    "codemirror_mode": {
153 |     "name": "ipython",
154 |     "version": 2
155 |    },
156 |    "file_extension": ".py",
157 |    "mimetype": "text/x-python",
158 |    "name": "python",
159 |    "nbconvert_exporter": "python",
160 |    "pygments_lexer": "ipython2",
161 |    "version": "2.7.12"
162 |   }
163 |  },
164 |  "nbformat": 4,
165 |  "nbformat_minor": 1
166 | }
167 | 


--------------------------------------------------------------------------------
/ch08_rl/rl.py:
--------------------------------------------------------------------------------
  1 | from yahoo_finance import Share
  2 | from matplotlib import pyplot as plt
  3 | import numpy as np
  4 | import random
  5 | import tensorflow as tf
  6 | import random
  7 | 
  8 | 
  9 | class DecisionPolicy:
 10 |     def select_action(self, current_state, step):
 11 |         pass
 12 | 
 13 |     def update_q(self, state, action, reward, next_state):
 14 |         pass
 15 | 
 16 | 
 17 | class RandomDecisionPolicy(DecisionPolicy):
 18 |     def __init__(self, actions):
 19 |         self.actions = actions
 20 | 
 21 |     def select_action(self, current_state, step):
 22 |         action = self.actions[random.randint(0, len(self.actions) - 1)]
 23 |         return action
 24 | 
 25 | 
 26 | class QLearningDecisionPolicy(DecisionPolicy):
 27 |     def __init__(self, actions, input_dim):
 28 |         self.epsilon = 0.9
 29 |         self.gamma = 0.001
 30 |         self.actions = actions
 31 |         output_dim = len(actions)
 32 |         h1_dim = 200
 33 | 
 34 |         self.x = tf.placeholder(tf.float32, [None, input_dim])
 35 |         self.y = tf.placeholder(tf.float32, [output_dim])
 36 |         W1 = tf.Variable(tf.random_normal([input_dim, h1_dim]))
 37 |         b1 = tf.Variable(tf.constant(0.1, shape=[h1_dim]))
 38 |         h1 = tf.nn.relu(tf.matmul(self.x, W1) + b1)
 39 |         W2 = tf.Variable(tf.random_normal([h1_dim, output_dim]))
 40 |         b2 = tf.Variable(tf.constant(0.1, shape=[output_dim]))
 41 |         self.q = tf.nn.relu(tf.matmul(h1, W2) + b2)
 42 | 
 43 |         loss = tf.square(self.y - self.q)
 44 |         self.train_op = tf.train.AdagradOptimizer(0.01).minimize(loss)
 45 |         self.sess = tf.Session()
 46 |         self.sess.run(tf.initialize_all_variables())
 47 | 
 48 |     def select_action(self, current_state, step):
 49 |         threshold = min(self.epsilon, step / 1000.)
 50 |         if random.random() < threshold:
 51 |             # Exploit best option with probability epsilon
 52 |             action_q_vals = self.sess.run(self.q, feed_dict={self.x: current_state})
 53 |             action_idx = np.argmax(action_q_vals)  # TODO: replace w/ tensorflow's argmax
 54 |             action = self.actions[action_idx]
 55 |         else:
 56 |             # Explore random option with probability 1 - epsilon
 57 |             action = self.actions[random.randint(0, len(self.actions) - 1)]
 58 |         return action
 59 | 
 60 |     def update_q(self, state, action, reward, next_state):
 61 |         action_q_vals = self.sess.run(self.q, feed_dict={self.x: state})
 62 |         next_action_q_vals = self.sess.run(self.q, feed_dict={self.x: next_state})
 63 |         next_action_idx = np.argmax(next_action_q_vals)
 64 |         action_q_vals[0, next_action_idx] = reward + self.gamma * next_action_q_vals[0, next_action_idx]
 65 |         action_q_vals = np.squeeze(np.asarray(action_q_vals))
 66 |         self.sess.run(self.train_op, feed_dict={self.x: state, self.y: action_q_vals})
 67 | 
 68 | 
 69 | def run_simulation(policy, initial_budget, initial_num_stocks, prices, hist, debug=False):
 70 |     budget = initial_budget
 71 |     num_stocks = initial_num_stocks
 72 |     share_value = 0
 73 |     transitions = list()
 74 |     for i in range(len(prices) - hist - 1):
 75 |         if i % 100 == 0:
 76 |             print('progress {:.2f}%'.format(float(100*i) / (len(prices) - hist - 1)))
 77 |         current_state = np.asmatrix(np.hstack((prices[i:i+hist], budget, num_stocks)))
 78 |         current_portfolio = budget + num_stocks * share_value
 79 |         action = policy.select_action(current_state, i)
 80 |         share_value = float(prices[i + hist + 1])
 81 |         if action == 'Buy' and budget >= share_value:
 82 |             budget -= share_value
 83 |             num_stocks += 1
 84 |         elif action == 'Sell' and num_stocks > 0:
 85 |             budget += share_value
 86 |             num_stocks -= 1
 87 |         else:
 88 |             action = 'Hold'
 89 |         new_portfolio = budget + num_stocks * share_value
 90 |         reward = new_portfolio - current_portfolio
 91 |         next_state = np.asmatrix(np.hstack((prices[i+1:i+hist+1], budget, num_stocks)))
 92 |         transitions.append((current_state, action, reward, next_state))
 93 |         policy.update_q(current_state, action, reward, next_state)
 94 | 
 95 |     portfolio = budget + num_stocks * share_value
 96 |     if debug:
 97 |         print('${}\t{} shares'.format(budget, num_stocks))
 98 |     return portfolio
 99 | 
100 | 
101 | def run_simulations(policy, budget, num_stocks, prices, hist):
102 |     num_tries = 10
103 |     final_portfolios = list()
104 |     for i in range(num_tries):
105 |         final_portfolio = run_simulation(policy, budget, num_stocks, prices, hist)
106 |         final_portfolios.append(final_portfolio)
107 |     avg, std = np.mean(final_portfolios), np.std(final_portfolios)
108 |     return avg, std
109 | 
110 | 
111 | def get_prices(share_symbol, start_date, end_date, cache_filename='stock_prices.npy'):
112 |     try:
113 |         stock_prices = np.load(cache_filename)
114 |     except IOError:
115 |         share = Share(share_symbol)
116 |         stock_hist = share.get_historical(start_date, end_date)
117 |         stock_prices = [stock_price['Open'] for stock_price in stock_hist]
118 |         np.save(cache_filename, stock_prices)
119 | 
120 |     return stock_prices
121 | 
122 | 
123 | def plot_prices(prices):
124 |     plt.title('Opening stock prices')
125 |     plt.xlabel('day')
126 |     plt.ylabel('price ($)')
127 |     plt.plot(prices)
128 |     plt.savefig('prices.png')
129 | 
130 | 
131 | if __name__ == '__main__':
132 |     prices = get_prices('MSFT', '1992-07-22', '2016-07-22')
133 |     plot_prices(prices)
134 |     actions = ['Buy', 'Sell', 'Hold']
135 |     hist = 200
136 |     # policy = RandomDecisionPolicy(actions)
137 |     policy = QLearningDecisionPolicy(actions, hist + 2)
138 |     budget = 1000.0
139 |     num_stocks = 0
140 |     avg, std = run_simulations(policy, budget, num_stocks, prices, hist)
141 |     print(avg, std)
142 | 
143 | 


--------------------------------------------------------------------------------
/ch07_autoencoder/Concept03_denoising.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Ch `07`: Concept `03`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Denoising autoencoder"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {
 20 |     "collapsed": true
 21 |    },
 22 |    "source": [
 23 |     "A denoising autoencoder is pretty much the same architecture as a normal autoencoder. The input is noised up, and cost function tries to denoise it by minimizing the construction error from denoised input to clean output."
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 1,
 29 |    "metadata": {
 30 |     "collapsed": true
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "import tensorflow as tf\n",
 35 |     "import numpy as np\n",
 36 |     "import time\n",
 37 |     "\n",
 38 |     "def get_batch(X, Xn, size):\n",
 39 |     "    a = np.random.choice(len(X), size, replace=False)\n",
 40 |     "    return X[a], Xn[a]\n",
 41 |     "\n",
 42 |     "class Denoiser:\n",
 43 |     "\n",
 44 |     "    def __init__(self, input_dim, hidden_dim, epoch=10000, batch_size=50, learning_rate=0.001):\n",
 45 |     "        self.epoch = epoch\n",
 46 |     "        self.batch_size = batch_size\n",
 47 |     "        self.learning_rate = learning_rate\n",
 48 |     "\n",
 49 |     "        self.x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x')\n",
 50 |     "        self.x_noised = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x_noised')\n",
 51 |     "        with tf.name_scope('encode'):\n",
 52 |     "            self.weights1 = tf.Variable(tf.random_normal([input_dim, hidden_dim], dtype=tf.float32), name='weights')\n",
 53 |     "            self.biases1 = tf.Variable(tf.zeros([hidden_dim]), name='biases')\n",
 54 |     "            self.encoded = tf.nn.sigmoid(tf.matmul(self.x_noised, self.weights1) + self.biases1, name='encoded')\n",
 55 |     "        with tf.name_scope('decode'):\n",
 56 |     "            weights = tf.Variable(tf.random_normal([hidden_dim, input_dim], dtype=tf.float32), name='weights')\n",
 57 |     "            biases = tf.Variable(tf.zeros([input_dim]), name='biases')\n",
 58 |     "            self.decoded = tf.matmul(self.encoded, weights) + biases\n",
 59 |     "        self.loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(self.x, self.decoded))))\n",
 60 |     "        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)\n",
 61 |     "        self.saver = tf.train.Saver()\n",
 62 |     "\n",
 63 |     "    def add_noise(self, data):\n",
 64 |     "        noise_type = 'mask-0.2'\n",
 65 |     "        if noise_type == 'gaussian':\n",
 66 |     "            n = np.random.normal(0, 0.1, np.shape(data))\n",
 67 |     "            return data + n\n",
 68 |     "        if 'mask' in noise_type:\n",
 69 |     "            frac = float(noise_type.split('-')[1])\n",
 70 |     "            temp = np.copy(data)\n",
 71 |     "            for i in temp:\n",
 72 |     "                n = np.random.choice(len(i), round(frac * len(i)), replace=False)\n",
 73 |     "                i[n] = 0\n",
 74 |     "            return temp\n",
 75 |     "\n",
 76 |     "    def train(self, data):\n",
 77 |     "        data_noised = self.add_noise(data)\n",
 78 |     "        with open('log.csv', 'w') as writer:\n",
 79 |     "            with tf.Session() as sess:\n",
 80 |     "                sess.run(tf.initialize_all_variables())\n",
 81 |     "                for i in range(self.epoch):\n",
 82 |     "                    for j in range(50):\n",
 83 |     "                        batch_data, batch_data_noised = get_batch(data, data_noised, self.batch_size)\n",
 84 |     "                        l, _ = sess.run([self.loss, self.train_op], feed_dict={self.x: batch_data, self.x_noised: batch_data_noised})\n",
 85 |     "                    if i % 10 == 0:\n",
 86 |     "                        print('epoch {0}: loss = {1}'.format(i, l))\n",
 87 |     "                        self.saver.save(sess, './model.ckpt')\n",
 88 |     "                        epoch_time = int(time.time())\n",
 89 |     "                        row_str = str(epoch_time) + ',' + str(i) + ',' + str(l) + '\\n'\n",
 90 |     "                        writer.write(row_str)\n",
 91 |     "                        writer.flush()\n",
 92 |     "                self.saver.save(sess, './model.ckpt')\n",
 93 |     "\n",
 94 |     "    def test(self, data):\n",
 95 |     "        with tf.Session() as sess:\n",
 96 |     "            self.saver.restore(sess, './model.ckpt')\n",
 97 |     "            hidden, reconstructed = sess.run([self.encoded, self.decoded], feed_dict={self.x: data})\n",
 98 |     "        print('input', data)\n",
 99 |     "        print('compressed', hidden)\n",
100 |     "        print('reconstructed', reconstructed)\n",
101 |     "        return reconstructed\n",
102 |     "\n",
103 |     "    def get_params(self):\n",
104 |     "        with tf.Session() as sess:\n",
105 |     "            self.saver.restore(sess, './model.ckpt')\n",
106 |     "            weights, biases = sess.run([self.weights1, self.biases1])\n",
107 |     "        return weights, biases"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {
114 |     "collapsed": true
115 |    },
116 |    "outputs": [],
117 |    "source": []
118 |   }
119 |  ],
120 |  "metadata": {
121 |   "kernelspec": {
122 |    "display_name": "Python 2",
123 |    "language": "python",
124 |    "name": "python2"
125 |   },
126 |   "language_info": {
127 |    "codemirror_mode": {
128 |     "name": "ipython",
129 |     "version": 2
130 |    },
131 |    "file_extension": ".py",
132 |    "mimetype": "text/x-python",
133 |    "name": "python",
134 |    "nbconvert_exporter": "python",
135 |    "pygments_lexer": "ipython2",
136 |    "version": "2.7.12"
137 |   }
138 |  },
139 |  "nbformat": 4,
140 |  "nbformat_minor": 1
141 | }
142 | 


--------------------------------------------------------------------------------
/ch10_rnn/Concept02_rnn.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ch `10`: Concept `02`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Recurrent Neural Network"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "Import the relevant libraries:"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 1,
 27 |    "metadata": {
 28 |     "collapsed": true
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import numpy as np\n",
 33 |     "import tensorflow as tf\n",
 34 |     "from tensorflow.python.ops import rnn, rnn_cell"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "Define the RNN model:"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 2,
 47 |    "metadata": {
 48 |     "collapsed": true
 49 |    },
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "class SeriesPredictor:\n",
 53 |     "\n",
 54 |     "    def __init__(self, input_dim, seq_size, hidden_dim=10):\n",
 55 |     "        # Hyperparameters\n",
 56 |     "        self.input_dim = input_dim\n",
 57 |     "        self.seq_size = seq_size\n",
 58 |     "        self.hidden_dim = hidden_dim\n",
 59 |     "\n",
 60 |     "        # Weight variables and input placeholders\n",
 61 |     "        self.W_out = tf.Variable(tf.random_normal([hidden_dim, 1]), name='W_out')\n",
 62 |     "        self.b_out = tf.Variable(tf.random_normal([1]), name='b_out')\n",
 63 |     "        self.x = tf.placeholder(tf.float32, [None, seq_size, input_dim])\n",
 64 |     "        self.y = tf.placeholder(tf.float32, [None, seq_size])\n",
 65 |     "\n",
 66 |     "        # Cost optimizer\n",
 67 |     "        self.cost = tf.reduce_mean(tf.square(self.model() - self.y))\n",
 68 |     "        self.train_op = tf.train.AdamOptimizer().minimize(self.cost)\n",
 69 |     "\n",
 70 |     "        # Auxiliary ops\n",
 71 |     "        self.saver = tf.train.Saver()\n",
 72 |     "\n",
 73 |     "    def model(self):\n",
 74 |     "        \"\"\"\n",
 75 |     "        :param x: inputs of size [T, batch_size, input_size]\n",
 76 |     "        :param W: matrix of fully-connected output layer weights\n",
 77 |     "        :param b: vector of fully-connected output layer biases\n",
 78 |     "        \"\"\"\n",
 79 |     "        cell = rnn_cell.BasicLSTMCell(self.hidden_dim)\n",
 80 |     "        outputs, states = rnn.dynamic_rnn(cell, self.x, dtype=tf.float32)\n",
 81 |     "        num_examples = tf.shape(self.x)[0]\n",
 82 |     "        W_repeated = tf.tile(tf.expand_dims(self.W_out, 0), [num_examples, 1, 1])\n",
 83 |     "        out = tf.batch_matmul(outputs, W_repeated) + self.b_out\n",
 84 |     "        out = tf.squeeze(out)\n",
 85 |     "        return out\n",
 86 |     "\n",
 87 |     "    def train(self, train_x, train_y):\n",
 88 |     "        with tf.Session() as sess:\n",
 89 |     "            tf.get_variable_scope().reuse_variables()\n",
 90 |     "            sess.run(tf.initialize_all_variables())\n",
 91 |     "            for i in range(1000):\n",
 92 |     "                _, mse = sess.run([self.train_op, self.cost], feed_dict={self.x: train_x, self.y: train_y})\n",
 93 |     "                if i % 100 == 0:\n",
 94 |     "                    print(i, mse)\n",
 95 |     "            save_path = self.saver.save(sess, 'model.ckpt')\n",
 96 |     "            print('Model saved to {}'.format(save_path))\n",
 97 |     "\n",
 98 |     "    def test(self, test_x):\n",
 99 |     "        with tf.Session() as sess:\n",
100 |     "            tf.get_variable_scope().reuse_variables()\n",
101 |     "            self.saver.restore(sess, 'model.ckpt')\n",
102 |     "            output = sess.run(self.model(), feed_dict={self.x: test_x})\n",
103 |     "            print(output)\n",
104 |     "\n",
105 |     "\n"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "Let's train our series predictor:"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 3,
118 |    "metadata": {
119 |     "collapsed": false
120 |    },
121 |    "outputs": [
122 |     {
123 |      "name": "stdout",
124 |      "output_type": "stream",
125 |      "text": [
126 |       "(0, 114.29322)\n",
127 |       "(100, 89.61042)\n",
128 |       "(200, 59.922985)\n",
129 |       "(300, 30.993044)\n",
130 |       "(400, 14.117287)\n",
131 |       "(500, 5.8190427)\n",
132 |       "(600, 3.971853)\n",
133 |       "(700, 2.5290568)\n",
134 |       "(800, 1.6387771)\n",
135 |       "(900, 1.1218418)\n",
136 |       "Model saved to model.ckpt\n",
137 |       "[[  1.08202362   2.7941215    4.96916628   6.67084217]\n",
138 |       " [  4.17165422   9.28973103  11.73881912  12.62569427]]\n"
139 |      ]
140 |     }
141 |    ],
142 |    "source": [
143 |     "if __name__ == '__main__':\n",
144 |     "    predictor = SeriesPredictor(input_dim=1, seq_size=4, hidden_dim=10)\n",
145 |     "    train_x = [[[1], [2], [5], [6]],\n",
146 |     "               [[5], [7], [7], [8]],\n",
147 |     "               [[3], [4], [5], [7]]]\n",
148 |     "    train_y = [[1, 3, 7, 11],\n",
149 |     "               [5, 12, 14, 15],\n",
150 |     "               [3, 7, 9, 12]]\n",
151 |     "    predictor.train(train_x, train_y)\n",
152 |     "\n",
153 |     "    test_x = [[[1], [2], [3], [4]],  # 1, 3, 5, 7\n",
154 |     "              [[4], [5], [6], [7]]]  # 4, 9, 11, 13\n",
155 |     "    predictor.test(test_x)"
156 |    ]
157 |   }
158 |  ],
159 |  "metadata": {
160 |   "kernelspec": {
161 |    "display_name": "Python 2",
162 |    "language": "python",
163 |    "name": "python2"
164 |   },
165 |   "language_info": {
166 |    "codemirror_mode": {
167 |     "name": "ipython",
168 |     "version": 2
169 |    },
170 |    "file_extension": ".py",
171 |    "mimetype": "text/x-python",
172 |    "name": "python",
173 |    "nbconvert_exporter": "python",
174 |    "pygments_lexer": "ipython2",
175 |    "version": "2.7.12"
176 |   }
177 |  },
178 |  "nbformat": 4,
179 |  "nbformat_minor": 1
180 | }
181 | 


--------------------------------------------------------------------------------
/ch09_cnn/Concept03_cnn.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ch `09`: Concept `03`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Convolution Neural Network"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "Load data from CIFAR-10."
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 5,
 27 |    "metadata": {
 28 |     "collapsed": false
 29 |    },
 30 |    "outputs": [
 31 |     {
 32 |      "name": "stdout",
 33 |      "output_type": "stream",
 34 |      "text": [
 35 |       "('names', ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'])\n",
 36 |       "((50000, 3072), (50000,))\n"
 37 |      ]
 38 |     }
 39 |    ],
 40 |    "source": [
 41 |     "import numpy as np\n",
 42 |     "import matplotlib.pyplot as plt\n",
 43 |     "import cifar_tools\n",
 44 |     "import tensorflow as tf\n",
 45 |     "\n",
 46 |     "learning_rate = 0.001\n",
 47 |     "\n",
 48 |     "names, data, labels = \\\n",
 49 |     "    cifar_tools.read_data('./cifar-10-batches-py')\n",
 50 |     "\n"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "Define the placeholders and variables for the CNN model:"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 6,
 63 |    "metadata": {
 64 |     "collapsed": true
 65 |    },
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "x = tf.placeholder(tf.float32, [None, 24 * 24])\n",
 69 |     "y = tf.placeholder(tf.float32, [None, len(names)])\n",
 70 |     "W1 = tf.Variable(tf.random_normal([5, 5, 1, 64]))\n",
 71 |     "b1 = tf.Variable(tf.random_normal([64]))\n",
 72 |     "W2 = tf.Variable(tf.random_normal([5, 5, 64, 64]))\n",
 73 |     "b2 = tf.Variable(tf.random_normal([64]))\n",
 74 |     "W3 = tf.Variable(tf.random_normal([6*6*64, 1024]))\n",
 75 |     "b3 = tf.Variable(tf.random_normal([1024]))\n",
 76 |     "W_out = tf.Variable(tf.random_normal([1024, len(names)]))\n",
 77 |     "b_out = tf.Variable(tf.random_normal([len(names)]))"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "Define helper functions for the convolution and maxpool layers:"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 7,
 90 |    "metadata": {
 91 |     "collapsed": true
 92 |    },
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "def conv_layer(x, W, b):\n",
 96 |     "    conv = tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')\n",
 97 |     "    conv_with_b = tf.nn.bias_add(conv, b)\n",
 98 |     "    conv_out = tf.nn.relu(conv_with_b)\n",
 99 |     "    return conv_out\n",
100 |     "\n",
101 |     "\n",
102 |     "def maxpool_layer(conv, k=2):\n",
103 |     "    return tf.nn.max_pool(conv, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "The CNN model is defined all within the following method:"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 8,
116 |    "metadata": {
117 |     "collapsed": true
118 |    },
119 |    "outputs": [],
120 |    "source": [
121 |     "def model():\n",
122 |     "    x_reshaped = tf.reshape(x, shape=[-1, 24, 24, 1])\n",
123 |     "\n",
124 |     "    conv_out1 = conv_layer(x_reshaped, W1, b1)\n",
125 |     "    maxpool_out1 = maxpool_layer(conv_out1)\n",
126 |     "    norm1 = tf.nn.lrn(maxpool_out1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)\n",
127 |     "    conv_out2 = conv_layer(norm1, W2, b2)\n",
128 |     "    norm2 = tf.nn.lrn(conv_out2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)\n",
129 |     "    maxpool_out2 = maxpool_layer(norm2)\n",
130 |     "\n",
131 |     "    maxpool_reshaped = tf.reshape(maxpool_out2, [-1, W3.get_shape().as_list()[0]])\n",
132 |     "    local = tf.add(tf.matmul(maxpool_reshaped, W3), b3)\n",
133 |     "    local_out = tf.nn.relu(local)\n",
134 |     "\n",
135 |     "    out = tf.add(tf.matmul(local_out, W_out), b_out)\n",
136 |     "    return out"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "markdown",
141 |    "metadata": {},
142 |    "source": [
143 |     "Here's the cost function to train the classifier."
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 9,
149 |    "metadata": {
150 |     "collapsed": true
151 |    },
152 |    "outputs": [],
153 |    "source": [
154 |     "model_op = model()\n",
155 |     "\n",
156 |     "cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(model_op, y))\n",
157 |     "train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)\n",
158 |     "\n",
159 |     "correct_pred = tf.equal(tf.argmax(model_op, 1), tf.argmax(y, 1))\n",
160 |     "accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "Let's train the classifier on our data:"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {
174 |     "collapsed": false
175 |    },
176 |    "outputs": [
177 |     {
178 |      "name": "stdout",
179 |      "output_type": "stream",
180 |      "text": [
181 |       "('batch size', 250)\n",
182 |       "('EPOCH', 0)\n",
183 |       "(0, 0.092)\n",
184 |       "(1000, 0.12)\n"
185 |      ]
186 |     }
187 |    ],
188 |    "source": [
189 |     "with tf.Session() as sess:\n",
190 |     "    sess.run(tf.initialize_all_variables())\n",
191 |     "    onehot_labels = tf.one_hot(labels, len(names), on_value=1., off_value=0., axis=-1)\n",
192 |     "    onehot_vals = sess.run(onehot_labels)\n",
193 |     "    batch_size = len(data) / 200\n",
194 |     "    print('batch size', batch_size)\n",
195 |     "    for j in range(0, 1000):\n",
196 |     "        print('EPOCH', j)\n",
197 |     "        for i in range(0, len(data), batch_size):\n",
198 |     "            batch_data = data[i:i+batch_size, :]\n",
199 |     "            batch_onehot_vals = onehot_vals[i:i+batch_size, :]\n",
200 |     "            _, accuracy_val = sess.run([train_op, accuracy], feed_dict={x: batch_data, y: batch_onehot_vals})\n",
201 |     "            if i % 1000 == 0:\n",
202 |     "                print(i, accuracy_val)\n",
203 |     "        print('DONE WITH EPOCH')\n"
204 |    ]
205 |   }
206 |  ],
207 |  "metadata": {
208 |   "kernelspec": {
209 |    "display_name": "Python 2",
210 |    "language": "python",
211 |    "name": "python2"
212 |   },
213 |   "language_info": {
214 |    "codemirror_mode": {
215 |     "name": "ipython",
216 |     "version": 2
217 |    },
218 |    "file_extension": ".py",
219 |    "mimetype": "text/x-python",
220 |    "name": "python",
221 |    "nbconvert_exporter": "python",
222 |    "pygments_lexer": "ipython2",
223 |    "version": "2.7.12"
224 |   }
225 |  },
226 |  "nbformat": 4,
227 |  "nbformat_minor": 1
228 | }
229 | 


--------------------------------------------------------------------------------
/ch06_hmm/Concept02_hmm.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ch `06`: Concept `02`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Viterbi parse of a Hidden Markov model"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "Import TensorFlow and Numpy"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 4,
 27 |    "metadata": {
 28 |     "collapsed": true
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import numpy as np\n",
 33 |     "import tensorflow as tf"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "Create the same HMM model as before. This time, we'll include a couple additional functions."
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 5,
 46 |    "metadata": {
 47 |     "collapsed": true
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "# initial parameters can be learned on training data\n",
 52 |     "# theory reference https://web.stanford.edu/~jurafsky/slp3/8.pdf\n",
 53 |     "# code reference https://phvu.net/2013/12/06/sweet-implementation-of-viterbi-in-python/\n",
 54 |     "class HMM(object):\n",
 55 |     "    def __init__(self, initial_prob, trans_prob, obs_prob):\n",
 56 |     "        self.N = np.size(initial_prob)\n",
 57 |     "        self.initial_prob = initial_prob\n",
 58 |     "        self.trans_prob = trans_prob\n",
 59 |     "        self.obs_prob = obs_prob\n",
 60 |     "        self.emission = tf.constant(obs_prob)\n",
 61 |     "        assert self.initial_prob.shape == (self.N, 1)\n",
 62 |     "        assert self.trans_prob.shape == (self.N, self.N)\n",
 63 |     "        assert self.obs_prob.shape[0] == self.N\n",
 64 |     "        self.obs = tf.placeholder(tf.int32)\n",
 65 |     "        self.fwd = tf.placeholder(tf.float64)\n",
 66 |     "        self.viterbi = tf.placeholder(tf.float64)\n",
 67 |     "\n",
 68 |     "    def get_emission(self, obs_idx):\n",
 69 |     "        slice_location = [0, obs_idx]\n",
 70 |     "        num_rows = tf.shape(self.emission)[0]\n",
 71 |     "        slice_shape = [num_rows, 1]\n",
 72 |     "        return tf.slice(self.emission, slice_location, slice_shape)\n",
 73 |     "\n",
 74 |     "    def forward_init_op(self):\n",
 75 |     "        obs_prob = self.get_emission(self.obs)\n",
 76 |     "        fwd = tf.mul(self.initial_prob, obs_prob)\n",
 77 |     "        return fwd\n",
 78 |     "\n",
 79 |     "    def forward_op(self):\n",
 80 |     "        transitions = tf.matmul(self.fwd, tf.transpose(self.get_emission(self.obs)))\n",
 81 |     "        weighted_transitions = transitions * self.trans_prob\n",
 82 |     "        fwd = tf.reduce_sum(weighted_transitions, 0)\n",
 83 |     "        return tf.reshape(fwd, tf.shape(self.fwd))\n",
 84 |     "\n",
 85 |     "    def decode_op(self):\n",
 86 |     "        transitions = tf.matmul(self.viterbi, tf.transpose(self.get_emission(self.obs)))\n",
 87 |     "        weighted_transitions = transitions * self.trans_prob\n",
 88 |     "        viterbi = tf.reduce_max(weighted_transitions, 0)\n",
 89 |     "        return tf.reshape(viterbi, tf.shape(self.viterbi))\n",
 90 |     "\n",
 91 |     "    def backpt_op(self):\n",
 92 |     "        back_transitions = tf.matmul(self.viterbi, np.ones((1, self.N)))\n",
 93 |     "        weighted_back_transitions = back_transitions * self.trans_prob\n",
 94 |     "        return tf.argmax(weighted_back_transitions, 0)"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "metadata": {},
100 |    "source": [
101 |     "Define the forward algorithm from Concept01."
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 7,
107 |    "metadata": {
108 |     "collapsed": true
109 |    },
110 |    "outputs": [],
111 |    "source": [
112 |     "def forward_algorithm(sess, hmm, observations):\n",
113 |     "    fwd = sess.run(hmm.forward_init_op(), feed_dict={hmm.obs: observations[0]})\n",
114 |     "    for t in range(1, len(observations)):\n",
115 |     "        fwd = sess.run(hmm.forward_op(), feed_dict={hmm.obs: observations[t], hmm.fwd: fwd})\n",
116 |     "    prob = sess.run(tf.reduce_sum(fwd))\n",
117 |     "    return prob"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {},
123 |    "source": [
124 |     "Now, let's compute the Viterbi likelihood of the observed sequence:"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 8,
130 |    "metadata": {
131 |     "collapsed": true
132 |    },
133 |    "outputs": [],
134 |    "source": [
135 |     "def viterbi_decode(sess, hmm, observations):\n",
136 |     "    viterbi = sess.run(hmm.forward_init_op(), feed_dict={hmm.obs: observations[0]})\n",
137 |     "    backpts = np.ones((hmm.N, len(observations)), 'int32') * -1\n",
138 |     "    for t in range(1, len(observations)):\n",
139 |     "        viterbi, backpt = sess.run([hmm.decode_op(), hmm.backpt_op()],\n",
140 |     "                                    feed_dict={hmm.obs: observations[t],\n",
141 |     "                                               hmm.viterbi: viterbi})\n",
142 |     "        backpts[:, t] = backpt\n",
143 |     "    tokens = [viterbi[:, -1].argmax()]\n",
144 |     "    for i in range(len(observations) - 1, 0, -1):\n",
145 |     "        tokens.append(backpts[tokens[-1], i])\n",
146 |     "    return tokens[::-1]"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "Let's try it out on some example data:"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 11,
159 |    "metadata": {
160 |     "collapsed": false
161 |    },
162 |    "outputs": [
163 |     {
164 |      "name": "stdout",
165 |      "output_type": "stream",
166 |      "text": [
167 |       "Probability of observing [0, 1, 1, 2, 1] is 0.0046421488\n",
168 |       "Most likely hidden states are [0, 0, 0, 1, 1]\n"
169 |      ]
170 |     }
171 |    ],
172 |    "source": [
173 |     "if __name__ == '__main__':\n",
174 |     "    states = ('Healthy', 'Fever')\n",
175 |     "#     observations = ('normal', 'cold', 'dizzy')\n",
176 |     "#     start_probability = {'Healthy': 0.6, 'Fever': 0.4}\n",
177 |     "#     transition_probability = {\n",
178 |     "#         'Healthy': {'Healthy': 0.7, 'Fever': 0.3},\n",
179 |     "#         'Fever': {'Healthy': 0.4, 'Fever': 0.6}\n",
180 |     "#     }\n",
181 |     "#     emission_probability = {\n",
182 |     "#         'Healthy': {'normal': 0.5, 'cold': 0.4, 'dizzy': 0.1},\n",
183 |     "#         'Fever': {'normal': 0.1, 'cold': 0.3, 'dizzy': 0.6}\n",
184 |     "#     }\n",
185 |     "    initial_prob = np.array([[0.6], [0.4]])\n",
186 |     "    trans_prob = np.array([[0.7, 0.3], [0.4, 0.6]])\n",
187 |     "    obs_prob = np.array([[0.5, 0.4, 0.1], [0.1, 0.3, 0.6]])\n",
188 |     "    hmm = HMM(initial_prob=initial_prob, trans_prob=trans_prob, obs_prob=obs_prob)\n",
189 |     "\n",
190 |     "    observations = [0, 1, 1, 2, 1]\n",
191 |     "    with tf.Session() as sess:\n",
192 |     "        prob = forward_algorithm(sess, hmm, observations)\n",
193 |     "        print('Probability of observing {} is {}'.format(observations, prob))\n",
194 |     "\n",
195 |     "        seq = viterbi_decode(sess, hmm, observations)\n",
196 |     "        print('Most likely hidden states are {}'.format(seq))"
197 |    ]
198 |   }
199 |  ],
200 |  "metadata": {
201 |   "kernelspec": {
202 |    "display_name": "Python 2",
203 |    "language": "python",
204 |    "name": "python2"
205 |   },
206 |   "language_info": {
207 |    "codemirror_mode": {
208 |     "name": "ipython",
209 |     "version": 2
210 |    },
211 |    "file_extension": ".py",
212 |    "mimetype": "text/x-python",
213 |    "name": "python",
214 |    "nbconvert_exporter": "python",
215 |    "pygments_lexer": "ipython2",
216 |    "version": "2.7.12"
217 |   }
218 |  },
219 |  "nbformat": 4,
220 |  "nbformat_minor": 1
221 | }
222 | 


--------------------------------------------------------------------------------
/ch02_basics/Concept08_TensorBoard.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ch `02`: Concept `08`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Using TensorBoard"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "TensorBoard is a great way to visualize what's happening behind the code. \n",
 22 |     "\n",
 23 |     "In this example, we'll loop through some numbers to improve our guess of the average value. Then we can visualize the results on TensorBoard. \n",
 24 |     "\n",
 25 |     "Let's just set ourselves up with some data to work with:"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "metadata": {
 32 |     "collapsed": false
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "import tensorflow as tf\n",
 37 |     "import numpy as np\n",
 38 |     "\n",
 39 |     "raw_data = np.random.normal(10, 1, 100)"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "The moving average is defined as follows:"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 3,
 52 |    "metadata": {
 53 |     "collapsed": true
 54 |    },
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "alpha = tf.constant(0.05)\n",
 58 |     "curr_value = tf.placeholder(tf.float32)\n",
 59 |     "prev_avg = tf.Variable(0.)\n",
 60 |     "\n",
 61 |     "update_avg = alpha * curr_value + (1 - alpha) * prev_avg"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "Here's what we care to visualize:"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 4,
 74 |    "metadata": {
 75 |     "collapsed": true
 76 |    },
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "avg_hist = tf.scalar_summary(\"running average\", update_avg)\n",
 80 |     "value_hist = tf.scalar_summary(\"incoming values\", curr_value)\n",
 81 |     "\n",
 82 |     "merged = tf.merge_all_summaries()\n",
 83 |     "writer = tf.train.SummaryWriter(\"./logs\")"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "Time to compute the moving averages. We'll also run the `merged` op to track how the values change:"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 5,
 96 |    "metadata": {
 97 |     "collapsed": false
 98 |    },
 99 |    "outputs": [
100 |     {
101 |      "name": "stdout",
102 |      "output_type": "stream",
103 |      "text": [
104 |       "(9.9219328082194433, 0.49609667)\n",
105 |       "(10.861675551185391, 1.0143757)\n",
106 |       "(9.9106617779805415, 1.45919)\n",
107 |       "(9.5849268381192587, 1.8654768)\n",
108 |       "(11.00720280405182, 2.3225632)\n",
109 |       "(10.612008840443297, 2.7370355)\n",
110 |       "(9.9125171356090611, 3.0958097)\n",
111 |       "(10.301745545416653, 3.4561064)\n",
112 |       "(9.0511758379087368, 3.7358599)\n",
113 |       "(10.942890307806293, 4.0962114)\n",
114 |       "(10.015208512849741, 4.3921614)\n",
115 |       "(9.3332763467657074, 4.6392169)\n",
116 |       "(10.962021001932852, 4.9553571)\n",
117 |       "(9.0722027682605031, 5.1611991)\n",
118 |       "(10.050949472375269, 5.4056864)\n",
119 |       "(10.831545986616975, 5.6769795)\n",
120 |       "(8.9709517646419936, 5.8416777)\n",
121 |       "(8.3373492656893404, 5.9664612)\n",
122 |       "(9.1772602301158415, 6.1270008)\n",
123 |       "(10.38990880186355, 6.3401461)\n",
124 |       "(8.808227775219283, 6.4635501)\n",
125 |       "(10.119509135452704, 6.6463475)\n",
126 |       "(10.12893016093162, 6.8204765)\n",
127 |       "(11.653614487744555, 7.0621333)\n",
128 |       "(8.6165361752502871, 7.139853)\n",
129 |       "(7.3070414531630119, 7.1482124)\n",
130 |       "(10.24150784730451, 7.3028769)\n",
131 |       "(9.2929367488494456, 7.40238)\n",
132 |       "(9.3953573027948245, 7.5020289)\n",
133 |       "(10.16671191219559, 7.635263)\n",
134 |       "(10.481513911689815, 7.777575)\n",
135 |       "(8.0437101254460153, 7.7908816)\n",
136 |       "(8.7769836489562927, 7.8401866)\n",
137 |       "(9.8227328223083337, 7.9393139)\n",
138 |       "(8.8020632506507184, 7.982451)\n",
139 |       "(8.7889073987865807, 8.0227737)\n",
140 |       "(10.15422875259706, 8.1293468)\n",
141 |       "(12.651362475259415, 8.3554478)\n",
142 |       "(9.2913568854657775, 8.4022436)\n",
143 |       "(10.412863798797048, 8.5027742)\n",
144 |       "(10.371544529285533, 8.5962133)\n",
145 |       "(12.563423435493572, 8.7945738)\n",
146 |       "(9.4860079112860909, 8.8291454)\n",
147 |       "(9.9303262258577494, 8.8842039)\n",
148 |       "(10.414121138287168, 8.9607)\n",
149 |       "(9.5808883451271338, 8.9917088)\n",
150 |       "(10.072867620470879, 9.0457659)\n",
151 |       "(11.553245521318047, 9.1711397)\n",
152 |       "(8.6044959537034504, 9.142807)\n",
153 |       "(9.4322434559683934, 9.1572781)\n",
154 |       "(10.990419701720093, 9.2489357)\n",
155 |       "(10.216710072121764, 9.2973242)\n",
156 |       "(9.0131111108592528, 9.2831135)\n",
157 |       "(10.287105453794908, 9.333313)\n",
158 |       "(10.748689516026573, 9.4040813)\n",
159 |       "(12.224440745681218, 9.5450993)\n",
160 |       "(10.300900427008118, 9.5828896)\n",
161 |       "(9.907324401745079, 9.5991106)\n",
162 |       "(9.4367912658650308, 9.5909948)\n",
163 |       "(9.4759772803599276, 9.5852442)\n",
164 |       "(10.311548223446287, 9.6215591)\n",
165 |       "(10.639115852392747, 9.6724367)\n",
166 |       "(9.7615621071829981, 9.6768932)\n",
167 |       "(9.6928206643783916, 9.6776896)\n",
168 |       "(10.455708947515985, 9.7165899)\n",
169 |       "(11.376151790081369, 9.7995682)\n",
170 |       "(9.4929577187259486, 9.7842369)\n",
171 |       "(10.626763765482758, 9.8263626)\n",
172 |       "(10.064452981816418, 9.8382664)\n",
173 |       "(10.716562608289381, 9.8821812)\n",
174 |       "(10.248293233436049, 9.9004869)\n",
175 |       "(11.132245844706093, 9.9620743)\n",
176 |       "(10.08952559223532, 9.9684467)\n",
177 |       "(9.7647245207736528, 9.9582605)\n",
178 |       "(10.913137813209302, 10.006004)\n",
179 |       "(9.5252081608248549, 9.9819641)\n",
180 |       "(11.467667394565416, 10.056249)\n",
181 |       "(7.9967455606815143, 9.9532738)\n",
182 |       "(8.6061314167812917, 9.8859167)\n",
183 |       "(10.572490966550214, 9.9202452)\n",
184 |       "(10.81286729477598, 9.9648762)\n",
185 |       "(10.257291348548044, 9.979496)\n",
186 |       "(8.7181484758406054, 9.9164286)\n",
187 |       "(9.2027202255623468, 9.880743)\n",
188 |       "(9.7831564586312201, 9.8758631)\n",
189 |       "(10.421150461115603, 9.9031267)\n",
190 |       "(10.978998086154267, 9.9569206)\n",
191 |       "(7.6687364827055511, 9.8425112)\n",
192 |       "(8.9482739188491998, 9.7977991)\n",
193 |       "(9.54681335244425, 9.7852497)\n",
194 |       "(8.88792515708111, 9.7403831)\n",
195 |       "(9.6287778090333855, 9.7348022)\n",
196 |       "(10.325869408477462, 9.7643557)\n",
197 |       "(9.0225220025034396, 9.7272635)\n",
198 |       "(10.891011102607536, 9.7854509)\n",
199 |       "(8.5599147797649415, 9.7241735)\n",
200 |       "(9.2050931655996884, 9.6982193)\n",
201 |       "(8.8681245615622721, 9.6567144)\n",
202 |       "(10.440651113767204, 9.6959114)\n",
203 |       "(9.1913661767896464, 9.6706839)\n"
204 |      ]
205 |     }
206 |    ],
207 |    "source": [
208 |     "init = tf.initialize_all_variables()\n",
209 |     "\n",
210 |     "with tf.Session() as sess:\n",
211 |     "    sess.run(init)\n",
212 |     "    for i in range(len(raw_data)):\n",
213 |     "        summary_str, curr_avg = sess.run([merged, update_avg], feed_dict={curr_value: raw_data[i]})\n",
214 |     "        sess.run(tf.assign(prev_avg, curr_avg))\n",
215 |     "        print(raw_data[i], curr_avg)\n",
216 |     "        writer.add_summary(summary_str, i)"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "markdown",
221 |    "metadata": {},
222 |    "source": [
223 |     "Check out the visualization by running TensorBoard from the terminal:\n",
224 |     "\n",
225 |     "    $ tensorboard --logdir=path/to/logs"
226 |    ]
227 |   }
228 |  ],
229 |  "metadata": {
230 |   "kernelspec": {
231 |    "display_name": "Python 2",
232 |    "language": "python",
233 |    "name": "python2"
234 |   },
235 |   "language_info": {
236 |    "codemirror_mode": {
237 |     "name": "ipython",
238 |     "version": 2
239 |    },
240 |    "file_extension": ".py",
241 |    "mimetype": "text/x-python",
242 |    "name": "python",
243 |    "nbconvert_exporter": "python",
244 |    "pygments_lexer": "ipython2",
245 |    "version": "2.7.12"
246 |   }
247 |  },
248 |  "nbformat": 4,
249 |  "nbformat_minor": 0
250 | }
251 | 


--------------------------------------------------------------------------------
/ch05_clustering/Concept01_clustering.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ch `05`: Concept `01`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Clustering"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "We're going to need a Python library called the BregmanToolkit. It's available here: https://github.com/BinRoot/BregmanToolkit\n",
 22 |     "\n",
 23 |     "One way to install it is by downloading from the GitHub link above, and then running `sudo python setup.py install` in the downloaded directory.\n"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 1,
 29 |    "metadata": {
 30 |     "collapsed": false
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "%matplotlib inline\n",
 35 |     "import tensorflow as tf\n",
 36 |     "import numpy as np\n",
 37 |     "from bregman.suite import *"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "Define some hyper-parameters for clustering:"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 2,
 50 |    "metadata": {
 51 |     "collapsed": true
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "k = 2\n",
 56 |     "max_iterations = 100"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "Select the location for the audio files:"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 3,
 69 |    "metadata": {
 70 |     "collapsed": true
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "filenames = tf.train.match_filenames_once('./audio_dataset/*.wav')\n",
 75 |     "count_num_files = tf.size(filenames)\n",
 76 |     "filename_queue = tf.train.string_input_producer(filenames)\n",
 77 |     "reader = tf.WholeFileReader()\n",
 78 |     "filename, file_contents = reader.read(filename_queue)\n",
 79 |     "\n",
 80 |     "chromo = tf.placeholder(tf.float32)\n",
 81 |     "max_freqs = tf.argmax(chromo, 0)"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "Create a helper function to get the next audio file's Chromogram:"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 4,
 94 |    "metadata": {
 95 |     "collapsed": true
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "def get_next_chromogram(sess):\n",
100 |     "    audio_file = sess.run(filename)\n",
101 |     "    F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205)\n",
102 |     "    return F.X, audio_file"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "markdown",
107 |    "metadata": {},
108 |    "source": [
109 |     "And create a helper function to extract a feature vector from the Chromogram data:"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 5,
115 |    "metadata": {
116 |     "collapsed": true
117 |    },
118 |    "outputs": [],
119 |    "source": [
120 |     "def extract_feature_vector(sess, chromo_data):\n",
121 |     "    num_features, num_samples = np.shape(chromo_data)\n",
122 |     "    freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data})\n",
123 |     "    hist, bins = np.histogram(freq_vals, bins=range(num_features + 1))\n",
124 |     "    normalized_hist = hist.astype(float) / num_samples\n",
125 |     "    return normalized_hist"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {},
131 |    "source": [
132 |     "Extract a dataset of feature vectors by calling our helper functions above:"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 6,
138 |    "metadata": {
139 |     "collapsed": true
140 |    },
141 |    "outputs": [],
142 |    "source": [
143 |     "def get_dataset(sess):\n",
144 |     "    num_files = sess.run(count_num_files)\n",
145 |     "    coord = tf.train.Coordinator()\n",
146 |     "    threads = tf.train.start_queue_runners(coord=coord)\n",
147 |     "    xs = list()\n",
148 |     "    names = list()\n",
149 |     "    plt.figure()\n",
150 |     "    for _ in range(num_files):\n",
151 |     "        chromo_data, filename = get_next_chromogram(sess)\n",
152 |     "\n",
153 |     "        plt.subplot(1, 2, 1)\n",
154 |     "        plt.imshow(chromo_data, cmap='Greys', interpolation='nearest')\n",
155 |     "        plt.title('Visualization of Sound Spectrum')\n",
156 |     "\n",
157 |     "        plt.subplot(1, 2, 2)\n",
158 |     "        freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data})\n",
159 |     "        plt.hist(freq_vals)\n",
160 |     "        plt.title('Histogram of Notes')\n",
161 |     "        plt.xlabel('Musical Note')\n",
162 |     "        plt.ylabel('Count')\n",
163 |     "        plt.savefig('{}.png'.format(filename))\n",
164 |     "        plt.clf()\n",
165 |     "\n",
166 |     "        plt.clf()\n",
167 |     "        names.append(filename)\n",
168 |     "        x = extract_feature_vector(sess, chromo_data)\n",
169 |     "        xs.append(x)\n",
170 |     "    xs = np.asmatrix(xs)\n",
171 |     "    return xs, names"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "markdown",
176 |    "metadata": {},
177 |    "source": [
178 |     "k-means clustering requires 2 main phases: clustering and assignment. We'll also throw in an initialization helper function for good measure. Here are the three components:\n"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": 7,
184 |    "metadata": {
185 |     "collapsed": true
186 |    },
187 |    "outputs": [],
188 |    "source": [
189 |     "def initial_cluster_centroids(X, k):\n",
190 |     "    return X[0:k, :]\n",
191 |     "\n",
192 |     "\n",
193 |     "def assign_cluster(X, centroids):\n",
194 |     "    expanded_vectors = tf.expand_dims(X, 0)\n",
195 |     "    expanded_centroids = tf.expand_dims(centroids, 1)\n",
196 |     "    distances = tf.reduce_sum(tf.square(tf.sub(expanded_vectors, expanded_centroids)), 2)\n",
197 |     "    mins = tf.argmin(distances, 0)\n",
198 |     "    return mins\n",
199 |     "\n",
200 |     "\n",
201 |     "def recompute_centroids(X, Y):\n",
202 |     "    sums = tf.unsorted_segment_sum(X, Y, k)\n",
203 |     "    counts = tf.unsorted_segment_sum(tf.ones_like(X), Y, k)\n",
204 |     "    return sums / counts\n"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "metadata": {},
210 |    "source": [
211 |     "Open a session, obtain a dataset, and cluster the data:"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 8,
217 |    "metadata": {
218 |     "collapsed": false
219 |    },
220 |    "outputs": [
221 |     {
222 |      "name": "stderr",
223 |      "output_type": "stream",
224 |      "text": [
225 |       "/usr/local/lib/python2.7/dist-packages/bregman/features_base.py:353: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future\n",
226 |       "  mxnorm = P.empty(self._cqtN) # Normalization coefficients\n",
227 |       "/usr/local/lib/python2.7/dist-packages/bregman/features_base.py:357: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future\n",
228 |       "  for i in P.arange(self._cqtN)])\n"
229 |      ]
230 |     },
231 |     {
232 |      "name": "stdout",
233 |      "output_type": "stream",
234 |      "text": [
235 |       "[(0, './audio_dataset/cough_2.wav'), (1, './audio_dataset/scream_1.wav'), (0, './audio_dataset/cough_1.wav'), (1, './audio_dataset/scream_2.wav'), (1, './audio_dataset/scream_3.wav')]\n"
236 |      ]
237 |     },
238 |     {
239 |      "data": {
240 |       "text/plain": [
241 |        "<matplotlib.figure.Figure at 0x7fe871c3c150>"
242 |       ]
243 |      },
244 |      "metadata": {},
245 |      "output_type": "display_data"
246 |     }
247 |    ],
248 |    "source": [
249 |     "with tf.Session() as sess:\n",
250 |     "    sess.run(tf.initialize_all_variables())\n",
251 |     "    X, names = get_dataset(sess)\n",
252 |     "    centroids = initial_cluster_centroids(X, k)\n",
253 |     "    i, converged = 0, False\n",
254 |     "    while not converged and i < max_iterations:\n",
255 |     "        i += 1\n",
256 |     "        Y = assign_cluster(X, centroids)\n",
257 |     "        centroids = sess.run(recompute_centroids(X, Y))\n",
258 |     "    print(zip(sess.run(Y), names))\n"
259 |    ]
260 |   }
261 |  ],
262 |  "metadata": {
263 |   "kernelspec": {
264 |    "display_name": "Python 2",
265 |    "language": "python",
266 |    "name": "python2"
267 |   },
268 |   "language_info": {
269 |    "codemirror_mode": {
270 |     "name": "ipython",
271 |     "version": 2
272 |    },
273 |    "file_extension": ".py",
274 |    "mimetype": "text/x-python",
275 |    "name": "python",
276 |    "nbconvert_exporter": "python",
277 |    "pygments_lexer": "ipython2",
278 |    "version": "2.7.12"
279 |   }
280 |  },
281 |  "nbformat": 4,
282 |  "nbformat_minor": 1
283 | }
284 | 


--------------------------------------------------------------------------------
/ch07_autoencoder/Concept01_autoencoder.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Ch `07`: Concept `01`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Autoencoder"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "All we'll need is TensorFlow and NumPy:"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 4,
 27 |    "metadata": {
 28 |     "collapsed": true
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import tensorflow as tf\n",
 33 |     "import numpy as np"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "Instead of feeding all the training data to the training op, we will feed data in small batches:"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 5,
 46 |    "metadata": {
 47 |     "collapsed": true
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "def get_batch(X, size):\n",
 52 |     "    a = np.random.choice(len(X), size, replace=False)\n",
 53 |     "    return X[a]"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "Define the autoencoder class:"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 11,
 66 |    "metadata": {
 67 |     "collapsed": true
 68 |    },
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "class Autoencoder:\n",
 72 |     "    def __init__(self, input_dim, hidden_dim, epoch=500, batch_size=10, learning_rate=0.001):\n",
 73 |     "        self.epoch = epoch\n",
 74 |     "        self.batch_size = batch_size\n",
 75 |     "        self.learning_rate = learning_rate\n",
 76 |     "\n",
 77 |     "        # Define input placeholder\n",
 78 |     "        x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim])\n",
 79 |     "        \n",
 80 |     "        # Define variables\n",
 81 |     "        with tf.name_scope('encode'):\n",
 82 |     "            weights = tf.Variable(tf.random_normal([input_dim, hidden_dim], dtype=tf.float32), name='weights')\n",
 83 |     "            biases = tf.Variable(tf.zeros([hidden_dim]), name='biases')\n",
 84 |     "            encoded = tf.nn.sigmoid(tf.matmul(x, weights) + biases)\n",
 85 |     "        with tf.name_scope('decode'):\n",
 86 |     "            weights = tf.Variable(tf.random_normal([hidden_dim, input_dim], dtype=tf.float32), name='weights')\n",
 87 |     "            biases = tf.Variable(tf.zeros([input_dim]), name='biases')\n",
 88 |     "            decoded = tf.matmul(encoded, weights) + biases\n",
 89 |     "\n",
 90 |     "        self.x = x\n",
 91 |     "        self.encoded = encoded\n",
 92 |     "        self.decoded = decoded\n",
 93 |     "\n",
 94 |     "        # Define cost function and training op\n",
 95 |     "        self.loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(self.x, self.decoded))))\n",
 96 |     "\n",
 97 |     "        self.all_loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(self.x, self.decoded)), 1))\n",
 98 |     "        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)\n",
 99 |     "        \n",
100 |     "        # Define a saver op\n",
101 |     "        self.saver = tf.train.Saver()\n",
102 |     "\n",
103 |     "    def train(self, data):\n",
104 |     "        with tf.Session() as sess:\n",
105 |     "            sess.run(tf.initialize_all_variables())\n",
106 |     "            for i in range(self.epoch):\n",
107 |     "                for j in range(500):\n",
108 |     "                    batch_data = get_batch(data, self.batch_size)\n",
109 |     "                    l, _ = sess.run([self.loss, self.train_op], feed_dict={self.x: batch_data})\n",
110 |     "                if i % 50 == 0:\n",
111 |     "                    print('epoch {0}: loss = {1}'.format(i, l))\n",
112 |     "                    self.saver.save(sess, './model.ckpt')\n",
113 |     "            self.saver.save(sess, './model.ckpt')\n",
114 |     "        \n",
115 |     "    def test(self, data):\n",
116 |     "        with tf.Session() as sess:\n",
117 |     "            self.saver.restore(sess, './model.ckpt')\n",
118 |     "            hidden, reconstructed = sess.run([self.encoded, self.decoded], feed_dict={self.x: data})\n",
119 |     "        print('input', data)\n",
120 |     "        print('compressed', hidden)\n",
121 |     "        print('reconstructed', reconstructed)\n",
122 |     "        return reconstructed\n",
123 |     "\n",
124 |     "    def get_params(self):\n",
125 |     "        with tf.Session() as sess:\n",
126 |     "            self.saver.restore(sess, './model.ckpt')\n",
127 |     "            weights, biases = sess.run([self.weights1, self.biases1])\n",
128 |     "        return weights, biases\n",
129 |     "\n",
130 |     "    def classify(self, data, labels):\n",
131 |     "        with tf.Session() as sess:\n",
132 |     "            sess.run(tf.initialize_all_variables())\n",
133 |     "            self.saver.restore(sess, './model.ckpt')\n",
134 |     "            hidden, reconstructed = sess.run([self.encoded, self.decoded], feed_dict={self.x: data})\n",
135 |     "            reconstructed = reconstructed[0]\n",
136 |     "            # loss = sess.run(self.all_loss, feed_dict={self.x: data})\n",
137 |     "            print('data', np.shape(data))\n",
138 |     "            print('reconstructed', np.shape(reconstructed))\n",
139 |     "            loss = np.sqrt(np.mean(np.square(data - reconstructed), axis=1))\n",
140 |     "            print('loss', np.shape(loss))\n",
141 |     "            horse_indices = np.where(labels == 7)[0]\n",
142 |     "            not_horse_indices = np.where(labels != 7)[0]\n",
143 |     "            horse_loss = np.mean(loss[horse_indices])\n",
144 |     "            not_horse_loss = np.mean(loss[not_horse_indices])\n",
145 |     "            print('horse', horse_loss)\n",
146 |     "            print('not horse', not_horse_loss)\n",
147 |     "            return hidden[7,:]\n",
148 |     "\n",
149 |     "    def decode(self, encoding):\n",
150 |     "        with tf.Session() as sess:\n",
151 |     "            sess.run(tf.initialize_all_variables())\n",
152 |     "            self.saver.restore(sess, './model.ckpt')\n",
153 |     "            reconstructed = sess.run(self.decoded, feed_dict={self.encoded: encoding})\n",
154 |     "        img = np.reshape(reconstructed, (32, 32))\n",
155 |     "        return img\n"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "metadata": {},
161 |    "source": [
162 |     "The *Iris dataset* is often used as a simple training dataset to check whether a classification algorithm is working. The sklearn library comes with it, `pip install sklearn`."
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 10,
168 |    "metadata": {
169 |     "collapsed": false
170 |    },
171 |    "outputs": [
172 |     {
173 |      "name": "stdout",
174 |      "output_type": "stream",
175 |      "text": [
176 |       "epoch 0: loss = 3.95399570465\n",
177 |       "epoch 100: loss = 0.383291095495\n",
178 |       "epoch 200: loss = 0.271494656801\n",
179 |       "epoch 300: loss = 0.318220198154\n",
180 |       "epoch 400: loss = 0.293813735247\n",
181 |       "('input', [[8, 4, 6, 2]])\n",
182 |       "('compressed', array([[ 0.27284986]], dtype=float32))\n",
183 |       "('reconstructed', array([[ 6.86461449,  2.81402731,  6.21785831,  2.23264408]], dtype=float32))\n"
184 |      ]
185 |     },
186 |     {
187 |      "data": {
188 |       "text/plain": [
189 |        "array([[ 6.86461449,  2.81402731,  6.21785831,  2.23264408]], dtype=float32)"
190 |       ]
191 |      },
192 |      "execution_count": 10,
193 |      "metadata": {},
194 |      "output_type": "execute_result"
195 |     }
196 |    ],
197 |    "source": [
198 |     "from sklearn import datasets\n",
199 |     "\n",
200 |     "hidden_dim = 1\n",
201 |     "data = datasets.load_iris().data\n",
202 |     "input_dim = len(data[0])\n",
203 |     "ae = Autoencoder(input_dim, hidden_dim)\n",
204 |     "ae.train(data)\n",
205 |     "ae.test([[8, 4, 6, 2]])"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": null,
211 |    "metadata": {
212 |     "collapsed": true
213 |    },
214 |    "outputs": [],
215 |    "source": []
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": null,
220 |    "metadata": {
221 |     "collapsed": true
222 |    },
223 |    "outputs": [],
224 |    "source": []
225 |   }
226 |  ],
227 |  "metadata": {
228 |   "kernelspec": {
229 |    "display_name": "Python 2",
230 |    "language": "python",
231 |    "name": "python2"
232 |   },
233 |   "language_info": {
234 |    "codemirror_mode": {
235 |     "name": "ipython",
236 |     "version": 2
237 |    },
238 |    "file_extension": ".py",
239 |    "mimetype": "text/x-python",
240 |    "name": "python",
241 |    "nbconvert_exporter": "python",
242 |    "pygments_lexer": "ipython2",
243 |    "version": "2.7.12"
244 |   }
245 |  },
246 |  "nbformat": 4,
247 |  "nbformat_minor": 1
248 | }
249 | 


--------------------------------------------------------------------------------
/ch10_rnn/Concept03_rnn_real_world.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ch `10`: Concept `03`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Recurrent Neural Network on real data"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {
 21 |     "collapsed": true
 22 |    },
 23 |    "outputs": [],
 24 |    "source": []
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 2,
 29 |    "metadata": {
 30 |     "collapsed": true
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "import numpy as np\n",
 35 |     "import tensorflow as tf\n",
 36 |     "from tensorflow.python.ops import rnn, rnn_cell\n",
 37 |     "import data_loader\n",
 38 |     "import matplotlib.pyplot as plt\n",
 39 |     "\n",
 40 |     "class SeriesPredictor:\n",
 41 |     "\n",
 42 |     "    def __init__(self, input_dim, seq_size, hidden_dim):\n",
 43 |     "        # Hyperparameters\n",
 44 |     "        self.input_dim = input_dim\n",
 45 |     "        self.seq_size = seq_size\n",
 46 |     "        self.hidden_dim = hidden_dim\n",
 47 |     "\n",
 48 |     "        # Weight variables and input placeholders\n",
 49 |     "        self.W_out = tf.Variable(tf.random_normal([hidden_dim, 1]), name='W_out')\n",
 50 |     "        self.b_out = tf.Variable(tf.random_normal([1]), name='b_out')\n",
 51 |     "        self.x = tf.placeholder(tf.float32, [None, seq_size, input_dim])\n",
 52 |     "        self.y = tf.placeholder(tf.float32, [None, seq_size])\n",
 53 |     "\n",
 54 |     "        # Cost optimizer\n",
 55 |     "        self.cost = tf.reduce_mean(tf.square(self.model() - self.y))\n",
 56 |     "        self.train_op = tf.train.AdamOptimizer(learning_rate=0.003).minimize(self.cost)\n",
 57 |     "\n",
 58 |     "        # Auxiliary ops\n",
 59 |     "        self.saver = tf.train.Saver()\n",
 60 |     "\n",
 61 |     "    def model(self):\n",
 62 |     "        \"\"\"\n",
 63 |     "        :param x: inputs of size [T, batch_size, input_size]\n",
 64 |     "        :param W: matrix of fully-connected output layer weights\n",
 65 |     "        :param b: vector of fully-connected output layer biases\n",
 66 |     "        \"\"\"\n",
 67 |     "        cell = rnn_cell.BasicLSTMCell(self.hidden_dim)\n",
 68 |     "        outputs, states = rnn.dynamic_rnn(cell, self.x, dtype=tf.float32)\n",
 69 |     "        num_examples = tf.shape(self.x)[0]\n",
 70 |     "        W_repeated = tf.tile(tf.expand_dims(self.W_out, 0), [num_examples, 1, 1])\n",
 71 |     "        out = tf.batch_matmul(outputs, W_repeated) + self.b_out\n",
 72 |     "        out = tf.squeeze(out)\n",
 73 |     "        return out\n",
 74 |     "\n",
 75 |     "    def train(self, train_x, train_y, test_x, test_y):\n",
 76 |     "        with tf.Session() as sess:\n",
 77 |     "            tf.get_variable_scope().reuse_variables()\n",
 78 |     "            sess.run(tf.initialize_all_variables())\n",
 79 |     "            max_patience = 3\n",
 80 |     "            patience = max_patience\n",
 81 |     "            min_test_err = float('inf')\n",
 82 |     "            step = 0\n",
 83 |     "            while patience > 0:\n",
 84 |     "                _, train_err = sess.run([self.train_op, self.cost], feed_dict={self.x: train_x, self.y: train_y})\n",
 85 |     "                if step % 100 == 0:\n",
 86 |     "                    test_err = sess.run(self.cost, feed_dict={self.x: test_x, self.y: test_y})\n",
 87 |     "                    print('step: {}\\t\\ttrain err: {}\\t\\ttest err: {}'.format(step, train_err, test_err))\n",
 88 |     "                    if test_err < min_test_err:\n",
 89 |     "                        min_test_err = test_err\n",
 90 |     "                        patience = max_patience\n",
 91 |     "                    else:\n",
 92 |     "                        patience -= 1\n",
 93 |     "                step += 1\n",
 94 |     "            save_path = self.saver.save(sess, 'model.ckpt')\n",
 95 |     "            print('Model saved to {}'.format(save_path))\n",
 96 |     "\n",
 97 |     "    def test(self, sess, test_x):\n",
 98 |     "        tf.get_variable_scope().reuse_variables()\n",
 99 |     "        self.saver.restore(sess, 'model.ckpt')\n",
100 |     "        output = sess.run(self.model(), feed_dict={self.x: test_x})\n",
101 |     "        return output"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {
108 |     "collapsed": true
109 |    },
110 |    "outputs": [],
111 |    "source": []
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 3,
116 |    "metadata": {
117 |     "collapsed": true
118 |    },
119 |    "outputs": [],
120 |    "source": [
121 |     "def plot_results(train_x, predictions, actual, filename):\n",
122 |     "    plt.figure()\n",
123 |     "    num_train = len(train_x)\n",
124 |     "    plt.plot(list(range(num_train)), train_x, color='b', label='training data')\n",
125 |     "    plt.plot(list(range(num_train, num_train + len(predictions))), predictions, color='r', label='predicted')\n",
126 |     "    plt.plot(list(range(num_train, num_train + len(actual))), actual, color='g', label='test data')\n",
127 |     "    plt.legend()\n",
128 |     "    if filename is not None:\n",
129 |     "        plt.savefig(filename)\n",
130 |     "    else:\n",
131 |     "        plt.show()"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {
138 |     "collapsed": true
139 |    },
140 |    "outputs": [],
141 |    "source": []
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": 4,
146 |    "metadata": {
147 |     "collapsed": false
148 |    },
149 |    "outputs": [
150 |     {
151 |      "name": "stdout",
152 |      "output_type": "stream",
153 |      "text": [
154 |       "step: 0\t\ttrain err: 3.87396216393\t\ttest err: 1.34220945835\n",
155 |       "step: 100\t\ttrain err: 0.603672504425\t\ttest err: 1.43012559414\n",
156 |       "step: 200\t\ttrain err: 0.220897972584\t\ttest err: 1.13115489483\n",
157 |       "step: 300\t\ttrain err: 0.127372398973\t\ttest err: 1.02137005329\n",
158 |       "step: 400\t\ttrain err: 0.0927080661058\t\ttest err: 0.940221071243\n",
159 |       "step: 500\t\ttrain err: 0.0732159689069\t\ttest err: 0.856326341629\n",
160 |       "step: 600\t\ttrain err: 0.0618191249669\t\ttest err: 0.781472921371\n",
161 |       "step: 700\t\ttrain err: 0.0551925823092\t\ttest err: 0.719470381737\n",
162 |       "step: 800\t\ttrain err: 0.0517075285316\t\ttest err: 0.670733273029\n",
163 |       "step: 900\t\ttrain err: 0.0492312312126\t\ttest err: 0.627119839191\n",
164 |       "step: 1000\t\ttrain err: 0.0470333322883\t\ttest err: 0.581817626953\n",
165 |       "step: 1100\t\ttrain err: 0.0451201759279\t\ttest err: 0.535747170448\n",
166 |       "step: 1200\t\ttrain err: 0.0435940213501\t\ttest err: 0.493058353662\n",
167 |       "step: 1300\t\ttrain err: 0.0424766205251\t\ttest err: 0.460727632046\n",
168 |       "step: 1400\t\ttrain err: 0.0416616015136\t\ttest err: 0.444208920002\n",
169 |       "step: 1500\t\ttrain err: 0.0410179980099\t\ttest err: 0.444220513105\n",
170 |       "step: 1600\t\ttrain err: 0.040466774255\t\ttest err: 0.457963436842\n",
171 |       "step: 1700\t\ttrain err: 0.0399809591472\t\ttest err: 0.480032652617\n",
172 |       "Model saved to model.ckpt\n",
173 |       "('predicted_vals', (22,))\n"
174 |      ]
175 |     }
176 |    ],
177 |    "source": [
178 |     "if __name__ == '__main__':\n",
179 |     "    seq_size = 5\n",
180 |     "    predictor = SeriesPredictor(input_dim=1, seq_size=seq_size, hidden_dim=5)\n",
181 |     "    data = data_loader.load_series('international-airline-passengers.csv')\n",
182 |     "    train_data, actual_vals = data_loader.split_data(data)\n",
183 |     "\n",
184 |     "    train_x, train_y = [], []\n",
185 |     "    for i in range(len(train_data) - seq_size - 1):\n",
186 |     "        train_x.append(np.expand_dims(train_data[i:i+seq_size], axis=1).tolist())\n",
187 |     "        train_y.append(train_data[i+1:i+seq_size+1])\n",
188 |     "\n",
189 |     "    test_x, test_y = [], []\n",
190 |     "    for i in range(len(actual_vals) - seq_size - 1):\n",
191 |     "        test_x.append(np.expand_dims(actual_vals[i:i+seq_size], axis=1).tolist())\n",
192 |     "        test_y.append(actual_vals[i+1:i+seq_size+1])\n",
193 |     "\n",
194 |     "    predictor.train(train_x, train_y, test_x, test_y)\n",
195 |     "\n",
196 |     "    with tf.Session() as sess:\n",
197 |     "        predicted_vals = predictor.test(sess, test_x)[:,0]\n",
198 |     "        print('predicted_vals', np.shape(predicted_vals))\n",
199 |     "        plot_results(train_data, predicted_vals, actual_vals, 'predictions.png')\n",
200 |     "\n",
201 |     "        prev_seq = train_x[-1]\n",
202 |     "        predicted_vals = []\n",
203 |     "        for i in range(20):\n",
204 |     "            next_seq = predictor.test(sess, [prev_seq])\n",
205 |     "            predicted_vals.append(next_seq[-1])\n",
206 |     "            prev_seq = np.vstack((prev_seq[1:], next_seq[-1]))\n",
207 |     "        plot_results(train_data, predicted_vals, actual_vals, 'hallucinations.png')"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": null,
213 |    "metadata": {
214 |     "collapsed": true
215 |    },
216 |    "outputs": [],
217 |    "source": []
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "metadata": {
223 |     "collapsed": true
224 |    },
225 |    "outputs": [],
226 |    "source": []
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "metadata": {
232 |     "collapsed": true
233 |    },
234 |    "outputs": [],
235 |    "source": []
236 |   }
237 |  ],
238 |  "metadata": {
239 |   "kernelspec": {
240 |    "display_name": "Python 2",
241 |    "language": "python",
242 |    "name": "python2"
243 |   },
244 |   "language_info": {
245 |    "codemirror_mode": {
246 |     "name": "ipython",
247 |     "version": 2
248 |    },
249 |    "file_extension": ".py",
250 |    "mimetype": "text/x-python",
251 |    "name": "python",
252 |    "nbconvert_exporter": "python",
253 |    "pygments_lexer": "ipython2",
254 |    "version": "2.7.12"
255 |   }
256 |  },
257 |  "nbformat": 4,
258 |  "nbformat_minor": 1
259 | }
260 | 


--------------------------------------------------------------------------------
/ch03_regression/Concept03_regularization.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ch `03`: Concept `03`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Regularization"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "Import the relevant libraries and initialize the hyper-parameters"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 1,
 27 |    "metadata": {
 28 |     "collapsed": true
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import tensorflow as tf\n",
 33 |     "import numpy as np\n",
 34 |     "import matplotlib.pyplot as plt\n",
 35 |     "\n",
 36 |     "np.random.seed(100)\n",
 37 |     "\n",
 38 |     "learning_rate = 0.001\n",
 39 |     "training_epochs = 1000\n",
 40 |     "reg_lambda = 0."
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "create a helper method to split the dataset"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 2,
 53 |    "metadata": {
 54 |     "collapsed": true
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "def split_dataset(x_dataset, y_dataset, ratio):\n",
 59 |     "    arr = np.arange(x_dataset.size)\n",
 60 |     "    np.random.shuffle(arr)\n",
 61 |     "    num_train = int(ratio * x_dataset.size)\n",
 62 |     "    x_train = x_dataset[arr[0:num_train]]\n",
 63 |     "    y_train = y_dataset[arr[0:num_train]]\n",
 64 |     "    x_test = x_dataset[arr[num_train:x_dataset.size]]\n",
 65 |     "    y_test = y_dataset[arr[num_train:x_dataset.size]]\n",
 66 |     "    return x_train, x_test, y_train, y_test"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "Create a fake dataset. y = x^2"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 3,
 79 |    "metadata": {
 80 |     "collapsed": true
 81 |    },
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "x_dataset = np.linspace(-1, 1, 100)\n",
 85 |     "\n",
 86 |     "num_coeffs = 9\n",
 87 |     "y_dataset_params = [0.] * num_coeffs\n",
 88 |     "y_dataset_params[2] = 1\n",
 89 |     "y_dataset = 0\n",
 90 |     "for i in range(num_coeffs):\n",
 91 |     "    y_dataset += y_dataset_params[i] * np.power(x_dataset, i)\n",
 92 |     "y_dataset += np.random.randn(*x_dataset.shape) * 0.3"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "Split the dataset into 70% training and testing 30%"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 4,
105 |    "metadata": {
106 |     "collapsed": false
107 |    },
108 |    "outputs": [],
109 |    "source": [
110 |     "(x_train, x_test, y_train, y_test) = split_dataset(x_dataset, y_dataset, 0.7)\n"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "markdown",
115 |    "metadata": {},
116 |    "source": [
117 |     "Set up the input/output placeholders"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 5,
123 |    "metadata": {
124 |     "collapsed": true
125 |    },
126 |    "outputs": [],
127 |    "source": [
128 |     "X = tf.placeholder(\"float\")\n",
129 |     "Y = tf.placeholder(\"float\")"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "metadata": {},
135 |    "source": [
136 |     "Define our model"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": 6,
142 |    "metadata": {
143 |     "collapsed": true
144 |    },
145 |    "outputs": [],
146 |    "source": [
147 |     "def model(X, w):\n",
148 |     "    terms = []\n",
149 |     "    for i in range(num_coeffs):\n",
150 |     "        term = tf.mul(w[i], tf.pow(X, i))\n",
151 |     "        terms.append(term)\n",
152 |     "    return tf.add_n(terms)"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "metadata": {},
158 |    "source": [
159 |     "Define the regularized cost function"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 7,
165 |    "metadata": {
166 |     "collapsed": true
167 |    },
168 |    "outputs": [],
169 |    "source": [
170 |     "w = tf.Variable([0.] * num_coeffs, name=\"parameters\")\n",
171 |     "y_model = model(X, w)\n",
172 |     "cost = tf.div(tf.add(tf.reduce_sum(tf.square(Y-y_model)),\n",
173 |     "                     tf.mul(reg_lambda, tf.reduce_sum(tf.square(w)))),\n",
174 |     "              2*x_train.size)\n",
175 |     "train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "markdown",
180 |    "metadata": {},
181 |    "source": [
182 |     "Set up the session"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": 8,
188 |    "metadata": {
189 |     "collapsed": true
190 |    },
191 |    "outputs": [],
192 |    "source": [
193 |     "sess = tf.Session()\n",
194 |     "init = tf.initialize_all_variables()\n",
195 |     "sess.run(init)"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "markdown",
200 |    "metadata": {},
201 |    "source": [
202 |     "Try out various regularization parameters "
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": null,
208 |    "metadata": {
209 |     "collapsed": false
210 |    },
211 |    "outputs": [
212 |     {
213 |      "name": "stdout",
214 |      "output_type": "stream",
215 |      "text": [
216 |       "('reg lambda', 0.0)\n",
217 |       "('final cost', 0.032030996)\n",
218 |       "('reg lambda', 0.010101010101010102)\n",
219 |       "('final cost', 0.028902497)\n",
220 |       "('reg lambda', 0.020202020202020204)\n",
221 |       "('final cost', 0.027128801)\n",
222 |       "('reg lambda', 0.030303030303030304)\n",
223 |       "('final cost', 0.025800284)\n",
224 |       "('reg lambda', 0.040404040404040407)\n",
225 |       "('final cost', 0.024812995)\n",
226 |       "('reg lambda', 0.050505050505050511)\n",
227 |       "('final cost', 0.02407708)\n",
228 |       "('reg lambda', 0.060606060606060608)\n",
229 |       "('final cost', 0.023520416)\n",
230 |       "('reg lambda', 0.070707070707070718)\n",
231 |       "('final cost', 0.023091596)\n",
232 |       "('reg lambda', 0.080808080808080815)\n",
233 |       "('final cost', 0.022754973)\n",
234 |       "('reg lambda', 0.090909090909090912)\n",
235 |       "('final cost', 0.02248578)\n",
236 |       "('reg lambda', 0.10101010101010102)\n",
237 |       "('final cost', 0.022266606)\n",
238 |       "('reg lambda', 0.11111111111111112)\n",
239 |       "('final cost', 0.022085056)\n",
240 |       "('reg lambda', 0.12121212121212122)\n",
241 |       "('final cost', 0.021932183)\n",
242 |       "('reg lambda', 0.13131313131313133)\n",
243 |       "('final cost', 0.021801468)\n",
244 |       "('reg lambda', 0.14141414141414144)\n",
245 |       "('final cost', 0.021688091)\n",
246 |       "('reg lambda', 0.15151515151515152)\n",
247 |       "('final cost', 0.021588458)\n",
248 |       "('reg lambda', 0.16161616161616163)\n",
249 |       "('final cost', 0.021499878)\n",
250 |       "('reg lambda', 0.17171717171717174)\n",
251 |       "('final cost', 0.021420266)\n",
252 |       "('reg lambda', 0.18181818181818182)\n",
253 |       "('final cost', 0.0213481)\n",
254 |       "('reg lambda', 0.19191919191919193)\n",
255 |       "('final cost', 0.021282142)\n",
256 |       "('reg lambda', 0.20202020202020204)\n",
257 |       "('final cost', 0.021221504)\n",
258 |       "('reg lambda', 0.21212121212121213)\n",
259 |       "('final cost', 0.021165388)\n",
260 |       "('reg lambda', 0.22222222222222224)\n",
261 |       "('final cost', 0.021113267)\n",
262 |       "('reg lambda', 0.23232323232323235)\n",
263 |       "('final cost', 0.021064682)\n",
264 |       "('reg lambda', 0.24242424242424243)\n",
265 |       "('final cost', 0.021019241)\n",
266 |       "('reg lambda', 0.25252525252525254)\n",
267 |       "('final cost', 0.020976664)\n",
268 |       "('reg lambda', 0.26262626262626265)\n",
269 |       "('final cost', 0.020936726)\n",
270 |       "('reg lambda', 0.27272727272727276)\n",
271 |       "('final cost', 0.020899175)\n",
272 |       "('reg lambda', 0.28282828282828287)\n",
273 |       "('final cost', 0.020863945)\n",
274 |       "('reg lambda', 0.29292929292929293)\n",
275 |       "('final cost', 0.020830829)\n",
276 |       "('reg lambda', 0.30303030303030304)\n",
277 |       "('final cost', 0.020799706)\n",
278 |       "('reg lambda', 0.31313131313131315)\n",
279 |       "('final cost', 0.020770466)\n",
280 |       "('reg lambda', 0.32323232323232326)\n",
281 |       "('final cost', 0.020743057)\n",
282 |       "('reg lambda', 0.33333333333333337)\n",
283 |       "('final cost', 0.020717327)\n",
284 |       "('reg lambda', 0.34343434343434348)\n",
285 |       "('final cost', 0.020693347)\n",
286 |       "('reg lambda', 0.35353535353535359)\n",
287 |       "('final cost', 0.020670896)\n",
288 |       "('reg lambda', 0.36363636363636365)\n",
289 |       "('final cost', 0.020649901)\n",
290 |       "('reg lambda', 0.37373737373737376)\n",
291 |       "('final cost', 0.020630507)\n",
292 |       "('reg lambda', 0.38383838383838387)\n",
293 |       "('final cost', 0.020612456)\n",
294 |       "('reg lambda', 0.39393939393939398)\n",
295 |       "('final cost', 0.020595714)\n",
296 |       "('reg lambda', 0.40404040404040409)\n",
297 |       "('final cost', 0.020580363)\n",
298 |       "('reg lambda', 0.4141414141414142)\n",
299 |       "('final cost', 0.020566262)\n",
300 |       "('reg lambda', 0.42424242424242425)\n",
301 |       "('final cost', 0.02055335)\n",
302 |       "('reg lambda', 0.43434343434343436)\n",
303 |       "('final cost', 0.020541588)\n",
304 |       "('reg lambda', 0.44444444444444448)\n",
305 |       "('final cost', 0.020531092)\n",
306 |       "('reg lambda', 0.45454545454545459)\n",
307 |       "('final cost', 0.020521631)\n",
308 |       "('reg lambda', 0.4646464646464647)\n",
309 |       "('final cost', 0.020513181)\n"
310 |      ]
311 |     }
312 |    ],
313 |    "source": [
314 |     "for reg_lambda in np.linspace(0,1,100):\n",
315 |     "    for epoch in range(training_epochs):\n",
316 |     "        sess.run(train_op, feed_dict={X: x_train, Y: y_train})\n",
317 |     "    final_cost = sess.run(cost, feed_dict={X: x_test, Y:y_test})\n",
318 |     "    print('reg lambda', reg_lambda)\n",
319 |     "    print('final cost', final_cost)"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "markdown",
324 |    "metadata": {},
325 |    "source": [
326 |     "Close the session"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "execution_count": null,
332 |    "metadata": {
333 |     "collapsed": true
334 |    },
335 |    "outputs": [],
336 |    "source": [
337 |     "sess.close()"
338 |    ]
339 |   }
340 |  ],
341 |  "metadata": {
342 |   "kernelspec": {
343 |    "display_name": "Python 2",
344 |    "language": "python",
345 |    "name": "python2"
346 |   },
347 |   "language_info": {
348 |    "codemirror_mode": {
349 |     "name": "ipython",
350 |     "version": 2
351 |    },
352 |    "file_extension": ".py",
353 |    "mimetype": "text/x-python",
354 |    "name": "python",
355 |    "nbconvert_exporter": "python",
356 |    "pygments_lexer": "ipython2",
357 |    "version": "2.7.12"
358 |   }
359 |  },
360 |  "nbformat": 4,
361 |  "nbformat_minor": 0
362 | }
363 | 


--------------------------------------------------------------------------------
/ch05_clustering/Concept02_segmentation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ch `05`: Concept `02`"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Segmentation"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "Import libraries and define hyper-parameters:"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 1,
 27 |    "metadata": {
 28 |     "collapsed": true
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import tensorflow as tf\n",
 33 |     "import numpy as np\n",
 34 |     "from bregman.suite import *\n",
 35 |     "\n",
 36 |     "k = 4\n",
 37 |     "segment_size = 50  # out of 24,526\n",
 38 |     "max_iterations = 100"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "Define functions to get the chromogram and the dataset:"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 2,
 51 |    "metadata": {
 52 |     "collapsed": true
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "chromo = tf.placeholder(tf.float32)\n",
 57 |     "max_freqs = tf.argmax(chromo, 0)\n",
 58 |     "\n",
 59 |     "def get_chromogram(audio_file):\n",
 60 |     "    F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205)\n",
 61 |     "    return F.X\n",
 62 |     "\n",
 63 |     "def get_dataset(sess, audio_file):\n",
 64 |     "    chromo_data = get_chromogram(audio_file)\n",
 65 |     "    print('chromo_data', np.shape(chromo_data))\n",
 66 |     "    chromo_length = np.shape(chromo_data)[1]\n",
 67 |     "    xs = []\n",
 68 |     "    for i in range(chromo_length/segment_size):\n",
 69 |     "        chromo_segment = chromo_data[:, i*segment_size:(i+1)*segment_size]\n",
 70 |     "        x = extract_feature_vector(sess, chromo_segment)\n",
 71 |     "        if len(xs) == 0:\n",
 72 |     "            xs = x\n",
 73 |     "        else:\n",
 74 |     "            xs = np.vstack((xs, x))\n",
 75 |     "    return xs"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "metadata": {},
 81 |    "source": [
 82 |     "As required for the k-means algorithm, specify the assignment and re-centering code:"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 3,
 88 |    "metadata": {
 89 |     "collapsed": true
 90 |    },
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "def initial_cluster_centroids(X, k):\n",
 94 |     "    return X[0:k, :]\n",
 95 |     "\n",
 96 |     "\n",
 97 |     "def assign_cluster(X, centroids):\n",
 98 |     "    expanded_vectors = tf.expand_dims(X, 0)\n",
 99 |     "    expanded_centroids = tf.expand_dims(centroids, 1)\n",
100 |     "    distances = tf.reduce_sum(tf.square(tf.sub(expanded_vectors, expanded_centroids)), 2)\n",
101 |     "    mins = tf.argmin(distances, 0)\n",
102 |     "    return mins\n",
103 |     "\n",
104 |     "\n",
105 |     "def recompute_centroids(X, Y):\n",
106 |     "    sums = tf.unsorted_segment_sum(X, Y, k)\n",
107 |     "    counts = tf.unsorted_segment_sum(tf.ones_like(X), Y, k)\n",
108 |     "    return sums / counts\n"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "Given a chromogram, extract a histogram of sound frequencies as our feature vector: "
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 4,
121 |    "metadata": {
122 |     "collapsed": true
123 |    },
124 |    "outputs": [],
125 |    "source": [
126 |     "def extract_feature_vector(sess, chromo_data):\n",
127 |     "    num_features, num_samples = np.shape(chromo_data)\n",
128 |     "    freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data})\n",
129 |     "    hist, bins = np.histogram(freq_vals, bins=range(num_features + 1))\n",
130 |     "    return hist.astype(float) / num_samples"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "markdown",
135 |    "metadata": {},
136 |    "source": [
137 |     "In a session, segment an audio file using k-means:"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 5,
143 |    "metadata": {
144 |     "collapsed": false
145 |    },
146 |    "outputs": [
147 |     {
148 |      "ename": "IOError",
149 |      "evalue": "[Errno 2] No such file or directory: 'sysk.wav'",
150 |      "output_type": "error",
151 |      "traceback": [
152 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
153 |       "\u001b[0;31mIOError\u001b[0m                                   Traceback (most recent call last)",
154 |       "\u001b[0;32m<ipython-input-5-9117f832815d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSession\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msess\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m     \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msess\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'sysk.wav'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m     \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0mcentroids\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minitial_cluster_centroids\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m     \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconverged\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
155 |       "\u001b[0;32m<ipython-input-2-b7f46b0533cf>\u001b[0m in \u001b[0;36mget_dataset\u001b[0;34m(sess, audio_file)\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msess\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maudio_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m     \u001b[0mchromo_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_chromogram\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     10\u001b[0m     \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'chromo_data'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchromo_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m     \u001b[0mchromo_length\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchromo_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
156 |       "\u001b[0;32m<ipython-input-2-b7f46b0533cf>\u001b[0m in \u001b[0;36mget_chromogram\u001b[0;34m(audio_file)\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_chromogram\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m     \u001b[0mF\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mChromagram\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnfft\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m16384\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwfft\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m8192\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnhop\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2205\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
157 |       "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/bregman/features.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, arg, **feature_params)\u001b[0m\n\u001b[1;32m    141\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfeature_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    142\u001b[0m         \u001b[0mfeature_params\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'feature'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'chroma'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 143\u001b[0;31m         \u001b[0mFeatures\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeature_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    145\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0mHighQuefrencyChromagram\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mFeatures\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
158 |       "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/bregman/features_base.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, arg, feature_params)\u001b[0m\n\u001b[1;32m     67\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m==\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     68\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 69\u001b[0;31m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_audio\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# open file as MONO signal\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     70\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextract\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     71\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
159 |       "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/bregman/features_base.pyc\u001b[0m in \u001b[0;36mload_audio\u001b[0;34m(self, filename)\u001b[0m\n\u001b[1;32m    175\u001b[0m             \u001b[0mOpen\u001b[0m \u001b[0ma\u001b[0m \u001b[0mWAV\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0mAIFC\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0mAU\u001b[0m \u001b[0mfile\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0ma\u001b[0m \u001b[0mMONO\u001b[0m \u001b[0msignal\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mL\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msets\u001b[0m \u001b[0maudio\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    176\u001b[0m         \"\"\"\n\u001b[0;32m--> 177\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mWavOpen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnhop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mverbosity\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    178\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_have_x\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    179\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msample_rate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msample_rate\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
160 |       "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/bregman/sound.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, arg, n, verbosity)\u001b[0m\n\u001b[1;32m     35\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbosity\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     36\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msound\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 37\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbosity\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     38\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     39\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__iter__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
161 |       "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/bregman/sound.pyc\u001b[0m in \u001b[0;36mopen\u001b[0;34m(self, filename, n, verbosity)\u001b[0m\n\u001b[1;32m     43\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msound\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     44\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfilename\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfilename\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 45\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msound\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwave\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"r\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     46\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     47\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msample_rate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msound\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetframerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
162 |       "\u001b[0;32m/usr/lib/python2.7/wave.pyc\u001b[0m in \u001b[0;36mopen\u001b[0;34m(f, mode)\u001b[0m\n\u001b[1;32m    509\u001b[0m             \u001b[0mmode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    510\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'r'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 511\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mWave_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    512\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'w'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'wb'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    513\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mWave_write\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
163 |       "\u001b[0;32m/usr/lib/python2.7/wave.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f)\u001b[0m\n\u001b[1;32m    158\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_i_opened_the_file\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    159\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbasestring\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 160\u001b[0;31m             \u001b[0mf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m__builtin__\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    161\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_i_opened_the_file\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    162\u001b[0m         \u001b[0;31m# else, assume it is an open file object already\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
164 |       "\u001b[0;31mIOError\u001b[0m: [Errno 2] No such file or directory: 'sysk.wav'"
165 |      ]
166 |     }
167 |    ],
168 |    "source": [
169 |     "with tf.Session() as sess:\n",
170 |     "    X = get_dataset(sess, 'sysk.wav')\n",
171 |     "    print(np.shape(X))\n",
172 |     "    centroids = initial_cluster_centroids(X, k)\n",
173 |     "    i, converged = 0, False\n",
174 |     "    # prev_Y = None\n",
175 |     "    while not converged and i < max_iterations:\n",
176 |     "        i += 1\n",
177 |     "        Y = assign_cluster(X, centroids)\n",
178 |     "        # if prev_Y == Y:\n",
179 |     "        #     converged = True\n",
180 |     "        #     break\n",
181 |     "        # prev_Y = Y\n",
182 |     "        centroids = sess.run(recompute_centroids(X, Y))\n",
183 |     "        if i % 50 == 0:\n",
184 |     "            print('iteration', i)\n",
185 |     "    segments = sess.run(Y)\n",
186 |     "    for i in range(len(segments)):\n",
187 |     "        seconds = (i * segment_size) / float(10)\n",
188 |     "        min, sec = divmod(seconds, 60)\n",
189 |     "        time_str = str(min) + 'm ' + str(sec) + 's'\n",
190 |     "        print(time_str, segments[i])"
191 |    ]
192 |   }
193 |  ],
194 |  "metadata": {
195 |   "kernelspec": {
196 |    "display_name": "Python 2",
197 |    "language": "python",
198 |    "name": "python2"
199 |   },
200 |   "language_info": {
201 |    "codemirror_mode": {
202 |     "name": "ipython",
203 |     "version": 2
204 |    },
205 |    "file_extension": ".py",
206 |    "mimetype": "text/x-python",
207 |    "name": "python",
208 |    "nbconvert_exporter": "python",
209 |    "pygments_lexer": "ipython2",
210 |    "version": "2.7.12"
211 |   }
212 |  },
213 |  "nbformat": 4,
214 |  "nbformat_minor": 1
215 | }
216 | 


--------------------------------------------------------------------------------