├── tf-slides
    ├── tf-slides.snm
    ├── tf-slides.pdf
    ├── figs
    │   ├── tensors.png
    │   ├── 3-add-bias.png
    │   ├── full-graph.png
    │   ├── 1-initialization.png
    │   ├── 2-apply-weights.png
    │   ├── 4-apply-sigmoid.png
    │   ├── 5-loss-function.png
    │   ├── 6-optimization.png
    │   ├── thats-all-folks.jpg
    │   └── define-X-Y-matrices.png
    ├── tf-slides.out
    ├── tf-slides.toc
    ├── tf-slides.vrb
    ├── tf-slides.nav
    ├── tf-slides.aux
    ├── tf-slides.tex~
    └── tf-slides.tex
├── requirements.txt
├── data.tar.gz
├── trained_variables.ckpt
├── summary_logs
    ├── events.out.tfevents.1456006109.N550JV
    ├── events.out.tfevents.1456040112.yoga
    ├── events.out.tfevents.1488987088.yoga
    ├── events.out.tfevents.1488987154.yoga
    ├── events.out.tfevents.1488987254.yoga
    └── events.out.tfevents.1488988866.yoga
├── README.md
├── .gitignore
├── LICENSE
├── make-your-own-data
    ├── README.txt
    ├── format_SMS.sh
    └── read_corpus.py
├── logistic_regression_predict.py
├── email_input.py
└── logistic_regression_train.py


/tf-slides/tf-slides.snm:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow
2 | matplotlib
3 | numpy
4 | tkinter
5 | 


--------------------------------------------------------------------------------
/data.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/data.tar.gz


--------------------------------------------------------------------------------
/trained_variables.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/trained_variables.ckpt


--------------------------------------------------------------------------------
/tf-slides/tf-slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/tf-slides/tf-slides.pdf


--------------------------------------------------------------------------------
/tf-slides/figs/tensors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/tf-slides/figs/tensors.png


--------------------------------------------------------------------------------
/tf-slides/figs/3-add-bias.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/tf-slides/figs/3-add-bias.png


--------------------------------------------------------------------------------
/tf-slides/figs/full-graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/tf-slides/figs/full-graph.png


--------------------------------------------------------------------------------
/tf-slides/figs/1-initialization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/tf-slides/figs/1-initialization.png


--------------------------------------------------------------------------------
/tf-slides/figs/2-apply-weights.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/tf-slides/figs/2-apply-weights.png


--------------------------------------------------------------------------------
/tf-slides/figs/4-apply-sigmoid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/tf-slides/figs/4-apply-sigmoid.png


--------------------------------------------------------------------------------
/tf-slides/figs/5-loss-function.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/tf-slides/figs/5-loss-function.png


--------------------------------------------------------------------------------
/tf-slides/figs/6-optimization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/tf-slides/figs/6-optimization.png


--------------------------------------------------------------------------------
/tf-slides/figs/thats-all-folks.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/tf-slides/figs/thats-all-folks.jpg


--------------------------------------------------------------------------------
/tf-slides/figs/define-X-Y-matrices.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/tf-slides/figs/define-X-Y-matrices.png


--------------------------------------------------------------------------------
/summary_logs/events.out.tfevents.1456006109.N550JV:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/summary_logs/events.out.tfevents.1456006109.N550JV


--------------------------------------------------------------------------------
/summary_logs/events.out.tfevents.1456040112.yoga:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/summary_logs/events.out.tfevents.1456040112.yoga


--------------------------------------------------------------------------------
/summary_logs/events.out.tfevents.1488987088.yoga:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/summary_logs/events.out.tfevents.1488987088.yoga


--------------------------------------------------------------------------------
/summary_logs/events.out.tfevents.1488987154.yoga:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/summary_logs/events.out.tfevents.1488987154.yoga


--------------------------------------------------------------------------------
/summary_logs/events.out.tfevents.1488987254.yoga:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/summary_logs/events.out.tfevents.1488987254.yoga


--------------------------------------------------------------------------------
/summary_logs/events.out.tfevents.1488988866.yoga:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JRMeyer/tensorflow-tutorial/HEAD/summary_logs/events.out.tfevents.1488988866.yoga


--------------------------------------------------------------------------------
/tf-slides/tf-slides.out:
--------------------------------------------------------------------------------
1 | \BOOKMARK [2][]{Outline0.1}{What's Going to Happen}{}% 1
2 | \BOOKMARK [2][]{Outline0.2}{TensorFlow Structures}{}% 2
3 | \BOOKMARK [2][]{Outline0.3}{The Flow of TensorFlow}{}% 3
4 | 


--------------------------------------------------------------------------------
/tf-slides/tf-slides.toc:
--------------------------------------------------------------------------------
1 | \beamer@endinputifotherversion {3.24pt}
2 | \beamer@sectionintoc {1}{What's Going to Happen}{3}{0}{1}
3 | \beamer@sectionintoc {2}{TensorFlow Structures}{4}{0}{2}
4 | \beamer@sectionintoc {3}{The Flow of TensorFlow}{6}{0}{3}
5 | 


--------------------------------------------------------------------------------
/tf-slides/tf-slides.vrb:
--------------------------------------------------------------------------------
 1 | \frametitle{Reuse, Recycle}
 2 |   \begin{python}
 3 | ##############################
 4 | ### SAVE TRAINED VARIABLES ###
 5 | ##############################
 6 | 
 7 | # Create Saver
 8 | saver = tf.train.Saver()
 9 | # Save variables to .ckpt file
10 | # saver.save(sess, "trained_variables.ckpt")
11 | 
12 | # Close tensorflow session
13 | sess.close()
14 |   \end{python}
15 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # TL;DR
 3 | 
 4 | You can find a tutorial and walk-through of the code here:
 5 | 
 6 | [http://jrmeyer.github.io/machinelearning/2016/02/01/TensorFlow-Tutorial.html](http://jrmeyer.github.io/machinelearning/2016/02/01/TensorFlow-Tutorial.html)
 7 | 
 8 | # Application
 9 | 
10 | To ground this tutorial in some real-world application, we decided to use a common beginner problem from Natural Language Processing (NLP): **email classification**. The idea is simple - given an email you’ve never seen before, determine whether or not that email is Spam or not (aka Ham).
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Created by .ignore support plugin (hsz.mobi)
 2 | 
 3 | # extracted data
 4 | data/
 5 | 
 6 | ### Python template
 7 | # Byte-compiled / optimized / DLL files
 8 | __pycache__/
 9 | *.py[cod]
10 | *$py.class
11 | *.pyc
12 | 
13 | # C extensions
14 | *.so
15 | 
16 | # Distribution / packaging
17 | .Python
18 | env/
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 | 
34 | # PyInstaller
35 | #  Usually these files are written by a python script from a template
36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 | 
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 | 
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *,cover
53 | 
54 | # Translations
55 | *.mo
56 | *.pot
57 | 
58 | # Django stuff:
59 | *.log
60 | 
61 | # Sphinx documentation
62 | docs/_build/
63 | 
64 | # PyBuilder
65 | target/
66 | 
67 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Josh Meyer
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/make-your-own-data/README.txt:
--------------------------------------------------------------------------------
 1 | # Joshua Meyer (2017)
 2 | 
 3 | I've added this main script <read_corpus.py> to give an idea of how
 4 | to get raw text documents into the format required by
 5 | logistic_regression_train.py.
 6 | 
 7 | read_corpus.py expects data in this format:
 8 | 
 9 | data/
10 |     ham/
11 |         hamfile1.txt
12 |         hamfile2.txt
13 |         hamfile3.txt
14 |         
15 |     spam/
16 |         spamfile1.txt
17 |         spamfile2.txt
18 |         spamfile3.txt
19 | 
20 | 
21 | To give a real case, I made <format_SMS.sh> to format an easy to get
22 | corpus, a dataset used in Kaggle <https://www.kaggle.com/uciml/sms-spam-collection-dataset>, originally from <https://archive.ics.uci.edu/ml/datasets/SMS+Spam+Collection>.
23 | 
24 | So, if you can:
25 | 
26 | (1) download the SMS corpus
27 | (2) format it with <format_SMS.sh>
28 | (3) process new data/ dir with read_corpus.py
29 | 
30 | You should be able to process your own data with the same pipeline.
31 | 
32 | This is just one way to get features from text, and it makes very
33 | sparse matrices, so be careful, you can use up a lot of RAM by training on a very sparse trainX.csv.
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/make-your-own-data/format_SMS.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Joshua Meyer (2017)
 4 | 
 5 | # Input: dataset used in Kaggle <https://www.kaggle.com/uciml/sms-spam-collection-dataset>, originally from <https://archive.ics.uci.edu/ml/datasets/SMS+Spam+Collection>
 6 | #
 7 | # This script just splits above dataset into format that
 8 | # works for input script, namely:
 9 | #
10 | #  data/
11 | #     ham/
12 | #       hamText1.txt
13 | #       hamText2.txt
14 | #       hamText3.txt
15 | #
16 | #     spam/
17 | #       spamText1.txt
18 | #       spamText2.txt
19 | #       spamText3.txt
20 | 
21 | 
22 | SMSSpamCollection=$1
23 | 
24 | if [ ! -f $SMSSpamCollection ]; then
25 |    echo "$0: Input dataset not found!"
26 | fi
27 | 
28 | mkdir -p data/ham data/spam
29 |    
30 | iter=0;
31 | while read line;
32 | do line=( $line );
33 |    label=${line[0]};
34 |    if [ "$label" == "ham" ]; then
35 |        sms="${line[@]:3}";
36 |    else
37 |        sms="${line[@]:4}";
38 |    fi;
39 | 
40 |    # don't know why but I'm getting empty sms files,
41 |    # just ignore them (< 100 out of thousands)
42 |    if [ "$sms" == "" ]; then
43 |       echo "empty SMS:/ Skipping..."
44 |    else
45 |        echo $sms > data/${label}/${iter}.txt;
46 |        ((iter++));
47 |    fi
48 |         
49 | done<$SMSSpamCollection
50 | 


--------------------------------------------------------------------------------
/tf-slides/tf-slides.nav:
--------------------------------------------------------------------------------
 1 | \beamer@endinputifotherversion {3.24pt}
 2 | \headcommand {\slideentry {0}{0}{1}{1/1}{}{0}}
 3 | \headcommand {\beamer@framepages {1}{1}}
 4 | \headcommand {\slideentry {0}{0}{2}{2/2}{}{0}}
 5 | \headcommand {\beamer@framepages {2}{2}}
 6 | \headcommand {\sectionentry {1}{What's Going to Happen}{3}{What's Going to Happen}{0}}
 7 | \headcommand {\beamer@sectionpages {1}{2}}
 8 | \headcommand {\beamer@subsectionpages {1}{2}}
 9 | \headcommand {\slideentry {1}{0}{3}{3/3}{}{0}}
10 | \headcommand {\beamer@framepages {3}{3}}
11 | \headcommand {\sectionentry {2}{TensorFlow Structures}{4}{TensorFlow Structures}{0}}
12 | \headcommand {\beamer@sectionpages {3}{3}}
13 | \headcommand {\beamer@subsectionpages {3}{3}}
14 | \headcommand {\slideentry {2}{0}{4}{4/4}{}{0}}
15 | \headcommand {\beamer@framepages {4}{4}}
16 | \headcommand {\slideentry {2}{0}{5}{5/5}{}{0}}
17 | \headcommand {\beamer@framepages {5}{5}}
18 | \headcommand {\sectionentry {3}{The Flow of TensorFlow}{6}{The Flow of TensorFlow}{0}}
19 | \headcommand {\beamer@sectionpages {4}{5}}
20 | \headcommand {\beamer@subsectionpages {4}{5}}
21 | \headcommand {\slideentry {3}{0}{6}{6/6}{}{0}}
22 | \headcommand {\beamer@framepages {6}{6}}
23 | \headcommand {\slideentry {3}{0}{7}{7/7}{}{0}}
24 | \headcommand {\beamer@framepages {7}{7}}
25 | \headcommand {\slideentry {3}{0}{8}{8/8}{}{0}}
26 | \headcommand {\beamer@framepages {8}{8}}
27 | \headcommand {\slideentry {3}{0}{9}{9/9}{}{0}}
28 | \headcommand {\beamer@framepages {9}{9}}
29 | \headcommand {\slideentry {3}{0}{10}{10/10}{}{0}}
30 | \headcommand {\beamer@framepages {10}{10}}
31 | \headcommand {\slideentry {3}{0}{11}{11/11}{}{0}}
32 | \headcommand {\beamer@framepages {11}{11}}
33 | \headcommand {\slideentry {3}{0}{12}{12/12}{}{0}}
34 | \headcommand {\beamer@framepages {12}{12}}
35 | \headcommand {\slideentry {3}{0}{13}{13/13}{}{0}}
36 | \headcommand {\beamer@framepages {13}{13}}
37 | \headcommand {\slideentry {3}{0}{14}{14/14}{}{0}}
38 | \headcommand {\beamer@framepages {14}{14}}
39 | \headcommand {\slideentry {3}{0}{15}{15/15}{}{0}}
40 | \headcommand {\beamer@framepages {15}{15}}
41 | \headcommand {\slideentry {3}{0}{16}{16/16}{}{0}}
42 | \headcommand {\beamer@framepages {16}{16}}
43 | \headcommand {\slideentry {3}{0}{17}{17/17}{}{0}}
44 | \headcommand {\beamer@framepages {17}{17}}
45 | \headcommand {\slideentry {3}{0}{18}{18/18}{}{0}}
46 | \headcommand {\beamer@framepages {18}{18}}
47 | \headcommand {\slideentry {3}{0}{19}{19/19}{}{0}}
48 | \headcommand {\beamer@framepages {19}{19}}
49 | \headcommand {\slideentry {3}{0}{20}{20/20}{}{0}}
50 | \headcommand {\beamer@framepages {20}{20}}
51 | \headcommand {\slideentry {3}{0}{21}{21/21}{}{0}}
52 | \headcommand {\beamer@framepages {21}{21}}
53 | \headcommand {\slideentry {3}{0}{22}{22/22}{}{0}}
54 | \headcommand {\beamer@framepages {22}{22}}
55 | \headcommand {\slideentry {3}{0}{23}{23/23}{}{0}}
56 | \headcommand {\beamer@framepages {23}{23}}
57 | \headcommand {\slideentry {3}{0}{24}{24/24}{}{0}}
58 | \headcommand {\beamer@framepages {24}{24}}
59 | \headcommand {\slideentry {3}{0}{25}{25/25}{}{0}}
60 | \headcommand {\beamer@framepages {25}{25}}
61 | \headcommand {\slideentry {3}{0}{26}{26/26}{}{0}}
62 | \headcommand {\beamer@framepages {26}{26}}
63 | \headcommand {\slideentry {3}{0}{27}{27/27}{}{0}}
64 | \headcommand {\beamer@framepages {27}{27}}
65 | \headcommand {\slideentry {3}{0}{28}{28/28}{}{0}}
66 | \headcommand {\beamer@framepages {28}{28}}
67 | \headcommand {\slideentry {3}{0}{29}{29/29}{}{0}}
68 | \headcommand {\beamer@framepages {29}{29}}
69 | \headcommand {\slideentry {3}{0}{30}{30/30}{}{0}}
70 | \headcommand {\beamer@framepages {30}{30}}
71 | \headcommand {\beamer@partpages {1}{30}}
72 | \headcommand {\beamer@subsectionpages {6}{30}}
73 | \headcommand {\beamer@sectionpages {6}{30}}
74 | \headcommand {\beamer@documentpages {30}}
75 | \headcommand {\def \inserttotalframenumber {30}}
76 | 


--------------------------------------------------------------------------------
/logistic_regression_predict.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import tarfile
  4 | import os
  5 | 
  6 | def csv_to_numpy_array(filePath, delimiter):
  7 |     return np.genfromtxt(filePath, delimiter=delimiter, dtype=None)
  8 | 
  9 | def import_data():
 10 |     if "data" not in os.listdir(os.getcwd()):
 11 |         # Untar directory of data if we haven't already
 12 |         tarObject = tarfile.open("data.tar.gz")
 13 |         tarObject.extractall()
 14 |         tarObject.close()
 15 |         print("Extracted tar to current directory")
 16 |     else:
 17 |         # we've already extracted the files
 18 |         pass
 19 | 
 20 |     print("loading training data")
 21 |     trainX = csv_to_numpy_array("data/trainX.csv", delimiter="\t")
 22 |     trainY = csv_to_numpy_array("data/trainY.csv", delimiter="\t")
 23 |     print("loading test data")
 24 |     testX = csv_to_numpy_array("data/testX.csv", delimiter="\t")
 25 |     testY = csv_to_numpy_array("data/testY.csv", delimiter="\t")
 26 |     return trainX,trainY,testX,testY
 27 | 
 28 | 
 29 | ###################
 30 | ### IMPORT DATA ###
 31 | ###################
 32 | 
 33 | trainX,trainY,testX,testY = import_data()
 34 | 
 35 | 
 36 | #########################
 37 | ### GLOBAL PARAMETERS ###
 38 | #########################
 39 | 
 40 | # Get our dimensions for our different variables and placeholders:
 41 | # numFeatures = the number of words extracted from each email
 42 | numFeatures = trainX.shape[1]
 43 | # numLabels = number of classes we are predicting (here just 2: ham or spam)
 44 | numLabels = trainY.shape[1]
 45 | 
 46 | #create a tensorflow session
 47 | sess = tf.Session()
 48 | 
 49 | 
 50 | ####################
 51 | ### PLACEHOLDERS ###
 52 | ####################
 53 | 
 54 | # X = X-matrix / feature-matrix / data-matrix... It's a tensor to hold our email
 55 | # data. 'None' here means that we can hold any number of emails
 56 | X = tf.placeholder(tf.float32, [None, numFeatures])
 57 | # yGold = Y-matrix / label-matrix / labels... This will be our correct answers
 58 | # matrix. Every row has either [1,0] for SPAM or [0,1] for HAM. 'None' here
 59 | # means that we can hold any number of emails
 60 | yGold = tf.placeholder(tf.float32, [None, numLabels])
 61 | 
 62 | 
 63 | #################
 64 | ### VARIABLES ###
 65 | #################
 66 | 
 67 | #all values must be initialized to a value before loading can occur
 68 | 
 69 | weights = tf.Variable(tf.zeros([numFeatures,numLabels]))
 70 | 
 71 | bias = tf.Variable(tf.zeros([1,numLabels]))
 72 | 
 73 | ########################
 74 | ### OPS / OPERATIONS ###
 75 | ########################
 76 | 
 77 | #since we don't have to train the model, the only Ops are the prediction operations
 78 | 
 79 | apply_weights_OP = tf.matmul(X, weights, name="apply_weights")
 80 | add_bias_OP = tf.add(apply_weights_OP, bias, name="add_bias")
 81 | activation_OP = tf.nn.sigmoid(add_bias_OP, name="activation")
 82 | 
 83 | 
 84 | # argmax(activation_OP, 1) gives the label our model thought was most likely
 85 | # argmax(yGold, 1) is the correct label
 86 | correct_predictions_OP = tf.equal(tf.argmax(activation_OP,1),tf.argmax(yGold,1))
 87 | 
 88 | # False is 0 and True is 1, what was our average?
 89 | accuracy_OP = tf.reduce_mean(tf.cast(correct_predictions_OP, "float"))
 90 | 
 91 | # Initializes everything we've defined made above, but doesn't run anything
 92 | # until sess.run()
 93 | init_OP = tf.initialize_all_variables()
 94 | 
 95 | sess.run(init_OP)       #initialize variables BEFORE loading
 96 | 
 97 | #load variables from file
 98 | saver = tf.train.Saver()
 99 | saver.restore(sess, "trained_variables.ckpt")
100 | 
101 | #####################
102 | ### RUN THE GRAPH ###
103 | #####################
104 | 
105 | # Initialize all tensorflow objects
106 | # sess.run(init_OP)
107 | 
108 | #method for converting tensor label to string label
109 | def labelToString(label):
110 |     if np.argmax(label) == 0:
111 |         return "ham"
112 |     else:
113 |         return "spam"
114 | 
115 | #make prediction on a given test set item
116 | def predict(features, goldLabel):
117 |     #run through graph
118 |     tensor_prediction = sess.run(activation_OP, feed_dict={X: features.reshape(1, len(features)), yGold: goldLabel.reshape(1, len(goldLabel))})      #had to make sure that each input in feed_dict was an array
119 |     prediction = labelToString(tensor_prediction)
120 |     actual = labelToString(goldLabel)
121 |     print("regression predicts email to be %s and is actually %s" %(prediction, actual))
122 | 
123 | if __name__ == "__main__":
124 | 
125 |     #show predictions and accuracy of entire test set
126 |     prediction, evaluation = sess.run([activation_OP, accuracy_OP], feed_dict={X: testX, yGold: testY})
127 | 
128 |     for i in range(len(testX)):
129 |         print("regression predicts email %s to be %s and is actually %s" %(str(i + 1), labelToString(prediction[i]), labelToString(testY[i])))
130 |     print("overall accuracy of dataset: %s percent" %str(evaluation))
131 | 
132 | 


--------------------------------------------------------------------------------
/tf-slides/tf-slides.aux:
--------------------------------------------------------------------------------
 1 | \relax 
 2 | \providecommand\hyper@newdestlabel[2]{}
 3 | \providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
 4 | \HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
 5 | \global\let\oldcontentsline\contentsline
 6 | \gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
 7 | \global\let\oldnewlabel\newlabel
 8 | \gdef\newlabel#1#2{\newlabelxx{#1}#2}
 9 | \gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
10 | \AtEndDocument{\ifx\hyper@anchor\@undefined
11 | \let\contentsline\oldcontentsline
12 | \let\newlabel\oldnewlabel
13 | \fi}
14 | \fi}
15 | \global\let\hyper@last\relax 
16 | \gdef\HyperFirstAtBeginDocument#1{#1}
17 | \providecommand\HyField@AuxAddToFields[1]{}
18 | \providecommand\HyField@AuxAddToCoFields[2]{}
19 | \providecommand \oddpage@label [2]{}
20 | \@writefile{toc}{\beamer@endinputifotherversion {3.24pt}}
21 | \@writefile{nav}{\beamer@endinputifotherversion {3.24pt}}
22 | \@writefile{nav}{\headcommand {\slideentry {0}{0}{1}{1/1}{}{0}}}
23 | \@writefile{nav}{\headcommand {\beamer@framepages {1}{1}}}
24 | \@writefile{nav}{\headcommand {\slideentry {0}{0}{2}{2/2}{}{0}}}
25 | \@writefile{nav}{\headcommand {\beamer@framepages {2}{2}}}
26 | \@writefile{toc}{\beamer@sectionintoc {1}{What's Going to Happen}{3}{0}{1}}
27 | \@writefile{nav}{\headcommand {\sectionentry {1}{What's Going to Happen}{3}{What's Going to Happen}{0}}}
28 | \@writefile{nav}{\headcommand {\beamer@sectionpages {1}{2}}}
29 | \@writefile{nav}{\headcommand {\beamer@subsectionpages {1}{2}}}
30 | \@writefile{nav}{\headcommand {\slideentry {1}{0}{3}{3/3}{}{0}}}
31 | \@writefile{nav}{\headcommand {\beamer@framepages {3}{3}}}
32 | \@writefile{toc}{\beamer@sectionintoc {2}{TensorFlow Structures}{4}{0}{2}}
33 | \@writefile{nav}{\headcommand {\sectionentry {2}{TensorFlow Structures}{4}{TensorFlow Structures}{0}}}
34 | \@writefile{nav}{\headcommand {\beamer@sectionpages {3}{3}}}
35 | \@writefile{nav}{\headcommand {\beamer@subsectionpages {3}{3}}}
36 | \@writefile{nav}{\headcommand {\slideentry {2}{0}{4}{4/4}{}{0}}}
37 | \@writefile{nav}{\headcommand {\beamer@framepages {4}{4}}}
38 | \@writefile{nav}{\headcommand {\slideentry {2}{0}{5}{5/5}{}{0}}}
39 | \@writefile{nav}{\headcommand {\beamer@framepages {5}{5}}}
40 | \@writefile{toc}{\beamer@sectionintoc {3}{The Flow of TensorFlow}{6}{0}{3}}
41 | \@writefile{nav}{\headcommand {\sectionentry {3}{The Flow of TensorFlow}{6}{The Flow of TensorFlow}{0}}}
42 | \@writefile{nav}{\headcommand {\beamer@sectionpages {4}{5}}}
43 | \@writefile{nav}{\headcommand {\beamer@subsectionpages {4}{5}}}
44 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{6}{6/6}{}{0}}}
45 | \@writefile{nav}{\headcommand {\beamer@framepages {6}{6}}}
46 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{7}{7/7}{}{0}}}
47 | \@writefile{nav}{\headcommand {\beamer@framepages {7}{7}}}
48 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{8}{8/8}{}{0}}}
49 | \@writefile{nav}{\headcommand {\beamer@framepages {8}{8}}}
50 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{9}{9/9}{}{0}}}
51 | \@writefile{nav}{\headcommand {\beamer@framepages {9}{9}}}
52 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{10}{10/10}{}{0}}}
53 | \@writefile{nav}{\headcommand {\beamer@framepages {10}{10}}}
54 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{11}{11/11}{}{0}}}
55 | \@writefile{nav}{\headcommand {\beamer@framepages {11}{11}}}
56 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{12}{12/12}{}{0}}}
57 | \@writefile{nav}{\headcommand {\beamer@framepages {12}{12}}}
58 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{13}{13/13}{}{0}}}
59 | \@writefile{nav}{\headcommand {\beamer@framepages {13}{13}}}
60 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{14}{14/14}{}{0}}}
61 | \@writefile{nav}{\headcommand {\beamer@framepages {14}{14}}}
62 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{15}{15/15}{}{0}}}
63 | \@writefile{nav}{\headcommand {\beamer@framepages {15}{15}}}
64 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{16}{16/16}{}{0}}}
65 | \@writefile{nav}{\headcommand {\beamer@framepages {16}{16}}}
66 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{17}{17/17}{}{0}}}
67 | \@writefile{nav}{\headcommand {\beamer@framepages {17}{17}}}
68 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{18}{18/18}{}{0}}}
69 | \@writefile{nav}{\headcommand {\beamer@framepages {18}{18}}}
70 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{19}{19/19}{}{0}}}
71 | \@writefile{nav}{\headcommand {\beamer@framepages {19}{19}}}
72 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{20}{20/20}{}{0}}}
73 | \@writefile{nav}{\headcommand {\beamer@framepages {20}{20}}}
74 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{21}{21/21}{}{0}}}
75 | \@writefile{nav}{\headcommand {\beamer@framepages {21}{21}}}
76 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{22}{22/22}{}{0}}}
77 | \@writefile{nav}{\headcommand {\beamer@framepages {22}{22}}}
78 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{23}{23/23}{}{0}}}
79 | \@writefile{nav}{\headcommand {\beamer@framepages {23}{23}}}
80 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{24}{24/24}{}{0}}}
81 | \@writefile{nav}{\headcommand {\beamer@framepages {24}{24}}}
82 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{25}{25/25}{}{0}}}
83 | \@writefile{nav}{\headcommand {\beamer@framepages {25}{25}}}
84 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{26}{26/26}{}{0}}}
85 | \@writefile{nav}{\headcommand {\beamer@framepages {26}{26}}}
86 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{27}{27/27}{}{0}}}
87 | \@writefile{nav}{\headcommand {\beamer@framepages {27}{27}}}
88 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{28}{28/28}{}{0}}}
89 | \@writefile{nav}{\headcommand {\beamer@framepages {28}{28}}}
90 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{29}{29/29}{}{0}}}
91 | \@writefile{nav}{\headcommand {\beamer@framepages {29}{29}}}
92 | \@writefile{nav}{\headcommand {\slideentry {3}{0}{30}{30/30}{}{0}}}
93 | \@writefile{nav}{\headcommand {\beamer@framepages {30}{30}}}
94 | \@writefile{nav}{\headcommand {\beamer@partpages {1}{30}}}
95 | \@writefile{nav}{\headcommand {\beamer@subsectionpages {6}{30}}}
96 | \@writefile{nav}{\headcommand {\beamer@sectionpages {6}{30}}}
97 | \@writefile{nav}{\headcommand {\beamer@documentpages {30}}}
98 | \@writefile{nav}{\headcommand {\def \inserttotalframenumber {30}}}
99 | 


--------------------------------------------------------------------------------
/make-your-own-data/read_corpus.py:
--------------------------------------------------------------------------------
  1 | # Joshua Meyer (2017)
  2 | # <jrmeyer.github.io>
  3 | 
  4 | # USAGE: $ python3 ./read_corpus.py -ham data/ham -spam data/spam
  5 | 
  6 | # Script expects data in the format:
  7 | # Where the dirnames 'ham' and 'spam'
  8 | # Are imporant. Filenames are not.
  9 | #
 10 | #  data/
 11 | #     ham/
 12 | #       hamText1.txt
 13 | #       hamText2.txt
 14 | #       hamText3.txt
 15 | #
 16 | #     spam/
 17 | #       spamText1.txt
 18 | #       spamText2.txt
 19 | #       spamText3.txt
 20 | 
 21 | 
 22 | import glob
 23 | import random
 24 | import re
 25 | from collections import Counter
 26 | import numpy as np
 27 | 
 28 | def create_bag_of_words(filePaths):
 29 |     '''
 30 |     Input:
 31 |       filePaths: Array. A list of absolute filepaths
 32 |     Returns:
 33 |       bagOfWords: Array. All tokens in files
 34 |     '''
 35 |     bagOfWords = []
 36 |     # this regex filtering is specific to original dataset
 37 |     regex = re.compile("X-Spam.*\n")
 38 |     for filePath in filePaths:
 39 |         with open(filePath, encoding ="latin-1") as f:
 40 |             raw = f.read()
 41 |             raw = re.sub(regex,'',raw)
 42 |             tokens = raw.split()
 43 |             for token in tokens:
 44 |                 bagOfWords.append(token)
 45 |     return bagOfWords
 46 | 
 47 | def get_feature_matrix(filePaths, featureDict):
 48 |     '''
 49 |     create feature/x matrix from multiple text files
 50 |     rows = files, cols = features
 51 |     '''
 52 |     featureMatrix = np.zeros(shape=(len(filePaths),
 53 |                                       len(featureDict)),
 54 |                                dtype=float)
 55 |     regex = re.compile("X-Spam.*\n")
 56 |     for i,filePath in enumerate(filePaths):
 57 |         with open(filePath, encoding ="latin-1") as f:
 58 |             _raw = f.read()
 59 |             raw = re.sub(regex,'',_raw)
 60 |             tokens = raw.split()
 61 |             fileUniDist = Counter(tokens)
 62 |             for key,value in fileUniDist.items():
 63 |                 if key in featureDict:
 64 |                     featureMatrix[i,featureDict[key]] = value
 65 |     return featureMatrix
 66 | 
 67 | def regularize_vectors(featureMatrix):
 68 |     '''
 69 |     Input:
 70 |       featureMatrix: matrix, where docs are rows and features are columns
 71 |     Returns:
 72 |       featureMatrix: matrix, updated by dividing each feature value by the total
 73 |       number of features for a given document
 74 |     '''
 75 |     for doc in range(featureMatrix.shape[0]):
 76 |         totalWords = np.sum(featureMatrix[doc,:],axis=0)
 77 |         # some reason getting docs with 0 total words, just hacking right now
 78 |         # with +1 in denominator
 79 |         featureMatrix[doc,:] = np.multiply(featureMatrix[doc,:],(1/(totalWords+1)))
 80 | 
 81 |     return featureMatrix
 82 | 
 83 | def input_data(hamDir,spamDir,percentTest):
 84 |     ''' 
 85 |     Input:
 86 |       hamDir: String. dir of ham text files
 87 |       spamDir: String. dir of spam text file
 88 |       percentTest: Float. percentage of all data to be assigned to testset
 89 |     Returns:
 90 |       trainPaths: Array. Absolute paths to training emails
 91 |       trainY: Array. Training labels, 0 or 1 int.
 92 |       testPaths: Array. Absolute paths to testing emails
 93 |       testY: Array. Testing labels, 0 or 1 int.
 94 |     '''
 95 |     pathLabelPairs={}
 96 |     for hamPath in glob.glob(hamDir+'/*'):
 97 |         pathLabelPairs.update({hamPath:"0.,1."})
 98 |     for spamPath in glob.glob(spamDir+'/*'):
 99 |         pathLabelPairs.update({spamPath:"1.,0."})
100 |     
101 |     # get test set as random subsample of all data
102 |     numTest = int(percentTest * len(pathLabelPairs))
103 |     testing = set(random.sample(pathLabelPairs.items(),numTest))
104 | 
105 |     # delete testing data from superset of all data
106 |     for entry in testing:
107 |         del pathLabelPairs[entry[0]]
108 |     
109 |     # split training tuples of (path,label) into separate lists
110 |     trainPaths=[]
111 |     trainY=[]
112 |     for item in pathLabelPairs.items():
113 |         trainPaths.append(item[0])
114 |         trainY.append([float(i) for i in item[1].split(',')])
115 |     del pathLabelPairs
116 |     trainY=np.asarray(trainY)
117 | 
118 |     # split testing tuples of (path,label) into separate lists
119 |     testPaths=[]
120 |     testY=[]
121 |     for item in testing:
122 |         testPaths.append(item[0])
123 |         testY.append([float(i) for i in item[1].split(',')])
124 |     del testing
125 |     testY=np.asarray(testY)
126 |     
127 |     # create feature dictionary of n-grams
128 |     bagOfWords = create_bag_of_words(trainPaths)
129 | 
130 |     # throw out low freq words if you want (set k=FreqCutOff)
131 |     k=5
132 |     freqDist = Counter(bagOfWords)
133 |     newBagOfWords=[]
134 |     for word,freq in freqDist.items():
135 |         if freq > k:
136 |             newBagOfWords.append(word)
137 |     features = set(newBagOfWords)
138 |     featureDict = {feature:i for i,feature in enumerate(features)}
139 | 
140 |     # make feature matrices
141 |     trainX = get_feature_matrix(trainPaths,featureDict)
142 |     testX = get_feature_matrix(testPaths,featureDict)
143 |     
144 |     # regularize length
145 |     trainX = regularize_vectors(trainX)
146 |     testX = regularize_vectors(testX)
147 | 
148 |     return trainX, trainY, testX, testY
149 | 
150 | 
151 | def parse_user_args():
152 |     parser = argparse.ArgumentParser()
153 |     parser.add_argument('-ham','--hamDir')
154 |     parser.add_argument('-spam','--spamDir')
155 |     args = parser.parse_args()
156 |     return args
157 | 
158 | 
159 | if __name__ == '__main__':
160 |     import sys, argparse
161 |     # get user input
162 |     args = parse_user_args()
163 |     hamDir = args.hamDir
164 |     spamDir= args.spamDir
165 |     
166 |     print("### Looking for ham files in",hamDir,
167 |           "and spam files in", spamDir, "###")
168 | 
169 |     trainX,trainY,testX,testY = input_data(hamDir,spamDir,.1)
170 |     
171 |     np.savetxt("trainX.csv", trainX, delimiter=",")
172 |     np.savetxt("trainY.csv", trainY, delimiter=",")
173 |     np.savetxt("testX.csv", testX, delimiter=",")
174 |     np.savetxt("testY.csv", testY, delimiter=",")
175 | 


--------------------------------------------------------------------------------
/email_input.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import random
  3 | import re
  4 | from collections import Counter
  5 | import numpy as np
  6 | 
  7 | class DocReader():
  8 |     def __init__(self):
  9 |         pass
 10 | 
 11 |     def create_bag_of_words(self,filePaths):
 12 |         '''
 13 |         Input:
 14 |           filePaths: Array. A list of absolute filepaths
 15 |         Returns:
 16 |           bagOfWords: Array. All tokens in files
 17 |         '''
 18 |         bagOfWords = []
 19 |         regex = re.compile("X-Spam.*\n")
 20 |         for filePath in filePaths:
 21 |             with open(filePath, encoding ="latin-1") as f:
 22 |                 raw = f.read()
 23 |                 raw = re.sub(regex,'',raw)
 24 |                 tokens = raw.split()
 25 |                 for token in tokens:
 26 |                     bagOfWords.append(token)
 27 |         return bagOfWords
 28 | 
 29 |     def get_feature_matrix(self,filePaths, featureDict):
 30 |         '''
 31 |         create feature/x matrix from multiple text files
 32 |         rows = files, cols = features
 33 |         '''
 34 |         featureMatrix = np.zeros(shape=(len(filePaths),
 35 |                                           len(featureDict)),
 36 |                                    dtype=float)
 37 |         regex = re.compile("X-Spam.*\n")
 38 |         for i,filePath in enumerate(filePaths):
 39 |             with open(filePath, encoding ="latin-1") as f:
 40 |                 _raw = f.read()
 41 |                 raw = re.sub(regex,'',_raw)
 42 |                 tokens = raw.split()
 43 |                 fileUniDist = Counter(tokens)
 44 |                 for key,value in fileUniDist.items():
 45 |                     if key in featureDict:
 46 |                         featureMatrix[i,featureDict[key]] = value
 47 |         return featureMatrix
 48 | 
 49 |     def regularize_vectors(self,featureMatrix):
 50 |         '''
 51 |         Input:
 52 |           featureMatrix: matrix, where docs are rows and features are columns
 53 |         Returns:
 54 |           featureMatrix: matrix, updated by dividing each feature value by the total
 55 |           number of features for a given document
 56 |         '''
 57 |         for doc in range(featureMatrix.shape[0]):
 58 |             totalWords = np.sum(featureMatrix[doc,:],axis=0)
 59 |             featureMatrix[doc,:] = np.multiply(featureMatrix[doc,:],(1/totalWords))
 60 |         return featureMatrix
 61 | 
 62 |     def input_data(self,hamDir,spamDir,percentTest,cutoff):
 63 |         ''' 
 64 |         Input:
 65 |           hamDir: String. dir of ham text files
 66 |           spamDir: String. dir of spam text file
 67 |           percentTest: Float. percentage of all data to be assigned to testset
 68 |         Returns:
 69 |           trainPaths: Array. Absolute paths to training emails
 70 |           trainY: Array. Training labels, 0 or 1 int.
 71 |           testPaths: Array. Absolute paths to testing emails
 72 |           testY: Array. Testing labels, 0 or 1 int.
 73 |         '''
 74 |         pathLabelPairs={}
 75 |         for hamPath in glob.glob(hamDir+'*'):
 76 |             pathLabelPairs.update({hamPath:(0,1)})
 77 |         for spamPath in glob.glob(spamDir+'*'):
 78 |             pathLabelPairs.update({spamPath:(1,0)})
 79 | 
 80 |         # get test set as random subsample of all data
 81 |         numTest = int(percentTest * len(pathLabelPairs))
 82 |         testing = set(random.sample(pathLabelPairs.items(),numTest))
 83 | 
 84 |         # delete testing data from superset of all data
 85 |         for entry in testing:
 86 |             del pathLabelPairs[entry[0]]
 87 | 
 88 |         # split training tuples of (path,label) into separate lists
 89 |         trainPaths=[]
 90 |         trainY=[]
 91 |         for item in pathLabelPairs.items():
 92 |             trainPaths.append(item[0])
 93 |             trainY.append(item[1])
 94 | 
 95 |         # split testing tuples of (path,label) into separate lists
 96 |         testPaths=[]
 97 |         testY=[]
 98 |         for item in testing:
 99 |             testPaths.append(item[0])
100 |             testY.append(item[1])
101 | 
102 |         # create feature dictionary of n-grams
103 |         bagOfWords = self.create_bag_of_words(trainPaths)
104 | 
105 |         # throw out low freq words
106 |         freqDist = Counter(bagOfWords)
107 |         newBagOfWords=[]
108 |         for word,freq in freqDist.items():
109 |             if freq > cutoff:
110 |                 newBagOfWords.append(word)
111 |         features = set(newBagOfWords)
112 |         featureDict = {feature:i for i,feature in enumerate(features)}
113 | 
114 |         # make feature matrices
115 |         trainX = self.get_feature_matrix(trainPaths,featureDict)
116 |         testX = self.get_feature_matrix(testPaths,featureDict)
117 | 
118 |         # regularize length
119 |         trainX = self.regularize_vectors(trainX)
120 |         testX = self.regularize_vectors(testX)
121 | 
122 |         # cast as ndarrays
123 |         trainY = np.asarray(trainY)
124 |         testY = np.asarray(testY)
125 | 
126 |         return trainX, trainY, testX, testY
127 | 
128 | 
129 | def parse_user_args():
130 |     parser = argparse.ArgumentParser()
131 |     parser.add_argument('-ham','--hamDir')
132 |     parser.add_argument('-spam','--spamDir')
133 |     args = parser.parse_args()
134 |     return args
135 | 
136 | 
137 | if __name__ == '__main__':
138 |     import sys, argparse
139 |     # get user input
140 |     args = parse_user_args()
141 |     hamDir = args.hamDir
142 |     spamDir= args.spamDir
143 | 
144 |     reader = DocReader()
145 |     
146 |     trainX,trainY,testX,testY = reader.input_data(hamDir=hamDir,
147 |                                                   spamDir=spamDir,
148 |                                                   percentTest=.1,
149 |                                                   cutoff=15)
150 | 
151 |     print(trainX.shape)
152 |     print(trainY.shape)    
153 |     print(testX.shape)
154 |     print(testY.shape)    
155 | 
156 |     np.savetxt("trainX.csv", trainX, delimiter="\t")
157 |     np.savetxt("trainY.csv", trainY, delimiter="\t")
158 |     np.savetxt("testX.csv", testX, delimiter="\t")
159 |     np.savetxt("testY.csv", testY, delimiter="\t")
160 | 
161 |     print(trainX[:10,:])
162 |     print(trainY[:10,:])
163 | 


--------------------------------------------------------------------------------
/tf-slides/tf-slides.tex~:
--------------------------------------------------------------------------------
  1 | \documentclass[14pt]{beamer}
  2 | \usepackage{multimedia}
  3 | 
  4 | \usetheme[width=1.4cm]{PaloAlto}
  5 | \usecolortheme{default}
  6 | \useoutertheme{shadow} 
  7 | 
  8 | \usepackage[utf8]{inputenc}
  9 | \usepackage{booktabs,tabu}
 10 | \usepackage{adjustbox}
 11 | \usepackage{graphicx}
 12 | \usepackage{caption}
 13 | \graphicspath{{figs/}}
 14 | \DeclareGraphicsRule{.tif}{png}{.png}{`convert #1 `dirname #1`/`basename #1 .tif`.png}
 15 | 
 16 | 
 17 | %%%%%%%%%%%%%%%%%%%%
 18 | %%% BEGIN PYTHON %%%
 19 | %%%%%%%%%%%%%%%%%%%%
 20 | 
 21 | \usepackage{color}
 22 | \long\def\ans#1{{\color{blue}{\em #1}}}
 23 | \long\def\ansnem#1{{\color{blue}#1}}
 24 | \long\def\boldred#1{{\color{red}{\bf #1}}}
 25 | 
 26 | \usepackage{listings}
 27 | \usepackage{textcomp}
 28 | \renewcommand{\lstlistlistingname}{Code Listings}
 29 | \renewcommand{\lstlistingname}{Code Listing}
 30 | 
 31 | %%% Specific for python listings
 32 | \definecolor{gray}{gray}{0.5}
 33 | \definecolor{green}{rgb}{0,0.5,0}
 34 | 
 35 | \lstnewenvironment{python}[1][]{
 36 | \lstset{
 37 | language=python,
 38 | basicstyle=\tiny,
 39 | stringstyle=\color{red},
 40 | showstringspaces=false,
 41 | alsoletter={1234567890},
 42 | otherkeywords={\ , \}, \{},
 43 | keywordstyle=\color{blue},
 44 | emph={access,and,break,class,continue,def,del,elif ,else,%
 45 | except,exec,finally,for,from,global,if,import,in,i s,%
 46 | lambda,not,or,pass,print,raise,return,try,while},
 47 | emphstyle=\color{black}\bfseries,
 48 | emph={[2]True, False, None, self},
 49 | emphstyle=[2]\color{green},
 50 | emph={[3]from, import, as},
 51 | emphstyle=[3]\color{blue},
 52 | upquote=true,
 53 | morecomment=[s]{"""}{"""},
 54 | commentstyle=\color{gray}\slshape,
 55 | emph={[4]1, 2, 3, 4, 5, 6, 7, 8, 9, 0},
 56 | emphstyle=[4]\color{blue},
 57 | literate=*{:}{{\textcolor{blue}:}}{1}%
 58 | {=}{{\textcolor{blue}=}}{1}%
 59 | {-}{{\textcolor{blue}-}}{1}%
 60 | {+}{{\textcolor{blue}+}}{1}%
 61 | {*}{{\textcolor{blue}*}}{1}%
 62 | {!}{{\textcolor{blue}!}}{1}%
 63 | {(}{{\textcolor{blue}(}}{1}%
 64 | {)}{{\textcolor{blue})}}{1}%
 65 | {[}{{\textcolor{blue}[}}{1}%
 66 | {]}{{\textcolor{blue}]}}{1}%
 67 | {<}{{\textcolor{blue}<}}{1}%
 68 | {>}{{\textcolor{blue}>}}{1},%
 69 | %framexleftmargin=1mm, framextopmargin=1mm, frame=shadowbox, rulesepcolor=\color{blue},#1
 70 | framexleftmargin=1mm, framextopmargin=1mm, frame=single,#1
 71 | }}{}
 72 | 
 73 | %%%%%%%%%%%%%%%%%%
 74 | %%% END PYTHON %%%
 75 | %%%%%%%%%%%%%%%%%%
 76 | 
 77 | 
 78 | % remove navigation symbols
 79 | \setbeamertemplate{navigation symbols}{}
 80 | \setbeamerfont{page number in head/foot}{size=\fontsize{12}{12}}
 81 | \setbeamertemplate{footline}[frame number]
 82 | 
 83 | % make image sized to frame
 84 | \newcommand {\framedgraphic}[2] { % args = {frametitle}{image.ext}
 85 |     \begin{frame}{#1}
 86 |         \begin{center}
 87 |             \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{#2}
 88 |         \end{center}
 89 |     \end{frame}
 90 | }
 91 | 
 92 | % This block will remove authornames and talk title from sidebar
 93 | \makeatletter
 94 |   \setbeamertemplate{sidebar \beamer@sidebarside}%{sidebar theme}
 95 |   {
 96 |     \beamer@tempdim=\beamer@sidebarwidth%
 97 |     \advance\beamer@tempdim by -6pt%
 98 |     \insertverticalnavigation{\beamer@sidebarwidth}%
 99 |     \vfill
100 |     \ifx\beamer@sidebarside\beamer@lefttext%
101 |     \else%
102 |       \usebeamercolor{normal text}%
103 |       \llap{\usebeamertemplate***{navigation symbols}\hskip0.1cm}%
104 |       \vskip2pt%
105 |     \fi%
106 | }%
107 | \makeatother
108 | 
109 | 
110 | 
111 | 
112 | \begin{document}
113 | 
114 | \title{A TensorFlow Tutorial}  
115 | \subtitle{Email Classification with Logistic Regression}
116 | \author{Josh Meyer \inst{1} \and Michael Capizzi \inst{2}}
117 | \institute{ \inst{1} University of Arizona \and \inst{2} PitchVantage}
118 | \date{} 
119 | 
120 | \frame{\titlepage} 
121 | 
122 | \frame{\frametitle{Table of contents}\tableofcontents} 
123 | 
124 | 
125 | 
126 | \section{The Flow of TensorFlow}
127 | \frame{\frametitle{}
128 |   \centering
129 |     \begingroup
130 |     \fontsize{20pt}{20pt}\selectfont
131 |     The \textit{Flow} \\
132 |     of TensorFlow \\
133 |   \endgroup
134 | }
135 | 
136 | 
137 | \framedgraphic{Initialize Weights \& Bias Terms}{1-initialization.png}
138 | 
139 | \begin{frame}[fragile]
140 |   \begin{python}
141 | #################
142 | ### VARIABLES ###
143 | #################
144 | 
145 | # all values are randomly assigned:
146 | # sqrt(6 / (numInputNodes + numOutputNodes + 1))
147 | 
148 | weights = tf.Variable(tf.random_normal([numFeatures,numLabels],
149 |           mean=0,
150 |           stddev=(np.sqrt(6/numFeatures+numLabels+1)),
151 |           name="weights"))
152 | 
153 | bias = tf.Variable(tf.random_normal([1,numLabels],
154 |        mean=0,
155 |        stddev=(np.sqrt(6/numFeatures+numLabels+1)),
156 |        name="bias"))
157 | 
158 | 
159 | # INITIALIZE our weights and biases
160 | init_OP = tf.initialize_all_variables()
161 |   \end{python}
162 | \end{frame}
163 | 
164 | 
165 | \framedgraphic{Apply Weights to Features}{2-apply-weights.png}
166 | 
167 | \begin{frame}[fragile]
168 |   \begin{python}
169 | apply_weights_OP = tf.matmul(X, weights, name="apply_weights")
170 |   \end{python}
171 | \end{frame}
172 | 
173 | 
174 | \framedgraphic{Add Bias to Weighted Features}{3-add-bias.png}
175 | 
176 | \begin{frame}[fragile]
177 |   \begin{python}
178 | add_bias_OP = tf.add(apply_weights_OP, bias, name="add_bias") 
179 |   \end{python}
180 | \end{frame}
181 | 
182 | 
183 | \framedgraphic{Apply Sigmoid Activation}{4-apply-sigmoid.png}
184 | 
185 | \begin{frame}[fragile]
186 |   \begin{python}
187 | activation_OP = tf.nn.sigmoid(add_bias_OP, name="activation")
188 |   \end{python}
189 | \end{frame}
190 | 
191 | 
192 | 
193 | 
194 | 
195 | 
196 | \frame{\frametitle{}
197 |   \centering
198 |   \begingroup
199 |   \fontsize{12pt}{12pt}\selectfont
200 |   Thank you! \\
201 |   \endgroup
202 |   \vspace{1cm}
203 | }
204 | 
205 | 
206 | \end{document}
207 | 
208 | 
209 | 
210 | 
211 | %% \frame{\frametitle{The Flow of TensorFlow} 
212 | %%   \begin{table}[H]
213 | %%     \begin{adjustbox}{max width=\textwidth}
214 | %%       \centering
215 | %%       \begin{tabular}{lccc}
216 | %%         \toprule
217 | %%         \textbf{Study}       & \textbf{Languages}   & \textbf{Phonemic Feature}                       & \textbf{Best Predictor}\\
218 | %%         \midrule
219 | %%         Cutler et al. (1986, 1992)  & French-English       & Metrical Structure                            & Preference \\
220 | %%         \bottomrule
221 | %%       \end{tabular}
222 | %%     \end{adjustbox}
223 | %%   \end{table}
224 | %% }
225 | 


--------------------------------------------------------------------------------
/logistic_regression_train.py:
--------------------------------------------------------------------------------
  1 | ################
  2 | ### PREAMBLE ###
  3 | ################
  4 | 
  5 | from __future__ import division
  6 | import tensorflow as tf
  7 | import numpy as np
  8 | import tarfile
  9 | import os
 10 | import matplotlib.pyplot as plt
 11 | import time
 12 | 
 13 | 
 14 | 
 15 | ###################
 16 | ### IMPORT DATA ###
 17 | ###################
 18 | 
 19 | def csv_to_numpy_array(filePath, delimiter):
 20 |     return np.genfromtxt(filePath, delimiter=delimiter, dtype=None)
 21 | 
 22 | def import_data():
 23 |     if "data" not in os.listdir(os.getcwd()):
 24 |         # Untar directory of data if we haven't already
 25 |         tarObject = tarfile.open("data.tar.gz")
 26 |         tarObject.extractall()
 27 |         tarObject.close()
 28 |         print("Extracted tar to current directory")
 29 |     else:
 30 |         # we've already extracted the files
 31 |         pass
 32 | 
 33 |     print("loading training data")
 34 |     trainX = csv_to_numpy_array("data/trainX.csv", delimiter="\t")
 35 |     trainY = csv_to_numpy_array("data/trainY.csv", delimiter="\t")
 36 |     print("loading test data")
 37 |     testX = csv_to_numpy_array("data/testX.csv", delimiter="\t")
 38 |     testY = csv_to_numpy_array("data/testY.csv", delimiter="\t")
 39 |     return trainX,trainY,testX,testY
 40 | 
 41 | trainX,trainY,testX,testY = import_data()
 42 | 
 43 | 
 44 | 
 45 | #########################
 46 | ### GLOBAL PARAMETERS ###
 47 | #########################
 48 | 
 49 | ## DATA SET PARAMETERS
 50 | # Get our dimensions for our different variables and placeholders:
 51 | # numFeatures = the number of words extracted from each email
 52 | numFeatures = trainX.shape[1]
 53 | # numLabels = number of classes we are predicting (here just 2: Ham or Spam)
 54 | numLabels = trainY.shape[1]
 55 | 
 56 | ## TRAINING SESSION PARAMETERS
 57 | # number of times we iterate through training data
 58 | # tensorboard shows that accuracy plateaus at ~25k epochs
 59 | numEpochs = 27000
 60 | # a smarter learning rate for gradientOptimizer
 61 | learningRate = tf.train.exponential_decay(learning_rate=0.0008,
 62 |                                           global_step= 1,
 63 |                                           decay_steps=trainX.shape[0],
 64 |                                           decay_rate= 0.95,
 65 |                                           staircase=True)
 66 | 
 67 | 
 68 | 
 69 | 
 70 | 
 71 | ####################
 72 | ### PLACEHOLDERS ###
 73 | ####################
 74 | 
 75 | # X = X-matrix / feature-matrix / data-matrix... It's a tensor to hold our email
 76 | # data. 'None' here means that we can hold any number of emails
 77 | X = tf.placeholder(tf.float32, [None, numFeatures])
 78 | # yGold = Y-matrix / label-matrix / labels... This will be our correct answers
 79 | # matrix. Every row has either [1,0] for SPAM or [0,1] for HAM. 'None' here 
 80 | # means that we can hold any number of emails
 81 | yGold = tf.placeholder(tf.float32, [None, numLabels])
 82 | 
 83 | 
 84 | 
 85 | #################
 86 | ### VARIABLES ###
 87 | #################
 88 | 
 89 | # Values are randomly sampled from a Gaussian with a standard deviation of:
 90 | #     sqrt(6 / (numInputNodes + numOutputNodes + 1))
 91 | 
 92 | weights = tf.Variable(tf.random_normal([numFeatures,numLabels],
 93 |                                        mean=0,
 94 |                                        stddev=(np.sqrt(6/(numFeatures+
 95 |                                                          numLabels+1))),
 96 |                                        name="weights"))
 97 | 
 98 | bias = tf.Variable(tf.random_normal([1,numLabels],
 99 |                                     mean=0,
100 |                                     stddev=(np.sqrt(6/(numFeatures+numLabels+1))),
101 |                                     name="bias"))
102 | 
103 | 
104 | 
105 | ######################
106 | ### PREDICTION OPS ###
107 | ######################
108 | 
109 | # INITIALIZE our weights and biases
110 | init_OP = tf.global_variables_initializer()
111 | 
112 | # PREDICTION ALGORITHM i.e. FEEDFORWARD ALGORITHM
113 | apply_weights_OP = tf.matmul(X, weights, name="apply_weights")
114 | add_bias_OP = tf.add(apply_weights_OP, bias, name="add_bias") 
115 | activation_OP = tf.nn.sigmoid(add_bias_OP, name="activation")
116 | 
117 | 
118 | #####################
119 | ### EVALUATION OP ###
120 | #####################
121 | 
122 | # COST FUNCTION i.e. MEAN SQUARED ERROR
123 | cost_OP = tf.nn.l2_loss(activation_OP-yGold, name="squared_error_cost")
124 | 
125 | 
126 | #######################
127 | ### OPTIMIZATION OP ###
128 | #######################
129 | 
130 | # OPTIMIZATION ALGORITHM i.e. GRADIENT DESCENT
131 | training_OP = tf.train.GradientDescentOptimizer(learningRate).minimize(cost_OP)
132 | 
133 | 
134 | ###########################
135 | ### GRAPH LIVE UPDATING ###
136 | ###########################
137 | 
138 | epoch_values=[]
139 | accuracy_values=[]
140 | cost_values=[]
141 | # Turn on interactive plotting
142 | plt.ion()
143 | # Create the main, super plot
144 | fig = plt.figure()
145 | # Create two subplots on their own axes and give titles
146 | ax1 = plt.subplot("211")
147 | ax1.set_title("TRAINING ACCURACY", fontsize=18)
148 | ax2 = plt.subplot("212")
149 | ax2.set_title("TRAINING COST", fontsize=18)
150 | plt.tight_layout()
151 | 
152 | 
153 | 
154 | #####################
155 | ### RUN THE GRAPH ###
156 | #####################
157 | 
158 | # Create a tensorflow session
159 | sess = tf.Session()
160 | 
161 | # Initialize all tensorflow variables
162 | sess.run(init_OP)
163 | 
164 | ## Ops for vizualization
165 | # argmax(activation_OP, 1) gives the label our model thought was most likely
166 | # argmax(yGold, 1) is the correct label
167 | correct_predictions_OP = tf.equal(tf.argmax(activation_OP,1),tf.argmax(yGold,1))
168 | # False is 0 and True is 1, what was our average?
169 | accuracy_OP = tf.reduce_mean(tf.cast(correct_predictions_OP, "float"))
170 | # Summary op for regression output
171 | activation_summary_OP = tf.summary.histogram("output", activation_OP)
172 | # Summary op for accuracy
173 | accuracy_summary_OP = tf.summary.scalar("accuracy", accuracy_OP)
174 | # Summary op for cost
175 | cost_summary_OP = tf.summary.scalar("cost", cost_OP)
176 | # Summary ops to check how variables (W, b) are updating after each iteration
177 | weightSummary = tf.summary.histogram("weights", weights.eval(session=sess))
178 | biasSummary = tf.summary.histogram("biases", bias.eval(session=sess))
179 | # Merge all summaries
180 | all_summary_OPS = tf.summary.merge_all()
181 | # Summary writer
182 | writer = tf.summary.FileWriter("summary_logs", sess.graph)
183 | 
184 | # Initialize reporting variables
185 | cost = 0
186 | diff = 1
187 | 
188 | # Training epochs
189 | for i in range(numEpochs):
190 |     if i > 1 and diff < .0001:
191 |         print("change in cost %g; convergence."%diff)
192 |         break
193 |     else:
194 |         # Run training step
195 |         step = sess.run(training_OP, feed_dict={X: trainX, yGold: trainY})
196 |         # Report occasional stats
197 |         if i % 10 == 0:
198 |             # Add epoch to epoch_values
199 |             epoch_values.append(i)
200 |             # Generate accuracy stats on test data
201 |             summary_results, train_accuracy, newCost = sess.run(
202 |                 [all_summary_OPS, accuracy_OP, cost_OP], 
203 |                 feed_dict={X: trainX, yGold: trainY}
204 |             )
205 |             # Add accuracy to live graphing variable
206 |             accuracy_values.append(train_accuracy)
207 |             # Add cost to live graphing variable
208 |             cost_values.append(newCost)
209 |             # Write summary stats to writer
210 |             writer.add_summary(summary_results, i)
211 |             # Re-assign values for variables
212 |             diff = abs(newCost - cost)
213 |             cost = newCost
214 | 
215 |             #generate print statements
216 |             print("step %d, training accuracy %g"%(i, train_accuracy))
217 |             print("step %d, cost %g"%(i, newCost))
218 |             print("step %d, change in cost %g"%(i, diff))
219 | 
220 |             # Plot progress to our two subplots
221 |             accuracyLine, = ax1.plot(epoch_values, accuracy_values)
222 |             costLine, = ax2.plot(epoch_values, cost_values)
223 |             fig.canvas.draw()
224 |             time.sleep(1)
225 | 
226 | 
227 | # How well do we perform on held-out test data?
228 | print("final accuracy on test set: %s" %str(sess.run(accuracy_OP, 
229 |                                                      feed_dict={X: testX, 
230 |                                                                 yGold: testY})))
231 | 
232 | 
233 | ##############################
234 | ### SAVE TRAINED VARIABLES ###
235 | ##############################
236 | 
237 | # Create Saver
238 | saver = tf.train.Saver()
239 | # Save variables to .ckpt file
240 | # saver.save(sess, "trained_variables.ckpt")
241 | 
242 | 
243 | ############################
244 | ### MAKE NEW PREDICTIONS ###
245 | ############################
246 | 
247 | # Close tensorflow session
248 | sess.close()
249 | 
250 | # To view tensorboard:
251 |     #1. run: tensorboard --logdir=/path/to/log-directory
252 |     #2. open your browser to http://localhost:6006/
253 | # See tutorial here for graph visualization:
254 | # https://www.tensorflow.org/versions/0.6.0/how_tos/graph_viz/index.html
255 | 


--------------------------------------------------------------------------------
/tf-slides/tf-slides.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[14pt]{beamer}
  2 | \usepackage{multimedia}
  3 | 
  4 | \usetheme[width=1.4cm]{PaloAlto}
  5 | \usecolortheme{default}
  6 | \useoutertheme{shadow} 
  7 | 
  8 | \usepackage[utf8]{inputenc}
  9 | \usepackage{booktabs,tabu}
 10 | \usepackage{adjustbox}
 11 | \usepackage{graphicx}
 12 | \usepackage{caption}
 13 | \graphicspath{{figs/}}
 14 | \DeclareGraphicsRule{.tif}{png}{.png}{`convert #1 `dirname #1`/`basename #1 .tif`.png}
 15 | 
 16 | 
 17 | %%%%%%%%%%%%%%%%%%%%
 18 | %%% BEGIN PYTHON %%%
 19 | %%%%%%%%%%%%%%%%%%%%
 20 | 
 21 | \usepackage{color}
 22 | \long\def\ans#1{{\color{blue}{\em #1}}}
 23 | \long\def\ansnem#1{{\color{blue}#1}}
 24 | \long\def\boldred#1{{\color{red}{\bf #1}}}
 25 | 
 26 | \usepackage{listings}
 27 | \usepackage{textcomp}
 28 | \renewcommand{\lstlistlistingname}{Code Listings}
 29 | \renewcommand{\lstlistingname}{Code Listing}
 30 | 
 31 | %%% Specific for python listings
 32 | \definecolor{gray}{gray}{0.5}
 33 | \definecolor{green}{rgb}{0,0.5,0}
 34 | 
 35 | \lstnewenvironment{python}[1][]{
 36 | \lstset{
 37 | language=python,
 38 | basicstyle=\tiny,
 39 | stringstyle=\color{red},
 40 | showstringspaces=false,
 41 | alsoletter={1234567890},
 42 | otherkeywords={\ , \}, \{},
 43 | keywordstyle=\color{blue},
 44 | emph={access,and,break,class,continue,def,del,elif ,else,%
 45 | except,exec,finally,for,from,global,if,import,in,i s,%
 46 | lambda,not,or,pass,print,raise,return,try,while},
 47 | emphstyle=\color{black}\bfseries,
 48 | emph={[2]True, False, None, self},
 49 | emphstyle=[2]\color{green},
 50 | emph={[3]from, import, as},
 51 | emphstyle=[3]\color{blue},
 52 | upquote=true,
 53 | morecomment=[s]{"""}{"""},
 54 | commentstyle=\color{gray}\slshape,
 55 | emph={[4]1, 2, 3, 4, 5, 6, 7, 8, 9, 0},
 56 | emphstyle=[4]\color{blue},
 57 | literate=*{:}{{\textcolor{blue}:}}{1}%
 58 | {=}{{\textcolor{blue}=}}{1}%
 59 | {-}{{\textcolor{blue}-}}{1}%
 60 | {+}{{\textcolor{blue}+}}{1}%
 61 | {*}{{\textcolor{blue}*}}{1}%
 62 | {!}{{\textcolor{blue}!}}{1}%
 63 | {(}{{\textcolor{blue}(}}{1}%
 64 | {)}{{\textcolor{blue})}}{1}%
 65 | {[}{{\textcolor{blue}[}}{1}%
 66 | {]}{{\textcolor{blue}]}}{1}%
 67 | {<}{{\textcolor{blue}<}}{1}%
 68 | {>}{{\textcolor{blue}>}}{1},%
 69 | %framexleftmargin=1mm, framextopmargin=1mm, frame=shadowbox, rulesepcolor=\color{blue},#1
 70 | framexleftmargin=1mm, framextopmargin=1mm, frame=single,#1
 71 | }}{}
 72 | 
 73 | %%%%%%%%%%%%%%%%%%
 74 | %%% END PYTHON %%%
 75 | %%%%%%%%%%%%%%%%%%
 76 | 
 77 | 
 78 | % remove navigation symbols
 79 | \setbeamertemplate{navigation symbols}{}
 80 | \setbeamerfont{page number in head/foot}{size=\fontsize{12}{12}}
 81 | \setbeamertemplate{footline}[frame number]
 82 | 
 83 | % make image sized to frame
 84 | \newcommand {\framedgraphic}[2] { % args = {frametitle}{image.ext}
 85 |     \begin{frame}{#1}
 86 |         \begin{center}
 87 |             \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{#2}
 88 |         \end{center}
 89 |     \end{frame}
 90 | }
 91 | 
 92 | % This block will remove authornames and talk title from sidebar
 93 | \makeatletter
 94 |   \setbeamertemplate{sidebar \beamer@sidebarside}%{sidebar theme}
 95 |   {
 96 |     \beamer@tempdim=\beamer@sidebarwidth%
 97 |     \advance\beamer@tempdim by -6pt%
 98 |     \insertverticalnavigation{\beamer@sidebarwidth}%
 99 |     \vfill
100 |     \ifx\beamer@sidebarside\beamer@lefttext%
101 |     \else%
102 |       \usebeamercolor{normal text}%
103 |       \llap{\usebeamertemplate***{navigation symbols}\hskip0.1cm}%
104 |       \vskip2pt%
105 |     \fi%
106 | }%
107 | \makeatother
108 | 
109 | 
110 | 
111 | 
112 | \begin{document}
113 | 
114 | \title{A TensorFlow Tutorial}  
115 | \subtitle{Email Classification with Logistic Regression}
116 | \author{Josh Meyer \inst{1} \and Michael Capizzi \inst{2}}
117 | \institute{ \inst{1} University of Arizona \and \inst{2} PitchVantage}
118 | \date{} 
119 | 
120 | \frame{\titlepage} 
121 | 
122 | \frame{\frametitle{Table of contents}\tableofcontents} 
123 | 
124 | 
125 | \section{What's Going to Happen}
126 | 
127 | \frame{
128 | Tonight we will...
129 | \begin{itemize}
130 | \item describe the basic TensorFlow structures
131 | \item build a working example of text classification
132 | \item point out places where \textit{other} TensorFlow "built-ins" apply {\footnotesize(optimizers, cost functions, etc)}
133 | \item "hand-wave" liberally \\
134 | {\footnotesize(when we don't want to get into it or don't know the answer)}
135 | \end{itemize}
136 | Tonight we will \textbf{not}...
137 | \begin{itemize}
138 | \item discuss details of NLP feature selection
139 | \item discuss details of Machine Learning \\
140 | {\footnotesize(linear algebra, backpropogation, etc.)}
141 | \end{itemize}
142 | }
143 | 
144 | \section{TensorFlow Structures}
145 | 
146 | \frame{\frametitle{}
147 |   \centering
148 |     \begingroup
149 |     \fontsize{20pt}{20pt}\selectfont
150 |     TensorFlow Structures
151 |   \endgroup
152 |   \begin{flushleft}
153 |   tensor = \textit{n}-dimensional matrix
154 |   \end{flushleft}
155 | \includegraphics[width=\textwidth,height=0.9\textheight,keepaspectratio]{tensors.png}}
156 | 
157 | \begin{frame}[fragile]
158 | \frametitle{TensorFlow Structures}
159 | \begin{itemize}
160 | \item{\textbf{constants}}: {\small \textit{never} changes its value(s)} \\
161 | \begin{python}
162 | c = tf.constant(2.0, name="constantC") #can be int, float, or tensor
163 | \end{python}
164 | \item{\textbf{placeholders}}: {\small shell into which tensors can be iteratively inserted} \\
165 | \begin{python}
166 | X = tf.placeholder(tf.float32, [None, 200], name="input")
167 | \end{python}
168 | \item{\textbf{variables}}: {\small value(s) can be updated}
169 | \begin{python}
170 | weights = tf.Variable(tf.random_normal([1, 200], name="weights"))
171 | \end{python}
172 | \item{\textbf{operations}}: {\small computations that will act on tensors}
173 | \begin{python}
174 | apply_weights_OP = tf.matmul(X, weights, name="apply_weights")
175 | add_bias_OP = tf.add(apply_weights_OP, bias, name="add_bias")
176 | activation_OP = tf.nn.sigmoid(add_bias_OP, name="activation")\end{python}
177 | \end{itemize}
178 | \end{frame}
179 | 
180 | \section{The Flow of TensorFlow}
181 | 
182 | \frame{\frametitle{}
183 |   \centering
184 |     \begingroup
185 |     \fontsize{20pt}{20pt}\selectfont
186 |     Let's get to the script!\\
187 |   \endgroup
188 | }
189 | 
190 | \begin{frame}[fragile]
191 |   \frametitle{Preamble}
192 |   \begin{python}
193 | ################
194 | ### PREAMBLE ###
195 | ################
196 | 
197 | from __future__ import division
198 | import tensorflow as tf
199 | import numpy as np
200 | import tarfile
201 | import os
202 | import matplotlib.pyplot as plt
203 | import time
204 |   \end{python}
205 | \end{frame}
206 | 
207 | 
208 | \begin{frame}[fragile]
209 |   \frametitle{Import the Email Data}
210 |   \begin{python}
211 | ###################
212 | ### IMPORT DATA ###
213 | ###################
214 | 
215 | def csv_to_numpy_array(filePath, delimiter):
216 |     return np.genfromtxt(filePath, delimiter=delimiter, dtype=None)
217 | 
218 | def import_data():
219 |     if "data" not in os.listdir(os.getcwd()):
220 |         # Untar directory of data if we haven't already
221 |         tarObject = tarfile.open("data.tar.gz")
222 |         tarObject.extractall()
223 |         tarObject.close()
224 |         print("Extracted tar to current directory")
225 |     else:
226 |         # we've already extracted the files
227 |         pass
228 | 
229 |     print("loading training data")
230 |     trainX = csv_to_numpy_array("data/trainX.csv", delimiter="\t")
231 |     trainY = csv_to_numpy_array("data/trainY.csv", delimiter="\t")
232 |     print("loading test data")
233 |     testX = csv_to_numpy_array("data/testX.csv", delimiter="\t")
234 |     testY = csv_to_numpy_array("data/testY.csv", delimiter="\t")
235 |     return trainX,trainY,testX,testY
236 | 
237 | trainX,trainY,testX,testY = import_data()
238 |   \end{python}
239 | \end{frame}
240 | 
241 | 
242 | 
243 | \begin{frame}[fragile]
244 |   \frametitle{Some Global Parameters}
245 |   \begin{python}
246 | #########################
247 | ### GLOBAL PARAMETERS ###
248 | #########################
249 | 
250 | # DATA SET PARAMETERS
251 | # Get our dimensions for our different variables and placeholders:
252 | # numFeatures = the number of words extracted from each email
253 | numFeatures = trainX.shape[1]
254 | # numLabels = number of classes we are predicting (here just 2: Ham or Spam)
255 | numLabels = trainY.shape[1]
256 | 
257 | # TRAINING SESSION PARAMETERS
258 | # number of times we iterate through training data
259 | # tensorboard shows that accuracy plateaus at ~25k epochs
260 | numEpochs = 27000
261 | # a smarter learning rate for gradientOptimizer
262 | learningRate = tf.train.exponential_decay(learning_rate=0.0008,
263 |                                           global_step= 1,
264 |                                           decay_steps=trainX.shape[0],
265 |                                           decay_rate= 0.95,
266 |                                           staircase=True)
267 |   \end{python}
268 | \end{frame}
269 | 
270 | \frame{\frametitle{}
271 |   \centering
272 |     \begingroup
273 |     \fontsize{20pt}{20pt}\selectfont
274 |     The Computational Graph\\
275 |   \endgroup
276 | }
277 | 
278 | 
279 | \framedgraphic{The Full Computational Graph}{full-graph.png}
280 | 
281 | \framedgraphic{Define Feature and Label Placeholders}{define-X-Y-matrices.png}
282 | 
283 | \begin{frame}[fragile]
284 |   \frametitle{Define Feature and Label Placeholders}
285 |   \begin{python}
286 | ####################
287 | ### PLACEHOLDERS ###
288 | ####################
289 | 
290 | # X = X-matrix / feature-matrix / data-matrix... It's a tensor to hold our
291 | # email data. 'None' here means that we can hold any number of emails
292 | X = tf.placeholder(tf.float32, [None, numFeatures])
293 | 
294 | # yGold = Y-matrix / label-matrix / labels... This will be our correct answers
295 | # matrix. Every row has either [1,0] for SPAM or [0,1] for HAM. 'None' here 
296 | # means that we can hold any number of emails
297 | yGold = tf.placeholder(tf.float32, [None, numLabels])
298 |   \end{python}
299 | \end{frame}
300 | 
301 | 
302 | \framedgraphic{Initialize Weights \& Bias Terms Op}{1-initialization.png}
303 | 
304 | \begin{frame}[fragile]
305 |   \frametitle{Initialize Weights \& Bias Terms Op}
306 |   \begin{python}
307 | #################
308 | ### VARIABLES ###
309 | #################
310 | 
311 | # all values are randomly assigned:
312 | # sqrt(6 / (numInputNodes + numOutputNodes + 1))
313 | 
314 | weights = tf.Variable(tf.random_normal([numFeatures,numLabels],
315 |           mean=0,
316 |           stddev=(np.sqrt(6/numFeatures+numLabels+1)),
317 |           name="weights"))
318 | 
319 | bias = tf.Variable(tf.random_normal([1,numLabels],
320 |        mean=0,
321 |        stddev=(np.sqrt(6/numFeatures+numLabels+1)),
322 |        name="bias"))
323 | 
324 | 
325 | # INITIALIZE our weights and biases
326 | init_OP = tf.initialize_all_variables()
327 |   \end{python}
328 | \end{frame}
329 | 
330 | 
331 | \framedgraphic{Apply Weights to Features Op}{2-apply-weights.png}
332 | 
333 | \begin{frame}[fragile]
334 |   \frametitle{Apply Weights to Features Op}
335 |   \begin{python}
336 | apply_weights_OP = tf.matmul(X, weights, name="apply_weights")
337 |   \end{python}
338 | \end{frame}
339 | 
340 | 
341 | \framedgraphic{Add Bias to Weighted Features Op}{3-add-bias.png}
342 | 
343 | \begin{frame}[fragile]
344 |   \frametitle{Add Bias to Weighted Features Op}
345 |   \begin{python}
346 | add_bias_OP = tf.add(apply_weights_OP, bias, name="add_bias") 
347 |   \end{python}
348 | \end{frame}
349 | 
350 | 
351 | \framedgraphic{Activation Op}{4-apply-sigmoid.png}
352 | 
353 | \begin{frame}[fragile]
354 |   \frametitle{Activation Op}
355 |   \begin{python}
356 | activation_OP = tf.nn.sigmoid(add_bias_OP, name="activation")
357 |   \end{python}
358 | \end{frame}
359 | 
360 | 
361 | \framedgraphic{Evaluation Op: Mean Squared Error}{5-loss-function.png}
362 | 
363 | \begin{frame}[fragile]
364 |   \frametitle{Evaluation Op: Mean Squared Error}
365 |   \begin{python}
366 | #####################
367 | ### EVALUATION OP ###
368 | #####################
369 | 
370 | # COST FUNCTION i.e. MEAN SQUARED ERROR
371 | cost_OP = tf.nn.l2_loss(activation_OP-yGold, name="squared_error_cost")  \end{python}
372 | \end{frame}
373 | 
374 | \framedgraphic{Optimization Op: Gradient Descent}{6-optimization.png}
375 | 
376 | \begin{frame}[fragile]
377 |   \frametitle{Optimization Op: Gradient Descent}
378 |   \begin{python}
379 | #######################
380 | ### OPTIMIZATION OP ###
381 | #######################
382 | 
383 | # OPTIMIZATION ALGORITHM i.e. GRADIENT DESCENT
384 | training_OP = tf.train.GradientDescentOptimizer(learningRate).minimize(cost_OP)
385 |   \end{python}
386 | \end{frame}
387 | 
388 | 
389 | 
390 | \begin{frame}[fragile]
391 |   \frametitle{Run the Graph}
392 |   \begin{python}
393 | #####################
394 | ### RUN THE GRAPH ###
395 | #####################
396 | 
397 | # Create a tensorflow session
398 | sess = tf.Session()
399 | # Initialize all tensorflow variables
400 | sess.run(init_OP)
401 | 
402 | ## Ops for vizualization
403 | # argmax(activation_OP, 1) gives the label our model thought was most likely
404 | # argmax(yGold, 1) is the correct label
405 | correct_predictions_OP=tf.equal(tf.argmax(activation_OP,1),tf.argmax(yGold,1))
406 | # False is 0 and True is 1, what was our average?
407 | accuracy_OP = tf.reduce_mean(tf.cast(correct_predictions_OP, "float"))
408 | # Summary op for regression output
409 | activation_summary_OP = tf.histogram_summary("output", activation_OP)
410 | # Summary op for accuracy
411 | accuracy_summary_OP = tf.scalar_summary("accuracy", accuracy_OP)
412 | # Summary op for cost
413 | cost_summary_OP = tf.scalar_summary("cost", cost_OP)
414 | # Summary ops to check how variables (W, b) are updating after each iteration
415 | weightSummary = tf.histogram_summary("weights", weights.eval(session=sess))
416 | biasSummary = tf.histogram_summary("biases", bias.eval(session=sess))
417 | # Merge all summaries
418 | all_summary_OPS = tf.merge_all_summaries()
419 | # Summary writer
420 | writer = tf.train.SummaryWriter("summary_logs", sess.graph_def)
421 |   \end{python}
422 | \end{frame}
423 | 
424 | \begin{frame}[fragile]
425 |   \frametitle{Still 'Running the Graph'}
426 |   \begin{python}
427 | # Initialize reporting variables
428 | cost = 0
429 | diff = 1
430 | # Training epochs
431 | for i in range(numEpochs):
432 |     if i > 1 and diff < .0001:
433 |         print("change in cost %g; convergence."%diff)
434 |         break
435 |     else:
436 |         # Run training step
437 |         step = sess.run(training_OP, feed_dict={X: trainX, yGold: trainY})
438 |         # Report occasional stats
439 |         if i % 10 == 0:
440 |             # Add epoch to epoch_values
441 |             epoch_values.append(i)
442 |             # Generate accuracy stats on test data
443 |             summary_results, train_accuracy, newCost = sess.run(
444 |                 [all_summary_OPS, accuracy_OP, cost_OP], 
445 |                 feed_dict={X: trainX, yGold: trainY}
446 |             )
447 |             # Add accuracy to live graphing variable
448 |             accuracy_values.append(train_accuracy)
449 |             # Add cost to live graphing variable
450 |             cost_values.append(newCost)
451 |             # Write summary stats to writer
452 |             writer.add_summary(summary_results, i)
453 |             # Re-assign values for variables
454 |             diff = abs(newCost - cost)
455 |             cost = newCost
456 |   \end{python}
457 | \end{frame}
458 | 
459 | \begin{frame}[fragile]
460 |   \frametitle{Still 'Still Running the Graph'}
461 |   \begin{python}
462 |             #generate print statements
463 |             print("step %d, training accuracy %g"%(i, train_accuracy))
464 |             print("step %d, cost %g"%(i, newCost))
465 |             print("step %d, change in cost %g"%(i, diff))
466 | 
467 |             # Plot progress to our two subplots
468 |             accuracyLine, = ax1.plot(epoch_values, accuracy_values)
469 |             costLine, = ax2.plot(epoch_values, cost_values)
470 |             fig.canvas.draw()
471 |             time.sleep(1)
472 | 
473 | 
474 | # How well do we perform on held-out test data?
475 | print("final accuracy on test set: %s" %str(sess.run(accuracy_OP, 
476 |                                                  feed_dict={X: testX, 
477 |                                                         yGold: testY})))
478 |   \end{python}
479 | \end{frame}
480 | 
481 | \begin{frame}[fragile]
482 |   \frametitle{Reuse, Recycle}
483 |   \begin{python}
484 | ##############################
485 | ### SAVE TRAINED VARIABLES ###
486 | ##############################
487 | 
488 | # Create Saver
489 | saver = tf.train.Saver()
490 | # Save variables to .ckpt file
491 | # saver.save(sess, "trained_variables.ckpt")
492 | 
493 | # Close tensorflow session
494 | sess.close()
495 |   \end{python}
496 | \end{frame}
497 | 
498 | \framedgraphic{}{thats-all-folks.jpg}
499 | 
500 | 
501 | \end{document}
502 | 


--------------------------------------------------------------------------------