├── Scratch Neural Network.ipynb
├── __init__.py
├── __pycache__
    └── __init__.cpython-35.pyc
├── docs
    ├── Gemfile
    ├── License.md
    ├── Rakefile
    ├── ReadMe.md
    ├── _config.yml
    ├── _includes
    │   ├── JB
    │   │   ├── analytics
    │   │   ├── analytics-providers
    │   │   │   ├── getclicky
    │   │   │   ├── google
    │   │   │   └── mixpanel
    │   │   ├── categories_list
    │   │   ├── comments
    │   │   ├── comments-providers
    │   │   │   ├── disqus
    │   │   │   ├── facebook
    │   │   │   ├── intensedebate
    │   │   │   └── livefyre
    │   │   ├── liquid_raw
    │   │   ├── pages_list
    │   │   ├── posts_collate
    │   │   ├── setup
    │   │   ├── sharing
    │   │   └── tags_list
    │   ├── head.html
    │   └── themes
    │   │   └── twitter
    │   │       ├── default.html
    │   │       ├── page.html
    │   │       ├── post.html
    │   │       └── settings.yml
    ├── _layouts
    │   ├── default.html
    │   ├── page.html
    │   └── post.html
    ├── _plugins
    │   └── debug.rb
    ├── assets
    │   ├── all_3neurons_lr_0.003_reg_0.0.gif
    │   ├── all_50neurons_lr_0.003_reg_0.000001.gif
    │   ├── all_50neurons_lr_0.003_reg_0.0001.gif
    │   ├── chain_w1.png
    │   ├── chain_w1_numbers.png
    │   ├── chain_w1_numbers_final.png
    │   ├── chain_w2.png
    │   ├── chain_w2_detailed.png
    │   ├── chain_w2_numbers.png
    │   ├── code.png
    │   ├── copy_values.png
    │   ├── example.png
    │   ├── forward.png
    │   ├── h1.png
    │   ├── h2.png
    │   ├── initialized_network.png
    │   ├── loss.png
    │   ├── nonlinear_xor.png
    │   ├── overview.png
    │   ├── overview2.png
    │   ├── themes
    │   │   └── twitter
    │   │   │   ├── bootstrap
    │   │   │       ├── css
    │   │   │       │   └── bootstrap.2.2.2.min.css
    │   │   │       └── img
    │   │   │       │   ├── glyphicons-halflings-white.png
    │   │   │       │   └── glyphicons-halflings.png
    │   │   │   └── css
    │   │   │       ├── kbroman.css
    │   │   │       └── style.css
    │   ├── update_w1.png
    │   ├── update_w2.png
    │   ├── z1.png
    │   └── z2.png
    ├── index.md
    ├── index_es.md
    └── pages
    │   ├── independent_site.md
    │   ├── local_test.md
    │   ├── nojekyll.md
    │   ├── overview.md
    │   ├── project_site.md
    │   ├── resources.md
    │   └── user_site.md
├── scratch_mlp.py
├── slides
    └── 2017_Summer_School_LACCI.pdf
└── utils.py


/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/__init__.py


--------------------------------------------------------------------------------
/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/docs/Gemfile:
--------------------------------------------------------------------------------
1 | gem 'github-pages'
2 | 


--------------------------------------------------------------------------------
/docs/License.md:
--------------------------------------------------------------------------------
1 | To the extent possible under law,
2 | [Karl Broman](https://github.com/kbroman)
3 | has waived all copyright and related or neighboring rights to
4 | &ldquo;[simple site](https://github.com/kbroman/simple_site)&rdquo;.
5 | This work is published from the United States.
6 | <br/>
7 | [![CC0](https://i.creativecommons.org/p/zero/1.0/88x31.png)](https://creativecommons.org/publicdomain/zero/1.0/)
8 | 


--------------------------------------------------------------------------------
/docs/Rakefile:
--------------------------------------------------------------------------------
  1 | require "rubygems"
  2 | require 'rake'
  3 | require 'yaml'
  4 | require 'time'
  5 | 
  6 | SOURCE = "."
  7 | CONFIG = {
  8 |   'version' => "0.3.0",
  9 |   'themes' => File.join(SOURCE, "_includes", "themes"),
 10 |   'layouts' => File.join(SOURCE, "_layouts"),
 11 |   'posts' => File.join(SOURCE, "_posts"),
 12 |   'post_ext' => "md",
 13 |   'theme_package_version' => "0.1.0"
 14 | }
 15 | 
 16 | # Path configuration helper
 17 | module JB
 18 |   class Path
 19 |     SOURCE = "."
 20 |     Paths = {
 21 |       :layouts => "_layouts",
 22 |       :themes => "_includes/themes",
 23 |       :theme_assets => "assets/themes",
 24 |       :theme_packages => "_theme_packages",
 25 |       :posts => "_posts"
 26 |     }
 27 |     
 28 |     def self.base
 29 |       SOURCE
 30 |     end
 31 | 
 32 |     # build a path relative to configured path settings.
 33 |     def self.build(path, opts = {})
 34 |       opts[:root] ||= SOURCE
 35 |       path = "#{opts[:root]}/#{Paths[path.to_sym]}/#{opts[:node]}".split("/")
 36 |       path.compact!
 37 |       File.__send__ :join, path
 38 |     end
 39 |   
 40 |   end #Path
 41 | end #JB
 42 | 
 43 | # Usage: rake post title="A Title" [date="2012-02-09"] [tags=[tag1, tag2]]
 44 | desc "Begin a new post in #{CONFIG['posts']}"
 45 | task :post do
 46 |   abort("rake aborted: '#{CONFIG['posts']}' directory not found.") unless FileTest.directory?(CONFIG['posts'])
 47 |   title = ENV["title"] || "new-post"
 48 |   tags = ENV["tags"] || "[]"
 49 |   slug = title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
 50 |   begin
 51 |     date = (ENV['date'] ? Time.parse(ENV['date']) : Time.now).strftime('%Y-%m-%d')
 52 |   rescue Exception => e
 53 |     puts "Error - date format must be YYYY-MM-DD, please check you typed it correctly!"
 54 |     exit -1
 55 |   end
 56 |   filename = File.join(CONFIG['posts'], "#{date}-#{slug}.#{CONFIG['post_ext']}")
 57 |   if File.exist?(filename)
 58 |     abort("rake aborted!") if ask("#{filename} already exists. Do you want to overwrite?", ['y', 'n']) == 'n'
 59 |   end
 60 |   
 61 |   puts "Creating new post: #{filename}"
 62 |   open(filename, 'w') do |post|
 63 |     post.puts "---"
 64 |     post.puts "layout: post"
 65 |     post.puts "title: \"#{title.gsub(/-/,' ')}\""
 66 |     post.puts 'description: ""'
 67 |     post.puts "category: "
 68 |     post.puts "tags: []"
 69 |     post.puts "---"
 70 |     post.puts "{% include JB/setup %}"
 71 |   end
 72 | end # task :post
 73 | 
 74 | # Usage: rake page name="about.html"
 75 | # You can also specify a sub-directory path.
 76 | # If you don't specify a file extention we create an index.html at the path specified
 77 | desc "Create a new page."
 78 | task :page do
 79 |   name = ENV["name"] || "new-page.md"
 80 |   filename = File.join(SOURCE, "#{name}")
 81 |   filename = File.join(filename, "index.html") if File.extname(filename) == ""
 82 |   title = File.basename(filename, File.extname(filename)).gsub(/[\W\_]/, " ").gsub(/\b\w/){$&.upcase}
 83 |   if File.exist?(filename)
 84 |     abort("rake aborted!") if ask("#{filename} already exists. Do you want to overwrite?", ['y', 'n']) == 'n'
 85 |   end
 86 |   
 87 |   mkdir_p File.dirname(filename)
 88 |   puts "Creating new page: #{filename}"
 89 |   open(filename, 'w') do |post|
 90 |     post.puts "---"
 91 |     post.puts "layout: page"
 92 |     post.puts "title: \"#{title}\""
 93 |     post.puts 'description: ""'
 94 |     post.puts "---"
 95 |     post.puts "{% include JB/setup %}"
 96 |   end
 97 | end # task :page
 98 | 
 99 | desc "Launch preview environment"
100 | task :preview do
101 |   system "jekyll --auto --server"
102 | end # task :preview
103 | 
104 | # Public: Alias - Maintains backwards compatability for theme switching.
105 | task :switch_theme => "theme:switch"
106 | 
107 | namespace :theme do
108 |   
109 |   # Public: Switch from one theme to another for your blog.
110 |   #
111 |   # name - String, Required. name of the theme you want to switch to.
112 |   #        The the theme must be installed into your JB framework.
113 |   #
114 |   # Examples
115 |   #
116 |   #   rake theme:switch name="the-program"
117 |   #
118 |   # Returns Success/failure messages.
119 |   desc "Switch between Jekyll-bootstrap themes."
120 |   task :switch do
121 |     theme_name = ENV["name"].to_s
122 |     theme_path = File.join(CONFIG['themes'], theme_name)
123 |     settings_file = File.join(theme_path, "settings.yml")
124 |     non_layout_files = ["settings.yml"]
125 | 
126 |     abort("rake aborted: name cannot be blank") if theme_name.empty?
127 |     abort("rake aborted: '#{theme_path}' directory not found.") unless FileTest.directory?(theme_path)
128 |     abort("rake aborted: '#{CONFIG['layouts']}' directory not found.") unless FileTest.directory?(CONFIG['layouts'])
129 | 
130 |     Dir.glob("#{theme_path}/*") do |filename|
131 |       next if non_layout_files.include?(File.basename(filename).downcase)
132 |       puts "Generating '#{theme_name}' layout: #{File.basename(filename)}"
133 | 
134 |       open(File.join(CONFIG['layouts'], File.basename(filename)), 'w') do |page|
135 |         if File.basename(filename, ".html").downcase == "default"
136 |           page.puts "---"
137 |           page.puts File.read(settings_file) if File.exist?(settings_file)
138 |           page.puts "---"
139 |         else
140 |           page.puts "---"
141 |           page.puts "layout: default"
142 |           page.puts "---"
143 |         end 
144 |         page.puts "{% include JB/setup %}"
145 |         page.puts "{% include themes/#{theme_name}/#{File.basename(filename)} %}" 
146 |       end
147 |     end
148 |     
149 |     puts "=> Theme successfully switched!"
150 |     puts "=> Reload your web-page to check it out =)"
151 |   end # task :switch
152 |   
153 |   # Public: Install a theme using the theme packager.
154 |   # Version 0.1.0 simple 1:1 file matching.
155 |   #
156 |   # git  - String, Optional path to the git repository of the theme to be installed.
157 |   # name - String, Optional name of the theme you want to install.
158 |   #        Passing name requires that the theme package already exist.
159 |   #
160 |   # Examples
161 |   #
162 |   #   rake theme:install git="https://github.com/jekyllbootstrap/theme-twitter.git"
163 |   #   rake theme:install name="cool-theme"
164 |   #
165 |   # Returns Success/failure messages.
166 |   desc "Install theme"
167 |   task :install do
168 |     if ENV["git"]
169 |       manifest = theme_from_git_url(ENV["git"])
170 |       name = manifest["name"]
171 |     else
172 |       name = ENV["name"].to_s.downcase
173 |     end
174 | 
175 |     packaged_theme_path = JB::Path.build(:theme_packages, :node => name)
176 |     
177 |     abort("rake aborted!
178 |       => ERROR: 'name' cannot be blank") if name.empty?
179 |     abort("rake aborted! 
180 |       => ERROR: '#{packaged_theme_path}' directory not found.
181 |       => Installable themes can be added via git. You can find some here: http://github.com/jekyllbootstrap
182 |       => To download+install run: `rake theme:install git='[PUBLIC-CLONE-URL]'`
183 |       => example : rake theme:install git='git@github.com:jekyllbootstrap/theme-the-program.git'
184 |     ") unless FileTest.directory?(packaged_theme_path)
185 |     
186 |     manifest = verify_manifest(packaged_theme_path)
187 |     
188 |     # Get relative paths to packaged theme files
189 |     # Exclude directories as they'll be recursively created. Exclude meta-data files.
190 |     packaged_theme_files = []
191 |     FileUtils.cd(packaged_theme_path) {
192 |       Dir.glob("**/*.*") { |f| 
193 |         next if ( FileTest.directory?(f) || f =~ /^(manifest|readme|packager)/i )
194 |         packaged_theme_files << f 
195 |       }
196 |     }
197 |     
198 |     # Mirror each file into the framework making sure to prompt if already exists.
199 |     packaged_theme_files.each do |filename|
200 |       file_install_path = File.join(JB::Path.base, filename)
201 |       if File.exist? file_install_path and ask("#{file_install_path} already exists. Do you want to overwrite?", ['y', 'n']) == 'n'
202 |         next
203 |       else
204 |         mkdir_p File.dirname(file_install_path)
205 |         cp_r File.join(packaged_theme_path, filename), file_install_path
206 |       end
207 |     end
208 |     
209 |     puts "=> #{name} theme has been installed!"
210 |     puts "=> ---"
211 |     if ask("=> Want to switch themes now?", ['y', 'n']) == 'y'
212 |       system("rake switch_theme name='#{name}'")
213 |     end
214 |   end
215 | 
216 |   # Public: Package a theme using the theme packager.
217 |   # The theme must be structured using valid JB API.
218 |   # In other words packaging is essentially the reverse of installing.
219 |   #
220 |   # name - String, Required name of the theme you want to package.
221 |   #        
222 |   # Examples
223 |   #
224 |   #   rake theme:package name="twitter"
225 |   #
226 |   # Returns Success/failure messages.
227 |   desc "Package theme"
228 |   task :package do
229 |     name = ENV["name"].to_s.downcase
230 |     theme_path = JB::Path.build(:themes, :node => name)
231 |     asset_path = JB::Path.build(:theme_assets, :node => name)
232 | 
233 |     abort("rake aborted: name cannot be blank") if name.empty?
234 |     abort("rake aborted: '#{theme_path}' directory not found.") unless FileTest.directory?(theme_path)
235 |     abort("rake aborted: '#{asset_path}' directory not found.") unless FileTest.directory?(asset_path)
236 |     
237 |     ## Mirror theme's template directory (_includes)
238 |     packaged_theme_path = JB::Path.build(:themes, :root => JB::Path.build(:theme_packages, :node => name))
239 |     mkdir_p packaged_theme_path
240 |     cp_r theme_path, packaged_theme_path
241 |     
242 |     ## Mirror theme's asset directory
243 |     packaged_theme_assets_path = JB::Path.build(:theme_assets, :root => JB::Path.build(:theme_packages, :node => name))
244 |     mkdir_p packaged_theme_assets_path
245 |     cp_r asset_path, packaged_theme_assets_path
246 | 
247 |     ## Log packager version
248 |     packager = {"packager" => {"version" => CONFIG["theme_package_version"].to_s } }
249 |     open(JB::Path.build(:theme_packages, :node => "#{name}/packager.yml"), "w") do |page|
250 |       page.puts packager.to_yaml
251 |     end
252 |     
253 |     puts "=> '#{name}' theme is packaged and available at: #{JB::Path.build(:theme_packages, :node => name)}"
254 |   end
255 |   
256 | end # end namespace :theme
257 | 
258 | # Internal: Download and process a theme from a git url.
259 | # Notice we don't know the name of the theme until we look it up in the manifest.
260 | # So we'll have to change the folder name once we get the name.
261 | #
262 | # url - String, Required url to git repository.
263 | #        
264 | # Returns theme manifest hash
265 | def theme_from_git_url(url)
266 |   tmp_path = JB::Path.build(:theme_packages, :node => "_tmp")
267 |   abort("rake aborted: system call to git clone failed") if !system("git clone #{url} #{tmp_path}")
268 |   manifest = verify_manifest(tmp_path)
269 |   new_path = JB::Path.build(:theme_packages, :node => manifest["name"])
270 |   if File.exist?(new_path) && ask("=> #{new_path} theme package already exists. Override?", ['y', 'n']) == 'n'
271 |     remove_dir(tmp_path)
272 |     abort("rake aborted: '#{manifest["name"]}' already exists as theme package.")
273 |   end
274 | 
275 |   remove_dir(new_path) if File.exist?(new_path)
276 |   mv(tmp_path, new_path)
277 |   manifest
278 | end
279 | 
280 | # Internal: Process theme package manifest file.
281 | #
282 | # theme_path - String, Required. File path to theme package.
283 | #        
284 | # Returns theme manifest hash
285 | def verify_manifest(theme_path)
286 |   manifest_path = File.join(theme_path, "manifest.yml")
287 |   manifest_file = File.open( manifest_path )
288 |   abort("rake aborted: repo must contain valid manifest.yml") unless File.exist? manifest_file
289 |   manifest = YAML.load( manifest_file )
290 |   manifest_file.close
291 |   manifest
292 | end
293 | 
294 | def ask(message, valid_options)
295 |   if valid_options
296 |     answer = get_stdin("#{message} #{valid_options.to_s.gsub(/"/, '').gsub(/, /,'/')} ") while !valid_options.include?(answer)
297 |   else
298 |     answer = get_stdin(message)
299 |   end
300 |   answer
301 | end
302 | 
303 | def get_stdin(message)
304 |   print message
305 |   STDIN.gets.chomp
306 | end
307 | 
308 | #Load custom rake scripts
309 | Dir['_rake/*.rake'].each { |r| load r }
310 | 


--------------------------------------------------------------------------------
/docs/ReadMe.md:
--------------------------------------------------------------------------------
  1 | 
  2 | Steps to run the code:
  3 | - git clone https://github.com/omar-florez/scratch_mlp/
  4 | - python scratch_mlp/scratch_mlp.py
  5 | 
  6 | >A **neural network** is a clever arrangement of linear and non-linear modules. When we choose and connect them wisely,
  7 | we have a powerful tool to approximate any mathematical function. For example one that **separates classes with a non-linear
  8 | decision boundary**.
  9 | 
 10 | A topic that is not always explained in depth, despite of its intuitive and modular nature, is the
 11 | **backpropagation technique** responsible for updating trainable parameters. Let’s build a neural network from scratch
 12 | to see the internal functioning of a neural network using **LEGO pieces as a modular analogy**, one brick at a time.
 13 | 
 14 | Code implementing this can be found in this repository: [https://github.com/omar-florez/scratch_mlp](https://github.com/omar-florez/scratch_mlp)
 15 | 
 16 | ## Neural Networks as a Composition of Pieces
 17 | 
 18 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/overview.png "Logo Title Text 1")
 19 | 
 20 | The above figure depicts some of the Math used for training a neural network. We will make sense of this during this article.
 21 | The reader may find interesting that a neural network is a stack of modules with different purposes:
 22 | 
 23 | - **Input X** feeds a neural network with raw data, which is stored in a matrix in which observations are rows and dimensions are columns
 24 | - **Weights W1** maps input X to the first hidden layer h1. Weights W1 works then as a linear kernel
 25 | - A **Sigmoid function** prevents numbers in the hidden layer from falling out of range by scaling them to 0-1. The result is an **array of
 26 | neural activations** h1 = Sigmoid(WX)
 27 | 
 28 | At this point these operations only compute a **general linear system**, which doesn’t have the capacity to model non-linear interactions.
 29 | This changes when we stack one more layer, adding depth to this modular structure. The deeper the network, the more subtle non-linear
 30 | interactions we can learn and more complex problems we can solve, which may explain in part the rise of deep neural models.
 31 | 
 32 | ## Why should I read this?
 33 | 
 34 | >If you understand the internal parts of a neural network, you will quickly know **what to change first** when things don't work
 35 | and define an strategy to **test invariants** and **expected behaviors** that you know are part the algorithm. This will also
 36 | be helpful when you want to **create new capabilities that are not currently implemented in the ML library** you are using.
 37 | 
 38 | **Because debugging machine learning models is a complex task**. By experience, mathematical models don't
 39 |  work as expected the first try. They may give you low accuracy for new data, spend long training time or too much memory,
 40 |  return a large number of false negatives or NaN predictions, etc. Let me show some cases when knowing how the algorithm works
 41 |  can become handy:
 42 | 
 43 |  - If it **takes so much time to train**, it is maybe a good idea to increase the size of a minibatch to reduce the variance
 44 |  in the observations and thus to help the algorithm to converge
 45 |  - If you observe **NaN predictions**, the algorithm may have received large gradients producing memory overflow. Think of
 46 |  this as consecutive matrix multiplications that exploit after many iterations. Decreasing the learning rate will have the
 47 |  effect of scaling down these values. Reducing the number of layers will decrease the number of multiplications. And clipping
 48 |  gradients will control this problem explicitly
 49 | 
 50 | ## Concrete Example: Learning the XOR Function
 51 | 
 52 | >Let's open the blackbox. We will build now a neural network from scratch that learns the **XOR function**.
 53 | The choice of this **non-linear function** is by no means random chance. Without backpropagation it would be hard to learn
 54 | to separate classes with a **straight line**.
 55 | 
 56 | To illustrate this important concept, note below how a straight line cannot
 57 | separate 0s and 1s, the outputs of the XOR function. **Real life problems are also non-linearly separable**.
 58 | 
 59 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/nonlinear_xor.png "Logo Title Text 1")
 60 | 
 61 | The topology of the network is simple:
 62 | - **Input X** is a two dimensional vector
 63 | - **Weights W1** is a 2x3 matrix with randomly initialized values
 64 | - **Hidden layer h1** consists of three neurons. Each neuron receives as input a weighted sum of observations, this is the inner product
 65 | highlighted in green in the below figure: **z1 = [x1, x2][w1, w2]**
 66 | - **Weights W2** is a 3x2 matrix with randomly initialized values and
 67 | - **Output layer h2** consists of two neurons since the XOR function returns either 0 (y1=[0,1]) or 1 (y2 = [1,0])
 68 | 
 69 | More visually:
 70 | 
 71 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/overview2.png "Logo Title Text 1")
 72 | 
 73 | Let's now train the model. In our simple example the trainable parameters are weights, but be aware that current
 74 | research is exploring more types of parameters to be optimized. For example shortcuts between layers, regularized distributions, topologies,
 75 | residual, learning rates, etc.
 76 | 
 77 | **Backpropagation** is a method to update the weights towards the direction (**gradient**) that minimizes a predefined error metric known as **Loss function**
 78 | given a batch of labeled observations. This algorithm has been repeatedly rediscovered and is a special case of a more general technique called
 79 | [automatic differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation) in reverse accumulation mode.
 80 | 
 81 | ### Network Initialization
 82 | 
 83 | >Let's **initialize the network weights** with random numbers.
 84 | 
 85 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/initialized_network.png "Logo Title Text 1"){:width="1300px"}
 86 | 
 87 | ### Forward Step:
 88 | 
 89 | >This goal of this step is to **forward propagate** the input X to each layer of the network until computing a vector in
 90 | the output layer h2.
 91 | 
 92 | This is how it happens:
 93 | - Linearly map input data X using weights W1 as a kernel:
 94 | 
 95 | 
 96 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/z1.png){:width="500px"}
 97 | 
 98 | - Scale this weighted sum z1 with a Sigmoid function to get values of the first hidden layer h1. **Note that the original
 99 | 2D vector is now mapped to a 3D space**.
100 | 
101 | 
102 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/h1.png){:width="400px"}
103 | 
104 | - A similar process takes place for the second layer h2. Let's compute first the **weighted sum** z2 of the
105 | first hidden layer, which is now input data.
106 | 
107 | 
108 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/z2.png){:width="500px"}
109 | 
110 | - And then compute their Sigmoid activation function. This vector [0.37166596 0.45414264] represents the **log probability**
111 | or **predicted vector** computed by the network given input X.
112 | 
113 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/h2.png){:width="300px"}
114 | 
115 | ### Computing the Total Loss
116 | 
117 | >Also known as "actual minus predicted", the goal of the loss function is to **quantify the distance between the predicted
118 |  vector h2 and the actual label provided by humans y**.
119 | 
120 | Note that the Loss function contains a **regularization component** that penalizes large weight values as in a Ridge
121 | regression. In other words, large squared weights values will increase the Loss function, **an error metric we indeed want to minimize**.
122 | 
123 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/loss.png){:width="500px"}
124 | 
125 | ### Backward step:
126 | >The goal of this step is to **update the weights of the neural network** in a direction that minimizes its Loss function.
127 | As we will see, this is a **recursive algorithm**, which can reuse gradients previously computed and heavily relies on
128 | **differentiable functions**. Since these updates reduce the loss function, a network ‘learns’ to approximate the label
129 | of observations with known classes. A property called **generalization**.
130 | 
131 | This step goes in **backward order** than the forward step. It computes first the partial derivative of the loss function
132 | with respect to the weights of the output layer (dLoss/dW2) and then the hidden layer (dLoss/dW1). Let's explain
133 | in detail each one.
134 | 
135 | #### dLoss/dW2:
136 | 
137 | The chain rule says that we can decompose the computation of gradients of a neural network into **differentiable pieces**:
138 | 
139 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w2.png){:width="500px"}
140 | 
141 | As a memory helper, these are the **function definitions** used above and their **first derivatives**:
142 | 
143 | | Function       |  First derivative |
144 | |------------------------------------------------------------ |------------------------------------------------------------|
145 | |Loss = (y-h2)^2     | dLoss/dW2 = -(y-h2) |
146 | |h2 = Sigmoid(z2) | dh2/dz2 = h2(1-h2) |
147 | |z2 = h1W2 | dz2/dW2 = h1 |
148 | |z2 = h1W2 | dz2/dh1 = W2 |
149 | 
150 | 
151 | More visually, we aim to update the weights W2 (in blue) in the below figure. In order to that, we need to compute
152 | three **partial derivatives along the chain**.
153 | 
154 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/update_w2.png){:width="500px"}
155 | 
156 | Plugging in values into these partial derivatives allow us to compute gradients with respect to weights W2 as follows.
157 | 
158 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w2_detailed.png){:width="600px"}
159 | 
160 | The result is a 3x2 matrix dLoss/dW2, which will update the original W2 values in a direction that minimizes the Loss function.
161 | 
162 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w2_numbers.png){:width="700px"}
163 | 
164 | #### dLoss/dW1:
165 | 
166 | Computing the **chain rule** for updating the weights of the first hidden layer W1 exhibits the possibility of **reusing existing
167 | computations**.
168 | 
169 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w1.png){:width="500px"}
170 | 
171 | More visually, the **path from the output layer to the weights W1** touches partial derivatives already computed in **latter
172 | layers**.
173 | 
174 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/update_w1.png){:width="500px"}
175 | 
176 | For example, partial derivatives dLoss/dh2 and dh2/dz2 have been already computed as a dependency for learning weights
177 | of the output layer dLoss/dW2 in the previous section.
178 | 
179 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w1_numbers.png){:width="700px"}
180 | 
181 | Placing all derivatives together, we can execute the **chain rule** again to update the weights of the hidden layer W1:
182 | 
183 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w1_numbers_final.png){:width="700px"}
184 | 
185 | Finally, we assign the new values of the weights and have completed an iteration on the training of network.
186 | 
187 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/copy_values.png){:width="150px"}
188 | 
189 | ### Implementation
190 | 
191 | Let's translate the above mathematical equations to code only using [Numpy](http://www.numpy.org/) as our **linear algebra engine**.
192 | Neural networks are trained in a loop in which each iteration present already **calibrated input data** to the network.
193 | In this small example, let's just consider the entire dataset in each iteration. The computations of **Forward step**,
194 | **Loss**, and **Backwards step** lead to good generalization since we update the **trainable parameters** (matrices w1 and
195 | w2 in the code) with their corresponding **gradients** (matrices dL_dw1 and dL_dw2) in every cycle.
196 | Code is stored in this repository: [https://github.com/omar-florez/scratch_mlp](https://github.com/omar-florez/scratch_mlp)
197 | 
198 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/code.png)
199 | 
200 | ### Let's Run This!
201 | 
202 | See below **some neural networks** trained to approximate the **XOR function** over many iterations.
203 | 
204 | **Left plot:** Accuracy. **Central plot:** Learned decision boundary. **Right plot:** Loss function.
205 | 
206 | First let's see how a neural network with **3 neurons** in the hidden layer has small capacity. This model learns to separate 2 classes
207 | with a **simple decision boundary** that starts being a straight line but then shows a non-linear behavior.
208 | The loss function in the right plot nicely gets low as training continues.
209 | 
210 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/all_3neurons_lr_0.003_reg_0.0.gif)
211 | 
212 | Having  **50 neurons** in the hidden layer notably increases model's power to learn more **complex decision boundaries**.
213 | This could not only produce more accurate results, but also **exploiting gradients**, a notable problem when training neural networks.
214 | This happens when very large gradients multiply weights during backpropagation and thus generate large updated weights.
215 | This is reason why the **Loss value suddenly increases** during the last steps of the training (step > 90).
216 | The **regularization component** of the Loss function computes the **squared values** of weights that are already very large (sum(W^2)/2N).
217 | 
218 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/all_50neurons_lr_0.003_reg_0.0001.gif)
219 | 
220 | This problem can be avoided by **reducing the learning rate** as you can see below. Or by implementing a policy that reduces
221 | the learning rate over time. Or by enforcing a stronger regularization, maybe L1 instead of L2.
222 | **Exploiding** and **vanishing gradients** are interesting phenomenons and we will devote an entire analysis later.
223 | 
224 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/all_50neurons_lr_0.003_reg_0.000001.gif)
225 | 
226 | 


--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
  1 | # This is the default format.
  2 | # For more see: https://github.com/mojombo/jekyll/wiki/Permalinks
  3 | permalink: /:categories/:year/:month/:day/:title
  4 | 
  5 | exclude: [".rvmrc", ".rbenv-version", "ReadMe.md", "Rakefile", "changelog.md", "License.md"]
  6 | highlighter: rouge
  7 | 
  8 | # Themes are encouraged to use these universal variables
  9 | # so be sure to set them if your theme uses them.
 10 | #
 11 | title : Omar U. Florez
 12 | author :
 13 |   name : Omar U. Florez
 14 |   email : omar.florez@aggiemail.usu.edu
 15 |   github : omar-florez
 16 |   twitter : OmarUFlorez
 17 |   feedburner : nil
 18 | 
 19 | # NOTE: If replacing this next line with your own URL, you likely want "https://" not "http://"
 20 | production_url : https://omar-florez.github.io/scratch_mlp
 21 | 
 22 | # Tell Github to use the kramdown markdown interpreter
 23 | # (see https://help.github.com/articles/migrating-your-pages-site-from-maruku)
 24 | markdown: kramdown
 25 | 
 26 | # All Jekyll-Bootstrap specific configurations are namespaced into this hash
 27 | #
 28 | JB :
 29 |   version : 0.3.0
 30 | 
 31 |   # All links will be namespaced by BASE_PATH if defined.
 32 |   # Links in your website should always be prefixed with {{BASE_PATH}}
 33 |   # however this value will be dynamically changed depending on your deployment situation.
 34 |   #
 35 |   # CNAME (http://yourcustomdomain.com)
 36 |   #   DO NOT SET BASE_PATH
 37 |   #   (urls will be prefixed with "/" and work relatively)
 38 |   #
 39 |   # GitHub Pages (http://username.github.io)
 40 |   #   DO NOT SET BASE_PATH
 41 |   #   (urls will be prefixed with "/" and work relatively)
 42 |   #
 43 |   # GitHub Project Pages (http://username.github.io/project-name)
 44 |   #
 45 |   #   A GitHub Project site exists in the `gh-pages` branch of one of your repositories.
 46 |   #  REQUIRED! Set BASE_PATH to: http://username.github.io/project-name
 47 |   #
 48 |   # CAUTION:
 49 |   #   - When in Localhost, your site will run from root "/" regardless of BASE_PATH
 50 |   #   - Only the following values are falsy: ["", null, false]
 51 |   #   - When setting BASE_PATH it must be a valid url.
 52 |   #     This means always setting the protocol (http|https) or prefixing with "/"
 53 |   #
 54 |   # NOTE: If replacing this next line with your own URL, you likely want "https://" not "http://"
 55 |   BASE_PATH : https://omar-florez.github.io/scratch_mlp
 56 | 
 57 |   # By default, the asset_path is automatically defined relative to BASE_PATH plus the enabled theme.
 58 |   # ex: [BASE_PATH]/assets/themes/[THEME-NAME]
 59 |   #
 60 |   # Override this by defining an absolute path to assets here.
 61 |   # ex:
 62 |   #   http://s3.amazonaws.com/yoursite/themes/watermelon
 63 |   #   /assets
 64 |   #
 65 |   # ASSET_PATH : http://kbroman.org/simple_site/assets/themes/twitter
 66 | 
 67 |   # These paths are to the main pages Jekyll-Bootstrap ships with.
 68 |   # Some JB helpers refer to these paths; change them here if needed.
 69 |   #
 70 |   archive_path: nil
 71 |   categories_path : nil
 72 |   tags_path : nil
 73 |   atom_path : nil
 74 |   rss_path : nil
 75 | 
 76 |   # Settings for comments helper
 77 |   # Set 'provider' to the comment provider you want to use.
 78 |   # Set 'provider' to false to turn commenting off globally.
 79 |   #
 80 |   comments :
 81 |     provider : false
 82 | 
 83 |   # Settings for analytics helper
 84 |   # Set 'provider' to the analytics provider you want to use.
 85 |   # Set 'provider' to false to turn analytics off globally.
 86 |   #
 87 |   analytics :
 88 |     provider : false
 89 | 
 90 |   # Settings for sharing helper.
 91 |   # Sharing is for things like tweet, plusone, like, reddit buttons etc.
 92 |   # Set 'provider' to the sharing provider you want to use.
 93 |   # Set 'provider' to false to turn sharing off globally.
 94 |   #
 95 |   sharing :
 96 |     provider : true
 97 | 
 98 |   # Settings for all other include helpers can be defined by creating
 99 |   # a hash with key named for the given helper. ex:
100 |   #
101 |   #   pages_list :
102 |   #     provider : "custom"
103 |   #
104 |   # Setting any helper's provider to 'custom' will bypass the helper code
105 |   # and include your custom code. Your custom file must be defined at:
106 |   #   ./_includes/custom/[HELPER]
107 |   # where [HELPER] is the name of the helper you are overriding.
108 | 
109 | theme: jekyll-theme-leap-day


--------------------------------------------------------------------------------
/docs/_includes/JB/analytics:
--------------------------------------------------------------------------------
 1 | {% if site.safe and site.JB.analytics.provider and page.JB.analytics != false %}
 2 | 
 3 | {% case site.JB.analytics.provider %}
 4 | {% when "google" %}
 5 |   {% include JB/analytics-providers/google %}
 6 | {% when "getclicky" %}
 7 |   {% include JB/analytics-providers/getclicky %}
 8 | {% when "mixpanel" %}
 9 |   {% include JB/analytics-providers/mixpanel %}
10 | {% when "custom" %}
11 |   {% include custom/analytics %}
12 | {% endcase %}
13 | 
14 | {% endif %}


--------------------------------------------------------------------------------
/docs/_includes/JB/analytics-providers/getclicky:
--------------------------------------------------------------------------------
 1 | <script type="text/javascript">
 2 | var clicky_site_ids = clicky_site_ids || [];
 3 | clicky_site_ids.push({{ site.JB.analytics.getclicky.site_id }});
 4 | (function() {
 5 |   var s = document.createElement('script');
 6 |   s.type = 'text/javascript';
 7 |   s.async = true;
 8 |   s.src = '//static.getclicky.com/js';
 9 |   ( document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0] ).appendChild( s );
10 | })();
11 | </script>
12 | <noscript><p><img alt="Clicky" width="1" height="1" src="//in.getclicky.com/{{ site.JB.analytics.getclicky.site_id }}ns.gif" /></p></noscript>
13 | 


--------------------------------------------------------------------------------
/docs/_includes/JB/analytics-providers/google:
--------------------------------------------------------------------------------
 1 | <script type="text/javascript">
 2 |   var _gaq = _gaq || [];
 3 |   _gaq.push(['_setAccount', '{{ site.JB.analytics.google.tracking_id }}']);
 4 |   _gaq.push(['_trackPageview']);
 5 | 
 6 |   (function() {
 7 |     var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
 8 |     ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
 9 |     var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
10 |   })();
11 | </script>


--------------------------------------------------------------------------------
/docs/_includes/JB/analytics-providers/mixpanel:
--------------------------------------------------------------------------------
 1 | <script type="text/javascript">
 2 |     var mpq = [];
 3 |     mpq.push(["init", "{{ site.JB.analytics.mixpanel.token}}"]);
 4 |     (function(){var b,a,e,d,c;b=document.createElement("script");b.type="text/javascript";
 5 |     b.async=true;b.src=(document.location.protocol==="https:"?"https:":"http:")+
 6 |     "//api.mixpanel.com/site_media/js/api/mixpanel.js";a=document.getElementsByTagName("script")[0];
 7 |     a.parentNode.insertBefore(b,a);e=function(f){return function(){mpq.push(
 8 |     [f].concat(Array.prototype.slice.call(arguments,0)))}};d=["init","track","track_links",
 9 |     "track_forms","register","register_once","identify","name_tag","set_config"];for(c=0;c<
10 |     d.length;c++){mpq[d[c]]=e(d[c])}})();
11 | </script>


--------------------------------------------------------------------------------
/docs/_includes/JB/categories_list:
--------------------------------------------------------------------------------
 1 | {% comment %}<!--
 2 | The categories_list include is a listing helper for categories.
 3 | Usage:
 4 |   1) assign the 'categories_list' variable to a valid array of tags.
 5 |   2) include JB/categories_list
 6 |   example:
 7 |     <ul>
 8 |   	  {% assign categories_list = site.categories %}  
 9 |   	  {% include JB/categories_list %}
10 |   	</ul>
11 |   
12 |   Notes: 
13 |     Categories can be either a Hash of Category objects (hashes) or an Array of category-names (strings).
14 |     The encapsulating 'if' statement checks whether categories_list is a Hash or Array.
15 |     site.categories is a Hash while page.categories is an array.
16 |     
17 |   This helper can be seen in use at: ../_layouts/default.html
18 | -->{% endcomment %}
19 | 
20 | {% if site.JB.categories_list.provider == "custom" %}
21 |   {% include custom/categories_list %}
22 | {% else %}
23 |   {% if categories_list.first[0] == null %}
24 |     {% for category in categories_list %} 
25 |     	<li><a href="{{ BASE_PATH }}{{ site.JB.categories_path }}#{{ category }}-ref">
26 |     		{{ category | join: "/" }} <span>{{ site.categories[category].size }}</span>
27 |     	</a></li>
28 |     {% endfor %}
29 |   {% else %}
30 |     {% for category in categories_list %} 
31 |     	<li><a href="{{ BASE_PATH }}{{ site.JB.categories_path }}#{{ category[0] }}-ref">
32 |     		{{ category[0] | join: "/" }} <span>{{ category[1].size }}</span>
33 |     	</a></li>
34 |     {% endfor %}
35 |   {% endif %}
36 | {% endif %}
37 | {% assign categories_list = nil %}


--------------------------------------------------------------------------------
/docs/_includes/JB/comments:
--------------------------------------------------------------------------------
 1 | {% if site.JB.comments.provider and page.comments != false %}
 2 | 
 3 | {% case site.JB.comments.provider %}
 4 | {% when "disqus" %}
 5 |   {% include JB/comments-providers/disqus %}
 6 | {% when "livefyre" %}
 7 |   {% include JB/comments-providers/livefyre %}
 8 | {% when "intensedebate" %}
 9 |   {% include JB/comments-providers/intensedebate %}
10 | {% when "facebook" %}
11 |   {% include JB/comments-providers/facebook %}
12 | {% when "custom" %}
13 |   {% include custom/comments %}
14 | {% endcase %}
15 | 
16 | {% endif %}


--------------------------------------------------------------------------------
/docs/_includes/JB/comments-providers/disqus:
--------------------------------------------------------------------------------
 1 | <div id="disqus_thread"></div>
 2 | <script type="text/javascript">
 3 |     {% if site.safe == false %}var disqus_developer = 1;{% endif %}
 4 |     var disqus_shortname = '{{ site.JB.comments.disqus.short_name }}'; // required: replace example with your forum shortname
 5 |     {% if page.wordpress_id %}var disqus_identifier = '{{page.wordpress_id}} {{site.production_url}}/?p={{page.wordpress_id}}';{% endif %}
 6 |     /* * * DON'T EDIT BELOW THIS LINE * * */
 7 |     (function() {
 8 |         var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
 9 |         dsq.src = 'http://' + disqus_shortname + '.disqus.com/embed.js';
10 |         (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
11 |     })();
12 | </script>
13 | <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
14 | <a href="http://disqus.com" class="dsq-brlink">blog comments powered by <span class="logo-disqus">Disqus</span></a>
15 | 


--------------------------------------------------------------------------------
/docs/_includes/JB/comments-providers/facebook:
--------------------------------------------------------------------------------
1 | <div id="fb-root"></div>
2 | <script>(function(d, s, id) {
3 |   var js, fjs = d.getElementsByTagName(s)[0];
4 |   if (d.getElementById(id)) return;
5 |   js = d.createElement(s); js.id = id;
6 |   js.src = "//connect.facebook.net/en_US/all.js#xfbml=1&appId={{ site.JB.comments.facebook.appid }}";
7 |   fjs.parentNode.insertBefore(js, fjs);
8 | }(document, 'script', 'facebook-jssdk'));</script>
9 | <div class="fb-comments" data-href="{{ site.production_url }}" data-num-posts="{{ site.JB.comments.facebook.num_posts }}" data-width="{{ site.JB.comments.facebook.width }}" data-colorscheme="{{ site.JB.comments.facebook.colorscheme }}"></div>


--------------------------------------------------------------------------------
/docs/_includes/JB/comments-providers/intensedebate:
--------------------------------------------------------------------------------
1 | <script>
2 | var idcomments_acct = '{{ site.JB.comments.intensedebate.account }}';
3 | var idcomments_post_id;
4 | var idcomments_post_url;
5 | </script>
6 | <script type="text/javascript" src="http://www.intensedebate.com/js/genericLinkWrapperV2.js"></script>
7 | 


--------------------------------------------------------------------------------
/docs/_includes/JB/comments-providers/livefyre:
--------------------------------------------------------------------------------
1 | <script type='text/javascript' src='http://zor.livefyre.com/wjs/v1.0/javascripts/livefyre_init.js'></script>
2 | <script type='text/javascript'>
3 |     var fyre = LF({
4 |         site_id: {{ site.JB.comments.livefyre.site_id }}
5 |     });
6 | </script>


--------------------------------------------------------------------------------
/docs/_includes/JB/liquid_raw:
--------------------------------------------------------------------------------
 1 | {% comment%}<!--
 2 | The liquid_raw helper is a way to display raw liquid code, as opposed to parsing it.
 3 | Normally you'd use Liquid's built in 'raw' tag. 
 4 | The problem is GitHub Jekyll does not support the current Liquid release.
 5 | GitHub Jekyll supports the deprecated 'literal' tag.
 6 | Using one will break the other if you plan to deploy to GitHub pages.
 7 |   see: https://github.com/mojombo/jekyll/issues/425
 8 | 
 9 | Since I don't want to mess with Liquid versions, I'll just rewrite the way I 
10 | intend to give liquid examples. It's not an elegant by any means:
11 | 
12 | Usage: 
13 |   1) Define a 'text' variable with the block of liquid code you intend to display.
14 |   2) Pass the text variable to include JB/liquid_raw
15 | 
16 |   example:
17 |   {% capture text %}|.% for tag in tags_list %.|
18 |     <li><a href="|.{ site.var.tags_path }.||.{ tag[0] }.|-ref">|.{ tag[0] }.| <span>|.{tag[1].size}.|</span></a></li>
19 |   |.% endfor %.|
20 | 
21 |   |.% assign tags_list = null %.|{% endcapture %}    
22 |   {% include JB/liquid_raw %}
23 |   
24 |   As seen here, you must use "|." and ".|" as opening and closing brackets.
25 | -->{% endcomment%}
26 | 
27 | {% if site.JB.liquid_raw.provider == "custom" %}
28 |   {% include custom/liquid_raw %}
29 | {% else %}
30 |   <pre><code>{{text | replace:"|.", "&#123;" | replace:".|", "&#125;" | replace:">", "&gt;" | replace:"<", "&lt;" }}</code></pre>
31 | {% endif %}
32 | {% assign text = nil %}


--------------------------------------------------------------------------------
/docs/_includes/JB/pages_list:
--------------------------------------------------------------------------------
 1 | {% comment %}<!--
 2 | The pages_list include is a listing helper.
 3 | Usage:
 4 |   1) assign the 'pages_list' variable to a valid array of pages or posts.
 5 |   2) include JB/pages_list
 6 |   example:
 7 |     <ul>
 8 |   	  {% assign pages_list = site.pages %}  
 9 |   	  {% include JB/pages_list %}
10 |   	</ul>
11 |   	
12 |   Grouping: (optional): 
13 |   	assign the 'group' variable to constrain the list to only pages/posts
14 |   	in the given group. Note you must define the group manually in the page/post
15 |   	meta-data to use this feature.
16 |   	Grouping is mainly helpful for non-post pages.
17 |   	If you want to group posts, it's easier/better to tag them, then pass the tagged posts array.
18 |   	i.e. site.tags.cool_tag (this returns an array of posts tagged: cool_tag)
19 |   	
20 |   This helper can be seen in use at: ../_layouts/default.html
21 | -->{% endcomment %}
22 | 
23 | {% if site.JB.pages_list.provider == "custom" %}
24 |   {% include custom/pages_list %}
25 | {% else %}
26 |   {% for node in pages_list %}
27 |     {% if node.title != null %}
28 |       {% if group == null or group == node.group %}
29 |       	{% if page.url == node.url %}
30 |       	<li class="active"><a href="{{ BASE_PATH }}{{node.url}}" class="active">{{node.title}}</a></li>
31 |       	{% else %}
32 |       	<li><a href="{{ BASE_PATH }}{{node.url}}">{{node.title}}</a></li>
33 |       	{% endif %}
34 |       {% endif %}
35 |     {% endif %}
36 |   {% endfor %}
37 | {% endif %}
38 | {% assign pages_list = nil %}
39 | {% assign group = nil %}


--------------------------------------------------------------------------------
/docs/_includes/JB/posts_collate:
--------------------------------------------------------------------------------
 1 | {% comment %}<!--
 2 | Collate_posts helper. Collated posts by year and month.
 3 | Usage:
 4 |   1) assign the 'posts_collate' variable to a valid array of posts.
 5 |   2) include JB/posts_collate
 6 |   example:
 7 |     {% assign posts_collate = site.posts %}
 8 |     {% include JB/posts_collate %}
 9 | 
10 |   Ordering:
11 |     Posts are displayed in reverse chronological order.
12 |     For normal chronological order:
13 |       1) Change the for loop to this:
14 |         => 'for post in site.posts reversed'
15 |       2) Next make sure to change 'post.previous.date' to:
16 |         => 'post.next.date'
17 |         
18 | -->{% endcomment %}
19 | 
20 | {% if site.JB.posts_collate.provider == "custom" %}
21 |   {% include custom/posts_collate %}
22 | {% else %}
23 |   {% for post in posts_collate  %}
24 |     {% capture this_year %}{{ post.date | date: "%Y" }}{% endcapture %}
25 |     {% capture this_month %}{{ post.date | date: "%B" }}{% endcapture %}
26 |     {% capture next_year %}{{ post.previous.date | date: "%Y" }}{% endcapture %}
27 |     {% capture next_month %}{{ post.previous.date | date: "%B" }}{% endcapture %}
28 |   
29 |     {% if forloop.first %}
30 |       <h2>{{this_year}}</h2>
31 |       <h3>{{this_month}}</h3>
32 |       <ul>
33 |     {% endif %}
34 |   
35 |     <li><span>{{ post.date | date: "%B %e, %Y" }}</span> &raquo; <a href="{{ BASE_PATH }}{{ post.url }}">{{ post.title }}</a></li>
36 |   
37 |     {% if forloop.last %}
38 |       </ul>
39 |     {% else %}
40 |       {% if this_year != next_year %}
41 |         </ul>
42 |         <h2>{{next_year}}</h2>
43 |         <h3>{{next_month}}</h3>
44 |         <ul>
45 |       {% else %}    
46 |         {% if this_month != next_month %}
47 |           </ul>
48 |           <h3>{{next_month}}</h3>
49 |           <ul>
50 |         {% endif %}
51 |       {% endif %}
52 |     {% endif %}
53 |   {% endfor %}
54 | {% endif %}
55 | {% assign posts_collate = nil %}


--------------------------------------------------------------------------------
/docs/_includes/JB/setup:
--------------------------------------------------------------------------------
 1 | {% capture jbcache %}
 2 |   <!--
 3 |   - Dynamically set liquid variables for working with URLs/paths
 4 |   -->
 5 |   {% if site.JB.setup.provider == "custom" %}
 6 |     {% include custom/setup %}
 7 |   {% else %}
 8 |     {% if site.safe and site.JB.BASE_PATH and site.JB.BASE_PATH != '' %}
 9 |       {% assign BASE_PATH = site.JB.BASE_PATH %}
10 |       {% assign HOME_PATH = site.JB.BASE_PATH %}
11 |     {% else %}
12 |       {% assign BASE_PATH = nil %}
13 |       {% assign HOME_PATH = "/" %}
14 |     {% endif %}
15 | 
16 |     {% if site.JB.ASSET_PATH %}
17 |       {% assign ASSET_PATH = site.JB.ASSET_PATH %}
18 |     {% else %}
19 |       {% capture ASSET_PATH %}{{ BASE_PATH }}/assets/themes/{{ layout.theme.name }}{% endcapture %}
20 |     {% endif %}
21 |   {% endif %}
22 | {% endcapture %}{% assign jbcache = nil %}
23 | 


--------------------------------------------------------------------------------
/docs/_includes/JB/sharing:
--------------------------------------------------------------------------------
1 | {% if site.safe and site.JB.sharing.provider and page.JB.sharing != false %}
2 | 
3 | {% case site.JB.sharing.provider %}
4 | {% when "custom" %}
5 |   {% include custom/sharing %}
6 | {% endcase %}
7 | 
8 | {% endif %}


--------------------------------------------------------------------------------
/docs/_includes/JB/tags_list:
--------------------------------------------------------------------------------
 1 | {% comment %}<!--
 2 | The tags_list include is a listing helper for tags.
 3 | Usage:
 4 |   1) assign the 'tags_list' variable to a valid array of tags.
 5 |   2) include JB/tags_list
 6 |   example:
 7 |     <ul>
 8 |   	  {% assign tags_list = site.tags %}  
 9 |   	  {% include JB/tags_list %}
10 |   	</ul>
11 |   
12 |   Notes: 
13 |     Tags can be either a Hash of tag objects (hashes) or an Array of tag-names (strings).
14 |     The encapsulating 'if' statement checks whether tags_list is a Hash or Array.
15 |     site.tags is a Hash while page.tags is an array.
16 |     
17 |   This helper can be seen in use at: ../_layouts/default.html
18 | -->{% endcomment %}
19 | 
20 | {% if site.JB.tags_list.provider == "custom" %}
21 |   {% include custom/tags_list %}
22 | {% else %}
23 |   {% if tags_list.first[0] == null %}
24 |     {% for tag in tags_list %} 
25 |     	<li><a href="{{ BASE_PATH }}{{ site.JB.tags_path }}#{{ tag }}-ref">{{ tag }} <span>{{ site.tags[tag].size }}</span></a></li>
26 |     {% endfor %}
27 |   {% else %}
28 |     {% for tag in tags_list %} 
29 |     	<li><a href="{{ BASE_PATH }}{{ site.JB.tags_path }}#{{ tag[0] }}-ref">{{ tag[0] }} <span>{{ tag[1].size }}</span></a></li>
30 |     {% endfor %}
31 |   {% endif %}
32 | {% endif %}
33 | {% assign tags_list = nil %}
34 | 


--------------------------------------------------------------------------------
/docs/_includes/head.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <title>Title</title>
 6 | </head>
 7 | <body>
 8 | 
 9 | 
10 | <script type="text/x-mathjax-config">
11 | MathJax.Hub.Config({
12 |   tex2jax: {
13 |     skipTags: ['script', 'noscript', 'style', 'textarea', 'pre'],
14 |     inlineMath: [['$','$']]
15 |   }
16 | });
17 | 
18 | </body>
19 | </html>


--------------------------------------------------------------------------------
/docs/_includes/themes/twitter/default.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <title>{{ page.title }}</title>
 6 |     {% if page.description %}<meta name="description" content="{{ page.description }}">{% endif %}
 7 |     <meta name="author" content="{{ site.author.name }}">
 8 | 
 9 |     <!-- Enable responsive viewport -->
10 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
11 | 
12 |     <!-- Le HTML5 shim, for IE6-8 support of HTML elements -->
13 |     <!--[if lt IE 9]>
14 |       <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
15 |     <![endif]-->
16 | 
17 |     <!-- Le styles -->
18 |     <link href="{{ ASSET_PATH }}/bootstrap/css/bootstrap.2.2.2.min.css" rel="stylesheet">
19 |     <link href="{{ ASSET_PATH }}/css/style.css?body=1" rel="stylesheet" type="text/css" media="all">
20 |     <link href="{{ ASSET_PATH }}/css/kbroman.css" rel="stylesheet" type="text/css" media="all">
21 | 
22 |     <!-- Le fav and touch icons -->
23 | 
24 |     <!-- atom & rss feed -->
25 |     <link href="{{ BASE_PATH }}{{ site.JB.atom_path }}" type="application/atom+xml" rel="alternate" title="Sitewide ATOM Feed">
26 |     <link href="{{ BASE_PATH }}{{ site.JB.rss_path }}" type="application/rss+xml" rel="alternate" title="Sitewide RSS Feed">
27 | 
28 |   </head>
29 | 
30 |   <body>
31 |     <div class="navbar">
32 |       <div class="navbar-inner">
33 |         <div class="container-narrow">
34 |           <a class="brand" href="{{ HOME_PATH }}">{{ site.title }}</a>
35 |         </div>
36 |       </div>
37 |     </div>
38 | 
39 |     <div class="container-narrow">
40 | 
41 |       <div class="content">
42 |         {{ content }}
43 |       </div>
44 |       <hr>
45 |       <footer>
46 |         <p><small>
47 |   <!-- start of Karl's footer; modify this part -->
48 |           <a href="https://creativecommons.org/publicdomain/zero/1.0/"><img src="https://i.creativecommons.org/p/zero/1.0/88x31.png" alt="CC0"/></a> &nbsp;
49 |           <a href="https://www.linkedin.com/in/omar-u-florez-35338015">Omar U. Florez</a>
50 |   <!-- end of Karl's footer; modify this part -->
51 |         </small></p>
52 |       </footer>
53 | 
54 |     </div>
55 | 
56 |     {% include JB/analytics %}
57 |   </body>
58 | </html>
59 | 


--------------------------------------------------------------------------------
/docs/_includes/themes/twitter/page.html:
--------------------------------------------------------------------------------
 1 | <div class="page-header">
 2 |   <h2>{{ page.title }} {% if page.tagline %} <small>{{ page.tagline }}</small>{% endif %}</h2>
 3 | </div>
 4 | 
 5 | <div class="row-fluid">
 6 |   <div class="span12">
 7 |     {{ content }}
 8 |   </div>
 9 | </div>
10 | 


--------------------------------------------------------------------------------
/docs/_includes/themes/twitter/post.html:
--------------------------------------------------------------------------------
 1 | <div class="page-header">
 2 |   <h1>{{ page.title }} {% if page.tagline %}<small>{{page.tagline}}</small>{% endif %}</h1>
 3 | </div>
 4 | 
 5 | <div class="row-fluid post-full">
 6 |   <div class="span12">
 7 |     <div class="date">
 8 |       <span>{{ page.date | date_to_long_string }}</span>
 9 |     </div>
10 |     <div class="content">
11 |       {{ content }}
12 |     </div>
13 | 
14 |   {% unless page.categories == empty %}
15 |     <ul class="tag_box inline">
16 |       <li><i class="icon-folder-open"></i></li>
17 |       {% assign categories_list = page.categories %}
18 |       {% include JB/categories_list %}
19 |     </ul>
20 |   {% endunless %}  
21 | 
22 |   {% unless page.tags == empty %}
23 |     <ul class="tag_box inline">
24 |       <li><i class="icon-tags"></i></li>
25 |       {% assign tags_list = page.tags %}
26 |       {% include JB/tags_list %}
27 |     </ul>
28 |   {% endunless %}  
29 | 
30 |     <hr>
31 |     <div class="pagination">
32 |       <ul>
33 |       {% if page.previous %}
34 |         <li class="prev"><a href="{{ BASE_PATH }}{{ page.previous.url }}" title="{{ page.previous.title }}">&larr; Previous</a></li>
35 |       {% else %}
36 |         <li class="prev disabled"><a>&larr; Previous</a></li>
37 |       {% endif %}
38 |         <li><a href="{{ BASE_PATH }}{{ site.JB.archive_path }}">Archive</a></li>
39 |       {% if page.next %}
40 |         <li class="next"><a href="{{ BASE_PATH }}{{ page.next.url }}" title="{{ page.next.title }}">Next &rarr;</a></li>
41 |       {% else %}
42 |         <li class="next disabled"><a>Next &rarr;</a>
43 |       {% endif %}
44 |       </ul>
45 |     </div>
46 |     <hr>
47 |     {% include JB/comments %}
48 |   </div>
49 | </div>
50 | 


--------------------------------------------------------------------------------
/docs/_includes/themes/twitter/settings.yml:
--------------------------------------------------------------------------------
1 | theme :
2 |   name : twitter


--------------------------------------------------------------------------------
/docs/_layouts/default.html:
--------------------------------------------------------------------------------
1 | ---
2 | theme :
3 |   name : twitter
4 | ---
5 | {% include JB/setup %}
6 | {% include themes/twitter/default.html %}
7 | 


--------------------------------------------------------------------------------
/docs/_layouts/page.html:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | ---
4 | {% include JB/setup %}
5 | {% include themes/twitter/page.html %}
6 | 


--------------------------------------------------------------------------------
/docs/_layouts/post.html:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | ---
4 | {% include JB/setup %}
5 | {% include themes/twitter/post.html %}
6 | 


--------------------------------------------------------------------------------
/docs/_plugins/debug.rb:
--------------------------------------------------------------------------------
 1 | # A simple way to inspect liquid template variables.
 2 | # Usage:
 3 | #  Can be used anywhere liquid syntax is parsed (templates, includes, posts/pages)
 4 | #  {{ site | debug }}
 5 | #  {{ site.posts | debug }}
 6 | #
 7 | require 'pp'
 8 | module Jekyll
 9 |   # Need to overwrite the inspect method here because the original
10 |   # uses < > to encapsulate the psuedo post/page objects in which case
11 |   # the output is taken for HTML tags and hidden from view.
12 |   #
13 |   class Post
14 |     def inspect
15 |       "#Jekyll:Post @id=#{self.id.inspect}"
16 |     end
17 |   end
18 |   
19 |   class Page
20 |     def inspect
21 |       "#Jekyll:Page @name=#{self.name.inspect}"
22 |     end
23 |   end
24 |   
25 | end # Jekyll
26 |   
27 | module Jekyll
28 |   module DebugFilter
29 |     
30 |     def debug(obj, stdout=false)
31 |       puts obj.pretty_inspect if stdout
32 |       "<pre>#{obj.class}\n#{obj.pretty_inspect}</pre>"
33 |     end
34 | 
35 |   end # DebugFilter
36 | end # Jekyll
37 | 
38 | Liquid::Template.register_filter(Jekyll::DebugFilter)


--------------------------------------------------------------------------------
/docs/assets/all_3neurons_lr_0.003_reg_0.0.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/all_3neurons_lr_0.003_reg_0.0.gif


--------------------------------------------------------------------------------
/docs/assets/all_50neurons_lr_0.003_reg_0.000001.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/all_50neurons_lr_0.003_reg_0.000001.gif


--------------------------------------------------------------------------------
/docs/assets/all_50neurons_lr_0.003_reg_0.0001.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/all_50neurons_lr_0.003_reg_0.0001.gif


--------------------------------------------------------------------------------
/docs/assets/chain_w1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/chain_w1.png


--------------------------------------------------------------------------------
/docs/assets/chain_w1_numbers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/chain_w1_numbers.png


--------------------------------------------------------------------------------
/docs/assets/chain_w1_numbers_final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/chain_w1_numbers_final.png


--------------------------------------------------------------------------------
/docs/assets/chain_w2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/chain_w2.png


--------------------------------------------------------------------------------
/docs/assets/chain_w2_detailed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/chain_w2_detailed.png


--------------------------------------------------------------------------------
/docs/assets/chain_w2_numbers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/chain_w2_numbers.png


--------------------------------------------------------------------------------
/docs/assets/code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/code.png


--------------------------------------------------------------------------------
/docs/assets/copy_values.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/copy_values.png


--------------------------------------------------------------------------------
/docs/assets/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/example.png


--------------------------------------------------------------------------------
/docs/assets/forward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/forward.png


--------------------------------------------------------------------------------
/docs/assets/h1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/h1.png


--------------------------------------------------------------------------------
/docs/assets/h2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/h2.png


--------------------------------------------------------------------------------
/docs/assets/initialized_network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/initialized_network.png


--------------------------------------------------------------------------------
/docs/assets/loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/loss.png


--------------------------------------------------------------------------------
/docs/assets/nonlinear_xor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/nonlinear_xor.png


--------------------------------------------------------------------------------
/docs/assets/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/overview.png


--------------------------------------------------------------------------------
/docs/assets/overview2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/overview2.png


--------------------------------------------------------------------------------
/docs/assets/themes/twitter/bootstrap/img/glyphicons-halflings-white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/themes/twitter/bootstrap/img/glyphicons-halflings-white.png


--------------------------------------------------------------------------------
/docs/assets/themes/twitter/bootstrap/img/glyphicons-halflings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/themes/twitter/bootstrap/img/glyphicons-halflings.png


--------------------------------------------------------------------------------
/docs/assets/themes/twitter/css/kbroman.css:
--------------------------------------------------------------------------------
 1 | code {
 2 |   padding: 0;
 3 |   font-size: 90%;
 4 |   color: black;
 5 |   background-color: white;
 6 |   border: 0px solid white;
 7 | }
 8 | 
 9 | a code {
10 |   color: #08c;
11 | }


--------------------------------------------------------------------------------
/docs/assets/themes/twitter/css/style.css:
--------------------------------------------------------------------------------
 1 | /* Custom container */
 2 | .container-narrow {
 3 |   margin: 0 auto;
 4 |   max-width: 700px; }
 5 | 
 6 | .container-narrow > hr {
 7 |   margin: 30px 0; }
 8 | 
 9 | .navbar .nav {
10 |   float: right; }
11 | 
12 | /* posts index */
13 | .post > h3.title {
14 |   position: relative;
15 |   padding-top: 10px; }
16 | 
17 | .post > h3.title span.date {
18 |   position: absolute;
19 |   right: 0;
20 |   font-size: 0.9em; }
21 | 
22 | .post > .more {
23 |   margin: 10px 0;
24 |   text-align: left; }
25 | 
26 | /* post-full*/
27 | .post-full .date {
28 |   margin-bottom: 20px;
29 |   font-weight: bold; }
30 | 
31 | /* tag_box */
32 | .tag_box {
33 |   list-style: none;
34 |   margin: 0;
35 |   overflow: hidden; }
36 | 
37 | .tag_box li {
38 |   line-height: 28px; }
39 | 
40 | .tag_box li i {
41 |   opacity: 0.9; }
42 | 
43 | .tag_box.inline li {
44 |   float: left; }
45 | 
46 | .tag_box a {
47 |   padding: 3px 6px;
48 |   margin: 2px;
49 |   background: #eee;
50 |   color: #555;
51 |   border-radius: 3px;
52 |   text-decoration: none;
53 |   border: 1px dashed #cccccc; }
54 | 
55 | .tag_box a span {
56 |   vertical-align: super;
57 |   font-size: 0.8em; }
58 | 
59 | .tag_box a:hover {
60 |   background-color: #e5e5e5; }
61 | 
62 | .tag_box a.active {
63 |   background: #57A957;
64 |   border: 1px solid #4c964d;
65 |   color: #FFF; }


--------------------------------------------------------------------------------
/docs/assets/update_w1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/update_w1.png


--------------------------------------------------------------------------------
/docs/assets/update_w2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/update_w2.png


--------------------------------------------------------------------------------
/docs/assets/z1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/z1.png


--------------------------------------------------------------------------------
/docs/assets/z2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/docs/assets/z2.png


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: page
  3 | title: One LEGO at a time&#58; Explaining the Math of How Neural Networks Learn
  4 | tagline:
  5 | description: Tutorial on back-propagation
  6 | ---
  7 | 
  8 | >A **neural network** is a clever arrangement of linear and non-linear modules. When we choose and connect them wisely,
  9 | we have a powerful tool to approximate any mathematical function. For example one that **separates classes with a non-linear
 10 | decision boundary**.
 11 | 
 12 | A topic that is not always explained in depth, despite of its intuitive and modular nature, is the
 13 | **backpropagation technique** responsible for updating trainable parameters. Let’s build a neural network from scratch
 14 | to see the internal functioning of a neural network using **LEGO pieces as a modular analogy**, one brick at a time.
 15 | 
 16 | Code implementing this can be found in this repository: [https://github.com/omar-florez/scratch_mlp](https://github.com/omar-florez/scratch_mlp)
 17 | 
 18 | ## Neural Networks as a Composition of Pieces
 19 | 
 20 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/overview.png "Logo Title Text 1")
 21 | 
 22 | The above figure depicts some of the Math used for training a neural network. We will make sense of this during this article.
 23 | The reader may find interesting that a neural network is a stack of modules with different purposes:
 24 | 
 25 | - **Input X** feeds a neural network with raw data, which is stored in a matrix in which observations are rows and dimensions are columns
 26 | - **Weights W1** maps input X to the first hidden layer h1. Weights W1 works then as a linear kernel
 27 | - A **Sigmoid function** prevents numbers in the hidden layer from falling out of range by scaling them to 0-1. The result is an **array of
 28 | neural activations** h1 = Sigmoid(WX)
 29 | 
 30 | At this point these operations only compute a **general linear system**, which doesn’t have the capacity to model non-linear interactions.
 31 | This changes when we stack one more layer, adding depth to this modular structure. The deeper the network, the more subtle non-linear
 32 | interactions we can learn and more complex problems we can solve, which may explain in part the rise of deep neural models.
 33 | 
 34 | ## Why should I read this?
 35 | 
 36 | >If you understand the internal parts of a neural network, you will quickly know **what to change first** when things don't work
 37 | and define an strategy to **test invariants** and **expected behaviors** that you know are part the algorithm. This will also
 38 | be helpful when you want to **create new capabilities that are not currently implemented in the ML library** you are using.
 39 | 
 40 | **Because debugging machine learning models is a complex task**. By experience, mathematical models don't
 41 |  work as expected the first try. They may give you low accuracy for new data, spend long training time or too much memory,
 42 |  return a large number of false negatives or NaN predictions, etc. Let me show some cases when knowing how the algorithm works
 43 |  can become handy:
 44 | 
 45 |  - If it **takes so much time to train**, it is maybe a good idea to increase the size of a minibatch to reduce the variance
 46 |  in the observations and thus to help the algorithm to converge
 47 |  - If you observe **NaN predictions**, the algorithm may have received large gradients producing memory overflow. Think of
 48 |  this as consecutive matrix multiplications that exploit after many iterations. Decreasing the learning rate will have the
 49 |  effect of scaling down these values. Reducing the number of layers will decrease the number of multiplications. And clipping
 50 |  gradients will control this problem explicitly
 51 | 
 52 | ## Concrete Example: Learning the XOR Function
 53 | 
 54 | >Let's open the blackbox. We will build now a neural network from scratch that learns the **XOR function**.
 55 | The choice of this **non-linear function** is by no means random chance. Without backpropagation it would be hard to learn
 56 | to separate classes with a **straight line**.
 57 | 
 58 | To illustrate this important concept, note below how a straight line cannot
 59 | separate 0s and 1s, the outputs of the XOR function. **Real life problems are also non-linearly separable**.
 60 | 
 61 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/nonlinear_xor.png "Logo Title Text 1")
 62 | 
 63 | The topology of the network is simple:
 64 | - **Input X** is a two dimensional vector
 65 | - **Weights W1** is a 2x3 matrix with randomly initialized values
 66 | - **Hidden layer h1** consists of three neurons. Each neuron receives as input a weighted sum of observations, this is the inner product
 67 | highlighted in green in the below figure: **z1 = [x1, x2][w1, w2]**
 68 | - **Weights W2** is a 3x2 matrix with randomly initialized values and
 69 | - **Output layer h2** consists of two neurons since the XOR function returns either 0 (y1=[0,1]) or 1 (y2 = [1,0])
 70 | 
 71 | More visually:
 72 | 
 73 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/overview2.png "Logo Title Text 1")
 74 | 
 75 | Let's now train the model. In our simple example the trainable parameters are weights, but be aware that current
 76 | research is exploring more types of parameters to be optimized. For example shortcuts between layers, regularized distributions, topologies,
 77 | residual, learning rates, etc.
 78 | 
 79 | **Backpropagation** is a method to update the weights towards the direction (**gradient**) that minimizes a predefined error metric known as **Loss function**
 80 | given a batch of labeled observations. This algorithm has been repeatedly rediscovered and is a special case of a more general technique called
 81 | [automatic differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation) in reverse accumulation mode.
 82 | 
 83 | ### Network Initialization
 84 | 
 85 | >Let's **initialize the network weights** with random numbers.
 86 | 
 87 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/initialized_network.png "Logo Title Text 1"){:width="1300px"}
 88 | 
 89 | ### Forward Step:
 90 | 
 91 | >This goal of this step is to **forward propagate** the input X to each layer of the network until computing a vector in
 92 | the output layer h2.
 93 | 
 94 | This is how it happens:
 95 | - Linearly map input data X using weights W1 as a kernel:
 96 | 
 97 | 
 98 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/z1.png){:width="500px"}
 99 | 
100 | - Scale this weighted sum z1 with a Sigmoid function to get values of the first hidden layer h1. **Note that the original
101 | 2D vector is now mapped to a 3D space**.
102 | 
103 | 
104 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/h1.png){:width="400px"}
105 | 
106 | - A similar process takes place for the second layer h2. Let's compute first the **weighted sum** z2 of the
107 | first hidden layer, which is now input data.
108 | 
109 | 
110 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/z2.png){:width="500px"}
111 | 
112 | - And then compute their Sigmoid activation function. This vector [0.37166596 0.45414264] represents the **log probability**
113 | or **predicted vector** computed by the network given input X.
114 | 
115 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/h2.png){:width="300px"}
116 | 
117 | ### Computing the Total Loss
118 | 
119 | >Also known as "actual minus predicted", the goal of the loss function is to **quantify the distance between the predicted
120 |  vector h2 and the actual label provided by humans y**.
121 | 
122 | Note that the Loss function contains a **regularization component** that penalizes large weight values as in a Ridge
123 | regression. In other words, large squared weights values will increase the Loss function, **an error metric we indeed want to minimize**.
124 | 
125 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/loss.png){:width="500px"}
126 | 
127 | ### Backward step:
128 | >The goal of this step is to **update the weights of the neural network** in a direction that minimizes its Loss function.
129 | As we will see, this is a **recursive algorithm**, which can reuse gradients previously computed and heavily relies on
130 | **differentiable functions**. Since these updates reduce the loss function, a network ‘learns’ to approximate the label
131 | of observations with known classes. A property called **generalization**.
132 | 
133 | This step goes in **backward order** than the forward step. It computes first the partial derivative of the loss function
134 | with respect to the weights of the output layer (dLoss/dW2) and then the hidden layer (dLoss/dW1). Let's explain
135 | in detail each one.
136 | 
137 | #### dLoss/dW2:
138 | 
139 | The chain rule says that we can decompose the computation of gradients of a neural network into **differentiable pieces**:
140 | 
141 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w2.png){:width="500px"}
142 | 
143 | As a memory helper, these are the **function definitions** used above and their **first derivatives**:
144 | 
145 | | Function       |  First derivative |
146 | |------------------------------------------------------------ |------------------------------------------------------------|
147 | |Loss = (y-h2)^2     | dLoss/dW2 = -(y-h2) |
148 | |h2 = Sigmoid(z2) | dh2/dz2 = h2(1-h2) |
149 | |z2 = h1W2 | dz2/dW2 = h1 |
150 | |z2 = h1W2 | dz2/dh1 = W2 |
151 | 
152 | 
153 | More visually, we aim to update the weights W2 (in blue) in the below figure. In order to that, we need to compute
154 | three **partial derivatives along the chain**.
155 | 
156 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/update_w2.png){:width="500px"}
157 | 
158 | Plugging in values into these partial derivatives allow us to compute gradients with respect to weights W2 as follows.
159 | 
160 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w2_detailed.png){:width="600px"}
161 | 
162 | The result is a 3x2 matrix dLoss/dW2, which will update the original W2 values in a direction that minimizes the Loss function.
163 | 
164 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w2_numbers.png){:width="700px"}
165 | 
166 | #### dLoss/dW1:
167 | 
168 | Computing the **chain rule** for updating the weights of the first hidden layer W1 exhibits the possibility of **reusing existing
169 | computations**.
170 | 
171 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w1.png){:width="500px"}
172 | 
173 | More visually, the **path from the output layer to the weights W1** touches partial derivatives already computed in **latter
174 | layers**.
175 | 
176 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/update_w1.png){:width="500px"}
177 | 
178 | For example, partial derivatives dLoss/dh2 and dh2/dz2 have been already computed as a dependency for learning weights
179 | of the output layer dLoss/dW2 in the previous section.
180 | 
181 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w1_numbers.png){:width="700px"}
182 | 
183 | Placing all derivatives together, we can execute the **chain rule** again to update the weights of the hidden layer W1:
184 | 
185 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w1_numbers_final.png){:width="700px"}
186 | 
187 | Finally, we assign the new values of the weights and have completed an iteration on the training of network.
188 | 
189 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/copy_values.png){:width="150px"}
190 | 
191 | ### Implementation
192 | 
193 | Let's translate the above mathematical equations to code only using [Numpy](http://www.numpy.org/) as our **linear algebra engine**.
194 | Neural networks are trained in a loop in which each iteration present already **calibrated input data** to the network.
195 | In this small example, let's just consider the entire dataset in each iteration. The computations of **Forward step**,
196 | **Loss**, and **Backwards step** lead to good generalization since we update the **trainable parameters** (matrices w1 and
197 | w2 in the code) with their corresponding **gradients** (matrices dL_dw1 and dL_dw2) in every cycle.
198 | Code is stored in this repository: [https://github.com/omar-florez/scratch_mlp](https://github.com/omar-florez/scratch_mlp)
199 | 
200 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/code.png)
201 | 
202 | ### Let's Run This!
203 | 
204 | See below **some neural networks** trained to approximate the **XOR function** over many iterations.
205 | 
206 | **Left plot:** Accuracy. **Central plot:** Learned decision boundary. **Right plot:** Loss function.
207 | 
208 | First let's see how a neural network with **3 neurons** in the hidden layer has small capacity. This model learns to separate 2 classes
209 | with a **simple decision boundary** that starts being a straight line but then shows a non-linear behavior.
210 | The loss function in the right plot nicely gets low as training continues.
211 | 
212 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/all_3neurons_lr_0.003_reg_0.0.gif)
213 | 
214 | Having  **50 neurons** in the hidden layer notably increases model's power to learn more **complex decision boundaries**.
215 | This could not only produce more accurate results, but also **exploiting gradients**, a notable problem when training neural networks.
216 | This happens when very large gradients multiply weights during backpropagation and thus generate large updated weights.
217 | This is reason why the **Loss value suddenly increases** during the last steps of the training (step > 90).
218 | The **regularization component** of the Loss function computes the **squared values** of weights that are already very large (sum(W^2)/2N).
219 | 
220 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/all_50neurons_lr_0.003_reg_0.0001.gif)
221 | 
222 | This problem can be avoided by **reducing the learning rate** as you can see below. Or by implementing a policy that reduces
223 | the learning rate over time. Or by enforcing a stronger regularization, maybe L1 instead of L2.
224 | **Exploiding** and **vanishing gradients** are interesting phenomenons and we will devote an entire analysis later.
225 | 
226 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/all_50neurons_lr_0.003_reg_0.000001.gif)
227 | 
228 | 


--------------------------------------------------------------------------------
/docs/index_es.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: page
  3 | title: Un LEGO a la vez&#58; Explicando la Matemática de como las Redes Neuronales Aprenden
  4 | tagline:
  5 | description: Tutorial de retro-alimentación
  6 | ---
  7 | 
  8 | >Una **red neuronal** es un composición inteligente de módulos lineales y no lineales. Cuando los escogemos sabiamente, tenemos una herramienta muy poderosa para optimizar cualquier función matemática. Por ejemplo una que  **separe clases con un limite de decisión no lineal**.
  9 | 
 10 | Un tópico que no es siempre explicado en detalle, a pesar de su naturaleza intuitiva y modular, es el **algoritmo de retro-alimentación** (backpropagation algorithm)
 11 | Responsable de actualizar parámetros entrenables en la red. Construyamos una red neuronal desde cero para ver el funcionamiento interno de una red neuronal usando  **piezas de LEGO como una analogía**, un bloque a la vez.
 12 | 
 13 | Código implementando estos conceptos pueden ser encontrados en el siguiente repositorio: [https://github.com/omar-florez/scratch_mlp](https://github.com/omar-florez/scratch_mlp)
 14 | 
 15 | ## Las Redes Neuronales  como una Composición de Piezas
 16 | 
 17 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/overview.png "Logo Title Text 1")
 18 | 
 19 | La figura de arriba muestra algo de la matemática usada para entrenar una red neuronal. Haremos sentido de esto durante el articulo.
 20 | El lector puede encontrar interesante que una red neuronal es una pila de módulos con diferentes propósitos:
 21 | 
 22 | - **Entrada X** alimenta la red neuronal con datos sin procesar, la cual se almacena en una matriz en la cual las observaciones con filas y las dimensiones son columnas
 23 | - **Pesos W1** proyectan entrada X a la primera capa escondida  h1. Pesos W1 trabajan entonces como un kernel lineal
 24 | - Una **función Sigmoid** que previene los números de la capa escondida de salir del rango 0-1. El resultado es un **array activaciones neuronales** h1 = Sigmoid(WX)
 25 | 
 26 | Hasta este punto estas operaciones solo calculan  un **sistema general lineal**, el cual no tiene la capacidad de modelar interacciones no lineales.
 27 | Esto cambia cuando ponemos otro elemento en el pila, añadiendo profundidad a la estructura modular. Mientras más profunda sea la red, más interacciones no-lineales podremos aprender y problemas mas complejos podremos resolver, lo cual puede explicar en parte la popularidad de redes neuronales.
 28 | 
 29 | ## Porque debería leer esto?
 30 | 
 31 | >Si uno entiende las partes internas de una red neuronal, es mas fácil saber **que cambiar primero** cuando el algoritmo no funcione como es esperado y permite definir una estrategia para **probar invariantes** and **comportamientos esperados** que uno saben son parte del algoritmo. Esto también es útil cuando el lector quiere **crear nuevos algoritmos que actualmente no están implementados en la librería de Machine Learning de preferencia**.
 32 | 
 33 | **Porque hacer debugging de modelos de aprendizaje de maquina es una tarea compleja**. Por experiencia,  modelos matemáticos no funcionan como son esperados al primer intento. A veces estos pueden darte una exactitud baja para datos nuevos, tomar mucho tiempo de entrenamiento o mucha memoria RAM, devolver una gran cantidad de falsos negativos o valores NaN (Not a Number), etc. Déjame mostrarte algunos casos donde saber como el algoritmo funciona puede ser útil:
 34 | 
 35 |  - Si **toma mucho tiempo para entrenar**, es quizás una buena idea incrementar el tamaño del mini-batch o array de observaciones que alimentan a la red neuronal para reducir la varianza en las observaciones y así ayudar al algoritmo a converger
 36 |  - Si se observa **valores NaN**, el algoritmo ha recibido gradientes con valores muy altos produciendo desborde de memoria RAM. Piensa esto como una secuencia de multiplicaciones de matrices que explotan después de varias iteraciones. Reducir la velocidad de aprendizaje tendrá el efecto de escalar estos valores. Reduciendo el numero de capas reducirá el numero de multiplicaciones. Y poniendo una cota superior a los gradientes (clipping gradients) controlara este problema explícitamente
 37 | 
 38 | ## Un Ejemplo Concreto: Aprendiendo la Función XOR
 39 | 
 40 | >Abramos la caja negra. Construiremos a continuación una red neuronal desde cero que aprende la **función XOR**.
 41 | La elección de esta **función no linear** no es por casualidad. Sin backpropagation seria difícil aprender a separar clases con una **línea recta**.
 42 | 
 43 | Para ilustrar este importante concepto, note a continuación como una línea recta no puede separar 0s and 1s, las salidas de la función XOR. **Los problemas reales también son linealmente no separables**.
 44 | 
 45 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/nonlinear_xor.png "Logo Title Text 1")
 46 | 
 47 | La topología de la red es simple:
 48 | - **Entrada X** es un vector de dos dimensiones
 49 | - **Pesos W1** son una matriz de 2x3 dimensiones con valores inicializados de forma aleatoria
 50 | - **Capa escondida h1** consiste de 3 neuronas. Cada neurona recibe como entrada la suma de sus observaciones escaladas por sus pesos, este es el producto punto resaltado en verde en la figura de abajo: **z1 = [x1, x2][w1, w2]**
 51 | - **Pesos W2** son una matroz de 3x2 con valores inicializados de forma aleatoria y
 52 | - **Capa de salida h2** consiste de 2 neuronas ya que la función  XOR retorna 0 (y1=[0,1]) o 1 (y2 = [1,0])
 53 | 
 54 | Mas visualmente:
 55 | 
 56 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/overview2.png "Logo Title Text 1")
 57 | 
 58 | Entrenemos ahora el modelo. En nuestro ejemplo los valores entrenables son los pesos, pero tenga en cuenta que la investigación actual esta explorando nuevos tipos de parámetros a ser optimizados. Por ejemplo, atajos entre capas, distribuciones estables en las capas, topologías, velocidades de aprendizaje, etc.
 59 | 
 60 | **Backpropagation** es un método para actualizar los pesos en la dirección (**gradiente**) que minimiza una métrica de error predefinida conocida como  **función Loss**
 61 | dado un conjunto de observaciones etiquetadas. Este algoritmo ha sido repetidamente redescubierto y  es un caso especial de una técnica mas general llamada [diferenciación automática](https://en.wikipedia.org/wiki/Automatic_differentiation) en modo acumulativo reverso.
 62 | 
 63 | ### Inicialización de la Red
 64 | 
 65 | >Inicialicemos **los pesos de la red ** con valores aleatorios.
 66 | 
 67 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/initialized_network.png "Logo Title Text 1"){:width="1300px"}
 68 | 
 69 | ### Propagación hacia Adelante:
 70 | 
 71 | >El objetivo de este paso es **propagar hacia delante** la entrada X a cada capa de la red hasta calcular un vector en la capa de salida h2.
 72 | 
 73 | Es así como sucede:
 74 | - Se proyecta linealmente la entrada X usando pesos W1 a manera de kernel:
 75 | 
 76 | 
 77 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/z1.png){:width="500px"}
 78 | 
 79 | - Se escala esta suma z1 con una función Sigmoid para obtener valores de la primera capa escondida. **Note que el vector original de 2D ha sido proyectado ahora a 3D**.
 80 | 
 81 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/h1.png){:width="400px"}
 82 | 
 83 | - Un proceso similar toma lugar para la segunda capa h2. Calculemos primero la **suma** z2 de la primera capa escondida, la cual es ahora un vector de entrada.
 84 | 
 85 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/z2.png){:width="500px"}
 86 | 
 87 | - Y luego calculemos su activación Sigmoid. Este vector [0.37166596 0.45414264] representa el **logaritmo de la probabilidad**
 88 | o **vector predecido** calculado por la red dado los datos de entrada X.
 89 | 
 90 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/h2.png){:width="300px"}
 91 | 
 92 | ### Calculando el Error Total
 93 | 
 94 | >También conocido como "valor real menos predecido", el objetivo de la función Loss es **cuantificar la distancia entre el vector predecido h2 y la etiqueta real proveída por un ser humano, y**.
 95 | 
 96 | Note que la función Loss contiene un **componente de regularización** que penaliza valores de los pesos muy altos a manera de una regresión L2. En otras palabras, grandes valores cuadrados de los pesos incrementaran la función Loss, **una métrica de error que en realidad queremos reducir**.
 97 | 
 98 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/loss.png){:width="500px"}
 99 | 
100 | ### Propagación hacia Atrás:
101 | >El objetivo de este paso es **actualizar los pesos de la red neuronal ** en una dirección que minimiza la función Loss.
102 | Como veremos mas adelante, este es un **algoritmo recursivo**, el cual reutiliza gradientes previamente calculadas y se basada plenamente en
103 | **funciones diferenciables**. Ya que estas actualizaciones reducen la función Loss, una red ‘aprende’ a aproximar las etiquetas de nuevas observaciones. Una propiedad llamada **generalización**.
104 | 
105 | Este paso va en  **orden reverso** que la propagación hacia adelante. Este calcula la primera derivada de la función Loss con respecto a los pesos de la red neuronal de la capa de salida (dLoss/dW2) y  luego los de la capa escondida (dLoss/dW1). Expliquemos en detalle cada uno.
106 | 
107 | #### dLoss/dW2:
108 | 
109 | La regla de la cadena dice que podemos descomponer el calculo de gradientes de una red neuronal en **funciones diferenciables**:
110 | 
111 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w2.png){:width="500px"}
112 | 
113 | Aquí están las **definiciones de funciones** usadas arriba y sus **primeras derivadas**:
114 | 
115 | | Función        |  Primera derivada |
116 | |------------------------------------------------------------ |------------------------------------------------------------|
117 | |Loss = (y-h2)^2     | dLoss/dW2 = -(y-h2) |
118 | |h2 = Sigmoid(z2) | dh2/dz2 = h2(1-h2) |
119 | |z2 = h1W2 | dz2/dW2 = h1 |
120 | |z2 = h1W2 | dz2/dh1 = W2 |
121 | 
122 | 
123 | Mas visualmente, queremos actualizar los pesos W2 (en azul) en la figura de abajo. Para eso necesitamos calcular tres **derivadas parciales a lo largo de la cadena**.
124 | 
125 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/update_w2.png){:width="500px"}
126 | 
127 | Insertando esos valores esas derivadas parciales nos permite calcular gradientes con respecto a los pesos W2 como sigue.
128 | 
129 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w2_detailed.png){:width="600px"}
130 | 
131 | El resultado es una matriz de 3x2 llamada dLoss/dW2, la cual actualizara los valores originales de W2 en una dirección que minimiza la función Loss.
132 | 
133 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w2_numbers.png){:width="700px"}
134 | 
135 | #### dLoss/dW1:
136 | 
137 | Calculando la **regla de la cadena** para actualizar los pesos de la primera capa escondida W1 exhibe la posibilidad de  **reutilizar cálculos existentes**.
138 | 
139 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w1.png){:width="500px"}
140 | 
141 | Mas visualmente, el **camino desde la capa de salida hasta los pesos W1** toca derivadas parciales ya calculadas en capas mas superiores.
142 | 
143 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/update_w1.png){:width="500px"}
144 | 
145 | Por ejemplo, la derivada parcial dLoss/dh2 y dh2/dz2 ha sido ya calculada como una dependencia para aprender los pesos de la capa de salida dLoss/dW2 en la sección anterior.
146 | 
147 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w1_numbers.png){:width="700px"}
148 | 
149 | Ubicando todas las derivadas juntas, podemos ejecutar la **regla de la cadena** de nuevo para actualizar los pesos de la capa escondida W1:
150 | 
151 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/chain_w1_numbers_final.png){:width="700px"}
152 | 
153 | Finalmente, asignamos los nuevos valores de los pesos y hemos completado una iteración del entrenamiento de la red neuronal!
154 | 
155 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/copy_values.png){:width="150px"}
156 | 
157 | ### Implementación
158 | 
159 | Traduzcamos las ecuaciones matemáticas de arriba en código solamente utilizando [Numpy](http://www.numpy.org/) como nuestro **motor de algebra linar**.
160 | Redes neuronales son entrenadas en un loop en el cual cada iteración presenta **datos de entrada ya calibrados** a la red.
161 | En este pequeño ejemplo, consideremos todo el dataset en cada iteración. Los cálculos del paso de **Propagación hacia adelante**,
162 | **Loss**, y **Propagación hacia atrás** conducen a obtener una buena generalización ya que actualizaremos los **parámetros entrenables** (matrices W1 and W2 en el código) con sus correspondientes **gradientes** (matrices dL_dw1 and dL_dw2) en cada ciclo.
163 | El código es almacenado en este repositorio: [https://github.com/omar-florez/scratch_mlp](https://github.com/omar-florez/scratch_mlp)
164 | 
165 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/code.png)
166 | 
167 | ### Ejecutemos Esto!
168 | 
169 | Mire abajo **algunas redes neuronales** entrenadas para aproximar la **función XOR** en múltiple iteraciones.
170 | 
171 | **Izquierda:** Exactitud. **Centro:** Borde de decisión aprendido. **Derecha:** Función Loss.
172 | 
173 | Primero veamos como una red neuronal con **3 neuronas** en la capa escondida tiene una pequeña capacidad. Este modelo aprende a separar dos clases con un **simple borde de decisión** que empieza una línea recta, pero luego muestra un comportamiento no lineal.
174 | La función Loss en la derecha suavemente se reduce mientras el proceso de aprendizaje ocurre.
175 | 
176 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/all_3neurons_lr_0.003_reg_0.0.gif)
177 | 
178 | Teniendo  **50 neuronas** en la capa escondida notablemente incremental el poder del modelo para aprender  **bordes de decisión mas complejos**.
179 | Esto podría no solo producir resultados mas exactos, pero también **explotar las gradientes**, un problema notable cuando se entrena redes neuronales.
180 | Esto sucede cuando gradientes muy grandes multiplican pesos durante la propagación hacia atrás y así generan pesos actualizados muy grandes.
181 | Esta es la razón por la que **valores de la función Loss repentinamente se incrementan** durante los últimos pasos del entrenamiento (step > 90).
182 | El **componente de regularicion** de la función Loss calcula los **valores cuadrados** de los pesos que ya tienen valores muy altos (sum(W^2)/2N).
183 | 
184 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/all_50neurons_lr_0.003_reg_0.0001.gif)
185 | 
186 | Este problema puede ser evitado **reduciendo la velocidad de aprendizaje** como puede ver abajo. O implementado una política que reduzca la velocidad de aprendizaje con el tiempo. O imponiendo una regularización mas fuerte, quizás L1 en vez de L2.
187 | Gradientes que **explotan** y se **desvanecen** son interesantes fenómenos y haremos un análisis detallada de eso mas adelante.
188 | 
189 | ![alt text](https://raw.githubusercontent.com/omar-florez/scratch_mlp/master/docs/assets/all_50neurons_lr_0.003_reg_0.000001.gif)
190 | 
191 | 
192 | 


--------------------------------------------------------------------------------
/docs/pages/independent_site.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: page
  3 | title: Making an independent website
  4 | description: How to make an independent website with GitHub Pages.
  5 | ---
  6 | 
  7 | This is what to do if you just want a website. (This page is a bit
  8 | long, but it's really not that much work.)
  9 | 
 10 | ### First things
 11 | 
 12 | Start by cloning
 13 | [the repository for the present site](https://github.com/kbroman/simple_site). (Or,
 14 | alternatively, fork it and then clone your own version.)
 15 | 
 16 |     git clone git://github.com/kbroman/simple_site
 17 | 
 18 | Then change the name of that directory to something meaningful.
 19 | 
 20 |     mv simple_site something_meaningful
 21 | 
 22 | (Of course, don't use `something_meaningful` but rather
 23 | _something meaningful_.)
 24 | 
 25 | Now change into that directory and remove the `.git` directory
 26 | (because you don't want the history of _my_ repository).
 27 | 
 28 |     cd something_meaningful
 29 |     rm -r .git
 30 | 
 31 | Now make it a git repository again.
 32 | 
 33 |     git init
 34 | 
 35 | ### Things not to change
 36 | 
 37 | You'll need to keep the following files and directories largely unchanged.
 38 | 
 39 |     Rakefile
 40 |     _includes
 41 |     _layouts
 42 |     _plugins
 43 |     assets/themes
 44 | 
 45 | We _will_ change one file within `_includes/`; see below.
 46 | 
 47 | ### Edit the `_config.yml` file
 48 | 
 49 | The
 50 | [`_config.yml`](https://github.com/kbroman/simple_site/blob/gh-pages/_config.yml)
 51 | file contains a bunch of configuration information. You'll want to
 52 | edit this file to replace my information with your information.
 53 | 
 54 | Perhaps edit the
 55 | [line with `exclude:`](https://github.com/kbroman/simple_site/blob/gh-pages/_config.yml#L5)
 56 | if you've named `License.md` and/or `ReadMe.md` differently. (I've
 57 | edited this line a bit, here.)
 58 | 
 59 |     exclude: [..., "ReadMe.md", "Rakefile", "License.md"]
 60 | 
 61 | Edit the
 62 | [lines about the site name and author](https://github.com/kbroman/simple_site/blob/gh-pages/_config.yml#L11-L17).
 63 | 
 64 |     title : simple site
 65 |     author :
 66 |       name : Karl Broman
 67 |       email : kbroman@gmail.com
 68 |       github : kbroman
 69 |       twitter : kwbroman
 70 |       feedburner : nil
 71 | 
 72 | Edit the
 73 | [`production_url` line](https://github.com/kbroman/simple_site/blob/gh-pages/_config.yml#L19)
 74 | by replacing `kbroman` with _your_ github user name, and replace
 75 | `simple_site` with the name that your repository will have on github
 76 | (`something_meaningful`?).
 77 | 
 78 |     production_url : https://kbroman.github.io/simple_site
 79 | 
 80 | Note that the `https` (vs `http`) is important here; see
 81 | &ldquo;[Securing your github pages site with https](https://help.github.com/articles/securing-your-github-pages-site-with-https/).&rdquo;
 82 | (I need to use `http` because my site uses the custom domain
 83 | `kbroman.org`, but you likely need `https`.)
 84 | 
 85 | Replace the
 86 | [`BASE_PATH` line](https://github.com/kbroman/simple_site/blob/gh-pages/_config.yml#L52)
 87 | with the same url.
 88 | 
 89 |     BASE_PATH : https://kbroman.github.io/simple_site
 90 | 
 91 | There's also an
 92 | [`ASSET_PATH` line](https://github.com/kbroman/simple_site/blob/gh-pages/_config.yml#L62),
 93 | but you can leave that commented-out (with the `#` symbol at the beginning).
 94 | 
 95 | Note that for the `BASE_PATH`, I actually have
 96 | `http://kbroman.org/` in place of `https://kbroman.github.io/`. I set up
 97 | a
 98 | [custom domain](https://help.github.com/articles/setting-up-a-custom-domain-with-github-pages),
 99 | which involved a series of emails with a DNS provider. I
100 | don't totally understand how it works, and I'm not _entirely_ sure
101 | that I've done it right. But if you want to have a custom domain, take
102 | a look at
103 | [that GitHub help page](https://help.github.com/articles/setting-up-a-custom-domain-with-github-pages).
104 | 
105 | ### Edit `_includes/themes/twitter/default.html`
106 | 
107 | The
108 | [`_includes/themes/twitter/default.html`](https://github.com/kbroman/simple_site/blob/gh-pages/_includes/themes/twitter/default.html)
109 | file defines how a basic page will look on your site. In particular,
110 | it contains a bit of html code for a footer, if you want one.
111 | 
112 | Find the
113 | [footer for my site](https://github.com/kbroman/simple_site/blob/gh-pages/_includes/themes/twitter/default.html#L47-L50)
114 | and remove it or edit it to suit. This is the only bit of html you'll
115 | have to deal with.
116 | 
117 |     <!-- start of Karl's footer; modify this part -->
118 |         <a href="https://creativecommons.org/publicdomain/zero/1.0/">  ...
119 |         <a href="http://kbroman.org">Karl Broman</a>
120 |     <!-- end of Karl's footer; modify this part -->
121 | 
122 | ### Edit or remove the Markdown files
123 | 
124 | Edit the
125 | [`index.md`](https://raw.githubusercontent.com/kbroman/simple_site/gh-pages/index.md)
126 | file, which will become the main page for your site.
127 | 
128 | First, edit the initial chunk with a different title and tagline. Feel
129 | free to just delete the tagline.
130 | 
131 |     ---
132 |     layout: page
133 |     title: simple site
134 |     tagline: Easy websites with GitHub Pages
135 |     ---
136 | 
137 | Now edit the rest (or, for now, just remove) the rest of the file.
138 | 
139 | Now go into the [`pages/`](https://github.com/kbroman/simple_site/blob/gh-pages/pages) directory and remove or rename and modify
140 | all of the Markdown files in there
141 | 
142 | Note that when you link to any of these Markdown-based pages, you'll
143 | want to use a `.html` extension rather than `.md`. For example, look
144 | at the
145 | [main page](https://raw.githubusercontent.com/kbroman/simple_site/gh-pages/index.md)
146 | for this site; the links in the bullet points for the various pages
147 | look like this:
148 | 
149 |     - [Overview](pages/overview.html)
150 |     - [Making an independent website](pages/independent_site.html)
151 |     - [Making a personal site](pages/user_site.html)
152 |     - [Making a site for a project](pages/project_site.html)
153 |     - [Making a jekyll-free site](pages/nojekyll.html)
154 |     - [Testing your site locally](pages/local_test.html)
155 |     - [Resources](pages/resources.html)
156 | 
157 | ### Commit all of these changes.
158 | 
159 | At the start, we'd removed the `.git/` subdirectory (with the history
160 | of _my_ repository) and then used `git init` to make it a new git
161 | repository.
162 | 
163 | Now you want to add and commit all of the files, as modified.
164 | 
165 |     git add .
166 |     git commit -m "Initial commit"
167 | 
168 | Then change the name of the master branch to `gh-pages`.
169 | 
170 |     git branch -m master gh-pages
171 | 
172 | ### Push everything to GitHub
173 | 
174 | Now go back to GitHub and create a new repository, called something
175 | meaningful. (I'll again pretend that it's explicitly
176 | `something_meaningful`.)
177 | 
178 | Then go back to the command line and push your repository to
179 | [GitHub](https://github.com).
180 | 
181 |     git remote add origin git@github.com:username/something_meaningful
182 | 
183 | Replace `username` with your GitHub user name and
184 | `something_meaningful` with the name of your repository. And you might
185 | want to use the `https://` construction instead, if you're not using ssh.
186 | 
187 |     git remote add origin https://github.com/username/something_meaningful
188 | 
189 | Finally, push everything to GitHub.
190 | 
191 |     git push -u origin gh-pages
192 | 
193 | Note that we're using `gh-pages` and not `master` here, as we want
194 | this stuff in a `gh-pages` branch.
195 | 
196 | ### Check whether it worked
197 | 
198 | Go to `https://username.github.io/something_meaningful` and cross your
199 | fingers that it worked. (Really, _I_ should be crossing my fingers.)
200 | 
201 | ### Up next
202 | 
203 | Now go to [making a personal site](user_site.html).
204 | 


--------------------------------------------------------------------------------
/docs/pages/local_test.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: page
 3 | title: Testing your site locally
 4 | description: How to test your GitHub Pages site locally.
 5 | ---
 6 | 
 7 | To test your site locally, you'll need
 8 | 
 9 | - [ruby](https://www.ruby-lang.org/en/)
10 | - the [github-pages](https://github.com/github/pages-gem) gem
11 | 
12 | ### Installing ruby
13 | 
14 | There are
15 | [lots of different ways to install ruby](https://www.ruby-lang.org/en/installation/).
16 | 
17 | 
18 | In Mac OS X, older versions of ruby will already be installed.  But I
19 | use the [Ruby Version Manager (RVM)](https://rvm.io/) to have a more
20 | recent version.  You could also use [Homebrew](https://brew.sh/).
21 | 
22 | In Windows, use [RubyInstaller](https://rubyinstaller.org/). (In most
23 | of this tutorial, I've assumed you're using a Mac or some flavor of
24 | Unix. It's possible that none of this was usable for Windows
25 | folks. Sorry!)
26 | 
27 | 
28 | ### Installing the github-pages gem
29 | 
30 | Run the following command:
31 | 
32 |     gem install github-pages
33 | 
34 | This will install the `github-pages` gem and all dependencies
35 | (including [jekyll](https://jekyllrb.com/)).
36 | 
37 | Later, to update the gem, type:
38 | 
39 |     gem update github-pages
40 | 
41 | 
42 | ### Testing your site locally
43 | 
44 | To construct and test your site locally, go into the directory and
45 | type
46 | 
47 |     jekyll build
48 | 
49 | This will create (or modify) a `_site/` directory, containing
50 | everything from `assets/`, and then the `index.md` and all
51 | `pages/*.md` files, converted to html. (So there'll be
52 | `_site/index.html` and the various `_site/pages/*.html`.)
53 | 
54 | Type the following in order to &ldquo;serve&rdquo; the site.
55 | This will first run `build`, and so it does _not_ need to be
56 | preceded by `jekyll build`.
57 | 
58 |     jekyll serve
59 | 
60 | Now open your browser and go to <http://localhost:4000>
61 | 


--------------------------------------------------------------------------------
/docs/pages/nojekyll.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: page
 3 | title: Making a jekyll-free site
 4 | description: Making a GitHub Pages site without Jekyll.
 5 | ---
 6 | 
 7 | If you want a plain website without all of this
 8 | [Jekyll](https://jekyllrb.com/)/[Markdown](https://daringfireball.net/projects/markdown/)
 9 | business, you just need to fill the repository with plain html and css
10 | and avoid directory names that start with underscores.
11 | 
12 | If you want some directories with underscores, just include a file
13 | named `.nojekyll`. You might just include that anyway.  See the
14 | [source for my kbroman.github.io](https://github.com/kbroman/kbroman.github.io)
15 | site.
16 | 
17 | I recommend using [Jekyll](https://jekyllrb.com/) and
18 | [Markdown](https://daringfireball.net/projects/markdown/),
19 | though. Markdown is just so much nicer than html, to write and
20 | maintain.
21 | 
22 | ### Up next
23 | 
24 | Now go to [testing your site locally](local_test.html).
25 | 


--------------------------------------------------------------------------------
/docs/pages/overview.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: page
  3 | title: Overview of GitHub Pages
  4 | description: Overview of construction of a website with GitHub Pages
  5 | ---
  6 | 
  7 | The present site is simple, with a style derived from
  8 | [JekyllBootstrap](https://jekyllbootstrap.com/) and
  9 | [Twitter Bootstrap](https://getbootstrap.com) with a particular
 10 | theme. I'll explain how to create a site with exactly this style. If
 11 | you want something else, try the
 12 | [GitHub Pages](https://pages.github.com) automatic site generator, or
 13 | look at the [resources page](pages/resources.html).
 14 | 
 15 | These GitHub Pages sites are constructed by having a `gh-pages` branch
 16 | of a GitHub repository, with specific files layed out in a specific
 17 | way. To see the structure of such a repository, look at the
 18 | [repository for the present site](https://github.com/kbroman/simple_site).
 19 | 
 20 |     _includes/
 21 |     _layouts/
 22 |     _plugins/
 23 |     assets/
 24 |     pages/
 25 |     .gitignore
 26 |     License.md
 27 |     Rakefile
 28 |     ReadMe.md
 29 |     _config.yml
 30 |     index.md
 31 | 
 32 | The directories beginning with an underscore contain materials
 33 | defining the basic layout and style for the site. If you
 34 | [build the site locally](pages/local_test.html) (for testing
 35 | purposes), there will also be a `_site/` directory containing the
 36 | actual site (with
 37 | [Markdown](https://daringfireball.net/projects/markdown/) files
 38 | converted to html). You don't want the `_site/` directory in your
 39 | repository, so include that in the `.gitignore` file.
 40 | 
 41 | The
 42 | [`assets/`](https://github.com/kbroman/simple_site/tree/gh-pages/assets)
 43 | directory contains any non-Markdown materials for the site (e.g.,
 44 | images or example code). These files won't be touched in the
 45 | conversion but will be just copied over as-is.
 46 | 
 47 | The
 48 | [`pages/`](https://github.com/kbroman/simple_site/tree/gh-pages/pages)
 49 | directory contains
 50 | [Markdown](https://daringfireball.net/projects/markdown/) files that
 51 | will become html pages on your site.
 52 | 
 53 | The
 54 | [`_config.yml`](https://github.com/kbroman/simple_site/blob/gh-pages/_config.yml)
 55 | file contains all sorts of configuration parameters (some of which
 56 | you'll need to modify). The [`Rakefile`](https://github.com/kbroman/simple_site/blob/gh-pages/Rakefile) contains instructions for
 57 | the conversion; you won't modify this file.
 58 | 
 59 | It's best to always include
 60 | [`License.md`](https://github.com/kbroman/simple_site/tree/gh-pages/License.md)
 61 | and
 62 | [`ReadMe.md`](https://github.com/kbroman/simple_site/tree/gh-pages/ReadMe.md)
 63 | files. But you wouldn't need these to be placed on the website; they'd
 64 | just be viewed in the repository on [GitHub](https://github.com). The
 65 | [`_config.yml`](https://github.com/kbroman/simple_site/tree/gh-pages/_config.yml)
 66 | file contains
 67 | [a line sort of like the following](https://github.com/kbroman/simple_site/blob/gh-pages/_config.yml#L5)
 68 | (but listing a few more files), indicating files to _not_ move to the
 69 | final site.
 70 | 
 71 |     exclude: ["ReadMe.md", "Rakefile", "License.md"]
 72 | 
 73 | Finally,
 74 | [`index.md`](https://raw.githubusercontent.com/kbroman/simple_site/gh-pages/index.md)
 75 | is the Markdown version of the main page for your site.
 76 | 
 77 | The
 78 | [`index.md`](https://raw.githubusercontent.com/kbroman/simple_site/gh-pages/index.md)
 79 | file and the Markdown files in
 80 | [`pages/`](https://github.com/kbroman/simple_site/blob/gh-pages/pages)
 81 | (e.g.,
 82 | [the present page](https://raw.githubusercontent.com/kbroman/simple_site/gh-pages/pages/overview.md))
 83 | have a header with a particular form:
 84 | 
 85 |     ---
 86 |     layout: page
 87 |     title: simple site
 88 |     tagline: Easy websites with GitHub Pages
 89 |     description: Minimal tutorial on making a simple website with GitHub Pages
 90 |     ---
 91 | 
 92 | In the conversion of the site from Markdown to html, this bit says
 93 | that the current file is to be converted with the &ldquo;page&rdquo;
 94 | layout, and gives the title and the (optional) &ldquo;tagline.&rdquo;
 95 | The "`description:`" part gets converted into
 96 | `<meta name="description" content="Minimal tutorial on...">`
 97 | which, in principle, may be used in the results of google searches.
 98 | 
 99 | The rest is basically plain Markdown, though the present site is
100 | configured to use [kramdown](https://kramdown.gettalong.org/) as the
101 | Markdown converter (via
102 | [this line in the `_config.yml` file](https://github.com/kbroman/simple_site/blob/gh-pages/_config.yml#L23)).
103 | Read about the [kramdown syntax](https://kramdown.gettalong.org/syntax.html)
104 | or just look at the
105 | [quick reference](https://kramdown.gettalong.org/quickref.html).
106 | 
107 | Now go to the page about [how to make an independent website](independent_site.html).
108 | 


--------------------------------------------------------------------------------
/docs/pages/project_site.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: page
 3 | title: Making a project site
 4 | description: Using GitHub Pages to make a webpage for a GitHub-hosted project
 5 | ---
 6 | 
 7 | If you want to make a website for a GitHub-hosted project, as I've
 8 | done for my [R/qtlcharts package](http://kbroman.org/qtlcharts),
 9 | you follow my
10 | [instructions for making an independent site](independent_site.html),
11 | with just a few modifications.
12 | 
13 | Go to your local repository and create and switch to an
14 | &ldquo;orphan&rdquo; `gh-pages` branch. (It's an &ldquo;orphan&rdquo;
15 | branch because it won't contain the whole history of your project.)
16 | 
17 | 
18 |     cd my_repo
19 |     git checkout --orphan gh-pages
20 | 
21 | Remove _everything_.
22 | 
23 |     git rm -rf .
24 | 
25 | Now go back one directory and clone
26 | [the present repository](https://github.com/kbroman/simple_site).
27 | 
28 |     cd ..
29 |     git clone git://github.com/kbroman/simple_site
30 | 
31 | Change into that directory and remove the `.git/` directory.
32 | 
33 |     cd simple_site
34 |     \rm -rf .git
35 | 
36 | Move all of the stuff from that directory into _your_ repository
37 | (in the new and empty `gh-pages` branch).
38 | 
39 |     cd ../my_repo
40 |     cp -r ../simple_site/. .
41 | 
42 | Edit everything [as before](independent_site.html).
43 | Commit everything and push the `gh-pages` branch to github.
44 | 
45 |     git add .
46 |     git commit -m "Initial commit of web site"
47 |     git push origin gh-pages
48 | 
49 | Now you'll switch back-and-forth between the `gh-pages` branch (to
50 | edit your website) and the `master` or other branchs (to edit your
51 | project).
52 | 
53 | Personally, I'll clone a separate copy of my repository, one directory
54 | up, called `Web/`, that is sitting in the `gh-pages` branch. Then
55 | rather than using `git checkout` to switch between the code and the
56 | web, I switch from one directory to another.
57 | 
58 | ### Up next
59 | 
60 | Now go to [making a jekyll-free site](nojekyll.html) or
61 | [testing your site locally](local_test.html).
62 | 


--------------------------------------------------------------------------------
/docs/pages/resources.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: page
 3 | title: Resources
 4 | description: Resources related to GitHub Pages websites
 5 | ---
 6 | 
 7 | This is a minimal tutorial to get you started. There are lots of
 8 | other resources.
 9 | 
10 | - [GitHub Pages](https://pages.github.com)
11 | - [Jekyll](https://jekyllrb.com)
12 | - [JekyllBootstrap](https://jekyllbootstrap.com)
13 | - [Twitter Bootstrap](https://getbootstrap.com)
14 | - [Markdown](https://daringfireball.net/projects/markdown)
15 | - [kramdown](https://kramdown.gettalong.org)
16 | 
17 | - [Get started with GitHub Pages (plus bonus Jekyll)](https://24ways.org/2013/get-started-with-github-pages/)
18 | - [Using GitHub Pages to host your website](https://bcreativeweb.blogspot.com/2013/08/using-github-pages-to-host-your-website.html)
19 | - [A guide to using GitHub Pages](https://www.thinkful.com/learn/a-guide-to-using-github-pages/)
20 | - [Using GitHub to power a web project: How and why](https://audreywatters.com/2013/07/07/how-to-run-your-site-on-github/)
21 | - [The power and potential of GitHub Pages](https://konklone.com/post/the-power-and-potential-of-github-pages)
22 | - [GitHub Pages for projects (blog post)](http://blog.aquinzi.com/gh-pages-project/)
23 | - [Using R Markdown, Jekyll & GitHub for a website](http://jason.bryer.org/posts/2012-12-10/Markdown_Jekyll_R_for_Blogging.html)
24 | - [knitr, github, and a new phase for the lab notebook](http://carlboettiger.info/2012/03/21/knitr-github-and-a-new-phase-for-the-lab-notebook.html)
25 | 


--------------------------------------------------------------------------------
/docs/pages/user_site.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: page
 3 | title: Making a personal site
 4 | description: How to make a personal web page with GitHub Pages
 5 | ---
 6 | 
 7 | Your [GitHub Pages](https://pages.github.com) sites will appear at
 8 | 
 9 |     https://username.github.io/some_site
10 | 
11 | Of course, this will be with _your_ GitHub user name and with the
12 | names of your GitHub repositories.
13 | 
14 | I'd recommend putting _something_ at `https://username.github.io`,
15 | since people might look there. (When I started with GitHub Pages, I
16 | thought you were _required_ to have such a site, but either they've
17 | changed things or I'm just mistaken; you don't _need_ this anymore.)
18 | 
19 | You create one of these sites in much the same way as you
20 | [create an independent GitHub Pages site](independent_site). The only
21 | real differences are
22 | 
23 | - The repository needs to be called `username.github.io`
24 | - The site sits in the `master` branch rather than the `gh-pages` branch.
25 | 
26 | _My_ personal site, [kbroman.github.io](https://kbroman.github.io)
27 | (which shows up as [kbroman.org](http://kbroman.org); see the
28 | [GitHub help page on setting up a custom domain](https://help.github.com/articles/setting-up-a-custom-domain-with-github-pages))
29 | is minimal and is actually written in straight html rather than
30 | [Markdown](https://daringfireball.net/projects/markdown/). If you
31 | want, you could just make an edited version of my site:
32 | 
33 | - Clone my
34 |   [kbroman.github.io repository](https://github.com/kbroman/kbroman.github.io)
35 | - Remove the `.git` directory
36 | - Edit `index.html`, `404.html`, `README.md`, and `License.md`
37 | - Use `git init`, `git add`, `git commit`
38 | - Create a new repository on GitHub named `username.github.io`
39 | - Go back to the command line and do `git remote add` and
40 |   `git push -u origin master`
41 | 
42 | Alternatively, you could use the procedure I described for
43 | [making an independent website](independent_site.html). The only thing
44 | you do differently is to use the `master` branch rather than a
45 | `gh-pages` branch.
46 | 
47 | A final note: the `404.html` file will serve as the &ldquo;page not
48 | found&rdquo; page for _all_ of your GitHub Pages (that is, if you
49 | _want_ a personalized 404 page).
50 | 
51 | ### Up next
52 | 
53 | Now go to [making a project site](project_site.html).
54 | 


--------------------------------------------------------------------------------
/scratch_mlp.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # @Author: Omar U. Florez
  3 | # @Date:   October 28, 2017
  4 | 
  5 | '''
  6 | Train a feed forward neural network using only numpy Math library. This contains step by step explanation of the 
  7 | learning process of the network. 
  8 | '''
  9 | 
 10 | import numpy as np
 11 | import ipdb
 12 | from scratch_mlp import utils
 13 | utils.reset_folders()
 14 | 
 15 | def load_XOR_data(N=300):
 16 |     rng = np.random.RandomState(0)
 17 |     X = rng.randn(N, 2)
 18 |     y = np.array(np.logical_xor(X[:, 0] > 0, X[:, 1] > 0), dtype=int)
 19 |     y = np.expand_dims(y, 1)
 20 |     y_hot_encoded = []
 21 | 
 22 |     for x in y:
 23 |         if x == 0:
 24 |             y_hot_encoded.append([1,0])
 25 |         else:
 26 |             y_hot_encoded.append([0, 1])
 27 |     return X, np.array(y_hot_encoded)
 28 | 
 29 | def sigmoid(z, first_derivative=False):
 30 |     if first_derivative:
 31 |         return z*(1.0-z)
 32 |     return 1.0/(1.0+np.exp(-z))
 33 | 
 34 | def tanh(z, first_derivative=True):
 35 |     if first_derivative:
 36 |         return (1.0-z*z)
 37 |     return (1.0-np.exp(-z))/(1.0+np.exp(-z))
 38 | 
 39 | def inference(data, weights):
 40 |     h1 = sigmoid(np.matmul(data, weights[0]))
 41 |     logits = np.matmul(h1, weights[1])
 42 |     probs = np.exp(logits)/np.sum(np.exp(logits), axis=1, keepdims=True)
 43 |     return np.argmax(probs, axis=1)
 44 | 
 45 | def run():
 46 |     #size of minibatch: int(X.shape[0])
 47 |     N = 50
 48 |     X, y = load_XOR_data(N=300)
 49 |     input_dim = int(X.shape[1])
 50 |     hidden_dim = 10
 51 |     output_dim = 2
 52 |     num_epochs = 1000000
 53 |     learning_rate= 1e-3
 54 |     reg_coeff = 1e-6
 55 |     losses = []
 56 |     accuracies=[]
 57 | 
 58 |     #---------------------------------------------------------------------------------------------------------------
 59 |     # Initialize weights:
 60 |     np.random.seed(2017)
 61 |     w1 = 2.0*np.random.random((input_dim, hidden_dim))-1.0      #w0=(2,hidden_dim)
 62 |     w2 = 2.0*np.random.random((hidden_dim, output_dim))-1.0     #w1=(hidden_dim,2)
 63 | 
 64 |     #Calibratring variances with 1/sqrt(fan_in)
 65 |     w1 /= np.sqrt(input_dim)
 66 |     w2 /= np.sqrt(hidden_dim)
 67 |     for i in range(num_epochs):
 68 | 
 69 |         index = np.arange(X.shape[0])[:N]
 70 |         #is want to shuffle indices: np.random.shuffle(index)
 71 | 
 72 |         #---------------------------------------------------------------------------------------------------------------
 73 |         # Forward step:
 74 |         h1 = sigmoid(np.matmul(X[index], w1))                   #(N, 3)
 75 |         logits = sigmoid(np.matmul(h1, w2))                     #(N, 2)
 76 |         probs = np.exp(logits)/np.sum(np.exp(logits), axis=1, keepdims=True)
 77 |         h2 = logits
 78 | 
 79 |         #---------------------------------------------------------------------------------------------------------------
 80 |         # Definition of Loss function: mean squared error plus Ridge regularization
 81 |         L = np.square(y[index]-h2).sum()/(2*N) + reg_coeff*(np.square(w1).sum()+np.square(w2).sum())/(2*N)
 82 | 
 83 |         losses.append([i,L])
 84 | 
 85 |         #---------------------------------------------------------------------------------------------------------------
 86 |         # Backward step: Error = W_l e_l+1 f'_l
 87 |         #       dL/dw2 = dL/dh2 * dh2/dz2 * dz2/dw2
 88 |         dL_dh2 = -(y[index] - h2)                               #(N, 2)
 89 |         dh2_dz2 = sigmoid(h2, first_derivative=True)            #(N, 2)
 90 |         dz2_dw2 = h1                                            #(N, hidden_dim)
 91 |         #Gradient for weight2:   (hidden_dim,N)x(N,2)*(N,2)
 92 |         dL_dw2 = dz2_dw2.T.dot(dL_dh2*dh2_dz2) + reg_coeff*np.square(w2).sum()
 93 | 
 94 |         #dL/dw1 = dL/dh1 * dh1/dz1 * dz1/dw1
 95 |         #       dL/dh1 = dL/dz2 * dz2/dh1
 96 |         #       dL/dz2 = dL/dh2 * dh2/dz2
 97 |         dL_dz2 = dL_dh2 * dh2_dz2                               #(N, 2)
 98 |         dz2_dh1 = w2                                            #z2 = h1*w2
 99 |         dL_dh1 =  dL_dz2.dot(dz2_dh1.T)                         #(N,2)x(2, hidden_dim)=(N, hidden_dim)
100 |         dh1_dz1 = sigmoid(h1, first_derivative=True)            #(N,hidden_dim)
101 |         dz1_dw1 = X[index]                                      #(N,2)
102 |         #Gradient for weight1:  (2,N)x((N,hidden_dim)*(N,hidden_dim))
103 |         dL_dw1 = dz1_dw1.T.dot(dL_dh1*dh1_dz1) + reg_coeff*np.square(w1).sum()
104 | 
105 |         #weight updates:
106 |         w2 += -learning_rate*dL_dw2
107 |         w1 += -learning_rate*dL_dw1
108 |         if True: #(i+1)%1000==0:
109 |             y_pred = inference(X, [w1, w2])
110 |             y_actual = np.argmax(y, axis=1)
111 |             accuracy = np.sum(np.equal(y_pred,y_actual))/len(y_actual)
112 |             accuracies.append([i, accuracy])
113 | 
114 |         if (i+1)% 10000 == 0:
115 |             print('Epoch %d\tLoss: %f Average L1 error: %f Accuracy: %f' %(i, L, np.mean(np.abs(dL_dh2)), accuracy))
116 |             save_filepath = './scratch_mlp/plots/boundary/image_%d.png'%i
117 |             text = 'Batch #: %d    Accuracy: %.2f    Loss value: %.2f'%(i, accuracy, L)
118 |             utils.plot_decision_boundary(X, y_actual, lambda x: inference(x, [w1, w2]),
119 |                                          save_filepath=save_filepath, text = text)
120 |             save_filepath = './scratch_mlp/plots/loss/image_%d.png' % i
121 |             utils.plot_function(losses, save_filepath=save_filepath, ylabel='Loss', title='Loss estimation')
122 |             save_filepath = './scratch_mlp/plots/accuracy/image_%d.png' % i
123 |             utils.plot_function(accuracies, save_filepath=save_filepath, ylabel='Accuracy', title='Accuracy estimation')
124 | 
125 | run()


--------------------------------------------------------------------------------
/slides/2017_Summer_School_LACCI.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/omar-florez/scratch_mlp/133c565e7e386b9852aa5f89c99273078594e7a7/slides/2017_Summer_School_LACCI.pdf


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | # Loading Plotting Utilities
  2 | import matplotlib.pyplot as plt
  3 | import matplotlib.gridspec as gridspec
  4 | 
  5 | import numpy as np
  6 | import os
  7 | import imageio
  8 | import shutil
  9 | 
 10 | def plot_xor():
 11 |     xx, yy = np.meshgrid(np.linspace(-3, 3, 50), np.linspace(-3, 3, 50))
 12 |     rng = np.random.RandomState(0)
 13 |     X = rng.randn(300, 2)
 14 |     y = np.array(np.logical_xor(X[:, 0] > 0, X[:, 1] > 0), dtype=int)
 15 | 
 16 |     gs = gridspec.GridSpec(2, 2)
 17 |     fig = plt.figure(figsize=(10, 8))
 18 | 
 19 |     ax = plt.subplot(gs[0, 0])
 20 |     plt.plot(X[np.where(y == 0), 0], X[np.where(y == 0), 1], 'ro')
 21 |     plt.plot(X[np.where(y == 1), 0], X[np.where(y == 1), 1], 'bo')
 22 |     plt.title('XOR')
 23 |     plt.show()
 24 | 
 25 | def plot_decision_boundary(X, y_actual, inference, save_filepath=None, text=None):
 26 |     # Set min and max values and give it some padding
 27 |     x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
 28 |     y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
 29 |     h = 0.01
 30 |     # Generate a grid of points with distance h between them
 31 |     xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
 32 |     # Predict the function value for the whole gid
 33 |     zz = inference(np.c_[xx.ravel(), yy.ravel()])
 34 |     zz = zz.reshape(xx.shape)
 35 | 
 36 |     # Plot the contour and training examples
 37 |     plt.figure()
 38 |     plt.contourf(xx, yy, zz, cmap=plt.cm.Paired)
 39 |     plt.scatter(X[:, 0], X[:, 1], c=y_actual, cmap=plt.cm.Spectral)
 40 |     plt.xlabel('X[0]')
 41 |     plt.ylabel('X[1]')
 42 | 
 43 |     if text:
 44 |         #plt.xlim(2, 2)
 45 |         #plt.ylim(0, 4)
 46 |         plt.text(-3.2, 3.3, text, fontsize=14)
 47 |     if save_filepath == None:
 48 |         plt.show()
 49 |     else:
 50 |         plt.savefig(save_filepath)
 51 |     plt.close()
 52 | 
 53 | def plot_function(losses, save_filepath=None, ylabel=None, title=None):
 54 |     plt.figure()
 55 |     t = [x[0] for x in losses]
 56 |     loss = [x[1] for x in losses]
 57 | 
 58 |     plt.figure()
 59 |     plt.plot(t, loss, 'b')
 60 |     plt.xlabel('Batch #')
 61 |     plt.ylabel(ylabel if ylabel else '')
 62 |     if title:
 63 |         plt.title(title)
 64 | 
 65 |     if save_filepath == None:
 66 |         plt.show()
 67 |     else:
 68 |         plt.savefig(save_filepath)
 69 |     plt.close()
 70 | 
 71 | def make_gif(input_folder, save_filepath):
 72 |     episode_frames = []
 73 |     time_per_step = 0.25
 74 |     for root, _, files in os.walk(input_folder):
 75 |         file_paths = [os.path.join(root, file) for file in files]
 76 |         #sorted by modified time
 77 |         file_paths = sorted(file_paths, key=lambda x: os.path.getmtime(x))
 78 |         episode_frames = [imageio.imread(file_path) for file_path in file_paths if file_path.endswith('.png')]
 79 |     episode_frames = np.array(episode_frames)
 80 |     imageio.mimsave(save_filepath, episode_frames, duration=time_per_step)
 81 | 
 82 | def make_all_gif(input_folder, save_filepath):
 83 |     time_per_step = 0.25
 84 |     for root, _, files in os.walk(os.path.join(input_folder, 'accuracy')):
 85 |         file_paths = [os.path.join(root, file) for file in files]
 86 |         #sorted by modified time
 87 |         file_paths = sorted(file_paths, key=lambda x: os.path.getmtime(x))
 88 |     file_names = [os.path.basename(file) for file in file_paths]
 89 | 
 90 |     episode_frames_accuracy = [imageio.imread(os.path.join(input_folder, 'accuracy',file_name)) for file_name in
 91 |                                file_names if file_name.endswith('.png')]
 92 |     episode_frames_boundary = [imageio.imread(os.path.join(input_folder, 'boundary', file_name)) for file_name in
 93 |                                file_names if file_name.endswith('.png')]
 94 |     episode_frames_loss = [imageio.imread(os.path.join(input_folder, 'loss', file_name)) for file_name in
 95 |                            file_names if file_name.endswith('.png')]
 96 | 
 97 |     assert(len(episode_frames_accuracy)==len(episode_frames_boundary)==len(episode_frames_loss))
 98 | 
 99 |     episode_frames = []
100 |     for i in range(len(episode_frames_accuracy)):
101 |         plt.figure()
102 |         fig, axes = plt.subplots(1, 3, figsize=(20,5))
103 |         #fig.subplots_adjust(hspace=1, wspace=1)
104 | 
105 |         ax = axes.flat[0]
106 |         ax.imshow(episode_frames_accuracy[i], interpolation='none')
107 |         ax.set_axis_off()
108 |         ax.set_aspect('equal')
109 | 
110 |         ax = axes.flat[1]
111 |         ax.imshow(episode_frames_boundary[i], interpolation='none')
112 |         ax.set_axis_off()
113 |         ax.set_aspect('equal')
114 | 
115 |         ax = axes.flat[2]
116 |         ax.imshow(episode_frames_loss[i], interpolation='none')
117 |         ax.set_axis_off()
118 |         ax.set_aspect('equal')
119 | 
120 |         fig.tight_layout()
121 |         plt.suptitle('Step = %d' %i, fontsize=18)
122 |         plt.axis('off')
123 |         plt.savefig(os.path.join(input_folder, 'all', 'image_%d.png'%i), dpi = 200)
124 |         plt.close()
125 | 
126 |         image = imageio.imread(os.path.join(input_folder, 'all', 'image_%d.png'%i))
127 |         episode_frames.append(image)
128 | 
129 |     episode_frames = np.array(episode_frames)
130 |     imageio.mimsave(save_filepath, episode_frames, duration=time_per_step)
131 | 
132 | def reset_folders():
133 |     folders = [os.path.join('./scratch_mlp/plots', f) for f in ['accuracy', 'boundary', 'loss']]
134 |     if not os.path.exists('./scratch_mlp/plots'):
135 |         os.mkdir('./scratch_mlp/plots')
136 |     for f in folders:
137 |         if os.path.exists(f):
138 |             shutil.rmtree(f)
139 |         os.mkdir(f)
140 | 
141 | if __name__ == '__main__':
142 |     #save_filepath = './scratch_mlp/plots/gif/boundary.gif'
143 |     #make_gif('./scratch_mlp/plots/boundary/', save_filepath)
144 |     #save_filepath = './scratch_mlp/plots/gif/loss.gif'
145 |     #make_gif('./scratch_mlp/plots/loss/', save_filepath)
146 |     #save_filepath = './scratch_mlp/plots/gif/accuracy.gif'
147 |     #make_gif('./scratch_mlp/plots/accuracy/', save_filepath)
148 | 
149 |     input_folder = './scratch_mlp/plots/'
150 |     save_filepath = './scratch_mlp/plots/gif/all.gif'
151 |     make_all_gif(input_folder, save_filepath)


--------------------------------------------------------------------------------