├── genetic ├── zombies │ ├── .gitignore │ ├── demo │ │ ├── webapp │ │ │ ├── public │ │ │ │ ├── images │ │ │ │ │ ├── boy.png │ │ │ │ │ ├── girl.png │ │ │ │ │ ├── bullet.png │ │ │ │ │ └── zombie.png │ │ │ │ ├── js │ │ │ │ │ ├── utils.js │ │ │ │ │ └── index.js │ │ │ │ └── css │ │ │ │ │ └── general.css │ │ │ └── index.html │ │ ├── README.md │ │ ├── package.json │ │ ├── app.yaml │ │ └── app.js │ ├── README.md │ ├── DEVELOPMENT.md │ ├── CONTRIBUTE.md │ ├── typedoc.json │ ├── package.json │ ├── tsconfig.json │ ├── webpack.config.js │ └── src │ │ └── index.ts └── introduction │ ├── simple_genetic_exemple.html │ └── js │ └── simple_genetic_example.js ├── images ├── algorithm_q_learning.png ├── q_learning_loss_with_target.png ├── optimal_action_value_function.png └── q_function_loss_without_target.png ├── README.md ├── rl ├── demo_gym.py ├── test_human.py ├── q_learning.py ├── dqn_use.py ├── grid_world_q_learning.py ├── sticks.py ├── GridWorld.ipynb ├── tictactoe.py ├── q_learning_nn.html ├── dqn.py ├── q_learning_visu.html ├── policy_gradient.py ├── policy_gradient_continuous_actions.py └── actor_critic.py ├── meta-learning ├── Neural Turing Machine.ipynb └── Meta Learning.ipynb ├── metacar └── index.html ├── tensorflow-js └── teachable_machine.html └── tensorflow ├── Eager Execution.ipynb └── TensorFlow MNIST tutorial.ipynb /genetic/zombies/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/* 2 | demo/dist/* 3 | demo/node_modules/* 4 | dist/* 5 | -------------------------------------------------------------------------------- /images/algorithm_q_learning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thibo73800/aihub/HEAD/images/algorithm_q_learning.png -------------------------------------------------------------------------------- /images/q_learning_loss_with_target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thibo73800/aihub/HEAD/images/q_learning_loss_with_target.png -------------------------------------------------------------------------------- /images/optimal_action_value_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thibo73800/aihub/HEAD/images/optimal_action_value_function.png -------------------------------------------------------------------------------- /images/q_function_loss_without_target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thibo73800/aihub/HEAD/images/q_function_loss_without_target.png -------------------------------------------------------------------------------- /genetic/zombies/demo/webapp/public/images/boy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thibo73800/aihub/HEAD/genetic/zombies/demo/webapp/public/images/boy.png -------------------------------------------------------------------------------- /genetic/zombies/demo/webapp/public/images/girl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thibo73800/aihub/HEAD/genetic/zombies/demo/webapp/public/images/girl.png -------------------------------------------------------------------------------- /genetic/zombies/demo/webapp/public/images/bullet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thibo73800/aihub/HEAD/genetic/zombies/demo/webapp/public/images/bullet.png -------------------------------------------------------------------------------- /genetic/zombies/demo/webapp/public/images/zombie.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thibo73800/aihub/HEAD/genetic/zombies/demo/webapp/public/images/zombie.png -------------------------------------------------------------------------------- /genetic/zombies/demo/README.md: -------------------------------------------------------------------------------- 1 | # Metacar: Demo 2 | 3 | This is the demo website of the metacar project. You can use it as an example to learn how to used the environement. 4 | 5 | ## Install 6 | 7 | ``` 8 | npm install 9 | npm start 10 | ``` 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # aihub 2 | 3 | I use this repository for my Youtube channel where I share videos about Artificial Intelligence. The repository includes Machine Learning, Deep Learning, and Reinforcement learning's code. 4 | -------------------------------------------------------------------------------- /genetic/zombies/README.md: -------------------------------------------------------------------------------- 1 | ### Install 2 | 3 | ``` 4 | npm install 5 | ``` 6 | 7 | ### Develop 8 | 9 | ``` 10 | npm run watch 11 | ``` 12 | 13 | ### Build 14 | 15 | ``` 16 | npm run build 17 | ``` 18 | 19 | ### Run demo 20 | 21 | ``` 22 | cd demo 23 | npm run start 24 | ``` 25 | -------------------------------------------------------------------------------- /genetic/zombies/DEVELOPMENT.md: -------------------------------------------------------------------------------- 1 | # Metacar: Development 2 | 3 | ### Install 4 | 5 | ``` 6 | npm install 7 | ``` 8 | 9 | ### Develop 10 | 11 | ``` 12 | npm run watch 13 | ``` 14 | 15 | ### Build 16 | 17 | ``` 18 | npm run build 19 | ``` 20 | 21 | ### Build the docs 22 | 23 | ``` 24 | npm run docs 25 | ``` 26 | -------------------------------------------------------------------------------- /rl/demo_gym.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import time 3 | 4 | env = gym.make('Breakout-v0') 5 | env.reset() 6 | 7 | for _ in range(1000): 8 | env.render() 9 | observation, reward, done, info = env.step(env.action_space.sample()) 10 | if reward != 0: 11 | print("Reward", reward) 12 | time.sleep(0.1) 13 | if done: 14 | break 15 | -------------------------------------------------------------------------------- /genetic/zombies/demo/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "metacar-demo", 3 | "version": "0.0.1", 4 | "description": "", 5 | "main": "app.js", 6 | "scripts": { 7 | "start": "node app.js" 8 | }, 9 | "author": "Thibault Neveu", 10 | "license": "ISC", 11 | "dependencies": { 12 | "cors": "^2.8.4", 13 | "express": "^4.16.3", 14 | "pixi.js": "^4.8.1" 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /genetic/zombies/CONTRIBUTE.md: -------------------------------------------------------------------------------- 1 | # Metacar: Contribute 2 | 3 | If you want to be part of the project, whether to implement features in the environment or demonstrate algorithms, feel free to join the [slack channel](https://join.slack.com/t/metacar/shared_invite/enQtMzgyODI4NDMzMDc0LTY1MjIwNzk1MTAzOTBiZjJlOGUwM2YyYjA3MzBmNjQyNjUyMDZkOGNkYmU0MmUyYzUzNGRhNGJhZDE1M2EzNzM) to ask questions and talk about all your fantastic ideas! 4 | -------------------------------------------------------------------------------- /genetic/zombies/typedoc.json: -------------------------------------------------------------------------------- 1 | { 2 | "mode": "modules", 3 | "out": "demo/docs", 4 | "src": "src/index.ts", 5 | "theme": "default", 6 | "ignoreCompilerErrors": "true", 7 | "experimentalDecorators": "true", 8 | "emitDecoratorMetadata": "true", 9 | "target": "ES5", 10 | "moduleResolution": "node", 11 | "preserveConstEnums": "true", 12 | "stripInternal": "true", 13 | "suppressExcessPropertyErrors": "true", 14 | "suppressImplicitAnyIndexErrors": "true", 15 | "module": "commonjs", 16 | "hideGenerator": true, 17 | "excludePrivate": true 18 | } -------------------------------------------------------------------------------- /meta-learning/Neural Turing Machine.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Neural Turing Machine" 8 | ] 9 | } 10 | ], 11 | "metadata": { 12 | "kernelspec": { 13 | "display_name": "Python 3", 14 | "language": "python", 15 | "name": "python3" 16 | }, 17 | "language_info": { 18 | "codemirror_mode": { 19 | "name": "ipython", 20 | "version": 3 21 | }, 22 | "file_extension": ".py", 23 | "mimetype": "text/x-python", 24 | "name": "python", 25 | "nbconvert_exporter": "python", 26 | "pygments_lexer": "ipython3", 27 | "version": "3.5.2" 28 | } 29 | }, 30 | "nbformat": 4, 31 | "nbformat_minor": 2 32 | } 33 | -------------------------------------------------------------------------------- /genetic/zombies/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "zombie", 3 | "description": "A reinforcement learning environment for self-driving cars in the browser.", 4 | "version": "0.1.1", 5 | "main": "dist/geneticzombie.min.js", 6 | "author": "Thibault Neveu", 7 | "license": "MIT", 8 | "devDependencies": { 9 | "ts-loader": "^4.3.1", 10 | "typedoc": "^0.11.1", 11 | "typescript": "^2.9.1", 12 | "webpack": "^4.10.2", 13 | "webpack-cli": "^3.0.2" 14 | }, 15 | "dependencies": { 16 | "@types/pixi.js": "^4.7.5", 17 | "pixi.js": "^4.8.0" 18 | }, 19 | "scripts": { 20 | "build": "./node_modules/.bin/webpack-cli --mode production", 21 | "build-dev": "./node_modules/.bin/webpack-cli --mode development", 22 | "watch": "./node_modules/.bin/webpack-cli --watch --mode development", 23 | "docs": "./node_modules/.bin/typedoc --options typedoc.json" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /genetic/zombies/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "ES2015", 4 | "moduleResolution": "node", 5 | "noImplicitAny": true, 6 | "sourceMap": true, 7 | "removeComments": true, 8 | "preserveConstEnums": true, 9 | "declaration": true, 10 | "target": "es5", 11 | "lib": ["es2015", "dom"], 12 | "outDir": "./dist-es6", 13 | "noUnusedLocals": false, 14 | "noImplicitReturns": true, 15 | "noImplicitThis": true, 16 | "alwaysStrict": true, 17 | "noUnusedParameters": false, 18 | "pretty": true, 19 | "noFallthroughCasesInSwitch": true, 20 | "allowUnreachableCode": false, 21 | "experimentalDecorators": true 22 | }, 23 | "include": [ 24 | "src/**/*" 25 | ], 26 | "exclude": [ 27 | "node_modules", 28 | "demo" 29 | ] 30 | } 31 | -------------------------------------------------------------------------------- /genetic/zombies/demo/app.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2017, Google, Inc. 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | # [START app_yaml] 15 | runtime: nodejs 16 | env: flex 17 | 18 | skip_files: 19 | - yarn.lock 20 | 21 | # This sample incurs costs to run on the App Engine flexible environment. 22 | # The settings below are to reduce costs during testing and are not appropriate 23 | # for production use. For more information, see: 24 | # https://cloud.google.com/appengine/docs/flexible/nodejs/configuring-your-app-with-app-yaml 25 | manual_scaling: 26 | instances: 1 27 | resources: 28 | cpu: 1 29 | memory_gb: 0.5 30 | disk_size_gb: 10 31 | 32 | # [END app_yaml] 33 | -------------------------------------------------------------------------------- /genetic/zombies/webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | var webpack = require('webpack') 3 | 4 | var config = { 5 | entry: './src/index', 6 | module: { 7 | rules: [ 8 | { 9 | test: /\.ts$/, 10 | use: 'ts-loader', 11 | } 12 | ], 13 | }, 14 | resolve: { 15 | extensions: [ 16 | '.ts', 17 | ], 18 | }, 19 | externals: [ 20 | // Don't bundle pixi.js, assume it'll be included in the HTML via a script 21 | // tag, and made available in the global variable PIXI. 22 | {"pixi.js": "PIXI"} 23 | ] 24 | }; 25 | 26 | var packageConfig = Object.assign({}, config, { 27 | output: { 28 | filename: 'geneticzombie.min.js', 29 | path: path.resolve(__dirname, './dist'), 30 | library: 'geneticzombie', 31 | libraryTarget: 'window', 32 | libraryExport: 'default' 33 | } 34 | }); 35 | 36 | var demoConfig = Object.assign({}, config,{ 37 | output: { 38 | filename: 'geneticzombie.min.js', 39 | path: path.resolve(__dirname, './demo/dist'), 40 | library: 'geneticzombie', 41 | libraryTarget: 'window', 42 | libraryExport: 'default' 43 | } 44 | }); 45 | 46 | module.exports = [ 47 | packageConfig, demoConfig, 48 | ]; 49 | -------------------------------------------------------------------------------- /genetic/zombies/demo/webapp/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Genetic-Zombie: A survival experiment based on genetic algorithm 6 | 7 | 8 | 9 | 10 | 12 | 13 | 14 | 15 |
16 |
17 |

Genetic-Zombie

18 | 19 |
20 |
21 | 22 |
23 |


Genetic-Zombie

24 |

A survival experiment based on genetic algorithm

25 |
26 | 27 |
28 | 29 | 30 | 31 |
32 | 33 |
34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /genetic/introduction/simple_genetic_exemple.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 33 | Simple Genetic Exemple 34 | 35 | 36 | 37 |
    38 |
  1. 39 |
  2. 40 |
  3. 41 |
  4. 42 |
  5. 43 |
  6. 44 |
  7. 45 |
  8. 46 |
47 | 48 |
    49 |
  1. 50 |
  2. 51 |
  3. 52 |
  4. 53 |
54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /genetic/zombies/demo/app.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var fs = require('fs'); 4 | const express = require('express'); 5 | var path = require("path"); 6 | var cors = require('cors'); 7 | 8 | function fromDir(startPath,filter){ 9 | var files_list = []; 10 | var files=fs.readdirSync(startPath); 11 | for(var i=0;i=0) { 15 | files_list.push(filename); 16 | }; 17 | }; 18 | return files_list; 19 | } 20 | 21 | function get_path(file){ 22 | return path.join(path.join(__dirname, "webapp/"), file); 23 | } 24 | 25 | const app = express(); 26 | app.use(cors()); 27 | 28 | app.use("/dist", express.static(path.join(__dirname, "dist/"))); 29 | app.use("/public", express.static(path.join(__dirname, "webapp/public/"))); 30 | app.use("/docs", express.static(path.join(__dirname, "docs/"))); 31 | 32 | app.get('/', (req, res) => { 33 | res.sendFile(get_path("index.html")); 34 | }); 35 | 36 | /** 37 | * Create all HTML routes 38 | * **/ 39 | const files = fromDir(path.join(__dirname, "webapp/"),'.html'); 40 | files.forEach(file => { 41 | let route = file.split("/"); 42 | route = route[route.length - 1]; 43 | 44 | console.log("Open route:", route, file); 45 | app.get('/'+route, (req, res) => { 46 | res.sendFile(file); 47 | }); 48 | }); 49 | 50 | // Start the server 51 | const PORT = process.env.PORT || 3000; 52 | app.listen(PORT, () => { 53 | console.log(`App listening on port ${PORT}`); 54 | console.log('Press Ctrl+C to quit.'); 55 | }); 56 | -------------------------------------------------------------------------------- /rl/test_human.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from __future__ import division, print_function 4 | 5 | import sys 6 | import numpy 7 | import gym 8 | import time 9 | from optparse import OptionParser 10 | 11 | import gym_minigrid 12 | 13 | def main(): 14 | # Load the gym environment 15 | env = gym.make("MiniGrid-Empty-6x6-v0") 16 | 17 | env.reset() 18 | renderer = env.render('human') 19 | 20 | def keyDownCb(keyName): 21 | if keyName == 'BACKSPACE': 22 | resetEnv() 23 | return 24 | 25 | if keyName == 'ESCAPE': 26 | sys.exit(0) 27 | 28 | action = 0 29 | 30 | if keyName == 'LEFT': 31 | action = env.actions.left 32 | elif keyName == 'RIGHT': 33 | action = env.actions.right 34 | elif keyName == 'UP': 35 | action = env.actions.forward 36 | 37 | elif keyName == 'SPACE': 38 | action = env.actions.toggle 39 | elif keyName == 'PAGE_UP': 40 | action = env.actions.pickup 41 | elif keyName == 'PAGE_DOWN': 42 | action = env.actions.drop 43 | 44 | elif keyName == 'RETURN': 45 | action = env.actions.done 46 | 47 | else: 48 | print("unknown key %s" % keyName) 49 | return 50 | 51 | obs, reward, done, info = env.step(action) 52 | 53 | print('step=%s, reward=%.2f' % (env.step_count, reward)) 54 | 55 | if done: 56 | print('done!') 57 | resetEnv() 58 | 59 | renderer.window.setKeyDownCb(keyDownCb) 60 | 61 | while True: 62 | env.render('human') 63 | time.sleep(0.01) 64 | 65 | # If the window was closed 66 | if renderer.window == None: 67 | break 68 | 69 | if __name__ == "__main__": 70 | main() 71 | -------------------------------------------------------------------------------- /metacar/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Metacar: Documentation 6 | 7 | 8 | 9 | 10 |
11 | 12 | 13 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /rl/q_learning.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import numpy as np 4 | import gym 5 | import gym_minigrid 6 | import time 7 | 8 | def state_to_key(obs): 9 | return str(obs["image"].tolist()+[obs["direction"]]).strip() 10 | 11 | def update_Q(Q, s, sp, a, r, done): 12 | if s not in Q: 13 | Q[s] = np.array([0., 0., 0., 0.]) 14 | if sp not in Q: 15 | Q[sp] = np.array([0., 0., 0., 0.]) 16 | 17 | ap = np.argmax(Q[sp]) 18 | if not done: 19 | Q[s][a] = Q[s][a] + 0.01*(r + 0.99*Q[sp][ap] - Q[s][a]) 20 | else: 21 | Q[s][a] = Q[s][a] + 0.01*(r - Q[s][a]) 22 | 23 | def create_state_if_not_exist(Q, s): 24 | if s not in Q: 25 | Q[s] = np.array([0., 0., 0., 0.]) 26 | 27 | def main(): 28 | 29 | Q = {} 30 | 31 | env = gym.make("MiniGrid-Empty-6x6-v0") 32 | eps = 0.01 33 | 34 | for epoch in range(100): 35 | 36 | s = env.reset() 37 | s = state_to_key(s) 38 | done = False 39 | 40 | while not done: 41 | 42 | if np.random.rand() < eps: 43 | a = np.random.randint(0, 4) 44 | else: 45 | create_state_if_not_exist(Q, s) 46 | a = np.argmax(Q[s]) 47 | 48 | sp, r, done, info = env.step(a) 49 | sp = state_to_key(sp) 50 | 51 | update_Q(Q, s, sp, a, r, done) 52 | 53 | s = sp 54 | 55 | print("eps", eps) 56 | eps = max(0.1, eps*0.99) 57 | 58 | 59 | for epoch in range(100): 60 | s = env.reset() 61 | s = state_to_key(s) 62 | done = False 63 | 64 | while not done: 65 | create_state_if_not_exist(Q, s) 66 | a = np.argmax(Q[s]) 67 | sp, r, done, info = env.step(a) 68 | sp = state_to_key(sp) 69 | s = sp 70 | env.render() 71 | time.sleep(0.1) 72 | print("r", r) 73 | 74 | if __name__ == "__main__": 75 | main() 76 | -------------------------------------------------------------------------------- /genetic/zombies/demo/webapp/public/js/utils.js: -------------------------------------------------------------------------------- 1 | function update_genome_to_display(genomed, genome){ 2 | genomed = [ 3 | genomed[0]+1, 4 | genomed[1]+genome[0], 5 | genomed[2]+genome[1], 6 | genomed[3]+genome[2], 7 | genomed[4]+genome[3] 8 | ] 9 | return genomed; 10 | } 11 | 12 | function display_genome(genome){ 13 | console.log(chart_genome); 14 | genome = [ 15 | genome[0]/30, // Scale between 0 and 1 16 | genome[1]/genome[0], // Take the mean 17 | genome[2]/genome[0], // Take the mean 18 | genome[3]/genome[0], // Take the mean 19 | genome[4]/genome[0] // Take the mean 20 | ] 21 | chart_genome.data.datasets.forEach((dataset) => { 22 | dataset.data = genome; 23 | }); 24 | chart_genome.update(); 25 | } 26 | 27 | var ctx = document.getElementById('genomeChart').getContext('2d'); 28 | var chart_genome = new Chart(ctx, { 29 | type: 'bar', 30 | data: { 31 | labels: ['Size', 'Gen1 (Speed)', 'Gen2 (Perception)', 'Gen3 (Accuracy)', 'Gen4 (bullets)'], 32 | datasets: [{ 33 | label: 'Genome', 34 | labelColor: 'rgba(0, 99, 132, 0.2)', 35 | data: [], 36 | backgroundColor: [ 37 | 'rgba(255, 99, 132, 0.2)', 38 | 'rgba(54, 162, 235, 0.2)', 39 | 'rgba(255, 206, 86, 0.2)', 40 | 'rgba(75, 192, 192, 0.2)', 41 | 'rgba(153, 102, 255, 0.2)' 42 | ], 43 | borderColor: [ 44 | 'rgba(255, 99, 132, 1)', 45 | 'rgba(54, 162, 235, 1)', 46 | 'rgba(255, 206, 86, 1)', 47 | 'rgba(75, 192, 192, 1)', 48 | 'rgba(153, 102, 255, 1)' 49 | ], 50 | borderWidth: 1 51 | }] 52 | }, 53 | options: { 54 | responsive:false, 55 | maintainAspectRatio: false, 56 | scales: { 57 | yAxes: [{ 58 | ticks: { 59 | beginAtZero: true 60 | } 61 | }] 62 | } 63 | } 64 | }); 65 | -------------------------------------------------------------------------------- /rl/dqn_use.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import gym 4 | import time 5 | import os 6 | 7 | class StateProcessor(): 8 | 9 | def __init__(self): 10 | with tf.variable_scope("process"): 11 | self.input_state = tf.placeholder(shape=[210, 160, 3], dtype=tf.uint8, name="input_process") 12 | self.output = tf.image.rgb_to_grayscale(self.input_state) 13 | self.output = tf.image.crop_to_bounding_box(self.output, 34, 0, 160, 160) 14 | self.output = tf.image.resize_images( 15 | self.output, [84, 84], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) 16 | self.output = tf.squeeze(self.output) 17 | 18 | def process(self, sess, state): 19 | return sess.run(self.output, { self.input_state: state }) 20 | 21 | tf.reset_default_graph() 22 | 23 | checkpoint = os.path.join("./checkpoints", "model") 24 | new_saver = tf.train.import_meta_graph(checkpoint + '.meta') 25 | 26 | with tf.Session() as sess: 27 | # Restore variables from disk. 28 | 29 | 30 | new_saver.restore(sess, checkpoint) 31 | 32 | 33 | input_data = tf.get_default_graph().get_tensor_by_name('dqn/X:0') 34 | input_process = tf.get_default_graph().get_tensor_by_name('process/input_process:0') 35 | probs = tf.get_default_graph().get_tensor_by_name('dqn/predictions:0') 36 | 37 | state_processor = StateProcessor() 38 | 39 | env = gym.make('Breakout-v0') 40 | env.reset() 41 | 42 | state = env.reset() 43 | state = state_processor.process(sess, state) 44 | state = np.stack([state] * 4, axis=2) 45 | 46 | for _ in range(1000): 47 | import time 48 | time.sleep(0.05) 49 | env.render() 50 | p = sess.run(probs, feed_dict={input_data: [state]})[0] 51 | action = np.argmax(p) 52 | next_state, reward, done, info = env.step(action) 53 | 54 | # Add this action to the stack of images 55 | next_state = state_processor.process(sess, next_state) 56 | next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2) 57 | 58 | if reward != 0: 59 | print("Reward", reward) 60 | 61 | state = next_state 62 | 63 | if done: 64 | break 65 | -------------------------------------------------------------------------------- /genetic/zombies/demo/webapp/public/js/index.js: -------------------------------------------------------------------------------- 1 | /* 2 | Create the initial population in the environment 3 | */ 4 | function createPopulation(env) { 5 | // Create individuals with one specific or random genome 6 | for (let i = 0; i < 15; i++){ 7 | //env.createHuman([Math.random(), Math.random(), Math.random(), Math.random()]); 8 | env.createHuman([0.5, 0.5, 0.5, 0.5]); 9 | } 10 | // Create individuals with one specific or random genome 11 | for (let i = 0; i < 15; i++){ 12 | //env.createHuman([Math.random(), Math.random(), Math.random(), Math.random()]); 13 | env.createHuman([0.5, 0.5, 0.5, 0.5]); 14 | } 15 | } 16 | 17 | /* 18 | Method used to select amoung the last population the best 19 | individuals to create a new population 20 | */ 21 | function selection(env){ 22 | let genomed = [0, 0, 0, 0, 0]; 23 | 24 | // Compute the total fitness of all individuals 25 | let total_fitness = 0; 26 | for (let p in env.last_population){ 27 | total_fitness += env.last_population[p].lifeduration; 28 | } 29 | 30 | // Sort individual by their best fitness 31 | env.last_population.sort((i1, i2) => { 32 | i1.prob = i1.lifeduration / total_fitness; 33 | i2.prob = i2.lifeduration / total_fitness; 34 | return i2.lifeduration - i1.lifeduration; 35 | }); 36 | 37 | for (let i = 0; i<30; i++){ 38 | 39 | let rd = Math.random(); 40 | let lastprop = 0; 41 | 42 | for (let p in env.last_population){ 43 | let h = env.last_population[p]; 44 | if (rd >= lastprop && rd < h.prob + lastprop){ 45 | //if (h.genome[3] == 0.75){ 46 | genomed = update_genome_to_display(genomed, h.genome); 47 | //} 48 | env.createHuman(env.last_population[p].genome); 49 | break; 50 | } 51 | lastprop += env.last_population[p].prob; 52 | } 53 | 54 | } 55 | 56 | display_genome(genomed); 57 | } 58 | 59 | function step(env, delta){ 60 | // Retrieve the desired speed 61 | let speed = document.getElementById("speed").value; 62 | // Step in the environment with the given speed 63 | for (let i = 0; i { 76 | step(env); 77 | }); 78 | } 79 | 80 | function init(env){ 81 | env.reset(); 82 | createPopulation(env); 83 | step(env); 84 | } 85 | 86 | var env = new geneticzombie.env("canvas_container", {nbbullets : 10}); 87 | env.init(init, null); 88 | -------------------------------------------------------------------------------- /rl/grid_world_q_learning.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from random import randint 3 | import random 4 | 5 | class EnvGrid(object): 6 | """ 7 | docstring forEnvGrid. 8 | """ 9 | def __init__(self): 10 | super(EnvGrid, self).__init__() 11 | 12 | self.grid = [ 13 | [0, 0, 1], 14 | [0, -1, 0], 15 | [0, 0, 0] 16 | ] 17 | # Starting position 18 | self.y = 2 19 | self.x = 0 20 | 21 | self.actions = [ 22 | [-1, 0], # Up 23 | [1, 0], #Down 24 | [0, -1], # Left 25 | [0, 1] # Right 26 | ] 27 | 28 | def reset(self): 29 | """ 30 | Reset world 31 | """ 32 | self.y = 2 33 | self.x = 0 34 | return (self.y*3+self.x+1) 35 | 36 | def step(self, action): 37 | """ 38 | Action: 0, 1, 2, 3 39 | """ 40 | self.y = max(0, min(self.y + self.actions[action][0],2)) 41 | self.x = max(0, min(self.x + self.actions[action][1],2)) 42 | 43 | return (self.y*3+self.x+1) , self.grid[self.y][self.x] 44 | 45 | def show(self): 46 | """ 47 | Show the grid 48 | """ 49 | print("---------------------") 50 | y = 0 51 | for line in self.grid: 52 | x = 0 53 | for pt in line: 54 | print("%s\t" % (pt if y != self.y or x != self.x else "X"), end="") 55 | x += 1 56 | y += 1 57 | print("") 58 | 59 | def is_finished(self): 60 | return self.grid[self.y][self.x] == 1 61 | 62 | def take_action(st, Q, eps): 63 | # Take an action 64 | if random.uniform(0, 1) < eps: 65 | action = randint(0, 3) 66 | else: # Or greedy action 67 | action = np.argmax(Q[st]) 68 | return action 69 | 70 | if __name__ == '__main__': 71 | env = EnvGrid() 72 | st = env.reset() 73 | 74 | Q = [ 75 | [0, 0, 0, 0], 76 | [0, 0, 0, 0], 77 | [0, 0, 0, 0], 78 | [0, 0, 0, 0], 79 | [0, 0, 0, 0], 80 | [0, 0, 0, 0], 81 | [0, 0, 0, 0], 82 | [0, 0, 0, 0], 83 | [0, 0, 0, 0], 84 | [0, 0, 0, 0] 85 | ] 86 | 87 | for _ in range(100): 88 | # Reset the game 89 | st = env.reset() 90 | while not env.is_finished(): 91 | #env.show() 92 | #at = int(input("$>")) 93 | at = take_action(st, Q, 0.4) 94 | 95 | stp1, r = env.step(at) 96 | #print("s", stp1) 97 | #print("r", r) 98 | 99 | # Update Q function 100 | atp1 = take_action(stp1, Q, 0.0) 101 | Q[st][at] = Q[st][at] + 0.1*(r + 0.9*Q[stp1][atp1] - Q[st][at]) 102 | 103 | st = stp1 104 | 105 | for s in range(1, 10): 106 | print(s, Q[s]) 107 | -------------------------------------------------------------------------------- /genetic/introduction/js/simple_genetic_example.js: -------------------------------------------------------------------------------- 1 | function individual_to_string(individual){ 2 | let st = ""; 3 | for (let i=0; i<8; i++){ 4 | st += individual[i].toString(); 5 | } 6 | return st; 7 | } 8 | 9 | function display_pop(pop){ 10 | // Display each individual 11 | for (let p=0; p<8; p++){ 12 | // Display the individual on the screen 13 | let st = individual_to_string(pop[p]); 14 | document.getElementById("ind_" + (p + 1)).innerHTML = st; 15 | } 16 | } 17 | 18 | function display_subpop(subpop){ 19 | // Display each individual 20 | for (let p=0; p<4; p++){ 21 | // Display the individual on the screen 22 | let st = individual_to_string(subpop[p]); 23 | document.getElementById("sel_" + (p + 1)).innerHTML = st; 24 | } 25 | } 26 | 27 | function compute_fitness(individual){ 28 | score = 0; 29 | for (let i=0; i<8; i++){ 30 | score += individual[i]; 31 | } 32 | return score; 33 | } 34 | 35 | function cross_over(i1, i2){ 36 | let individual = [ 37 | i1[0], 38 | i1[1], 39 | i1[2], 40 | i1[3], 41 | i2[4], 42 | i2[5], 43 | i2[6], 44 | i2[7] 45 | ]; 46 | return individual; 47 | } 48 | 49 | function generate_individual(){ 50 | let individual = []; 51 | for (let i=0; i<8; i++){ 52 | individual.push(Math.random() > 0.5 ? 1 : 0); 53 | } 54 | return individual; 55 | } 56 | 57 | function generate_population(){ 58 | // Init empty population 59 | let pop = [] 60 | for (let p=0; p<8; p++){ 61 | // Randomly generate one individual with random genes 62 | pop.push(generate_individual()); 63 | } 64 | return pop; 65 | } 66 | 67 | let pop = generate_population(); 68 | console.log(pop); 69 | display_pop(pop); 70 | //var n_pop = []; 71 | 72 | document.getElementById("sort").addEventListener("click", () => { 73 | // Sort function 74 | pop.sort((i1, i2) => { 75 | return compute_fitness(i2) - compute_fitness(i1); 76 | }); 77 | console.log(pop); 78 | display_pop(pop); 79 | }); 80 | 81 | document.getElementById("selection").addEventListener("click", () => { 82 | n_pop = []; 83 | n_pop.push(pop[0]); 84 | n_pop.push(cross_over(pop[0], pop[1])); 85 | n_pop.push(cross_over(pop[0], pop[2])); 86 | n_pop.push(cross_over(pop[1], pop[2])); 87 | display_subpop(n_pop); 88 | }); 89 | 90 | document.getElementById("mutate").addEventListener("click", () => { 91 | for (let i=0; i<4; i++){ 92 | let individual = n_pop[i]; 93 | let gene_to_mutate = Math.floor(Math.random()*individual.length); 94 | if (individual[gene_to_mutate] == 0) { 95 | individual[gene_to_mutate] = 1; 96 | } 97 | else { 98 | individual[gene_to_mutate] = 0; 99 | } 100 | display_subpop(n_pop); 101 | } 102 | }); 103 | 104 | document.getElementById("fillpop").addEventListener("click", () => { 105 | pop = [] 106 | pop.push(n_pop[0]); 107 | pop.push(n_pop[1]); 108 | pop.push(n_pop[2]); 109 | pop.push(n_pop[3]); 110 | 111 | pop.push(generate_individual()); 112 | pop.push(generate_individual()); 113 | pop.push(generate_individual()); 114 | pop.push(generate_individual()); 115 | 116 | display_pop(pop); 117 | 118 | document.getElementById("sel_1").innerHTML = ""; 119 | document.getElementById("sel_2").innerHTML = ""; 120 | document.getElementById("sel_3").innerHTML = ""; 121 | document.getElementById("sel_4").innerHTML = ""; 122 | }); 123 | -------------------------------------------------------------------------------- /genetic/zombies/demo/webapp/public/css/general.css: -------------------------------------------------------------------------------- 1 | body{ 2 | background: #030303; 3 | font-family: 'Tajawal', sans-serif; 4 | padding: 0; 5 | margin: 0; 6 | padding-bottom: 100px; 7 | padding-top: 70px; 8 | } 9 | 10 | #genomeChart { 11 | display: block; 12 | float: left; 13 | width: 500px; 14 | height: 500px; 15 | } 16 | 17 | canvas { 18 | display: block; 19 | float: left; 20 | } 21 | 22 | header{ 23 | width: 100%; 24 | height: 70px; 25 | background: #383838; 26 | position: fixed; 27 | top:0px; 28 | left: 0px; 29 | } 30 | 31 | .header_container{ 32 | width: 100%; 33 | max-width: 800px; 34 | height: 70px; 35 | margin: auto; 36 | overflow: hidden; 37 | } 38 | 39 | .header_container h1{ 40 | line-height: 45px; 41 | color: white; 42 | float: left; 43 | font-weight: 100; 44 | } 45 | 46 | .header_container a, .header_container a:link{ 47 | color: white; 48 | text-decoration: none; 49 | } 50 | 51 | .header_container a{ 52 | float: right; 53 | } 54 | 55 | .header_container img{ 56 | width: 50px; 57 | margin-top: 10px; 58 | } 59 | 60 | .canvas_container{ 61 | background-color: #000000; 62 | height: auto; 63 | margin-bottom: 50px; 64 | width: 1200px; 65 | height: 800px; 66 | float: left; 67 | margin-left: 100px; 68 | } 69 | 70 | .canvas_container .canvas{ 71 | text-align: center; 72 | margin: auto; 73 | } 74 | 75 | .canvas_container .canvas canvas{ 76 | margin-bottom: -40px; 77 | -webkit-box-shadow: -1px 2px 20px -2px rgba(0,0,0,0.5); 78 | -moz-box-shadow: -1px 2px 20px -2px rgba(0,0,0,0.5); 79 | box-shadow: -1px 2px 20px -2px rgba(0,0,0,0.5); 80 | } 81 | 82 | .body_container{ 83 | width: 100%; 84 | max-width: 800px; 85 | min-height: 200px; 86 | margin: auto; 87 | } 88 | 89 | .body_container h2{ 90 | text-align: center; 91 | font-size: 50px; 92 | font-weight: 100; 93 | color: #cacaca; 94 | margin-top: 0px; 95 | margin-bottom: 0px; 96 | } 97 | 98 | .body_container h3{ 99 | margin-top: 0px; 100 | margin-bottom: 0px; 101 | text-align: center; 102 | font-size: 30px; 103 | font-weight: 100; 104 | color: #cacaca; 105 | } 106 | 107 | .body_container pre{ 108 | background: #383838; 109 | color: white; 110 | padding: 10px; 111 | font-size: 20px; 112 | } 113 | 114 | .body_container p{ 115 | font-size: 16px; 116 | } 117 | 118 | .level_link_box{ 119 | width: 100%; 120 | background: white; 121 | } 122 | 123 | .level_link_box h4, .level_link_box a{ 124 | margin-bottom: 0px; 125 | text-align: center; 126 | font-size: 22px; 127 | font-weight: 100; 128 | color: #484848; 129 | text-decoration: none; 130 | } 131 | 132 | .level_link_box p{ 133 | padding: 10px; 134 | font-size: 18px; 135 | } 136 | 137 | .level_link_box p a, .level_link_box p a:link{ 138 | color: #ed4532; 139 | text-decoration: none; 140 | font-weight: bold; 141 | } 142 | 143 | .floatleft { 144 | float: left; 145 | margin-right: 20px; 146 | } 147 | 148 | .floatright { 149 | float: right; 150 | margin-left: 20px; 151 | } 152 | 153 | .metacar_buttons_container{ 154 | text-align: center; 155 | margin: auto; 156 | margin-top: 60px; 157 | max-width: 800px; 158 | } 159 | 160 | .metacar_buttons_container button{ 161 | display: inline-block; 162 | padding: 10px; 163 | border: none; 164 | color: white; 165 | cursor: pointer; 166 | text-align: center; 167 | margin: auto; 168 | margin-right: 5px; 169 | border-radius:5px; 170 | -moz-border-radius:5px; 171 | -webkit-border-radius:5px; 172 | margin-bottom: 5px; 173 | background: #383838; 174 | } 175 | 176 | .metacar_img{ 177 | background: #71944c; 178 | display: inline-block; 179 | padding: 10px; 180 | margin: 5px; 181 | -moz-border-radius:5px; 182 | -webkit-border-radius:5px; 183 | margin-bottom: 5px; 184 | } 185 | -------------------------------------------------------------------------------- /rl/sticks.py: -------------------------------------------------------------------------------- 1 | from random import randint 2 | import random 3 | import numpy as np 4 | 5 | class StickGame(object): 6 | """ 7 | StickGame. 8 | """ 9 | 10 | def __init__(self, nb): 11 | # @nb Number of stick to play with 12 | super(StickGame, self).__init__() 13 | self.original_nb = nb 14 | self.nb = nb 15 | 16 | def is_finished(self): 17 | # Check if the game is over @return Boolean 18 | if self.nb <= 0: 19 | return True 20 | return False 21 | 22 | def reset(self): 23 | # Reset the state of the game 24 | self.nb = self.original_nb 25 | return self.nb 26 | 27 | def display(self): 28 | # Display the state of the game 29 | print ("| " * self.nb) 30 | 31 | def step(self, action): 32 | # @action either 1, 2 or 3. Take an action into the environement 33 | self.nb -= action 34 | if self.nb <= 0: 35 | return None, -1 36 | else: 37 | return self.nb, 0 38 | 39 | class StickPlayer(object): 40 | """ 41 | Stick Player 42 | """ 43 | 44 | def __init__(self, is_human, size, trainable=True): 45 | # @nb Number of stick to play with 46 | super(StickPlayer, self).__init__() 47 | self.is_human = is_human 48 | self.history = [] 49 | self.V = {} 50 | for s in range(1, size+1): 51 | self.V[s] = 0. 52 | self.win_nb = 0. 53 | self.lose_nb = 0. 54 | self.rewards = [] 55 | self.eps = 0.99 56 | self.trainable = trainable 57 | 58 | def reset_stat(self): 59 | # Reset stat 60 | self.win_nb = 0 61 | self.lose_nb = 0 62 | self.rewards = [] 63 | 64 | def greedy_step(self, state): 65 | # Greedy step 66 | actions = [1, 2, 3] 67 | vmin = None 68 | vi = None 69 | for i in range(0, 3): 70 | a = actions[i] 71 | if state - a > 0 and (vmin is None or vmin > self.V[state - a]): 72 | vmin = self.V[state - a] 73 | vi = i 74 | return actions[vi if vi is not None else 1] 75 | 76 | def play(self, state): 77 | # PLay given the @state (int) 78 | if self.is_human is False: 79 | # Take random action 80 | if random.uniform(0, 1) < self.eps: 81 | action = randint(1, 3) 82 | else: # Or greedy action 83 | action = self.greedy_step(state) 84 | else: 85 | action = int(input("$>")) 86 | return action 87 | 88 | def add_transition(self, n_tuple): 89 | # Add one transition to the history: tuple (s, a , r, s') 90 | self.history.append(n_tuple) 91 | s, a, r, sp = n_tuple 92 | self.rewards.append(r) 93 | 94 | def train(self): 95 | if not self.trainable or self.is_human is True: 96 | return 97 | 98 | # Update the value function if this player is not human 99 | for transition in reversed(self.history): 100 | s, a, r, sp = transition 101 | if r == 0: 102 | self.V[s] = self.V[s] + 0.001*(self.V[sp] - self.V[s]) 103 | else: 104 | self.V[s] = self.V[s] + 0.001*(r - self.V[s]) 105 | 106 | self.history = [] 107 | 108 | def play(game, p1, p2, train=True): 109 | state = game.reset() 110 | players = [p1, p2] 111 | random.shuffle(players) 112 | p = 0 113 | while game.is_finished() is False: 114 | 115 | if players[p%2].is_human: 116 | game.display() 117 | 118 | action = players[p%2].play(state) 119 | n_state, reward = game.step(action) 120 | 121 | # Game is over. Ass stat 122 | if (reward != 0): 123 | # Update stat of the current player 124 | players[p%2].lose_nb += 1. if reward == -1 else 0 125 | players[p%2].win_nb += 1. if reward == 1 else 0 126 | # Update stat of the other player 127 | players[(p+1)%2].lose_nb += 1. if reward == 1 else 0 128 | players[(p+1)%2].win_nb += 1. if reward == -1 else 0 129 | 130 | # Add the reversed reward and the new state to the other player 131 | if p != 0: 132 | s, a, r, sp = players[(p+1)%2].history[-1] 133 | players[(p+1)%2].history[-1] = (s, a, reward * -1, n_state) 134 | 135 | players[p%2].add_transition((state, action, reward, None)) 136 | 137 | state = n_state 138 | p += 1 139 | 140 | if train: 141 | p1.train() 142 | p2.train() 143 | 144 | if __name__ == '__main__': 145 | game = StickGame(12) 146 | 147 | # PLayers to train 148 | p1 = StickPlayer(is_human=False, size=12, trainable=True) 149 | p2 = StickPlayer(is_human=False, size=12, trainable=True) 150 | # Human player and random player 151 | human = StickPlayer(is_human=True, size=12, trainable=False) 152 | random_player = StickPlayer(is_human=False, size=12, trainable=False) 153 | 154 | # Train the agent 155 | for i in range(0, 10000): 156 | if i % 10 == 0: 157 | p1.eps = max(p1.eps*0.996, 0.05) 158 | p2.eps = max(p2.eps*0.996, 0.05) 159 | play(game, p1, p2) 160 | p1.reset_stat() 161 | 162 | # Display the value function 163 | for key in p1.V: 164 | print(key, p1.V[key]) 165 | print("--------------------------") 166 | 167 | # Play agains a random player 168 | for _ in range(0, 1000): 169 | play(game, p1, random_player, train=False) 170 | print("p1 win rate", p1.win_nb/(p1.win_nb + p1.lose_nb)) 171 | print("p1 win mean", np.mean(p1.rewards)) 172 | 173 | # Play agains us 174 | while True: 175 | play(game, p1, human, train=False) 176 | -------------------------------------------------------------------------------- /tensorflow-js/teachable_machine.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
11 | // Webcam 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
20 | 21 | 22 | 23 | 168 | 169 | 170 | -------------------------------------------------------------------------------- /rl/GridWorld.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Install the environement" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# $> git clone https://github.com/maximecb/gym-minigrid.git\n", 17 | "# $> pip3 install -e gym-minigrid\n", 18 | "# $> pip instal gym" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## Import libs and usefull methods" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "import gym\n", 35 | "import gym_minigrid\n", 36 | "\n", 37 | "import time\n", 38 | "import numpy as np\n", 39 | "\n", 40 | "from q_learning import state_to_key, update_Q, create_state_if_not_exist" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "## Demonstration of the environement" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 3, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "Q = {}\n", 57 | "env = gym.make(\"MiniGrid-Empty-6x6-v0\")\n", 58 | "\n", 59 | "for epoch in range(100):\n", 60 | " s = env.reset()\n", 61 | " s = state_to_key(s)\n", 62 | " done = False\n", 63 | "\n", 64 | " while not done:\n", 65 | " create_state_if_not_exist(Q, s)\n", 66 | " sp, r, done, info = env.step(np.random.randint(0, 4))\n", 67 | " sp = state_to_key(sp)\n", 68 | " s = sp\n", 69 | " env.render()" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 7, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "60\n", 82 | "[0. 0. 0. 0.]\n", 83 | "[0. 0. 0. 0.]\n", 84 | "[0. 0. 0. 0.]\n", 85 | "[0. 0. 0. 0.]\n", 86 | "[0. 0. 0. 0.]\n", 87 | "[0. 0. 0. 0.]\n", 88 | "[0. 0. 0. 0.]\n", 89 | "[0. 0. 0. 0.]\n", 90 | "[0. 0. 0. 0.]\n", 91 | "[0. 0. 0. 0.]\n", 92 | "[0. 0. 0. 0.]\n", 93 | "[0. 0. 0. 0.]\n", 94 | "[0. 0. 0. 0.]\n", 95 | "[0. 0. 0. 0.]\n", 96 | "[0. 0. 0. 0.]\n", 97 | "[0. 0. 0. 0.]\n", 98 | "[0. 0. 0. 0.]\n", 99 | "[0. 0. 0. 0.]\n", 100 | "[0. 0. 0. 0.]\n", 101 | "[0. 0. 0. 0.]\n", 102 | "[0. 0. 0. 0.]\n", 103 | "[0. 0. 0. 0.]\n", 104 | "[0. 0. 0. 0.]\n", 105 | "[0. 0. 0. 0.]\n", 106 | "[0. 0. 0. 0.]\n", 107 | "[0. 0. 0. 0.]\n", 108 | "[0. 0. 0. 0.]\n", 109 | "[0. 0. 0. 0.]\n", 110 | "[0. 0. 0. 0.]\n", 111 | "[0. 0. 0. 0.]\n", 112 | "[0. 0. 0. 0.]\n", 113 | "[0. 0. 0. 0.]\n", 114 | "[0. 0. 0. 0.]\n", 115 | "[0. 0. 0. 0.]\n", 116 | "[0. 0. 0. 0.]\n", 117 | "[0. 0. 0. 0.]\n", 118 | "[0. 0. 0. 0.]\n", 119 | "[0. 0. 0. 0.]\n", 120 | "[0. 0. 0. 0.]\n", 121 | "[0. 0. 0. 0.]\n", 122 | "[0. 0. 0. 0.]\n", 123 | "[0. 0. 0. 0.]\n", 124 | "[0. 0. 0. 0.]\n", 125 | "[0. 0. 0. 0.]\n", 126 | "[0. 0. 0. 0.]\n", 127 | "[0. 0. 0. 0.]\n", 128 | "[0. 0. 0. 0.]\n", 129 | "[0. 0. 0. 0.]\n", 130 | "[0. 0. 0. 0.]\n", 131 | "[0. 0. 0. 0.]\n", 132 | "[0. 0. 0. 0.]\n", 133 | "[0. 0. 0. 0.]\n", 134 | "[0. 0. 0. 0.]\n", 135 | "[0. 0. 0. 0.]\n", 136 | "[0. 0. 0. 0.]\n", 137 | "[0. 0. 0. 0.]\n", 138 | "[0. 0. 0. 0.]\n", 139 | "[0. 0. 0. 0.]\n", 140 | "[0. 0. 0. 0.]\n", 141 | "[0. 0. 0. 0.]\n" 142 | ] 143 | } 144 | ], 145 | "source": [ 146 | "print(len(Q))\n", 147 | "for state in Q:\n", 148 | " print(Q[state])" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "# Methods you can use" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 11, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "text": [ 167 | "\n", 168 | "\n", 169 | "Actions for state s [0. 0. 0. 0.]\n" 170 | ] 171 | } 172 | ], 173 | "source": [ 174 | "Q = {}\n", 175 | "env = gym.make(\"MiniGrid-Empty-6x6-v0\")\n", 176 | "s = env.reset()\n", 177 | "print(type(s))\n", 178 | "s = state_to_key(s)\n", 179 | "print(type(s))\n", 180 | "# Create the state in the Q table if this state doesn't exist yet\n", 181 | "create_state_if_not_exist(Q, s)\n", 182 | "print(\"Actions for state s\", Q[s])\n", 183 | "\n", 184 | "# This method can be used to update the Q Table\n", 185 | "update_Q(Q=Q, s=s, sp=s, a=0, r=0, done=False)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "## Train the agent" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 14, 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "name": "stdout", 202 | "output_type": "stream", 203 | "text": [ 204 | "\r", 205 | " epoch= 0\n", 206 | "\r", 207 | " epoch= 1\n" 208 | ] 209 | } 210 | ], 211 | "source": [ 212 | "Q = {}\n", 213 | "env = gym.make(\"MiniGrid-Empty-6x6-v0\")\n", 214 | "\n", 215 | "epochs_nb = 2\n", 216 | "for epoch in range(epochs_nb):\n", 217 | " s = env.reset()\n", 218 | " s = state_to_key(s)\n", 219 | " done = False\n", 220 | "\n", 221 | " print(\"\\r epoch=\", epoch)\n", 222 | " \n", 223 | " while not done:\n", 224 | " create_state_if_not_exist(Q, s)\n", 225 | " # TODO\n", 226 | " # Take an action here with epsilon greedy instead of randint\n", 227 | " sp, r, done, info = env.step(np.random.randint(0, 4))\n", 228 | " sp = state_to_key(sp)\n", 229 | " # TODO\n", 230 | " # Call the update method\n", 231 | " s = sp" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "## Check the result" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 15, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "s = env.reset()\n", 248 | "s = state_to_key(s)\n", 249 | "done = False\n", 250 | "\n", 251 | "while not done:\n", 252 | " create_state_if_not_exist(Q, s)\n", 253 | " sp, r, done, info = env.step(np.random.randint(0, 4))\n", 254 | " sp = state_to_key(sp)\n", 255 | " s = sp\n", 256 | " env.render()" 257 | ] 258 | } 259 | ], 260 | "metadata": { 261 | "kernelspec": { 262 | "display_name": "Python 3", 263 | "language": "python", 264 | "name": "python3" 265 | }, 266 | "language_info": { 267 | "codemirror_mode": { 268 | "name": "ipython", 269 | "version": 3 270 | }, 271 | "file_extension": ".py", 272 | "mimetype": "text/x-python", 273 | "name": "python", 274 | "nbconvert_exporter": "python", 275 | "pygments_lexer": "ipython3", 276 | "version": "3.6.7" 277 | } 278 | }, 279 | "nbformat": 4, 280 | "nbformat_minor": 2 281 | } 282 | -------------------------------------------------------------------------------- /rl/tictactoe.py: -------------------------------------------------------------------------------- 1 | from random import randint 2 | import random 3 | import numpy as np 4 | 5 | class TicTacToeGame(object): 6 | """ 7 | StickGame. 8 | """ 9 | 10 | def __init__(self): 11 | # @nb Number of stick to play with 12 | super(TicTacToeGame, self).__init__() 13 | self.state = [0, 0, 0, 0, 0, 0, 0, 0, 0] 14 | self.finished = False 15 | 16 | def is_finished(self): 17 | # Check if the game is over @return Boolean 18 | return self.finished 19 | 20 | def reset(self): 21 | # Reset the state of the game 22 | self.state = [0, 0, 0, 0, 0, 0, 0, 0, 0] 23 | self.finished = False 24 | return self.state_to_nb(self.state) 25 | 26 | def display(self): 27 | # Display the state of the game 28 | print("State id:%s"%self.state_to_nb(self.state)) 29 | display = "%s %s %s \n%s %s %s \n%s %s %s" % tuple(self.state) 30 | print(display.replace("1", "O").replace("2", "X").replace("0", "-")) 31 | 32 | def next_actions(self): 33 | # Return the next possible actions 34 | actions = [] 35 | for p in range(0, 9): 36 | if self.state[p] == 0: 37 | actions.append(p) 38 | return actions 39 | 40 | def next_possible_state(self, action, p): 41 | # Return the next possible actions 42 | state = [v for v in self.state] 43 | state[action] = p 44 | return state 45 | 46 | def state_to_nb(self, state): 47 | return str(state) 48 | i = 0 49 | nb = 0 50 | for p in state: 51 | nb += p*3**i 52 | i += 1 53 | return nb 54 | 55 | def step(self, action, p): 56 | self.state[action] = p 57 | st = self.state 58 | n_actions = self.next_actions() 59 | if len(n_actions) == 0: 60 | self.finished = True 61 | if (st[0] == p and st[1] == p and st[2] == p) or \ 62 | (st[3] == p and st[4] == p and st[5] == p) or \ 63 | (st[6] == p and st[7] == p and st[8] == p) or \ 64 | (st[0] == p and st[3] == p and st[6] == p) or \ 65 | (st[1] == p and st[4] == p and st[7] == p) or \ 66 | (st[2] == p and st[5] == p and st[8] == p) or \ 67 | (st[0] == p and st[4] == p and st[8] == p) or \ 68 | (st[2] == p and st[4] == p and st[6] == p): 69 | self.finished = True 70 | return self.state_to_nb(self.state), 1 71 | else: 72 | return self.state_to_nb(self.state), 0 73 | 74 | class StickPlayer(object): 75 | """ 76 | Stick Player 77 | """ 78 | 79 | def __init__(self, is_human, p, trainable=True): 80 | # @nb Number of stick to play with 81 | super(StickPlayer, self).__init__() 82 | self.is_human = is_human 83 | self.history = [] 84 | self.V = {} 85 | self.p = p 86 | self.win_nb = 0. 87 | self.lose_nb = 0. 88 | self.rewards = [] 89 | self.eps = 0.99 90 | self.trainable = trainable 91 | 92 | def reset_stat(self): 93 | # Reset stat 94 | self.win_nb = 0 95 | self.lose_nb = 0 96 | self.rewards = [] 97 | 98 | def greedy_step(self, state, game, display_value=False): 99 | # Greedy step 100 | actions = game.next_actions() 101 | vboard = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] 102 | vmax = None 103 | vi = None 104 | for i in range(0, len(actions)): 105 | a = actions[i] 106 | n_state = game.next_possible_state(a, self.p) 107 | nb = game.state_to_nb(n_state) 108 | 109 | if nb not in self.V: 110 | self.V[nb] = 0 111 | 112 | vboard[a] = self.V[nb] 113 | if vmax is None or vmax < self.V[nb]: 114 | vmax = self.V[nb] 115 | vi = i 116 | 117 | if display_value: 118 | display = "%.2f %.2f %.2f \n%.2f %.2f %.2f \n%.2f %.2f %.2f" % tuple(vboard) 119 | print(display) 120 | 121 | return actions[vi] 122 | 123 | def play(self, state, game, display_value=False): 124 | # PLay given the @state (int) 125 | if self.is_human is False: 126 | # Take random action 127 | if random.uniform(0, 1) < self.eps: 128 | action = random.choice(game.next_actions()) 129 | else: # Or greedy action 130 | action = self.greedy_step(state, game, display_value) 131 | else: 132 | action = int(input("$>")) 133 | return action 134 | 135 | def add_transition(self, n_tuple): 136 | # Add one transition to the history: tuple (s, a , r, s') 137 | self.history.append(n_tuple) 138 | 139 | def train(self, display_value): 140 | if not self.trainable or self.is_human is True: 141 | return 142 | 143 | # Update the value function if this player is not human 144 | f = 0 145 | for transition in reversed(self.history): 146 | s, r, sp = transition 147 | 148 | if s not in self.V: 149 | self.V[s] = 0 150 | if sp not in self.V: 151 | self.V[sp] = 0 152 | 153 | if f == 0: 154 | # For the last element, we train it toward the reward 155 | self.V[sp] = self.V[sp] + 0.01*(r - self.V[sp]) 156 | self.V[s] = self.V[s] + 0.01*(self.V[sp] - self.V[s]) 157 | f += 1 158 | 159 | 160 | self.history = [] 161 | 162 | def play(game, p1, p2, train=True, display_value=False): 163 | state = game.reset() 164 | players = [p1, p2] 165 | random.shuffle(players) 166 | p = 0 167 | while game.is_finished() is False: 168 | 169 | if players[0].is_human or players[1].is_human: 170 | game.display() 171 | 172 | if state not in players[p%2].V: 173 | players[p%2].V[state] = 0 174 | action = players[p%2].play(state, game, display_value) 175 | n_state, reward = game.step(action, players[p%2].p) 176 | 177 | # Game is over. Ass stat 178 | if (reward == 1): 179 | players[p%2].win_nb += 1 180 | 181 | if display_value: 182 | print("reward", reward) 183 | 184 | players[p%2].add_transition((state, reward, n_state)) 185 | players[(p+1)%2].add_transition((state, reward * -1, n_state)) 186 | 187 | state = n_state 188 | p += 1 189 | 190 | if train: 191 | p1.train(display_value=display_value) 192 | p2.train(display_value=display_value) 193 | 194 | if __name__ == '__main__': 195 | game = TicTacToeGame() 196 | 197 | # Players to train 198 | p1 = StickPlayer(is_human=False, p=1, trainable=True) 199 | p2 = StickPlayer(is_human=False, p=2, trainable=True) 200 | # Human player and random player 201 | human = StickPlayer(is_human=True, p=2, trainable=False) 202 | random_player = StickPlayer(is_human=False, p=2, trainable=False) 203 | 204 | # Train the agent 205 | for i in range(0, 100000): 206 | if i % 10 == 0: 207 | p1.eps = max(0.05, p1.eps*0.999) 208 | p2.eps = max(0.05, p2.eps*0.999) 209 | if i % 1000 == 0: 210 | p1.reset_stat() 211 | # Play agains a random player 212 | for _ in range(0, 100): 213 | play(game, p1, random_player, train=False) 214 | print("eps=%sp1 win rate=%s" % (p1.eps, p1.win_nb/100.)) 215 | 216 | play(game, p1, p2) 217 | 218 | p1.eps = 0.0 219 | p1.reset_stat() 220 | # Play agains a random player 221 | for _ in range(0, 10000): 222 | play(game, p1, random_player, train=False) 223 | print("eps=%sp1 win rate=%s" % (p1.eps, p1.win_nb/10000.)) 224 | 225 | # Play agains us 226 | while True: 227 | play(game, p1, human, train=True, display_value=True) 228 | -------------------------------------------------------------------------------- /rl/q_learning_nn.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Metacar: Documentation 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 | 14 | 15 | 204 | 205 | 206 | 207 | -------------------------------------------------------------------------------- /rl/dqn.py: -------------------------------------------------------------------------------- 1 | from collections import deque, namedtuple 2 | from gym.wrappers import Monitor 3 | import matplotlib.pyplot as plt 4 | import tensorflow as tf 5 | import numpy as np 6 | import itertools 7 | import random 8 | import gym 9 | import os 10 | import sys 11 | 12 | VALID_ACTIONS = [0, 1, 2, 3] 13 | 14 | 15 | from tensorflow.python.client import device_lib 16 | 17 | def get_available_gpus(): 18 | local_device_protos = device_lib.list_local_devices() 19 | return [x.name for x in local_device_protos if x.device_type == 'GPU'] 20 | get_available_gpus() 21 | 22 | 23 | class StateProcessor(): 24 | 25 | def __init__(self): 26 | with tf.variable_scope("process"): 27 | self.input_state = tf.placeholder(shape=[210, 160, 3], dtype=tf.uint8, name="input_process") 28 | self.output = tf.image.rgb_to_grayscale(self.input_state) 29 | self.output = tf.image.crop_to_bounding_box(self.output, 34, 0, 160, 160) 30 | self.output = tf.image.resize_images( 31 | self.output, [84, 84], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) 32 | self.output = tf.squeeze(self.output) 33 | 34 | def process(self, sess, state): 35 | return sess.run(self.output, { self.input_state: state }) 36 | 37 | import gym 38 | import time 39 | 40 | env = gym.make('Breakout-v0') 41 | env.reset() 42 | 43 | for _ in range(1000): 44 | observation, reward, done, info = env.step(env.action_space.sample()) 45 | if done: 46 | break 47 | 48 | class DQN(): 49 | 50 | def __init__(self, scope): 51 | self.scope = scope 52 | with tf.variable_scope(self.scope): 53 | self._build_model() 54 | 55 | def _build_model(self): 56 | # 4 Last frames of the game 57 | self.X_pl = tf.placeholder(shape=[None, 84, 84, 4], dtype=tf.uint8, name="X") 58 | # The TD target value 59 | self.y_pl = tf.placeholder(shape=[None], dtype=tf.float32, name="y") 60 | # Integer id of which action was selected 61 | self.actions_pl = tf.placeholder(shape=[None], dtype=tf.int32, name="actions") 62 | 63 | # Rescale the image 64 | X = tf.to_float(self.X_pl) / 255.0 65 | # Get the batch size 66 | batch_size = tf.shape(self.X_pl)[0] 67 | 68 | # Three convolutional layers 69 | conv1 = tf.layers.conv2d(X, 32, 8, 4, activation=tf.nn.relu) 70 | conv2 = tf.layers.conv2d(conv1, 64, 4, 2, activation=tf.nn.relu) 71 | conv3 = tf.layers.conv2d(conv2, 64, 3, 1, activation=tf.nn.relu) 72 | 73 | # Fully connected layers 74 | flattened = tf.contrib.layers.flatten(conv3) 75 | fc1 = tf.layers.dense(flattened, 512, activation=tf.nn.relu) 76 | self.predictions = tf.layers.dense(fc1, len(VALID_ACTIONS)) 77 | tf.identity(self.predictions, name="predictions") 78 | 79 | # Get the predictions for the chosen actions only 80 | gather_indices = tf.range(batch_size) * tf.shape(self.predictions)[1] + self.actions_pl 81 | self.action_predictions = tf.gather(tf.reshape(self.predictions, [-1]), gather_indices) 82 | 83 | # Calculate the loss 84 | self.losses = tf.squared_difference(self.y_pl, self.action_predictions) 85 | self.loss = tf.reduce_mean(self.losses) 86 | 87 | # Optimizer Parameters from original paper 88 | self.optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6) 89 | self.train_op = self.optimizer.minimize(self.loss) 90 | 91 | def predict(self, sess, s): 92 | return sess.run(self.predictions, { self.X_pl: s }) 93 | 94 | def update(self, sess, s, a, y): 95 | feed_dict = { self.X_pl: s, self.y_pl: y, self.actions_pl: a } 96 | ops = [self.train_op, self.loss] 97 | _, loss = sess.run(ops, feed_dict) 98 | return loss 99 | 100 | def copy_model_parameters(sess, estimator1, estimator2): 101 | e1_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator1.scope)] 102 | e1_params = sorted(e1_params, key=lambda v: v.name) 103 | e2_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator2.scope)] 104 | e2_params = sorted(e2_params, key=lambda v: v.name) 105 | 106 | update_ops = [] 107 | for e1_v, e2_v in zip(e1_params, e2_params): 108 | op = e2_v.assign(e1_v) 109 | update_ops.append(op) 110 | 111 | sess.run(update_ops) 112 | 113 | tf.reset_default_graph() 114 | 115 | 116 | # DQN 117 | dqn = DQN(scope="dqn") 118 | # DQN target 119 | target_dqn = DQN(scope="target_dqn") 120 | 121 | 122 | # State processor 123 | state_processor = StateProcessor() 124 | 125 | num_episodes = 10000 126 | 127 | replay_memory_size = 250000 128 | replay_memory_init_size = 50000 129 | 130 | update_target_estimator_every = 10000 131 | 132 | epsilon_start = 1.0 133 | epsilon_end = 0.1 134 | 135 | 136 | epsilon_decay_steps = 500000 137 | discount_factor = 0.99 138 | batch_size = 32 139 | 140 | def make_epsilon_greedy_policy(estimator, nA): 141 | def policy_fn(sess, observation, epsilon): 142 | A = np.ones(nA, dtype=float) * epsilon / nA 143 | q_values = estimator.predict(sess, np.expand_dims(observation, 0))[0] 144 | best_action = np.argmax(q_values) 145 | A[best_action] += (1.0 - epsilon) 146 | return A 147 | return policy_fn 148 | 149 | #saver = tf.train.Saver() 150 | start_i_episode = 0 151 | opti_step = -1 152 | 153 | # The replay memory 154 | replay_memory = [] 155 | 156 | 157 | 158 | with tf.Session() as sess: 159 | sess.run(tf.global_variables_initializer()) 160 | 161 | Transition = namedtuple("Transition", ["state", "action", "reward", "next_state", "done"]) 162 | 163 | 164 | # Used to save the model 165 | checkpoint_dir = os.path.join("./", "checkpoints") 166 | checkpoint_path = os.path.join(checkpoint_dir, "model") 167 | 168 | if not os.path.exists(checkpoint_dir): 169 | os.makedirs(checkpoint_dir) 170 | 171 | saver = tf.train.Saver() 172 | # Load a previous checkpoint if we find one 173 | latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) 174 | 175 | # Epsilon decay 176 | epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps) 177 | 178 | # Policy 179 | policy = make_epsilon_greedy_policy(dqn, len(VALID_ACTIONS)) 180 | 181 | epi_reward = [] 182 | best_epi_reward = 0 183 | 184 | for i_episode in range(start_i_episode, num_episodes): 185 | # Reset the environment 186 | state = env.reset() 187 | state = state_processor.process(sess, state) 188 | state = np.stack([state] * 4, axis=2) 189 | loss = None 190 | done = False 191 | r_sum = 0 192 | mean_epi_reward = np.mean(epi_reward) 193 | if best_epi_reward < mean_epi_reward: 194 | best_epi_reward = mean_epi_reward 195 | saver.save(tf.get_default_session(), checkpoint_path) 196 | 197 | len_replay_memory = len(replay_memory) 198 | while not done: 199 | # Get the epsilon for this step 200 | epsilon = epsilons[min(opti_step+1, epsilon_decay_steps-1)] 201 | 202 | 203 | # Update the target network 204 | if opti_step % update_target_estimator_every == 0: 205 | copy_model_parameters(sess, dqn, target_dqn) 206 | 207 | print("\r Epsilon ({}) ReplayMemorySize : ({}) rSum: ({}) best_epi_reward: ({}) OptiStep ({}) @ Episode {}/{}, loss: {}".format(epsilon, len_replay_memory, mean_epi_reward, best_epi_reward, opti_step, i_episode + 1, num_episodes, loss), end="") 208 | sys.stdout.flush() 209 | 210 | 211 | # Select an action with eps-greedy 212 | action_probs = policy(sess, state, epsilon) 213 | action = np.random.choice(np.arange(len(action_probs)), p=action_probs) 214 | 215 | # Step in the env with this action 216 | next_state, reward, done, _ = env.step(VALID_ACTIONS[action]) 217 | r_sum += reward 218 | 219 | # Add this action to the stack of images 220 | next_state = state_processor.process(sess, next_state) 221 | next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2) 222 | 223 | # If our replay memory is full, pop the first element 224 | if len(replay_memory) == replay_memory_size: 225 | replay_memory.pop(0) 226 | 227 | 228 | # Save transition to replay memory 229 | replay_memory.append(Transition(state, action, reward, next_state, done)) 230 | 231 | if len_replay_memory > replay_memory_init_size: 232 | # Sample a minibatch from the replay memory 233 | samples = random.sample(replay_memory, batch_size) 234 | states_batch, action_batch, reward_batch, next_states_batch, done_batch = map(np.array, zip(*samples)) 235 | 236 | # We compute the next q value with 237 | q_values_next_target = target_dqn.predict(sess, next_states_batch) 238 | t_best_actions = np.argmax(q_values_next_target, axis=1) 239 | targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * discount_factor * q_values_next_target[np.arange(batch_size), t_best_actions] 240 | 241 | # Perform gradient descent update 242 | states_batch = np.array(states_batch) 243 | loss = dqn.update(sess, states_batch, action_batch, targets_batch) 244 | 245 | opti_step += 1 246 | 247 | state = next_state 248 | if done: 249 | break 250 | 251 | epi_reward.append(r_sum) 252 | if len(epi_reward) > 100: 253 | epi_reward = epi_reward[1:] 254 | -------------------------------------------------------------------------------- /tensorflow/Eager Execution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import tensorflow as tf\n", 10 | "tf.enable_eager_execution()" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "name": "stdout", 20 | "output_type": "stream", 21 | "text": [ 22 | "hello, [[4.]]\n" 23 | ] 24 | } 25 | ], 26 | "source": [ 27 | "tf.executing_eagerly() # => True\n", 28 | "\n", 29 | "x = [[2.]]\n", 30 | "m = tf.matmul(x, x)\n", 31 | "print(\"hello, {}\".format(m)) # => \"hello, [[4.]]\"" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "name": "stdout", 41 | "output_type": "stream", 42 | "text": [ 43 | "tf.Tensor(\n", 44 | "[[1 2]\n", 45 | " [3 4]], shape=(2, 2), dtype=int32)\n", 46 | "tf.Tensor(\n", 47 | "[[2 3]\n", 48 | " [4 5]], shape=(2, 2), dtype=int32)\n", 49 | "tf.Tensor(\n", 50 | "[[ 2 6]\n", 51 | " [12 20]], shape=(2, 2), dtype=int32)\n", 52 | "[[ 2 6]\n", 53 | " [12 20]]\n", 54 | "[[1 2]\n", 55 | " [3 4]]\n" 56 | ] 57 | } 58 | ], 59 | "source": [ 60 | "a = tf.constant([[1, 2],\n", 61 | " [3, 4]])\n", 62 | "print(a)\n", 63 | "# => tf.Tensor([[1 2]\n", 64 | "# [3 4]], shape=(2, 2), dtype=int32)\n", 65 | "\n", 66 | "# Broadcasting support\n", 67 | "b = tf.add(a, 1)\n", 68 | "print(b)\n", 69 | "# => tf.Tensor([[2 3]\n", 70 | "# [4 5]], shape=(2, 2), dtype=int32)\n", 71 | "\n", 72 | "# Operator overloading is supported\n", 73 | "print(a * b)\n", 74 | "# => tf.Tensor([[ 2 6]\n", 75 | "# [12 20]], shape=(2, 2), dtype=int32)\n", 76 | "\n", 77 | "# Use NumPy values\n", 78 | "import numpy as np\n", 79 | "\n", 80 | "c = np.multiply(a, b)\n", 81 | "print(c)\n", 82 | "# => [[ 2 6]\n", 83 | "# [12 20]]\n", 84 | "\n", 85 | "# Obtain numpy value from a tensor:\n", 86 | "print(a.numpy())\n", 87 | "# => [[1 2]\n", 88 | "# [3 4]]" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "## Eager training" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 10, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "tf.Tensor([[2.]], shape=(1, 1), dtype=float32)\n" 108 | ] 109 | } 110 | ], 111 | "source": [ 112 | "tfe = tf.contrib.eager\n", 113 | "w = tfe.Variable([[1.0]])\n", 114 | "\n", 115 | "with tf.GradientTape() as tape:\n", 116 | " loss = w * w\n", 117 | "\n", 118 | "grad = tape.gradient(loss, w)\n", 119 | "print(grad) # => tf.Tensor([[ 2.]], shape=(1, 1), dtype=float32)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "Here's an example of tf.GradientTape that records forward-pass operations to train a simple model:" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 12, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "name": "stdout", 136 | "output_type": "stream", 137 | "text": [ 138 | "Initial loss: 69.297\n", 139 | "Loss at step 000: 66.579\n", 140 | "Loss at step 020: 30.117\n", 141 | "Loss at step 040: 13.941\n", 142 | "Loss at step 060: 6.764\n", 143 | "Loss at step 080: 3.579\n", 144 | "Loss at step 100: 2.167\n", 145 | "Loss at step 120: 1.540\n", 146 | "Loss at step 140: 1.261\n", 147 | "Loss at step 160: 1.138\n", 148 | "Loss at step 180: 1.083\n", 149 | "Final loss: 1.060\n", 150 | "W = 3.0000877380371094, B = 2.153679609298706\n" 151 | ] 152 | } 153 | ], 154 | "source": [ 155 | "# A toy dataset of points around 3 * x + 2\n", 156 | "NUM_EXAMPLES = 1000\n", 157 | "training_inputs = tf.random_normal([NUM_EXAMPLES])\n", 158 | "noise = tf.random_normal([NUM_EXAMPLES])\n", 159 | "training_outputs = training_inputs * 3 + 2 + noise\n", 160 | "\n", 161 | "def prediction(input, weight, bias):\n", 162 | " return input * weight + bias\n", 163 | "\n", 164 | "# A loss function using mean-squared error\n", 165 | "def loss(weights, biases):\n", 166 | " error = prediction(training_inputs, weights, biases) - training_outputs\n", 167 | " return tf.reduce_mean(tf.square(error))\n", 168 | "\n", 169 | "# Return the derivative of loss with respect to weight and bias\n", 170 | "def grad(weights, biases):\n", 171 | " with tf.GradientTape() as tape:\n", 172 | " loss_value = loss(weights, biases)\n", 173 | " return tape.gradient(loss_value, [weights, biases])\n", 174 | "\n", 175 | "train_steps = 200\n", 176 | "learning_rate = 0.01\n", 177 | "# Start with arbitrary values for W and B on the same batch of data\n", 178 | "W = tfe.Variable(5.)\n", 179 | "B = tfe.Variable(10.)\n", 180 | "\n", 181 | "print(\"Initial loss: {:.3f}\".format(loss(W, B)))\n", 182 | "\n", 183 | "for i in range(train_steps):\n", 184 | " dW, dB = grad(W, B)\n", 185 | " W.assign_sub(dW * learning_rate)\n", 186 | " B.assign_sub(dB * learning_rate)\n", 187 | " if i % 20 == 0:\n", 188 | " print(\"Loss at step {:03d}: {:.3f}\".format(i, loss(W, B)))\n", 189 | "\n", 190 | "print(\"Final loss: {:.3f}\".format(loss(W, B)))\n", 191 | "print(\"W = {}, B = {}\".format(W.numpy(), B.numpy()))" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "### Variables and optimizers" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 20, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "name": "stdout", 208 | "output_type": "stream", 209 | "text": [ 210 | "Initial loss: 68.871\n", 211 | "Loss at step 000: 66.188\n", 212 | "Loss at step 020: 30.088\n", 213 | "Loss at step 040: 13.975\n", 214 | "Loss at step 060: 6.783\n", 215 | "Loss at step 080: 3.573\n", 216 | "Loss at step 100: 2.140\n", 217 | "Loss at step 120: 1.500\n", 218 | "Loss at step 140: 1.215\n", 219 | "Loss at step 160: 1.087\n", 220 | "Loss at step 180: 1.030\n", 221 | "Loss at step 200: 1.005\n", 222 | "Loss at step 220: 0.993\n", 223 | "Loss at step 240: 0.988\n", 224 | "Loss at step 260: 0.986\n", 225 | "Loss at step 280: 0.985\n", 226 | "Final loss: 0.985\n", 227 | "W = 3.0072834491729736, B = 2.017597198486328\n" 228 | ] 229 | } 230 | ], 231 | "source": [ 232 | "class Model(tf.keras.Model):\n", 233 | " def __init__(self):\n", 234 | " super(Model, self).__init__()\n", 235 | " self.W = tfe.Variable(5., name='weight')\n", 236 | " self.B = tfe.Variable(10., name='bias')\n", 237 | " def call(self, inputs):\n", 238 | " return inputs * self.W + self.B\n", 239 | "\n", 240 | "# A toy dataset of points around 3 * x + 2\n", 241 | "NUM_EXAMPLES = 2000\n", 242 | "training_inputs = tf.random_normal([NUM_EXAMPLES])\n", 243 | "noise = tf.random_normal([NUM_EXAMPLES])\n", 244 | "training_outputs = training_inputs * 3 + 2 + noise\n", 245 | "\n", 246 | "# The loss function to be optimized\n", 247 | "def loss(model, inputs, targets):\n", 248 | " error = model(inputs) - targets\n", 249 | " return tf.reduce_mean(tf.square(error))\n", 250 | "\n", 251 | "def grad(model, inputs, targets):\n", 252 | " with tf.GradientTape() as tape:\n", 253 | " loss_value = loss(model, inputs, targets)\n", 254 | " return tape.gradient(loss_value, [model.W, model.B])\n", 255 | "\n", 256 | "# Define:\n", 257 | "# 1. A model.\n", 258 | "# 2. Derivatives of a loss function with respect to model parameters.\n", 259 | "# 3. A strategy for updating the variables based on the derivatives.\n", 260 | "model = Model()\n", 261 | "optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)\n", 262 | "\n", 263 | "print(\"Initial loss: {:.3f}\".format(loss(model, training_inputs, training_outputs)))\n", 264 | "\n", 265 | "# Training loop\n", 266 | "for i in range(300):\n", 267 | " grads = grad(model, training_inputs, training_outputs)\n", 268 | " optimizer.apply_gradients(zip(grads, [model.W, model.B]),\n", 269 | " global_step=tf.train.get_or_create_global_step())\n", 270 | " #optimizer.minimize(lambda: loss(model, training_inputs, training_outputs))\n", 271 | " if i % 20 == 0:\n", 272 | " print(\"Loss at step {:03d}: {:.3f}\".format(i, loss(model, training_inputs, training_outputs)))\n", 273 | "\n", 274 | "print(\"Final loss: {:.3f}\".format(loss(model, training_inputs, training_outputs)))\n", 275 | "print(\"W = {}, B = {}\".format(model.W.numpy(), model.B.numpy()))" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": null, 281 | "metadata": {}, 282 | "outputs": [], 283 | "source": [] 284 | } 285 | ], 286 | "metadata": { 287 | "kernelspec": { 288 | "display_name": "Python 3", 289 | "language": "python", 290 | "name": "python3" 291 | }, 292 | "language_info": { 293 | "codemirror_mode": { 294 | "name": "ipython", 295 | "version": 3 296 | }, 297 | "file_extension": ".py", 298 | "mimetype": "text/x-python", 299 | "name": "python", 300 | "nbconvert_exporter": "python", 301 | "pygments_lexer": "ipython3", 302 | "version": "3.5.2" 303 | } 304 | }, 305 | "nbformat": 4, 306 | "nbformat_minor": 2 307 | } 308 | -------------------------------------------------------------------------------- /rl/q_learning_visu.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 98 | 99 | 100 | 101 |
102 | 103 |
104 | 105 |
106 |
107 | 108 |
109 |

1

110 | 111 | 0 112 |
113 | ← 114 |
115 | 116 | 117 | 0 118 |
119 | → 120 |
121 | 122 | 123 | 0 124 |
125 | ↑ 126 |
127 | 128 | 129 | 0 130 |
131 | ↓ 132 |
133 |
134 | 135 |
136 |

2

137 | 138 | 0 139 |
140 | ← 141 |
142 | 143 | 144 | 0 145 |
146 | → 147 |
148 | 149 | 150 | 0 151 |
152 | ↑ 153 |
154 | 155 | 156 | 0 157 |
158 | ↓ 159 |
160 |
161 |
162 |

3

163 | 164 | 0 165 |
166 | ← 167 |
168 | 169 | 170 | 0 171 |
172 | → 173 |
174 | 175 | 176 | 0 177 |
178 | ↑ 179 |
180 | 181 | 182 | 0 183 |
184 | ↓ 185 |
186 |
187 |
188 |

4

189 | 190 | 0 191 |
192 | ← 193 |
194 | 195 | 196 | 0 197 |
198 | → 199 |
200 | 201 | 202 | 0 203 |
204 | ↑ 205 |
206 | 207 | 208 | 0 209 |
210 | ↓ 211 |
212 |
213 |
214 |

5

215 | 216 | 0 217 |
218 | ← 219 |
220 | 221 | 222 | 0 223 |
224 | → 225 |
226 | 227 | 228 | 0 229 |
230 | ↑ 231 |
232 | 233 | 234 | 0 235 |
236 | ↓ 237 |
238 |
239 |
240 |

6

241 | 242 | 0 243 |
244 | ← 245 |
246 | 247 | 248 | 0 249 |
250 | → 251 |
252 | 253 | 254 | 0 255 |
256 | ↑ 257 |
258 | 259 | 260 | 0 261 |
262 | ↓ 263 |
264 |
265 |
266 |

7

267 | 268 | 0 269 |
270 | ← 271 |
272 | 273 | 274 | 0 275 |
276 | → 277 |
278 | 279 | 280 | 0 281 |
282 | ↑ 283 |
284 | 285 | 286 | 0 287 |
288 | ↓ 289 |
290 |
291 |
292 |

8

293 | 294 | 0 295 |
296 | ← 297 |
298 | 299 | 300 | 0 301 |
302 | → 303 |
304 | 305 | 306 | 0 307 |
308 | ↑ 309 |
310 | 311 | 312 | 0 313 |
314 | ↓ 315 |
316 |
317 |
318 |

9

319 | 320 | 0 321 |
322 | ← 323 |
324 | 325 | 326 | 0 327 |
328 | → 329 |
330 | 331 | 332 | 0 333 |
334 | ↑ 335 |
336 | 337 | 338 | 0 339 |
340 | ↓ 341 |
342 |
343 |
344 | 345 | 438 | 439 | 440 | -------------------------------------------------------------------------------- /rl/policy_gradient.py: -------------------------------------------------------------------------------- 1 | from random import randint 2 | import tensorflow as tf 3 | import numpy as np 4 | import scipy.signal 5 | 6 | """ 7 | Exemple of the Policy Gradient Algorithm 8 | """ 9 | 10 | class Buffer: 11 | 12 | def __init__(self, obs_dim, act_dim, size, gamma=0.99, lam=0.95): 13 | self.obs_buf = np.zeros(Buffer.combined_shape(size, obs_dim), dtype=np.float32) 14 | # Actions buffer 15 | self.act_buf = np.zeros(size, dtype=np.float32) 16 | # Advantages buffer 17 | self.adv_buf = np.zeros(size, dtype=np.float32) 18 | # Rewards buffer 19 | self.rew_buf = np.zeros(size, dtype=np.float32) 20 | # Log probability of action a with the policy 21 | self.logp_buf = np.zeros(size, dtype=np.float32) 22 | # Gamma and lam to compute the advantage 23 | self.gamma, self.lam = gamma, lam 24 | # ptr: Position to insert the next tuple 25 | # path_start_idx Posittion of the current trajectory 26 | # max_size Max size of the buffer 27 | self.ptr, self.path_start_idx, self.max_size = 0, 0, size 28 | 29 | @staticmethod 30 | def discount_cumsum(x, discount): 31 | """ 32 | x = [x0, x1, x2] 33 | output: [x0 + discount * x1 + discount^2 * x2, x1 + discount * x2, x2] 34 | """ 35 | return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1] 36 | 37 | @staticmethod 38 | def combined_shape(length, shape=None): 39 | if shape is None: 40 | return (length,) 41 | return (length, shape) if np.isscalar(shape) else (length, *shape) 42 | 43 | def store(self, obs, act, rew, logp): 44 | """ 45 | Append one timestep of agent-environment interaction to the buffer. 46 | """ 47 | assert self.ptr < self.max_size 48 | self.obs_buf[self.ptr] = obs 49 | self.act_buf[self.ptr] = act 50 | self.rew_buf[self.ptr] = rew 51 | self.logp_buf[self.ptr] = logp 52 | self.ptr += 1 53 | 54 | def finish_path(self, last_val=0): 55 | # Select the path 56 | path_slice = slice(self.path_start_idx, self.ptr) 57 | # Append the last_val to the trajectory 58 | rews = np.append(self.rew_buf[path_slice], last_val) 59 | # Advantage 60 | self.adv_buf[path_slice] = Buffer.discount_cumsum(rews, self.gamma)[:-1] 61 | self.path_start_idx = self.ptr 62 | 63 | def get(self): 64 | assert self.ptr == self.max_size # buffer has to be full before you can get 65 | self.ptr, self.path_start_idx = 0, 0 66 | # the next two lines implement the advantage normalization trick 67 | # Normalize the Advantage 68 | self.adv_buf = (self.adv_buf - np.mean(self.adv_buf)) / np.std(self.adv_buf) 69 | return self.obs_buf, self.act_buf, self.adv_buf, self.logp_buf 70 | 71 | 72 | class PolicyGradient(object): 73 | """ 74 | Implementation of Policy gradient algorithm 75 | """ 76 | def __init__(self, input_space, action_space, pi_lr, buffer_size, seed): 77 | super(PolicyGradient, self).__init__() 78 | 79 | # Stored the spaces 80 | self.input_space = input_space 81 | self.action_space = action_space 82 | self.seed = seed 83 | # NET Buffer defined above 84 | self.buffer = Buffer( 85 | obs_dim=input_space, 86 | act_dim=action_space, 87 | size=buffer_size 88 | ) 89 | # Learning rate of the policy network 90 | self.pi_lr = pi_lr 91 | # The tensorflow session (set later) 92 | self.sess = None 93 | # Apply a random seed on tensorflow and numpy 94 | tf.set_random_seed(42) 95 | np.random.seed(42) 96 | 97 | def compile(self): 98 | """ 99 | Compile the model 100 | """ 101 | # tf_map: Input: Input state 102 | # tf_adv: Input: Advantage 103 | self.tf_map, self.tf_a, self.tf_adv = PolicyGradient.inputs( 104 | map_space=self.input_space, 105 | action_space=self.action_space 106 | ) 107 | # mu_op: Used to get the exploited prediction of the model 108 | # pi_op: Used to get the prediction of the model 109 | # logp_a_op: Used to get the log likelihood of taking action a with the current policy 110 | # logp_pi_op: Used to get the log likelihood of the predicted action @pi_op 111 | # log_std: Used to get the currently used log_std 112 | self.mu_op, self.pi_op, self.logp_a_op, self.logp_pi_op = PolicyGradient.mlp( 113 | tf_map=self.tf_map, 114 | tf_a=self.tf_a, 115 | action_space=self.action_space, 116 | seed=self.seed 117 | ) 118 | # Error 119 | self.pi_loss = PolicyGradient.net_objectives( 120 | tf_adv=self.tf_adv, 121 | logp_a_op=self.logp_a_op 122 | ) 123 | # Optimization 124 | self.train_pi = tf.train.AdamOptimizer(learning_rate=self.pi_lr).minimize(self.pi_loss) 125 | # Entropy 126 | self.approx_ent = tf.reduce_mean(-self.logp_a_op) 127 | 128 | 129 | def set_sess(self, sess): 130 | # Set the tensorflow used to run this model 131 | self.sess = sess 132 | 133 | def step(self, states): 134 | # Take actions given the states 135 | # Return mu (policy without exploration), pi (policy with the current exploration) and 136 | # the log probability of the action chossen by pi 137 | mu, pi, logp_pi = self.sess.run([self.mu_op, self.pi_op, self.logp_pi_op], feed_dict={ 138 | self.tf_map: states 139 | }) 140 | return mu, pi, logp_pi 141 | 142 | def store(self, obs, act, rew, logp): 143 | # Store the observation, action, reward and the log probability of the action 144 | # into the buffer 145 | self.buffer.store(obs, act, rew, logp) 146 | 147 | def finish_path(self, last_val=0): 148 | self.buffer.finish_path(last_val=last_val) 149 | 150 | def train(self, additional_infos={}): 151 | # Get buffer 152 | obs_buf, act_buf, adv_buf, logp_last_buf = self.buffer.get() 153 | # Train the model 154 | pi_loss_list = [] 155 | entropy_list = [] 156 | 157 | for step in range(5): 158 | _, entropy, pi_loss = self.sess.run([self.train_pi, self.approx_ent, self.pi_loss], feed_dict= { 159 | self.tf_map: obs_buf, 160 | self.tf_a:act_buf, 161 | self.tf_adv: adv_buf 162 | }) 163 | 164 | pi_loss_list.append(pi_loss) 165 | entropy_list.append(entropy) 166 | 167 | print("Entropy : %s, Loss: %s" % (np.mean(entropy_list), np.mean(pi_loss_list)), end="\r") 168 | 169 | 170 | @staticmethod 171 | def gaussian_likelihood(x, mu, log_std): 172 | # Compute the gaussian likelihood of x with a normal gaussian distribution of mean @mu 173 | # and a std @log_std 174 | pre_sum = -0.5 * (((x-mu)/(tf.exp(log_std)+1e-8))**2 + 2*log_std + np.log(2*np.pi)) 175 | return tf.reduce_sum(pre_sum, axis=1) 176 | 177 | @staticmethod 178 | def inputs(map_space, action_space): 179 | """ 180 | @map_space Tuple of the space. Ex (size,) 181 | @action_space Tuple describing the action space. Ex (size,) 182 | """ 183 | # Map of the game 184 | tf_map = tf.placeholder(tf.float32, shape=(None, *map_space), name="tf_map") 185 | # Possible actions (Should be two: x,y for the beacon game) 186 | tf_a = tf.placeholder(tf.int32, shape=(None,), name="tf_a") 187 | # Advantage 188 | tf_adv = tf.placeholder(tf.float32, shape=(None,), name="tf_adv") 189 | return tf_map, tf_a, tf_adv 190 | 191 | @staticmethod 192 | def mlp(tf_map, tf_a, action_space, seed=None): 193 | if seed is not None: 194 | tf.random.set_random_seed(seed) 195 | 196 | # Expand the dimension of the input 197 | tf_map_expand = tf.expand_dims(tf_map, axis=3) 198 | 199 | flatten = tf.layers.flatten(tf_map_expand) 200 | hidden = tf.layers.dense(flatten, units=256, activation=tf.nn.relu) 201 | spacial_action_logits = tf.layers.dense(hidden, units=action_space, activation=None) 202 | 203 | # Add take the log of the softmax 204 | logp_all = tf.nn.log_softmax(spacial_action_logits) 205 | # Take random actions according to the logits (Exploration) 206 | pi_op = tf.squeeze(tf.multinomial(spacial_action_logits,1), axis=1) 207 | mu = tf.argmax(spacial_action_logits, axis=1) 208 | 209 | # Gives log probability, according to the policy, of taking actions @a in states @x 210 | logp_a_op = tf.reduce_sum(tf.one_hot(tf_a, depth=action_space) * logp_all, axis=1) 211 | # Gives log probability, according to the policy, of the action sampled by pi. 212 | logp_pi_op = tf.reduce_sum(tf.one_hot(pi_op, depth=action_space) * logp_all, axis=1) 213 | 214 | return mu, pi_op, logp_a_op, logp_pi_op 215 | 216 | @staticmethod 217 | def net_objectives(logp_a_op, tf_adv, clip_ratio=0.2): 218 | """ 219 | @v_op: Predicted value function 220 | @tf_tv: Expected advantage 221 | @logp_a_op: Log likelihood of taking action under the current policy 222 | @tf_logp_old_pi: Log likelihood of the last policy 223 | @tf_adv: Advantage input 224 | """ 225 | pi_loss = -tf.reduce_mean(logp_a_op*tf_adv) 226 | return pi_loss 227 | 228 | class GridWorld(object): 229 | """ 230 | docstring for GridWorld. 231 | """ 232 | def __init__(self): 233 | super(GridWorld, self).__init__() 234 | 235 | self.rewards = [ 236 | [0, 0, 0, 0, -1, 0, 0], 237 | [0, -1, -1, 0, -1, 0, 0], 238 | [0, -1, -1, 1, -1, 0, 0], 239 | [0, -1, -1, 0, -1, 0, 0], 240 | [0, 0, 0, 0, 0, 0, 0], 241 | [0, 0, 0, 0, 0, 0, 0], 242 | [0, 0, 0, 0, 0, 0, 0], 243 | ] 244 | self.position = [6, 6] # y, x 245 | 246 | def gen_state(self): 247 | # Generate a state given the current position of the agent 248 | state = np.zeros((7, 7)) 249 | state[self.position[0]][self.position[1]] = 1 250 | return state 251 | 252 | def step(self, action): 253 | if action == 0: # Top 254 | self.position = [(self.position[0] - 1) % 7, self.position[1]] 255 | elif action == 1: # Left 256 | self.position = [self.position[0], (self.position[1] - 1) % 7] 257 | elif action == 2: # Right 258 | self.position = [self.position[0], (self.position[1] + 1) % 7] 259 | elif action == 3: # Down 260 | self.position = [(self.position[0] + 1) % 7, self.position[1]] 261 | 262 | reward = self.rewards[self.position[0]][self.position[1]] 263 | done = False if reward == 0 else True 264 | state = self.gen_state() 265 | if done: # The agent is dead, reset the game 266 | self.position = [6, 6] 267 | return state, reward, done 268 | 269 | def display(self): 270 | y = 0 271 | print("="*14) 272 | for line in self.rewards: 273 | x = 0 274 | for case in line: 275 | if case == -1: 276 | c = "0" 277 | elif (y == self.position[0] and x == self.position[1]): 278 | c = "A" 279 | elif case == 1: 280 | c = "T" 281 | else: 282 | c = "-" 283 | print(c, end=" ") 284 | x += 1 285 | y += 1 286 | print() 287 | 288 | def main(): 289 | grid = GridWorld() 290 | buffer_size = 1000 291 | 292 | # Create the NET class 293 | agent = PolicyGradient( 294 | input_space=(7, 7), 295 | action_space=4, 296 | pi_lr=0.001, 297 | buffer_size=buffer_size, 298 | seed=42 299 | ) 300 | agent.compile() 301 | # Init Session 302 | sess = tf.Session() 303 | # Init variables 304 | sess.run(tf.global_variables_initializer()) 305 | # Set the session 306 | agent.set_sess(sess) 307 | 308 | rewards = [] 309 | 310 | b = 0 311 | 312 | for epoch in range(10000): 313 | 314 | done = False 315 | state = grid.gen_state() 316 | 317 | while not done: 318 | _, pi, logpi = agent.step([state]) 319 | n_state, reward, done = grid.step(pi[0]) 320 | agent.store(state, pi[0], reward, logpi) 321 | b += 1 322 | 323 | state = n_state 324 | 325 | if done: 326 | agent.finish_path(reward) 327 | rewards.append(reward) 328 | if len(rewards) > 1000: 329 | rewards.pop(0) 330 | if b == buffer_size: 331 | if not done: 332 | agent.finish_path(0) 333 | agent.train() 334 | b = 0 335 | 336 | if epoch % 1000 == 0: 337 | print("Rewards mean:%s" % np.mean(rewards)) 338 | 339 | for epoch in range(10): 340 | import time 341 | print("=========================TEST=================================") 342 | done = False 343 | state = grid.gen_state() 344 | 345 | while not done: 346 | time.sleep(1) 347 | _, pi, logpi = agent.step([state]) 348 | n_state, _, done = grid.step(pi[0]) 349 | grid.display() 350 | state = n_state 351 | print("reward=>", reward) 352 | 353 | if __name__ == '__main__': 354 | main() 355 | -------------------------------------------------------------------------------- /rl/policy_gradient_continuous_actions.py: -------------------------------------------------------------------------------- 1 | from random import randint 2 | import tensorflow as tf 3 | import numpy as np 4 | import scipy.signal 5 | 6 | """ 7 | Exemple of the Policy Gradient Algorithm 8 | """ 9 | 10 | class Buffer: 11 | 12 | def __init__(self, obs_dim, act_dim, size, gamma=0.99, lam=0.95): 13 | self.obs_buf = np.zeros(Buffer.combined_shape(size, obs_dim), dtype=np.float32) 14 | # Actions buffer 15 | self.act_buf = np.zeros((size, act_dim), dtype=np.float32) 16 | # Advantages buffer 17 | self.adv_buf = np.zeros(size, dtype=np.float32) 18 | # Rewards buffer 19 | self.rew_buf = np.zeros(size, dtype=np.float32) 20 | # Log probability of action a with the policy 21 | self.logp_buf = np.zeros(size, dtype=np.float32) 22 | # Gamma and lam to compute the advantage 23 | self.gamma, self.lam = gamma, lam 24 | # ptr: Position to insert the next tuple 25 | # path_start_idx Posittion of the current trajectory 26 | # max_size Max size of the buffer 27 | self.ptr, self.path_start_idx, self.max_size = 0, 0, size 28 | 29 | @staticmethod 30 | def discount_cumsum(x, discount): 31 | """ 32 | x = [x0, x1, x2] 33 | output: [x0 + discount * x1 + discount^2 * x2, x1 + discount * x2, x2] 34 | """ 35 | return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1] 36 | 37 | @staticmethod 38 | def combined_shape(length, shape=None): 39 | if shape is None: 40 | return (length,) 41 | return (length, shape) if np.isscalar(shape) else (length, *shape) 42 | 43 | def store(self, obs, act, rew, logp): 44 | """ 45 | Append one timestep of agent-environment interaction to the buffer. 46 | """ 47 | assert self.ptr < self.max_size 48 | self.obs_buf[self.ptr] = obs 49 | self.act_buf[self.ptr] = act 50 | self.rew_buf[self.ptr] = rew 51 | self.logp_buf[self.ptr] = logp 52 | self.ptr += 1 53 | 54 | def finish_path(self, last_val=0): 55 | # Select the path 56 | path_slice = slice(self.path_start_idx, self.ptr) 57 | # Append the last_val to the trajectory 58 | rews = np.append(self.rew_buf[path_slice], last_val) 59 | # Advantage 60 | self.adv_buf[path_slice] = Buffer.discount_cumsum(rews, self.gamma)[:-1] 61 | self.path_start_idx = self.ptr 62 | 63 | def get(self): 64 | assert self.ptr == self.max_size # buffer has to be full before you can get 65 | self.ptr, self.path_start_idx = 0, 0 66 | # the next two lines implement the advantage normalization trick 67 | # Normalize the Advantage 68 | if np.std(self.adv_buf) != 0: 69 | self.adv_buf = (self.adv_buf - np.mean(self.adv_buf)) / np.std(self.adv_buf) 70 | return self.obs_buf, self.act_buf, self.adv_buf, self.logp_buf 71 | 72 | 73 | class PolicyGradient(object): 74 | """ 75 | Implementation of Policy gradient algorithm 76 | """ 77 | def __init__(self, input_space, action_space, pi_lr, buffer_size, seed): 78 | super(PolicyGradient, self).__init__() 79 | 80 | # Stored the spaces 81 | self.input_space = input_space 82 | self.action_space = action_space 83 | self.seed = seed 84 | # NET Buffer defined above 85 | self.buffer = Buffer( 86 | obs_dim=input_space, 87 | act_dim=action_space, 88 | size=buffer_size 89 | ) 90 | # Learning rate of the policy network 91 | self.pi_lr = pi_lr 92 | # The tensorflow session (set later) 93 | self.sess = None 94 | # Apply a random seed on tensorflow and numpy 95 | tf.set_random_seed(42) 96 | np.random.seed(42) 97 | 98 | def compile(self): 99 | """ 100 | Compile the model 101 | """ 102 | # tf_map: Input: Input state 103 | # tf_adv: Input: Advantage 104 | self.tf_map, self.tf_a, self.tf_adv = PolicyGradient.inputs( 105 | map_space=self.input_space, 106 | action_space=self.action_space 107 | ) 108 | # mu_op: Used to get the exploited prediction of the model 109 | # pi_op: Used to get the prediction of the model 110 | # logp_a_op: Used to get the log likelihood of taking action a with the current policy 111 | # logp_pi_op: Used to get the log likelihood of the predicted action @pi_op 112 | # log_std: Used to get the currently used log_std 113 | self.mu_op, self.pi_op, self.logp_a_op, self.logp_pi_op, self.std = PolicyGradient.mlp( 114 | tf_map=self.tf_map, 115 | tf_a=self.tf_a, 116 | action_space=self.action_space, 117 | seed=self.seed 118 | ) 119 | # Error 120 | self.pi_loss = PolicyGradient.net_objectives( 121 | tf_adv=self.tf_adv, 122 | logp_a_op=self.logp_a_op 123 | ) 124 | # Optimization 125 | self.train_pi = tf.train.AdamOptimizer(learning_rate=self.pi_lr).minimize(self.pi_loss) 126 | # Entropy 127 | self.approx_ent = tf.reduce_mean(-self.logp_a_op) 128 | 129 | 130 | def set_sess(self, sess): 131 | # Set the tensorflow used to run this model 132 | self.sess = sess 133 | 134 | def step(self, states): 135 | # Take actions given the states 136 | # Return mu (policy without exploration), pi (policy with the current exploration) and 137 | # the log probability of the action chossen by pi 138 | mu, pi, logp_pi = self.sess.run([self.mu_op, self.pi_op, self.logp_pi_op], feed_dict={ 139 | self.tf_map: states 140 | }) 141 | return mu, pi, logp_pi 142 | 143 | def store(self, obs, act, rew, logp): 144 | # Store the observation, action, reward and the log probability of the action 145 | # into the buffer 146 | self.buffer.store(obs, act, rew, logp) 147 | 148 | def finish_path(self, last_val=0): 149 | self.buffer.finish_path(last_val=last_val) 150 | 151 | def train(self, additional_infos={}): 152 | # Get buffer 153 | obs_buf, act_buf, adv_buf, logp_last_buf = self.buffer.get() 154 | # Train the model 155 | pi_loss_list = [] 156 | entropy_list = [] 157 | 158 | import time 159 | t = time.time() 160 | 161 | for step in range(5): 162 | _, entropy, pi_loss, std = self.sess.run([self.train_pi, self.approx_ent, self.pi_loss, self.std], feed_dict= { 163 | self.tf_map: obs_buf, 164 | self.tf_a:act_buf, 165 | self.tf_adv: adv_buf 166 | }) 167 | 168 | pi_loss_list.append(pi_loss) 169 | entropy_list.append(entropy) 170 | 171 | print("Std: %.2f, Entropy : %.2f" % (std[0], np.mean(entropy_list)), end="\r") 172 | 173 | 174 | @staticmethod 175 | def gaussian_likelihood(x, mu, log_std): 176 | # Compute the gaussian likelihood of x with a normal gaussian distribution of mean @mu 177 | # and a std @log_std 178 | pre_sum = -0.5 * (((x-mu)/(tf.exp(log_std)+1e-8))**2 + 2*log_std + np.log(2*np.pi)) 179 | return tf.reduce_sum(pre_sum, axis=1) 180 | 181 | @staticmethod 182 | def inputs(map_space, action_space): 183 | """ 184 | @map_space Tuple of the space. Ex (size,) 185 | @action_space Tuple describing the action space. Ex (size,) 186 | """ 187 | # Map of the game 188 | tf_map = tf.placeholder(tf.float32, shape=(None, *map_space), name="tf_map") 189 | # Possible actions (Should be two: x,y for the beacon game) 190 | tf_a = tf.placeholder(tf.float32, shape=(None, action_space), name="tf_a") 191 | # Advantage 192 | tf_adv = tf.placeholder(tf.float32, shape=(None,), name="tf_adv") 193 | return tf_map, tf_a, tf_adv 194 | 195 | @staticmethod 196 | def gaussian_likelihood(x, mu, log_std): 197 | # Compute the gaussian likelihood of x with a normal gaussian distribution of mean @mu 198 | # and a std @log_std 199 | pre_sum = -0.5 * (((x-mu)/(tf.exp(log_std)+1e-8))**2 + 2*log_std + np.log(2*np.pi)) 200 | return tf.reduce_sum(pre_sum, axis=1) 201 | #pre_sum = (1.*tf.exp((-(x-mu)**2)/(2*std**2)))/tf.sqrt(2*3.14*std**2) 202 | #retun 203 | 204 | @staticmethod 205 | def mlp(tf_map, tf_a, action_space, seed=None): 206 | if seed is not None: 207 | tf.random.set_random_seed(seed) 208 | 209 | # Expand the dimension of the input 210 | tf_map_expand = tf.expand_dims(tf_map, axis=3) 211 | 212 | flatten = tf.layers.flatten(tf_map_expand) 213 | hidden = tf.layers.dense(flatten, units=256, activation=tf.nn.relu) 214 | mu = tf.layers.dense(hidden, units=action_space, activation=tf.nn.relu) 215 | 216 | log_std = tf.get_variable(name='log_std', initializer=-0.5*np.ones(action_space, dtype=np.float32)) 217 | std = tf.exp(log_std) 218 | pi_op = mu + tf.random_normal(tf.shape(mu)) * std 219 | 220 | # Gives log probability, according to the policy, of taking actions @a in states @x 221 | logp_a_op = PolicyGradient.gaussian_likelihood(tf_a, mu, log_std) 222 | # Gives log probability, according to the policy, of the action sampled by pi. 223 | logp_pi_op = PolicyGradient.gaussian_likelihood(pi_op, mu, log_std) 224 | 225 | return mu, pi_op, logp_a_op, logp_pi_op, std 226 | 227 | @staticmethod 228 | def net_objectives(logp_a_op, tf_adv, clip_ratio=0.2): 229 | """ 230 | @v_op: Predicted value function 231 | @tf_tv: Expected advantage 232 | @logp_a_op: Log likelihood of taking action under the current policy 233 | @tf_logp_old_pi: Log likelihood of the last policy 234 | @tf_adv: Advantage input 235 | """ 236 | pi_loss = -tf.reduce_mean(logp_a_op*tf_adv) 237 | return pi_loss 238 | 239 | class GridWorld(object): 240 | """ 241 | docstring for GridWorld. 242 | """ 243 | def __init__(self): 244 | super(GridWorld, self).__init__() 245 | 246 | self.rewards = [ 247 | [0, 0, 0, 0, -1, 0, 0], 248 | [0, -1, -1, 0, -1, 0, 0], 249 | [0, -1, -1, 1, -1, 0, 0], 250 | [0, -1, -1, 0, -1, 0, 0], 251 | [0, 0, 0, 0, 0, 0, 0], 252 | [0, 0, 0, 0, 0, 0, 0], 253 | [0, 0, 0, 0, 0, 0, 0], 254 | ] 255 | self.position = [6, 6] # y, x 256 | 257 | def gen_state(self): 258 | # Generate a state given the current position of the agent 259 | state = np.zeros((7, 7)) 260 | state[self.position[0]][self.position[1]] = 1 261 | return state 262 | 263 | def step(self, y, x): 264 | # y and x coordinates 265 | 266 | # Move in the direction of the "click" 267 | self.position = [ 268 | self.position[0] + min(1, max(-1, (y - self.position[0]))), 269 | self.position[1] + min(1, max(-1, (x - self.position[1]))) 270 | ] 271 | reward = self.rewards[self.position[0]][self.position[1]] 272 | done = False if reward == 0 else True 273 | state = self.gen_state() 274 | if done: # The agent is dead, reset the game 275 | self.position = [6, 6] 276 | return state, reward, done 277 | 278 | def display(self): 279 | y = 0 280 | print("="*14) 281 | for line in self.rewards: 282 | x = 0 283 | for case in line: 284 | if case == -1: 285 | c = "0" 286 | elif (y == self.position[0] and x == self.position[1]): 287 | c = "A" 288 | elif case == 1: 289 | c = "T" 290 | else: 291 | c = "-" 292 | print(c, end=" ") 293 | x += 1 294 | y += 1 295 | print() 296 | 297 | def main(): 298 | grid = GridWorld() 299 | buffer_size = 1000 300 | 301 | # Create the NET class 302 | agent = PolicyGradient( 303 | input_space=(7, 7), 304 | action_space=2, 305 | pi_lr=0.001, 306 | buffer_size=buffer_size, 307 | seed=42 308 | ) 309 | agent.compile() 310 | # Init Session 311 | sess = tf.Session() 312 | # Init variables 313 | sess.run(tf.global_variables_initializer()) 314 | # Set the session 315 | agent.set_sess(sess) 316 | 317 | rewards = [] 318 | 319 | b = 0 320 | display = False 321 | 322 | for epoch in range(100000): 323 | 324 | done = False 325 | state = grid.gen_state() 326 | 327 | s = 0 328 | while not done and s < 20: 329 | s += 1 330 | _, pi, logpi = agent.step([state]) 331 | 332 | y = max(0, min(6, int(round((pi[0][0]+1.) / 2*6)))) 333 | x = max(0, min(6, int(round((pi[0][1]+1.) / 2*6)))) 334 | 335 | if display: 336 | import time 337 | time.sleep(0.1) 338 | grid.display() 339 | 340 | n_state, reward, done = grid.step(y, x) 341 | agent.store(state, pi[0], -0.1 if reward == 0 else reward, logpi) 342 | b += 1 343 | 344 | state = n_state 345 | 346 | if done: 347 | agent.finish_path(reward) 348 | rewards.append(reward) 349 | if len(rewards) > 1000: 350 | rewards.pop(0) 351 | if b == buffer_size: 352 | if not done: 353 | agent.finish_path(0) 354 | agent.train() 355 | b = 0 356 | 357 | if epoch % 1000 == 0: 358 | print("\nEpoch: %s Rewards mean:%s" % (epoch, np.mean(rewards))) 359 | 360 | for epoch in range(10): 361 | import time 362 | print("=========================TEST=================================") 363 | done = False 364 | state = grid.gen_state() 365 | 366 | while not done: 367 | time.sleep(1) 368 | mu, pi, logpi = agent.step([state]) 369 | 370 | y = max(0, min(6, int(round((pi[0][0]+1.) / 2*6)))) 371 | x = max(0, min(6, int(round((pi[0][1]+1.) / 2*6)))) 372 | 373 | n_state, _, done = grid.step(y, x) 374 | grid.display() 375 | state = n_state 376 | print("reward=>", reward) 377 | 378 | if __name__ == '__main__': 379 | main() 380 | -------------------------------------------------------------------------------- /genetic/zombies/src/index.ts: -------------------------------------------------------------------------------- 1 | 2 | function mod(x: number, n: number): number { 3 | return (x % n + n) % n; 4 | } 5 | 6 | export interface Zombie { 7 | sprite: PIXI.Sprite; 8 | vector: number[]; 9 | follow: boolean; 10 | speed: number; 11 | perception: number; 12 | } 13 | 14 | export interface Human { 15 | sprite: PIXI.Sprite; 16 | vector: number[]; 17 | followdist: number; 18 | speed: number; 19 | perception: number; 20 | ammunition: number; 21 | shot_accuracy: number; 22 | genome: number[]; 23 | lifeduration: number; 24 | } 25 | 26 | export interface Bullet { 27 | sprite: PIXI.Sprite; 28 | } 29 | 30 | export interface WorldConf { 31 | [key:string]: any, 32 | nbzombie: number; 33 | nbhuman: number; 34 | } 35 | 36 | export interface Phenotype { 37 | speed: number, 38 | perception: number, 39 | shot_accuracy: number, 40 | ammunition: number 41 | } 42 | 43 | export interface Position { 44 | y: number; 45 | x: number; 46 | } 47 | 48 | class GeneticZombie { 49 | 50 | private canvasId: string; 51 | protected app: PIXI.Application = null; // The pixi app. 52 | protected sprite: PIXI.Sprite = null; 53 | protected zombies: Zombie[] = []; 54 | protected bullets: Bullet[] = []; 55 | protected humans: Human[] = []; 56 | protected humansToConvert: string[] = []; 57 | protected zombieToKill: string[] = []; 58 | protected sounds: Position[] = []; 59 | protected isplaying: boolean = true; 60 | protected population: Human[] = []; 61 | protected last_population: Human[] = []; 62 | protected stepnb: number = 0; 63 | protected callback: any = null; 64 | 65 | protected width: number = 1200; 66 | protected height: number = 800; 67 | 68 | protected config: WorldConf = { 69 | nbzombie: 50, 70 | nbhuman: 50, 71 | nbbullets: 10, 72 | }; 73 | 74 | constructor(canvasId: string, config: WorldConf) { 75 | if (!canvasId){ 76 | console.error("You must specify the canvasId"); 77 | } 78 | 79 | for (let key in this.config){ 80 | if (config && key in config){ 81 | this.config[key] = config[key]; 82 | } 83 | } 84 | 85 | this.canvasId = canvasId;//Create a Pixi Application 86 | this.app = new PIXI.Application({ 87 | width: this.width, 88 | height: this.height, 89 | backgroundColor: 0x002628 90 | }); 91 | //Add the canvas that Pixi automatically created for you to the HTML document 92 | document.getElementById(this.canvasId).appendChild(this.app.view); 93 | 94 | this.app.ticker.add(delta => { 95 | if (this.callback){ 96 | this.callback(this, delta); 97 | } 98 | }); 99 | } 100 | 101 | protected vectorrand(vector: number []){ 102 | let a = Math.random(); 103 | let b = 1 - a; 104 | vector[0] = vector[0] * a; 105 | vector[1] = vector[1] * b; 106 | return vector; 107 | } 108 | 109 | public run(){ 110 | this.isplaying = true; 111 | } 112 | 113 | public stop(){ 114 | this.isplaying = false; 115 | } 116 | 117 | /* 118 | Step in the environment 119 | */ 120 | public step(delta: number){ 121 | this.stepnb += 1; 122 | 123 | // Track the human that are going to be catch by zombies 124 | this.humansToConvert = []; 125 | // Step all the zombies 126 | let n_zombies: Zombie[] = []; 127 | for (let i in this.zombies) { 128 | this.stepZombie(this.zombies[i], i); 129 | if (this.zombieToKill.indexOf(i) == -1){ 130 | n_zombies.push(this.zombies[i]); 131 | } 132 | } 133 | this.zombies = n_zombies; 134 | if (this.zombies.length == 0){ 135 | let zombie = this.createZombie(); 136 | } 137 | // Track the zombie that are going to be kill 138 | this.zombieToKill = []; 139 | // Step all the humans 140 | this.sounds = []; 141 | let n_humans: Human[] = []; 142 | for (let i in this.humans) { 143 | this.stepHuman(this.humans[i], i); 144 | if (this.humansToConvert.indexOf(i) == -1){ 145 | this.humans[i].lifeduration = this.stepnb; 146 | n_humans.push(this.humans[i]) 147 | } 148 | else { 149 | let zombie = this.createZombie(); 150 | zombie.sprite.position.x = this.humans[i].sprite.position.x; 151 | zombie.sprite.position.y = this.humans[i].sprite.position.y; 152 | this.humans[i].sprite.visible = false; 153 | } 154 | } 155 | this.humans = n_humans; 156 | } 157 | 158 | /* 159 | Move a particular Zombie 160 | */ 161 | public stepZombie(z: Zombie, hid: string){ 162 | for (let s in this.sounds){ 163 | let sound = this.sounds[s]; 164 | let dist = Math.sqrt((z.sprite.position.x-sound.x)**2 + (z.sprite.position.y-sound.y)**2); 165 | if (dist < 200){ 166 | z.vector[0] = sound.x <= z.sprite.position.x ? -1 : 1; 167 | z.vector[1] = sound.y <= z.sprite.position.y ? -1 : 1; 168 | } 169 | } 170 | for (let i in this.humans) { 171 | let h = this.humans[i]; 172 | 173 | let dist = Math.sqrt((h.sprite.position.x-z.sprite.position.x)**2 + (h.sprite.position.y-z.sprite.position.y)**2); 174 | 175 | if (dist < 10){ 176 | if (this.humansToConvert.indexOf(i) == -1){ 177 | this.humansToConvert.push(i); 178 | this.app.stage.removeChild(h.sprite); 179 | } 180 | } 181 | // The speed of the human increase the perception of the zombie 182 | // because of the sound 183 | let factornoise = h.speed > 1.0 ? (h.speed-1.0)*50 : 0.0; 184 | if (dist < z.perception * factornoise){ 185 | z.vector[0] = h.sprite.position.x <= z.sprite.position.x ? -1 : 1; 186 | z.vector[1] = h.sprite.position.y <= z.sprite.position.y ? -1 : 1; 187 | //z.vector = this.vectorrand(z.vector); 188 | } 189 | } 190 | z.sprite.position.x = mod(z.sprite.position.x + (z.speed*z.vector[0]) , this.width); 191 | z.sprite.position.y = mod(z.sprite.position.y + (z.speed*z.vector[1]), this.height); 192 | } 193 | 194 | /* 195 | Move a particular human 196 | */ 197 | public stepHuman(h: Human, hid: string): void { 198 | 199 | h.followdist = 10000; 200 | 201 | let n_bullets: Bullet[] = []; 202 | let bullet_to_add = 0; 203 | for (let b in this.bullets) { 204 | let bullet = this.bullets[b]; 205 | let dist = Math.sqrt((h.sprite.position.x-bullet.sprite.position.x)**2 + (h.sprite.position.y-bullet.sprite.position.y)**2); 206 | if (dist < h.perception){ 207 | h.vector[0] = h.sprite.position.x <= bullet.sprite.position.x ? 1 : -1; 208 | h.vector[1] = h.sprite.position.y <= bullet.sprite.position.y ? 1 : -1; 209 | h.vector = this.vectorrand(h.vector); 210 | } 211 | // Take the bullets 212 | if (dist < 10){ 213 | bullet.sprite.visible = false; 214 | this.app.stage.removeChild(bullet.sprite); 215 | h.ammunition += 5; 216 | bullet_to_add += 1; 217 | } else{ 218 | n_bullets.push(bullet); 219 | } 220 | } 221 | this.bullets = n_bullets; 222 | for (let b=0; b < bullet_to_add; b++){ 223 | this.createBullet(); 224 | } 225 | 226 | for (let i in this.zombies) { 227 | let z = this.zombies[i]; 228 | 229 | let dist = Math.sqrt((h.sprite.position.x-z.sprite.position.x)**2 + (h.sprite.position.y-z.sprite.position.y)**2); 230 | 231 | if (dist < h.perception && h.followdist+10 > dist){ 232 | if (h.ammunition > 0){ // Try to shot the Zombie 233 | h.ammunition = h.ammunition - 1; 234 | this.sounds.push({ 235 | y: h.sprite.position.y, 236 | x: h.sprite.position.x 237 | }); 238 | if (Math.random() < h.shot_accuracy){ 239 | this.zombieToKill.push(i); 240 | this.app.stage.removeChild(z.sprite); 241 | } 242 | } 243 | h.vector[0] = h.sprite.position.x <= z.sprite.position.x ? -1 : 1; 244 | h.vector[1] = h.sprite.position.y <= z.sprite.position.y ? -1 : 1; 245 | h.vector = this.vectorrand(h.vector); 246 | h.followdist = dist; 247 | } 248 | } 249 | 250 | h.sprite.position.x = mod(h.sprite.position.x + (h.speed*h.vector[0]), this.width); 251 | h.sprite.position.y = mod(h.sprite.position.y + (h.speed*h.vector[1]), this.height); 252 | } 253 | 254 | public emptyHumans(){ 255 | for (let h in this.humans){ 256 | this.humans[h].sprite.visible = false; 257 | this.app.stage.removeChild(this.humans[h].sprite); 258 | } 259 | this.humans = []; 260 | this.last_population = this.population; 261 | this.population = []; 262 | } 263 | 264 | public emptyZomvies(){ 265 | for (let z in this.zombies){ 266 | this.zombies[z].sprite.visible = false; 267 | this.app.stage.removeChild(this.zombies[z].sprite); 268 | } 269 | this.zombies = []; 270 | } 271 | 272 | public createZombies(){ 273 | console.log("create zombies", this.config.nbzombie - this.zombies.length); 274 | for (let i = 0; i < this.config.nbzombie - this.zombies.length; i++) { 275 | this.createZombie(); 276 | } 277 | } 278 | 279 | public reset() { 280 | this.stepnb = 0; 281 | 282 | for (let b in this.bullets){ 283 | this.bullets[b].sprite.visible = false; 284 | this.app.stage.removeChild(this.bullets[b].sprite); 285 | } 286 | this.bullets = []; 287 | for (let z in this.zombies){ 288 | this.zombies[z].sprite.visible = false; 289 | this.app.stage.removeChild(this.zombies[z].sprite); 290 | } 291 | this.zombies = []; 292 | for (let h in this.humans){ 293 | this.humans[h].sprite.visible = false; 294 | this.app.stage.removeChild(this.humans[h].sprite); 295 | } 296 | this.humans = []; 297 | this.population = []; 298 | for (let i = 0; i < this.config.nbbullets; i++) { 299 | this.createBullet(); 300 | } 301 | for (let i = 0; i < this.config.nbzombie; i++) { 302 | this.createZombie(); 303 | } 304 | } 305 | 306 | public init(oninit: any, callback: any): void { 307 | PIXI.loader 308 | .add("/public/images/boy.png") 309 | .add("/public/images/girl.png") 310 | .add("/public/images/zombie.png") 311 | .add("/public/images/bullet.png") 312 | .load(() => { 313 | 314 | for (let i = 0; i < this.config.nbbullets; i++) { 315 | this.createBullet(); 316 | } 317 | 318 | for (let i = 0; i < this.config.nbzombie; i++) { 319 | this.createZombie(); 320 | } 321 | //for (let i = 0; i < this.config.nbhuman; i++) { 322 | // this.createHuman(); 323 | //} 324 | 325 | if (oninit){ 326 | oninit(this); 327 | } 328 | 329 | this.callback = callback; 330 | 331 | }); 332 | } 333 | 334 | /** 335 | * Create a new Zombie 336 | */ 337 | public createZombie(): Zombie { 338 | let sprite = new PIXI.Sprite(PIXI.loader.resources["/public/images/zombie.png"].texture); 339 | sprite.scale.x = 0.05; 340 | sprite.scale.y = 0.05; 341 | // Position of the Zombie 342 | sprite.position.x = Math.floor(Math.random() * this.width); 343 | sprite.position.y = Math.floor(Math.random() * this.height); 344 | this.app.stage.addChild(sprite); 345 | 346 | let vector: number[] = [ 347 | Math.random() > 0.75 ? -1 : 1, 348 | Math.random() > 0.25 ? -1 : 1 349 | ] 350 | vector = this.vectorrand(vector); 351 | 352 | let zombie: Zombie = { 353 | sprite: sprite, 354 | vector: vector, 355 | follow: false, 356 | speed: 0.8, 357 | perception: 30.0 358 | } 359 | this.zombies.push(zombie); 360 | return zombie; 361 | } 362 | 363 | 364 | /** 365 | * Create a Bullet 366 | */ 367 | public createBullet(): Bullet { 368 | let sprite = new PIXI.Sprite(PIXI.loader.resources["/public/images/bullet.png"].texture); 369 | sprite.scale.x = 0.03; 370 | sprite.scale.y = 0.03; 371 | // Position of the Zombie 372 | sprite.position.x = Math.floor(Math.random() * this.width); 373 | sprite.position.y = Math.floor(Math.random() * this.height); 374 | this.app.stage.addChild(sprite); 375 | 376 | let bullet: Bullet = { 377 | sprite: sprite 378 | } 379 | 380 | this.bullets.push(bullet); 381 | return bullet; 382 | } 383 | 384 | /* 385 | Genome to phenotype 386 | */ 387 | protected genomeToPhenotype(genome: number[]): Phenotype { 388 | let phenotype: Phenotype = { 389 | speed: 1.0 + (0.4*genome[0]-0.2), 390 | perception: 50 + (300*genome[1]-150), 391 | shot_accuracy: 0.3 + (0.6*genome[2]-0.3), 392 | ammunition: 4 + (20*genome[3]-10), 393 | }; 394 | return phenotype; 395 | } 396 | 397 | /** 398 | * Create a new Human 399 | */ 400 | public createHuman(genome: number[]): void { 401 | let phenotype = this.genomeToPhenotype(genome); 402 | let sprite = new PIXI.Sprite(PIXI.loader.resources[ 403 | Math.random() > 0.5 ? "/public/images/boy.png" : "/public/images/girl.png" 404 | ].texture); 405 | sprite.scale.x = 0.05; 406 | sprite.scale.y = 0.05; 407 | // Position of the Zombie 408 | sprite.position.x = Math.floor(Math.random() * this.width); 409 | sprite.position.y = Math.floor(Math.random() * this.height); 410 | this.app.stage.addChild(sprite); 411 | 412 | let vector: number[] = [ 413 | Math.random() > 0.5 ? -1 : 1, 414 | Math.random() > 0.5 ? -1 : 1 415 | ] 416 | vector = this.vectorrand(vector); 417 | 418 | let n_human = { 419 | sprite: sprite, 420 | vector: vector, 421 | followdist: 10000, 422 | speed: phenotype.speed, 423 | perception: phenotype.perception, 424 | ammunition: phenotype.ammunition, 425 | shot_accuracy: phenotype.shot_accuracy, 426 | genome: genome, 427 | lifeduration: 0 428 | }; 429 | this.humans.push(n_human); 430 | this.population.push(n_human); 431 | } 432 | 433 | } 434 | 435 | const geneticzombie = { 436 | env: GeneticZombie 437 | } 438 | 439 | export default geneticzombie; 440 | -------------------------------------------------------------------------------- /rl/actor_critic.py: -------------------------------------------------------------------------------- 1 | from random import randint 2 | import tensorflow as tf 3 | import numpy as np 4 | import scipy.signal 5 | 6 | """ 7 | Exemple of the Policy Gradient Algorithm 8 | """ 9 | 10 | class Buffer: 11 | 12 | def __init__(self, obs_dim, act_dim, size, gamma=0.99, lam=0.95): 13 | self.obs_buf = np.zeros(Buffer.combined_shape(size, obs_dim), dtype=np.float32) 14 | # Actions buffer 15 | self.act_buf = np.zeros(size, dtype=np.float32) 16 | # Advantages buffer 17 | self.adv_buf = np.zeros(size, dtype=np.float32) 18 | # Value function buffer 19 | self.val_buf = np.zeros(size, dtype=np.float32) 20 | # Rewards buffer 21 | self.rew_buf = np.zeros(size, dtype=np.float32) 22 | # Log probability of action a with the policy 23 | self.logp_buf = np.zeros(size, dtype=np.float32) 24 | # Gamma and lam to compute the advantage 25 | self.gamma, self.lam = gamma, lam 26 | # Rreturn buffer (used to train the value fucntion) 27 | self.ret_buf = np.zeros(size, dtype=np.float32) 28 | # ptr: Position to insert the next tuple 29 | # path_start_idx Posittion of the current trajectory 30 | # max_size Max size of the buffer 31 | self.ptr, self.path_start_idx, self.max_size = 0, 0, size 32 | 33 | @staticmethod 34 | def discount_cumsum(x, discount): 35 | """ 36 | x = [x0, x1, x2] 37 | output: [x0 + discount * x1 + discount^2 * x2, x1 + discount * x2, x2] 38 | """ 39 | return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1] 40 | 41 | @staticmethod 42 | def combined_shape(length, shape=None): 43 | if shape is None: 44 | return (length,) 45 | return (length, shape) if np.isscalar(shape) else (length, *shape) 46 | 47 | def store(self, obs, act, rew, logp, val): 48 | # Append one timestep of agent-environment interaction to the buffer. 49 | assert self.ptr < self.max_size 50 | self.obs_buf[self.ptr] = obs 51 | self.act_buf[self.ptr] = act 52 | self.rew_buf[self.ptr] = rew 53 | self.logp_buf[self.ptr] = logp 54 | self.val_buf[self.ptr] = val 55 | self.ptr += 1 56 | 57 | def finish_path(self, last_val=0): 58 | # Select the path 59 | path_slice = slice(self.path_start_idx, self.ptr) 60 | # Append the last_val to the trajectory 61 | rews = np.append(self.rew_buf[path_slice], last_val) 62 | # Append the last value to the value function 63 | vals = np.append(self.val_buf[path_slice], last_val) 64 | # Deltas = r + y*v(s') - v(s) 65 | deltas = rews[:-1] + self.gamma * vals[1:] - vals[:-1] 66 | self.adv_buf[path_slice] = Buffer.discount_cumsum(deltas, self.gamma) 67 | # Advantage 68 | self.ret_buf[path_slice] = Buffer.discount_cumsum(rews, self.gamma)[:-1] 69 | self.path_start_idx = self.ptr 70 | 71 | def get(self): 72 | assert self.ptr == self.max_size # buffer has to be full before you can get 73 | self.ptr, self.path_start_idx = 0, 0 74 | # the next two lines implement the advantage normalization trick 75 | # Normalize the Advantage 76 | if np.std(self.adv_buf) != 0: 77 | self.adv_buf = (self.adv_buf - np.mean(self.adv_buf)) / np.std(self.adv_buf) 78 | return self.obs_buf, self.act_buf, self.adv_buf, self.logp_buf, self.ret_buf 79 | 80 | 81 | class ActorCritic(object): 82 | """ 83 | Implementation of Policy gradient algorithm 84 | """ 85 | def __init__(self, input_space, action_space, pi_lr, v_lr, buffer_size, seed): 86 | super(ActorCritic, self).__init__() 87 | 88 | # Stored the spaces 89 | self.input_space = input_space 90 | self.action_space = action_space 91 | self.seed = seed 92 | # NET Buffer defined above 93 | self.buffer = Buffer( 94 | obs_dim=input_space, 95 | act_dim=action_space, 96 | size=buffer_size 97 | ) 98 | # Learning rate of the policy network and the value network 99 | self.pi_lr = pi_lr 100 | self.v_lr = v_lr 101 | # The tensorflow session (set later) 102 | self.sess = None 103 | # Apply a random seed on tensorflow and numpy 104 | tf.set_random_seed(42) 105 | np.random.seed(42) 106 | 107 | def compile(self): 108 | """ 109 | Compile the model 110 | """ 111 | # tf_a : Input: Chosen action 112 | # tf_map: Input: Input state 113 | # tf_tv: Input: Target value function 114 | # tf_adv: Input: Advantage 115 | self.tf_map, self.tf_a, self.tf_adv, self.tf_tv = ActorCritic.inputs( 116 | map_space=self.input_space, 117 | action_space=self.action_space 118 | ) 119 | # mu_op: Used to get the exploited prediction of the model 120 | # pi_op: Used to get the prediction of the model 121 | # logp_a_op: Used to get the log likelihood of taking action a with the current policy 122 | # logp_pi_op: Used to get the log likelihood of the predicted action @pi_op 123 | # v_op: Used to get the value function of the given state 124 | self.mu_op, self.pi_op, self.logp_a_op, self.logp_pi_op, self.v_op = ActorCritic.mlp( 125 | tf_map=self.tf_map, 126 | tf_a=self.tf_a, 127 | action_space=self.action_space, 128 | seed=self.seed 129 | ) 130 | # Error 131 | self.pi_loss, self.v_loss = ActorCritic.net_objectives( 132 | tf_adv=self.tf_adv, 133 | logp_a_op=self.logp_a_op, 134 | v_op=self.v_op, 135 | tf_tv=self.tf_tv 136 | ) 137 | # Optimization 138 | self.train_pi = tf.train.AdamOptimizer(learning_rate=self.pi_lr).minimize(self.pi_loss) 139 | self.train_v = tf.train.AdamOptimizer(learning_rate=self.v_lr).minimize(self.v_loss) 140 | # Entropy 141 | self.approx_ent = tf.reduce_mean(-self.logp_a_op) 142 | 143 | 144 | def set_sess(self, sess): 145 | # Set the tensorflow used to run this model 146 | self.sess = sess 147 | 148 | def step(self, states): 149 | # Take actions given the states 150 | # Return mu (policy without exploration), pi (policy with the current exploration) and 151 | # the log probability of the action chossen by pi 152 | mu, pi, logp_pi, v = self.sess.run([self.mu_op, self.pi_op, self.logp_pi_op, self.v_op], feed_dict={ 153 | self.tf_map: states 154 | }) 155 | return mu, pi, logp_pi, v 156 | 157 | def store(self, obs, act, rew, logp, val): 158 | # Store the observation, action, reward, log probability of the action and state value 159 | # into the buffer 160 | self.buffer.store(obs, act, rew, logp, val) 161 | 162 | def finish_path(self, last_val=0): 163 | self.buffer.finish_path(last_val=last_val) 164 | 165 | def train(self, additional_infos={}): 166 | # Get buffer 167 | obs_buf, act_buf, adv_buf, logp_last_buf, ret_buf = self.buffer.get() 168 | # Train the model 169 | pi_loss_list = [] 170 | entropy_list = [] 171 | v_loss_list = [] 172 | 173 | for step in range(5): 174 | _, entropy, pi_loss = self.sess.run([self.train_pi, self.approx_ent, self.pi_loss], feed_dict= { 175 | self.tf_map: obs_buf, 176 | self.tf_a:act_buf, 177 | self.tf_adv: adv_buf 178 | }) 179 | entropy_list.append(entropy) 180 | pi_loss_list.append(pi_loss) 181 | 182 | 183 | for step in range(5): 184 | _, v_loss = self.sess.run([self.train_v, self.v_loss], feed_dict= { 185 | self.tf_map: obs_buf, 186 | self.tf_tv: ret_buf, 187 | }) 188 | 189 | v_loss_list.append(v_loss) 190 | 191 | print("Entropy : %s" % (np.mean(entropy_list)), end="\r") 192 | 193 | 194 | @staticmethod 195 | def gaussian_likelihood(x, mu, log_std): 196 | # Compute the gaussian likelihood of x with a normal gaussian distribution of mean @mu 197 | # and a std @log_std 198 | pre_sum = -0.5 * (((x-mu)/(tf.exp(log_std)+1e-8))**2 + 2*log_std + np.log(2*np.pi)) 199 | return tf.reduce_sum(pre_sum, axis=1) 200 | 201 | @staticmethod 202 | def inputs(map_space, action_space): 203 | """ 204 | @map_space Tuple of the space. Ex (size,) 205 | @action_space Tuple describing the action space. Ex (size,) 206 | """ 207 | # Map of the game 208 | tf_map = tf.placeholder(tf.float32, shape=(None, *map_space), name="tf_map") 209 | # Possible actions (Should be two: x,y for the beacon game) 210 | tf_a = tf.placeholder(tf.int32, shape=(None,), name="tf_a") 211 | # Advantage 212 | tf_adv = tf.placeholder(tf.float32, shape=(None,), name="tf_adv") 213 | # Target value 214 | tf_tv = tf.placeholder(tf.float32, shape=(None,), name="tf_tv") 215 | return tf_map, tf_a, tf_adv, tf_tv 216 | 217 | @staticmethod 218 | def mlp(tf_map, tf_a, action_space, seed=None): 219 | if seed is not None: 220 | tf.random.set_random_seed(seed) 221 | 222 | # Expand the dimension of the input 223 | tf_map_expand = tf.expand_dims(tf_map, axis=3) 224 | 225 | flatten = tf.layers.flatten(tf_map_expand) 226 | hidden = tf.layers.dense(flatten, units=256, activation=tf.nn.relu) 227 | 228 | # Logits policy 229 | spacial_action_logits = tf.layers.dense(hidden, units=action_space, activation=None) 230 | # Logits value function 231 | v_op = tf.layers.dense(hidden, units=1, activation=None) 232 | 233 | # Add take the log of the softmax 234 | logp_all = tf.nn.log_softmax(spacial_action_logits) 235 | # Take random actions according to the logits (Exploration) 236 | pi_op = tf.squeeze(tf.multinomial(spacial_action_logits,1), axis=1) 237 | mu = tf.argmax(spacial_action_logits, axis=1) 238 | 239 | # Gives log probability, according to the policy, of taking actions @a in states @x 240 | logp_a_op = tf.reduce_sum(tf.one_hot(tf_a, depth=action_space) * logp_all, axis=1) 241 | # Gives log probability, according to the policy, of the action sampled by pi. 242 | logp_pi_op = tf.reduce_sum(tf.one_hot(pi_op, depth=action_space) * logp_all, axis=1) 243 | 244 | return mu, pi_op, logp_a_op, logp_pi_op, v_op 245 | 246 | @staticmethod 247 | def net_objectives(logp_a_op, tf_adv, v_op, tf_tv, clip_ratio=0.2): 248 | """ 249 | @v_op: Predicted value function 250 | @tf_tv: Expected value function 251 | @logp_a_op: Log likelihood of taking action under the current policy 252 | @tf_logp_old_pi: Log likelihood of the last policy 253 | @tf_adv: Advantage input 254 | """ 255 | pi_loss = -tf.reduce_mean(logp_a_op*tf_adv) 256 | v_loss = tf.reduce_mean((tf_tv - v_op)**2) 257 | return pi_loss, v_loss 258 | 259 | class GridWorld(object): 260 | """ 261 | docstring for GridWorld. 262 | """ 263 | def __init__(self): 264 | super(GridWorld, self).__init__() 265 | 266 | self.rewards = [ 267 | [0, 0, 0, 0, -1, 0, 0], 268 | [0, -1, -1, 0, -1, 0, 0], 269 | [0, -1, -1, 1, -1, 0, 0], 270 | [0, -1, -1, 0, -1, 0, 0], 271 | [0, 0, 0, 0, 0, 0, 0], 272 | [0, 0, 0, 0, 0, 0, 0], 273 | [0, 0, 0, 0, 0, 0, 0], 274 | ] 275 | self.position = [6, 6] # y, x 276 | 277 | def gen_state(self): 278 | # Generate a state given the current position of the agent 279 | state = np.zeros((7, 7)) 280 | state[self.position[0]][self.position[1]] = 1 281 | return state 282 | 283 | def step(self, action): 284 | if action == 0: # Top 285 | self.position = [(self.position[0] - 1) % 7, self.position[1]] 286 | elif action == 1: # Left 287 | self.position = [self.position[0], (self.position[1] - 1) % 7] 288 | elif action == 2: # Right 289 | self.position = [self.position[0], (self.position[1] + 1) % 7] 290 | elif action == 3: # Down 291 | self.position = [(self.position[0] + 1) % 7, self.position[1]] 292 | 293 | reward = self.rewards[self.position[0]][self.position[1]] 294 | done = False if reward == 0 else True 295 | state = self.gen_state() 296 | if done: # The agent is dead, reset the game 297 | self.position = [6, 6] 298 | return state, reward, done 299 | 300 | def display(self): 301 | y = 0 302 | print("="*14) 303 | for line in self.rewards: 304 | x = 0 305 | for case in line: 306 | if case == -1: 307 | c = "0" 308 | elif (y == self.position[0] and x == self.position[1]): 309 | c = "A" 310 | elif case == 1: 311 | c = "T" 312 | else: 313 | c = "-" 314 | print(c, end=" ") 315 | x += 1 316 | y += 1 317 | print() 318 | 319 | def main(): 320 | grid = GridWorld() 321 | buffer_size = 1000 322 | 323 | # Create the NET class 324 | agent = ActorCritic( 325 | input_space=(7, 7), 326 | action_space=4, 327 | pi_lr=0.001, 328 | v_lr=0.001, 329 | buffer_size=buffer_size, 330 | seed=42 331 | ) 332 | agent.compile() 333 | # Init Session 334 | sess = tf.Session() 335 | # Init variables 336 | sess.run(tf.global_variables_initializer()) 337 | # Set the session 338 | agent.set_sess(sess) 339 | 340 | rewards = [] 341 | 342 | b = 0 343 | 344 | for epoch in range(10000): 345 | 346 | done = False 347 | state = grid.gen_state() 348 | 349 | while not done: 350 | _, pi, logpi, v = agent.step([state]) 351 | n_state, reward, done = grid.step(pi[0]) 352 | agent.store(state, pi[0], reward, logpi, v) 353 | b += 1 354 | 355 | state = n_state 356 | 357 | if done: 358 | agent.finish_path(reward) 359 | rewards.append(reward) 360 | if len(rewards) > 1000: 361 | rewards.pop(0) 362 | if b == buffer_size: 363 | if not done: 364 | # Bootstrap the last value 365 | agent.finish_path(v) 366 | agent.train() 367 | b = 0 368 | 369 | if epoch % 1000 == 0: 370 | print("Rewards mean:%s" % np.mean(rewards)) 371 | 372 | for epoch in range(10): 373 | import time 374 | print("=========================TEST=================================") 375 | done = False 376 | state = grid.gen_state() 377 | 378 | while not done: 379 | time.sleep(1) 380 | _, pi, logpi, v = agent.step([state]) 381 | n_state, _, done = grid.step(pi[0]) 382 | print("v", v) 383 | grid.display() 384 | state = n_state 385 | print("reward=>", reward) 386 | 387 | if __name__ == '__main__': 388 | main() 389 | -------------------------------------------------------------------------------- /meta-learning/Meta Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Classification Demo" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import matplotlib.pyplot as plt\n", 18 | "from scipy.spatial.distance import cdist\n", 19 | "import copy\n", 20 | "import sys\n", 21 | "import os\n", 22 | "\n", 23 | "DATASET = \"/home/thibault/work/datasets/omniglot/python/\"" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## One-shot classification demo with Modified Hausdorff Distance" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "Running this demo should lead to a result of 38.8 percent errors. M.-P. Dubuisson, A. K. Jain (1994). A modified hausdorff distance for object matching. International Conference on Pattern Recognition, pp. 566-568. ** Models should be trained on images in 'images_background' directory to avoid using images and alphabets used in the one-shot evaluation **" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "def mod_hausdorff_distance(itemA, itemB):\n", 47 | " \"\"\"\n", 48 | " Modified Hausdorff Distance\n", 49 | "\n", 50 | " Input:\n", 51 | " itemA : [n x 2] coordinates of \"inked\" pixels\n", 52 | " itemB : [m x 2] coordinates of \"inked\" pixels\n", 53 | " M.-P. Dubuisson, A. K. Jain (1994). A modified hausdorff distance for object matching.\n", 54 | " International Conference on Pattern Recognition, pp. 566-568.\n", 55 | " \"\"\"\n", 56 | " D = cdist(itemA,itemB)\n", 57 | " mindist_A = D.min(axis=1)\n", 58 | " mindist_B = D.min(axis=0)\n", 59 | " mean_A = np.mean(mindist_A)\n", 60 | " mean_B = np.mean(mindist_B)\n", 61 | " return max(mean_A,mean_B)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 23, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "data": { 71 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQUAAAD8CAYAAAB+fLH0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAADnhJREFUeJzt3V2oZXd5x/HvrzNGa6Tm7RDiTOxMMShBsJGDjaQUMUrTVEwuRBLEDjIwN7bGF9CkvZDeKYiagkgHo06LRG0MTQiipGOk9KJTz2gwL2PMNDZmhsQcqdFiLzT49GKvwfM/OafnzF77Ze053w8czl5rr733M/+Z/Naz/mvtlVQVknTG78y7AEnDYihIahgKkhqGgqSGoSCpYShIahgKkhpTCYUk1yV5LMnJJLdO4zMkTUcmffFSkl3AD4G3AqeA7wA3V9WjE/0gSVOxewrv+QbgZFU9AZDky8ANwKahcMkll9S+ffumUIqkM44fP/7TqlraartphMIe4Kk1y6eAP1q/UZJDwCGAV77ylaysrEyhFElnJHlyO9vNbaKxqg5X1XJVLS8tbRlekmZkGqFwGrh8zfLebp2kBTCNUPgOcEWS/UnOA24C7p3C50iagonPKVTV80n+EvgmsAv4fFU9MunPkTQd05hopKq+Dnx9Gu8tabq8olFSw1CQ1DAUJDUMBUkNQ0FSw1CQ1DAUJDUMBUkNQ0FSw1CQ1DAUJDUMBUkNQ0FSw1CQ1DAUJDUMBUkNQ0FSw1CQ1DAUJDUMBUkNQ0FSw1CQ1DAUJDUMBUkNQ0FSw1CQ1DAUJDUMBUkNQ0FSw1CQ1DAUJDUMBUmNsUMhyeVJHkjyaJJHktzSrb8oyf1JHu9+Xzi5ciVNW59O4XngQ1V1JXA18N4kVwK3Aker6grgaLcsaUGMHQpV9XRVfbd7/D/ACWAPcANwpNvsCHBj3yIlzc5E5hSS7AOuAo4Bl1bV091TzwCXTuIzJM1G71BI8jLga8D7q+oXa5+rqgJqk9cdSrKSZGV1dbVvGZImpFcoJHkRo0D4UlXd3a3+SZLLuucvA57d6LVVdbiqlqtqeWlpqU8Zkiaoz9mHAHcAJ6rqk2ueuhc40D0+ANwzfnmSZm13j9deA7wbeCjJg926vwY+Bnw1yUHgSeCd/UqUNEtjh0JV/RuQTZ6+dtz3lTRfXtEoqWEoSGoYCpIahoKkhqEgqWEoSGoYCpIahoKkhqEgqWEoSGoYCpIahoKkRp9vSUoAjL5FPx2j+/RoluwUJDXsFPQC09zzn631tdg5TJ+dgqSGncION6SuYDvsHKbPTkFSw05hh1i0jmC7zvy57Bgmx05BUsNQkNQwFHROSHLOHiLNmqEgqeFEoyZuEpN+4+71nXjsz05BUsNOQVuax153/Weebeewdnu7hrNjpyCpYaegFxjinvVMTZ5hmD47BUkNO4UdYoh7/3GM0zF4RuLs2ClIahgKWkhV5Z5/SgwFSY3ecwpJdgErwOmqeluS/cCXgYuB48C7q+pXfT9nJ1iUmfVF3UM7t7A9k+gUbgFOrFn+OPCpqnoV8DPg4AQ+Q9KM9AqFJHuBPwc+1y0HeDNwV7fJEeDGPp9xLjjzDb6tfhbFotZ9xqLWPSt9O4VPAx8GftMtXww8V1XPd8ungD0bvTDJoSQrSVZWV1d7liFpUsYOhSRvA56tquPjvL6qDlfVclUtLy0tjVvGIJwrHcC45vnn7HMWYqf8/ZytPhON1wBvT3I98BLg94DbgQuS7O66hb3A6f5lSpqVsTuFqrqtqvZW1T7gJuBbVfUu4AHgHd1mB4B7elc5MDutE9iueY6LHcPkTOM6hY8AH0xyktEcwx1T+AxJUzKR7z5U1beBb3ePnwDeMIn3nQf3GJMzj/9xi9+m7M8rGiU1/JZkZ2h7liFeddd3jGZ5N6S177/dur3iccROQVLDTmGGFn0PNMnjdffKw2WnIKlhKEhqePgwRedqa9z39usbvXaaY3W2hz07/dDGTkFSw06hh526J1lvEhOQO33vPCR2CpIadgpjcG+2sUnMNUyzY/AS6O2xU5DUMBQ0NYt+G/ad+pVqQ0FSw1AYwyLsQYZU4zgdw5Dq32kMBUkNzz70MIRz61vtTTd7fh41j/N15mnW4RWOG7NTkNSwU+gs2jnsRamzr522lx4COwVJDUNhHWfKZ2MI1zAMoYYhMhQkNQwFaZt2SkdoKEhqGAqbcG5BO5WhIKnhdQoL6myvq3CWfXOLdo3KtNkpSGrYKWxhKNfrb8YOQJNmpyCpYShMgWchtMgMBUmNXqGQ5IIkdyX5QZITSd6Y5KIk9yd5vPt94aSKnTevlddO0LdTuB34RlW9BngdcAK4FThaVVcAR7tlSQti7FBI8nLgT4A7AKrqV1X1HHADcKTb7AhwY98iJc1On05hP7AKfCHJ95J8Lsn5wKVV9XS3zTPApX2LXFROOGoR9QmF3cDrgc9W1VXAL1l3qFCjA/AND8KTHEqykmRldXW1RxmSJqlPKJwCTlXVsW75LkYh8ZMklwF0v5/d6MVVdbiqlqtqeWlpqUcZkiZp7FCoqmeAp5K8ult1LfAocC9woFt3ALinV4WSZqrvZc5/BXwpyXnAE8B7GAXNV5McBJ4E3tnzMwbHW4SPx/mVxdArFKrqQWB5g6eu7fO+kubHL0TN0E7tGOwQFouXOUtq2Cn0MO7NOXZKxzCJDuFcH6MhslOQ1LBTmAA7hslzTObHTkFSw05Bg2KHMH92CpIadgoT5K3CW9sZDzuD4bFTkNSwU5iCcb8bsdF7nAvOpT/LTmCnIKlhKExRnxu9etcmzYuhIKnhnMLAbdYteJw+OXZkLTsFSQ07hRmYxvULZ/tedhbaLjsFSQ07hRlav7ee5bGscxO/Ne6475SxslOQ1DAUJDU8fJijIXyB6lz5spKnFSfHTkFSw05hAIbQMWxkaPXMyyJ1TJNgpyCpYacwIP/fHsm9tmbFTkFSw05hQZztca2dxfh22hzCenYKkhp2CueorfZ2dhK/tdM7g/XsFCQ17BR2qHP9TId7//H16hSSfCDJI0keTnJnkpck2Z/kWJKTSb6S5LxJFStp+sYOhSR7gPcBy1X1WmAXcBPwceBTVfUq4GfAwUkUqtk5c8PZRf7R+PrOKewGfjfJbuClwNPAm4G7uuePADf2/AxJMzR2KFTVaeATwI8ZhcHPgePAc1X1fLfZKWBP3yIlzU6fw4cLgRuA/cArgPOB687i9YeSrCRZWV1dHbcMSRPW5/DhLcCPqmq1qn4N3A1cA1zQHU4A7AVOb/TiqjpcVctVtby0tNSjDEmT1CcUfgxcneSlGZ3DuhZ4FHgAeEe3zQHgnn4lSpqlPnMKxxhNKH4XeKh7r8PAR4APJjkJXAzcMYE6Jc1Ir4uXquqjwEfXrX4CeEOf95U0P17mLKlhKEhqGAqSGoaCpIahIKlhKEhqGAqSGoaCpIahIKlhKEhqGAqSGoaCpIahIKlhKEhqGAqSGoaCpIahIKlhKEhqGAqSGoaCpIahIKlhKEhqGAqSGoaCpIahIKlhKEhqGAqSGoaCpIahIKlhKEhqGAqSGoaCpIahIKmxZSgk+XySZ5M8vGbdRUnuT/J49/vCbn2S/F2Sk0m+n+T10yxe0uRtp1P4InDdunW3Aker6grgaLcM8GfAFd3PIeCzkylT0qxsGQpV9a/Af69bfQNwpHt8BLhxzfp/qJF/By5IctmkipU0fePOKVxaVU93j58BLu0e7wGeWrPdqW7dCyQ5lGQlycrq6uqYZUiatN4TjVVVQI3xusNVtVxVy0tLS33LkDQh44bCT84cFnS/n+3WnwYuX7Pd3m6dpAUxbijcCxzoHh8A7lmz/i+6sxBXAz9fc5ghaQHs3mqDJHcCbwIuSXIK+CjwMeCrSQ4CTwLv7Db/OnA9cBL4X+A9U6hZ0hRtGQpVdfMmT127wbYFvLdvUZLmxysaJTUMBUkNQ0FSw1CQ1MhobnDORSSrwC+Bn867lm24hOHXaY2Tswh1brfG36+qLa8UHEQoACRZqarledexlUWo0xonZxHqnHSNHj5IahgKkhpDCoXD8y5gmxahTmucnEWoc6I1DmZOQdIwDKlTkDQAgwiFJNcleay7t+OtW79i+pJcnuSBJI8meSTJLd36De9POedadyX5XpL7uuX9SY514/mVJOcNoMYLktyV5AdJTiR549DGMskHur/rh5PcmeQlQxjLWd8nde6hkGQX8BlG93e8Erg5yZXzrQqA54EPVdWVwNXAe7u6Nrs/5TzdApxYs/xx4FNV9SrgZ8DBuVTVuh34RlW9Bngdo3oHM5ZJ9gDvA5ar6rXALuAmhjGWX2SW90mtqrn+AG8Evrlm+TbgtnnXtUGd9wBvBR4DLuvWXQY8Nue69nb/KN4M3AeE0YUsuzca3znV+HLgR3RzWGvWD2Ys+e2tBC9i9O3h+4A/HcpYAvuAh7caO+DvgZs32m67P3PvFDiL+zrOS5J9wFXAMTa/P+W8fBr4MPCbbvli4Lmqer5bHsJ47gdWgS90hzmfS3I+AxrLqjoNfAL4MfA08HPgOMMbyzN63yd1M0MIhUFL8jLga8D7q+oXa5+rURTP7fRNkrcBz1bV8XnVsE27gdcDn62qqxhd0t4cKgxgLC9kdDfy/cArgPN5Ycs+SJMeuyGEwmDv65jkRYwC4UtVdXe3erP7U87DNcDbk/wX8GVGhxC3M7q1/pkb6AxhPE8Bp6rqWLd8F6OQGNJYvgX4UVWtVtWvgbsZje/QxvKMqd0ndQih8B3gim6W9zxGkzv3zrkmkgS4AzhRVZ9c89Rm96ecuaq6rar2VtU+RuP2rap6F/AA8I5us7nWCFBVzwBPJXl1t+pa4FEGNJaMDhuuTvLS7u/+TI2DGss1pnef1HlN7KybRLke+CHwn8DfzLuerqY/ZtSSfR94sPu5ntEx+1HgceBfgIvmXWtX75uA+7rHfwD8B6N7Zf4T8OIB1PeHwEo3nv8MXDi0sQT+FvgB8DDwj8CLhzCWwJ2M5jl+zajrOrjZ2DGaaP5M99/SQ4zOppzV53lFo6TGEA4fJA2IoSCpYShIahgKkhqGgqSGoSCpYShIahgKkhr/ByMg6OKiVzxNAAAAAElFTkSuQmCC\n", 72 | "text/plain": [ 73 | "
" 74 | ] 75 | }, 76 | "metadata": {}, 77 | "output_type": "display_data" 78 | }, 79 | { 80 | "data": { 81 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQUAAAD8CAYAAAB+fLH0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAADelJREFUeJzt3W+MZXV9x/H3p7uiFVMBmRDche42Eg0xsZgJxdA0RjSl1AgPjIGYdmM22Se24p9EoX1g+kwTo9LEmG5E3TYEtUgKIUaDK6bpg26ZFaLAimyxyBKQMQVs7INC/PbBPdPOb5xhZu65f2fer2Qy95x77r3f/c3u53zP75w9k6pCklb81rQLkDRbDAVJDUNBUsNQkNQwFCQ1DAVJDUNBUmMsoZDk6iSPJjmd5KZxfIak8cioL15Ksgf4CfAu4AxwP3BDVT0y0g+SNBZ7x/CelwOnq+pxgCRfA64FNgyF888/vw4cODCGUiStOHny5C+qamGz7cYRCvuAJ1ctnwH+YO1GSY4ARwAuvvhilpaWxlCKpBVJntjKdlObaKyqo1W1WFWLCwubhpekCRlHKDwFXLRqeX+3TtIcGEco3A9ckuRgkrOA64G7x/A5ksZg5HMKVfVSkr8AvgPsAb5cVQ+P+nMkjcc4Jhqpqm8B3xrHe0saL69olNQwFCQ1DAVJDUNBUsNQkNQwFCQ1DAVJDUNBUsNQkNQwFCQ1DAVJDUNBUsNQkNQwFCQ1DAVJDUNBUmMsN1nR5CXZ0naj/j0f2nnsFCQ1DAVJDUNBUsNQkNQwFCQ1PPsw57Z61kHaKjsFSQ1DQTtCErumETEUJDV2/ZzCdvYu83w14DzX/nLW/vxWlnfqn3cS7BQkNXZ9pzCvPH5+eavHx65he+wUJDXsFLZhHo9X56nW7dhOpzSPP7dpslOQ1Bg6FJJclOS+JI8keTjJjd3685Lcm+Sx7vu5oytXu53XI4xfn07hJeBjVXUpcAXwwSSXAjcBx6vqEuB4tyxpTgwdClX1dFX9oHv8X8ApYB9wLXCs2+wYcF3fIiVNzkgmGpMcAC4DTgAXVNXT3VPPABeM4jNmiRNXk+chw+T0nmhM8hrgm8CHq+qXq5+rwb+adf/lJDmSZCnJ0vLyct8yJI1Ir1BI8goGgXBbVd3Zrf55kgu75y8Enl3vtVV1tKoWq2pxYWGhTxm9VJV7/Bk2iolFf8bb0+fsQ4BbgVNV9dlVT90NHOoeHwLuGr48SZPWZ07hSuDPgB8lebBb91fAp4BvJDkMPAG8r1+Jk7GyJ/GimOly7mD6hg6FqvoXYKOf4FXDvq+k6fIy5zWG6Rg0vHGNs93b8LzMWVLDTmEE1u7t3Eutz+5rPtgpSGrYKYzBbv1lr3YCO4OdgqSGncIGJnEWYl72rPNSJ+y87msa7BQkNewUNrF6zzNPe8wV81gz/OYef17/HPPITkFSw05hG9x7jY9zAbPDTkFSw06hB/+fxPDsDGaXnYKkhp3CCAy719upHYZdwHyzU5DUsFOYomH2qNPsLuwAdgc7BUkNO4U5s90zHu7dtV12CpIahoKkhqEgqWEoSGoYCpIahoKkhqEgqWEoSGoYCpIahoKkhqEgqWEoSGoYCpIahoKkRu9QSLInyQNJ7umWDyY5keR0kq8nOat/mZImZRSdwo3AqVXLnwY+V1VvAJ4DDo/gMyRNSK9QSLIf+FPgS91ygHcAd3SbHAOu6/MZkiarb6fweeDjwK+75dcBz1fVS93yGWDfei9MciTJUpKl5eXlnmVIGpWhQyHJu4Fnq+rkMK+vqqNVtVhViwsLC8OWIWnE+tyj8UrgPUmuAV4F/A5wC3BOkr1dt7AfeKp/mZImZehOoapurqr9VXUAuB74XlW9H7gPeG+32SHgrt5VSpqYcVyn8Ango0lOM5hjuHUMnyFpTEZyi/eq+j7w/e7x48Dlo3hfSZPnFY2SGoaCpIahIKlhKEhqGAqSGoaCpIahIKlhKEhqGAqSGoaCpIahIKlhKEhqGAqSGoaCpIahIKlhKEhqGAqSGoaCpIahIKlhKEhqGAqSGoaCpMZIbvGu2TX4nb9QVSN/z7VG+RmaHjsFSQ07BW1qo85go+3sGOabnYKkhp3CnFrZG291L74d43hPzQ87BUkNO4Vdzq5Aa9kpSGrYKexS4+gQPOuwM9gpSGr0CoUk5yS5I8mPk5xK8rYk5yW5N8lj3fdzR1Ws+ksysi6hqpov7Qx9O4VbgG9X1ZuAtwCngJuA41V1CXC8W5Y0J4YOhSSvBf4IuBWgqv6nqp4HrgWOdZsdA67rW6SkyenTKRwEloGvJHkgyZeSnA1cUFVPd9s8A1zQt0j1N47DBu1MfUJhL/BW4ItVdRnwK9YcKtTgb866f3uSHEmylGRpeXm5RxmSRqlPKJwBzlTViW75DgYh8fMkFwJ0359d78VVdbSqFqtqcWFhoUcZGpe1E4l2CLvD0KFQVc8ATyZ5Y7fqKuAR4G7gULfuEHBXrwolTVTfi5f+ErgtyVnA48AHGATNN5IcBp4A3tfzMzRhdgO7W69QqKoHgcV1nrqqz/tKmh4vc9b/sUMQeJmzpDUMhTnnGYHWKK/H2K0MBUkNQ0FzwY5ocgwFSQ3PPuxy7n21lp2CpIadgnak1Wcg7Ia2x05BUsNOYYfY7i+Hmde95zh/CY4G7BQkNewUdpjN9qTz2iFocuwUJDXsFHaotR3Dbu4QHIPtsVOQ1LBT2OF26t7RsxDjY6cgqWEoSGoYCpIahoKkhqGgXcNbtW2NoSCpYShornmbttEzFCQ1DAXtOs4tvDxDQVLDUNCOMMzcgh3D+gwFSQ1DQVLDUJDUMBS0o3jdQn+9QiHJR5I8nOShJLcneVWSg0lOJDmd5OtJzhpVsZLGb+hQSLIP+BCwWFVvBvYA1wOfBj5XVW8AngMOj6JQaTtWOobVXxs9p1bfw4e9wG8n2Qu8GngaeAdwR/f8MeC6np8haYKGDoWqegr4DPAzBmHwAnASeL6qXuo2OwPs61ukNAp2BlvT5/DhXOBa4CDweuBs4OptvP5IkqUkS8vLy8OWIWnE+hw+vBP4aVUtV9WLwJ3AlcA53eEEwH7gqfVeXFVHq2qxqhYXFhZ6lCFplPqEws+AK5K8OoNrRa8CHgHuA97bbXMIuKtfiZImqc+cwgkGE4o/AH7UvddR4BPAR5OcBl4H3DqCOiVNSK/f+1BVnwQ+uWb148Dlfd5X0vR4RaOkhqEgqWEoSGoYCpIahoKkhqEgqWEoSGoYCpIahoKkhqEgqWEoSGoYCpIahoKkhqEgqWEoSGoYCpIahoKkhqEgqWEoSGoYCpIahoKkhqEgqWEoSGoYCpIahoKkhqEgqWEoSGoYCpIahoKkhqEgqWEoSGoYCpIahoKkxqahkOTLSZ5N8tCqdecluTfJY933c7v1SfK3SU4n+WGSt46zeEmjt5VO4avA1WvW3QQcr6pLgOPdMsCfAJd0X0eAL46mTEmTsmkoVNU/A/+5ZvW1wLHu8THgulXr/74G/hU4J8mFoypW0vgNO6dwQVU93T1+Brige7wPeHLVdme6db8hyZEkS0mWlpeXhyxD0qj1nmisqgJqiNcdrarFqlpcWFjoW4akERk2FH6+cljQfX+2W/8UcNGq7fZ36yTNiWFD4W7gUPf4EHDXqvV/3p2FuAJ4YdVhhqQ5sHezDZLcDrwdOD/JGeCTwKeAbyQ5DDwBvK/b/FvANcBp4L+BD4yhZkljtGkoVNUNGzx11TrbFvDBvkVJmh6vaJTUMBQkNQwFSQ1DQVIjg7nBKReRLAO/An4x7Vq24Hxmv05rHJ15qHOrNf5uVW16peBMhAJAkqWqWpx2HZuZhzqtcXTmoc5R1+jhg6SGoSCpMUuhcHTaBWzRPNRpjaMzD3WOtMaZmVOQNBtmqVOQNANmIhSSXJ3k0e7ejjdt/orxS3JRkvuSPJLk4SQ3duvXvT/llGvdk+SBJPd0yweTnOjG8+tJzpqBGs9JckeSHyc5leRtszaWST7S/awfSnJ7klfNwlhO+j6pUw+FJHuALzC4v+OlwA1JLp1uVQC8BHysqi4FrgA+2NW10f0pp+lG4NSq5U8Dn6uqNwDPAYenUlXrFuDbVfUm4C0M6p2ZsUyyD/gQsFhVbwb2ANczG2P5VSZ5n9SqmuoX8DbgO6uWbwZunnZd69R5F/Au4FHgwm7dhcCjU65rf/eX4h3APUAYXMiyd73xnVKNrwV+SjeHtWr9zIwl/38rwfMY/O/he4A/npWxBA4AD202dsDfATest91Wv6beKbCN+zpOS5IDwGXACTa+P+W0fB74OPDrbvl1wPNV9VK3PAvjeRBYBr7SHeZ8KcnZzNBYVtVTwGeAnwFPAy8AJ5m9sVzR+z6pG5mFUJhpSV4DfBP4cFX9cvVzNYjiqZ2+SfJu4NmqOjmtGrZoL/BW4ItVdRmDS9qbQ4UZGMtzGdyN/CDweuBsfrNln0mjHrtZCIWZva9jklcwCITbqurObvVG96echiuB9yT5D+BrDA4hbmFwa/2VG+jMwnieAc5U1Ylu+Q4GITFLY/lO4KdVtVxVLwJ3MhjfWRvLFWO7T+oshML9wCXdLO9ZDCZ37p5yTSQJcCtwqqo+u+qpje5POXFVdXNV7a+qAwzG7XtV9X7gPuC93WZTrRGgqp4Bnkzyxm7VVcAjzNBYMjhsuCLJq7uf/UqNMzWWq4zvPqnTmthZM4lyDfAT4N+Bv552PV1Nf8igJfsh8GD3dQ2DY/bjwGPAd4Hzpl1rV+/bgXu6x78H/BuDe2X+I/DKGajv94Glbjz/CTh31sYS+Bvgx8BDwD8Ar5yFsQRuZzDP8SKDruvwRmPHYKL5C92/pR8xOJuyrc/zikZJjVk4fJA0QwwFSQ1DQVLDUJDUMBQkNQwFSQ1DQVLDUJDU+F8RThXf3CWT1wAAAABJRU5ErkJggg==\n", 82 | "text/plain": [ 83 | "
" 84 | ] 85 | }, 86 | "metadata": {}, 87 | "output_type": "display_data" 88 | }, 89 | { 90 | "name": "stdout", 91 | "output_type": "stream", 92 | "text": [ 93 | "Distance 0.0\n", 94 | "Distance 1.9464946780762098\n" 95 | ] 96 | } 97 | ], 98 | "source": [ 99 | "def load_img_as_points(fn, show=False, inked=False): \n", 100 | " \"\"\"\n", 101 | " Load image file and return coordinates of 'inked' pixels in the binary image \n", 102 | " Output:\n", 103 | " D : [n x 2] rows are coordinates\n", 104 | " \"\"\"\n", 105 | " I = plt.imread(fn)\n", 106 | " I = np.array(I,dtype=bool)\n", 107 | "\n", 108 | " if inked:\n", 109 | " I = np.logical_not(I)\n", 110 | " (row,col) = I.nonzero()\n", 111 | " I = I[row.min():row.max(),col.min():col.max()]\n", 112 | " I = np.logical_not(I)\n", 113 | "\n", 114 | " if show:\n", 115 | " plt.imshow(I, cmap=\"gray\")\n", 116 | " plt.show()\n", 117 | "\n", 118 | " return I\n", 119 | "\n", 120 | "\n", 121 | "def test_load_img_as_points():\n", 122 | " img1 = load_img_as_points(os.path.join(DATASET, \"one-shot-classification/run01/training/class01.png\"))\n", 123 | " plt.imshow(img1, cmap=\"gray\")\n", 124 | " plt.show()\n", 125 | " \n", 126 | " img2 = load_img_as_points(os.path.join(DATASET, \"one-shot-classification/run01/training/class02.png\"))\n", 127 | " plt.imshow(img2, cmap=\"gray\")\n", 128 | " plt.show() \n", 129 | " \n", 130 | " print(\"Distance\", mod_hausdorff_distance(img1, img1))\n", 131 | " print(\"Distance\", mod_hausdorff_distance(img1, img2))\n", 132 | "\n", 133 | "test_load_img_as_points()" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 52, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "def classification_run(folder,f_load,f_cost,ftype='cost'):\n", 143 | " # Compute error rate for one run of one-shot classification\n", 144 | " #\n", 145 | " # Input\n", 146 | " # folder : contains images for a run of one-shot classification\n", 147 | " # f_load : itemA = f_load('file.png') should read in the image file and process it\n", 148 | " # f_cost : f_cost(itemA,itemB) should compute similarity between two images, using output of f_load\n", 149 | " # ftype : 'cost' if small values from f_cost mean more similar, or 'score' if large values are more similar\n", 150 | " #\n", 151 | " # Output\n", 152 | " # perror : percent errors (0 to 100% error)\n", 153 | " #\n", 154 | " fname_label = 'class_labels.txt' # where class labels are stored for each run\n", 155 | " assert ((ftype=='cost') | (ftype=='score'))\n", 156 | "\n", 157 | " base_folder = os.path.join(DATASET, \"one-shot-classification\")\n", 158 | " sub_folder = os.path.join(base_folder, folder)\n", 159 | " \n", 160 | " # get file names\n", 161 | " with open(os.path.join(sub_folder, fname_label), \"r\") as f:\n", 162 | " content = f.read().splitlines()\n", 163 | " \n", 164 | " pairs = [line.split() for line in content]\n", 165 | " test_files = [os.path.join(base_folder, pair[0]) for pair in pairs]\n", 166 | " train_files = [os.path.join(base_folder, pair[1]) for pair in pairs]\n", 167 | " answers_files = copy.copy(train_files)\n", 168 | " test_files.sort()\n", 169 | " train_files.sort()\n", 170 | " ntrain = len(train_files)\n", 171 | " ntest = len(test_files)\n", 172 | " \n", 173 | " # load the images (and, if needed, extract features)\n", 174 | " train_items = [f_load(f) for f in train_files]\n", 175 | " test_items = [f_load(f) for f in test_files ]\n", 176 | "\n", 177 | " # compute cost matrix\n", 178 | " costM = np.zeros((ntest,ntrain),float)\n", 179 | " for i in range(ntest):\n", 180 | " for c in range(ntrain):\n", 181 | " costM[i,c] = f_cost(test_items[i],train_items[c])\n", 182 | "\n", 183 | " if ftype == 'cost':\n", 184 | " YHAT = np.argmin(costM,axis=1)\n", 185 | " elif ftype == 'score':\n", 186 | " YHAT = np.argmax(costM,axis=1)\n", 187 | " else:\n", 188 | " assert False\n", 189 | " \n", 190 | " # compute the error rate\n", 191 | " correct = 0.0\n", 192 | " for i in range(ntest):\n", 193 | " if train_files[YHAT[i]] == answers_files[i]:\n", 194 | " correct += 1.0\n", 195 | " pcorrect = 100 * correct / ntest\n", 196 | " perror = 100 - pcorrect\n", 197 | "\n", 198 | " return perror" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "## Percentage of error with a simple dist" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 57, 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "name": "stdout", 215 | "output_type": "stream", 216 | "text": [ 217 | "Percentage of error 81.25\n" 218 | ] 219 | } 220 | ], 221 | "source": [ 222 | "def run_one_shot_classificaion_with_hausdorff_distance():\n", 223 | " nrun = 20 # number of classification runs\n", 224 | " fname_label = 'class_labels.txt' # where class labels are stored for each run\n", 225 | " \n", 226 | " perror = np.zeros(nrun)\n", 227 | " for r in range(1, nrun+1):\n", 228 | " # Add 0 for number from 0 to 9 (2 -> 02)\n", 229 | " rs = '0' + str(r) if len(str(r)) == 1 else str(r)\n", 230 | " perror[r-1] = classification_run('run'+rs, load_img_as_points, mod_hausdorff_distance, 'cost')\n", 231 | " \n", 232 | " print(\"Percentage of error\", np.mean(perror))\n", 233 | "\n", 234 | "run_one_shot_classificaion_with_hausdorff_distance()" 235 | ] 236 | } 237 | ], 238 | "metadata": { 239 | "kernelspec": { 240 | "display_name": "Python 3", 241 | "language": "python", 242 | "name": "python3" 243 | }, 244 | "language_info": { 245 | "codemirror_mode": { 246 | "name": "ipython", 247 | "version": 3 248 | }, 249 | "file_extension": ".py", 250 | "mimetype": "text/x-python", 251 | "name": "python", 252 | "nbconvert_exporter": "python", 253 | "pygments_lexer": "ipython3", 254 | "version": "3.5.2" 255 | } 256 | }, 257 | "nbformat": 4, 258 | "nbformat_minor": 2 259 | } 260 | -------------------------------------------------------------------------------- /tensorflow/TensorFlow MNIST tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# TensorFlow Demo: MNIST for ML Beginners\n", 8 | "Before start using this, please select `Cell` - `All Output` - `Clear` to clear the old results. See [TensorFlow Tutorial](https://www.tensorflow.org/versions/master/tutorials/mnist/beginners/index.html) for details of the tutorial.\n", 9 | "\n", 10 | "# Loading MNIST training data" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "name": "stdout", 20 | "output_type": "stream", 21 | "text": [ 22 | "WARNING:tensorflow:From :7: read_data_sets (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n", 23 | "Instructions for updating:\n", 24 | "Please use alternatives such as official/mnist/dataset.py from tensorflow/models.\n", 25 | "WARNING:tensorflow:From /home/thibault/work/env/gpu/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:260: maybe_download (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.\n", 26 | "Instructions for updating:\n", 27 | "Please write your own downloading logic.\n", 28 | "WARNING:tensorflow:From /home/thibault/work/env/gpu/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:262: extract_images (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n", 29 | "Instructions for updating:\n", 30 | "Please use tf.data to implement this functionality.\n", 31 | "Extracting MNIST_data/train-images-idx3-ubyte.gz\n", 32 | "WARNING:tensorflow:From /home/thibault/work/env/gpu/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:267: extract_labels (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n", 33 | "Instructions for updating:\n", 34 | "Please use tf.data to implement this functionality.\n", 35 | "Extracting MNIST_data/train-labels-idx1-ubyte.gz\n", 36 | "WARNING:tensorflow:From /home/thibault/work/env/gpu/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:110: dense_to_one_hot (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n", 37 | "Instructions for updating:\n", 38 | "Please use tf.one_hot on tensors.\n", 39 | "Extracting MNIST_data/t10k-images-idx3-ubyte.gz\n", 40 | "Extracting MNIST_data/t10k-labels-idx1-ubyte.gz\n", 41 | "WARNING:tensorflow:From /home/thibault/work/env/gpu/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:290: DataSet.__init__ (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n", 42 | "Instructions for updating:\n", 43 | "Please use alternatives such as official/mnist/dataset.py from tensorflow/models.\n" 44 | ] 45 | } 46 | ], 47 | "source": [ 48 | "# import matplotlib\n", 49 | "import matplotlib.pyplot as plt\n", 50 | "%matplotlib inline\n", 51 | "\n", 52 | "# import MNIST data\n", 53 | "from tensorflow.examples.tutorials.mnist import input_data\n", 54 | "mnist = input_data.read_data_sets(\"MNIST_data/\", one_hot=True)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "## Training Images\n", 62 | "![mnist.train.xs](https://www.tensorflow.org/versions/master/images/mnist-train-xs.png)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 2, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/plain": [ 73 | "(55000, 784)" 74 | ] 75 | }, 76 | "execution_count": 2, 77 | "metadata": {}, 78 | "output_type": "execute_result" 79 | } 80 | ], 81 | "source": [ 82 | "# check MNIST training images matrix shape\n", 83 | "mnist.train.images.shape" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 8, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "data": { 93 | "text/plain": [ 94 | "array([[0. , 0. , 0. , 0. , 0. ,\n", 95 | " 0. , 0. , 0. , 0. , 0. ,\n", 96 | " 0. , 0. , 0. , 0. , 0. ,\n", 97 | " 0. , 0. , 0. , 0. , 0. ,\n", 98 | " 0. , 0. , 0. , 0. , 0. ,\n", 99 | " 0. , 0. , 0. ],\n", 100 | " [0. , 0. , 0. , 0. , 0. ,\n", 101 | " 0. , 0. , 0. , 0. , 0. ,\n", 102 | " 0. , 0. , 0. , 0. , 0. ,\n", 103 | " 0. , 0. , 0. , 0. , 0. ,\n", 104 | " 0. , 0. , 0. , 0. , 0. ,\n", 105 | " 0. , 0. , 0. ],\n", 106 | " [0. , 0. , 0. , 0. , 0. ,\n", 107 | " 0. , 0. , 0. , 0. , 0. ,\n", 108 | " 0. , 0. , 0. , 0. , 0. ,\n", 109 | " 0. , 0. , 0. , 0. , 0. ,\n", 110 | " 0. , 0. , 0. , 0. , 0. ,\n", 111 | " 0. , 0. , 0. ],\n", 112 | " [0. , 0. , 0. , 0. , 0. ,\n", 113 | " 0. , 0. , 0. , 0. , 0. ,\n", 114 | " 0. , 0. , 0. , 0. , 0. ,\n", 115 | " 0. , 0. , 0. , 0. , 0. ,\n", 116 | " 0. , 0. , 0. , 0. , 0. ,\n", 117 | " 0. , 0. , 0. ],\n", 118 | " [0. , 0. , 0. , 0. , 0. ,\n", 119 | " 0. , 0. , 0. , 0. , 0. ,\n", 120 | " 0. , 0. , 0. , 0. , 0. ,\n", 121 | " 0.49411768, 0.8352942 , 0.13333334, 0. , 0. ,\n", 122 | " 0. , 0. , 0. , 0. , 0. ,\n", 123 | " 0. , 0. , 0. ],\n", 124 | " [0. , 0. , 0. , 0. , 0. ,\n", 125 | " 0. , 0. , 0. , 0. , 0. ,\n", 126 | " 0. , 0. , 0. , 0. , 0.07450981,\n", 127 | " 0.89019614, 0.9960785 , 0.32941177, 0. , 0. ,\n", 128 | " 0. , 0. , 0. , 0. , 0. ,\n", 129 | " 0. , 0. , 0. ],\n", 130 | " [0. , 0. , 0. , 0. , 0. ,\n", 131 | " 0. , 0. , 0. , 0. , 0. ,\n", 132 | " 0. , 0. , 0. , 0. , 0.44705886,\n", 133 | " 0.9960785 , 0.9960785 , 0.32941177, 0. , 0. ,\n", 134 | " 0. , 0. , 0. , 0. , 0. ,\n", 135 | " 0. , 0. , 0. ],\n", 136 | " [0. , 0. , 0. , 0. , 0. ,\n", 137 | " 0. , 0. , 0. , 0. , 0. ,\n", 138 | " 0. , 0. , 0. , 0. , 0.69803923,\n", 139 | " 0.9960785 , 0.9960785 , 0.32941177, 0. , 0. ,\n", 140 | " 0. , 0. , 0. , 0. , 0. ,\n", 141 | " 0. , 0. , 0. ],\n", 142 | " [0. , 0. , 0. , 0. , 0. ,\n", 143 | " 0. , 0. , 0. , 0. , 0. ,\n", 144 | " 0. , 0. , 0. , 0.23529413, 0.92549026,\n", 145 | " 0.9960785 , 0.9960785 , 0.32941177, 0. , 0. ,\n", 146 | " 0. , 0. , 0. , 0. , 0. ,\n", 147 | " 0. , 0. , 0. ],\n", 148 | " [0. , 0. , 0. , 0. , 0. ,\n", 149 | " 0. , 0. , 0. , 0. , 0. ,\n", 150 | " 0. , 0. , 0. , 0.30588236, 0.9960785 ,\n", 151 | " 0.9960785 , 0.9960785 , 0.32941177, 0. , 0. ,\n", 152 | " 0. , 0. , 0. , 0. , 0. ,\n", 153 | " 0. , 0. , 0. ],\n", 154 | " [0. , 0. , 0. , 0. , 0. ,\n", 155 | " 0. , 0. , 0. , 0. , 0. ,\n", 156 | " 0. , 0. , 0. , 0.30588236, 0.9960785 ,\n", 157 | " 0.9960785 , 0.9058824 , 0.21960786, 0. , 0. ,\n", 158 | " 0. , 0. , 0. , 0. , 0. ,\n", 159 | " 0. , 0. , 0. ],\n", 160 | " [0. , 0. , 0. , 0. , 0. ,\n", 161 | " 0. , 0. , 0. , 0. , 0. ,\n", 162 | " 0. , 0. , 0. , 0.7490196 , 0.9960785 ,\n", 163 | " 0.9960785 , 0.7254902 , 0. , 0. , 0. ,\n", 164 | " 0. , 0. , 0. , 0. , 0. ,\n", 165 | " 0. , 0. , 0. ],\n", 166 | " [0. , 0. , 0. , 0. , 0. ,\n", 167 | " 0. , 0. , 0. , 0. , 0. ,\n", 168 | " 0. , 0. , 0. , 0.91372555, 0.9960785 ,\n", 169 | " 0.9960785 , 0.41176474, 0. , 0. , 0. ,\n", 170 | " 0. , 0. , 0. , 0. , 0. ,\n", 171 | " 0. , 0. , 0. ],\n", 172 | " [0. , 0. , 0. , 0. , 0. ,\n", 173 | " 0. , 0. , 0. , 0. , 0. ,\n", 174 | " 0. , 0. , 0. , 0.91372555, 0.9960785 ,\n", 175 | " 0.9960785 , 0.11764707, 0. , 0. , 0. ,\n", 176 | " 0. , 0. , 0. , 0. , 0. ,\n", 177 | " 0. , 0. , 0. ],\n", 178 | " [0. , 0. , 0. , 0. , 0. ,\n", 179 | " 0. , 0. , 0. , 0. , 0. ,\n", 180 | " 0. , 0. , 0.3647059 , 0.9725491 , 0.9960785 ,\n", 181 | " 0.95294124, 0.10588236, 0. , 0. , 0. ,\n", 182 | " 0. , 0. , 0. , 0. , 0. ,\n", 183 | " 0. , 0. , 0. ],\n", 184 | " [0. , 0. , 0. , 0. , 0. ,\n", 185 | " 0. , 0. , 0. , 0. , 0. ,\n", 186 | " 0. , 0.0627451 , 0.75294125, 0.9960785 , 0.9960785 ,\n", 187 | " 0.50980395, 0. , 0. , 0. , 0. ,\n", 188 | " 0. , 0. , 0. , 0. , 0. ,\n", 189 | " 0. , 0. , 0. ],\n", 190 | " [0. , 0. , 0. , 0. , 0. ,\n", 191 | " 0. , 0. , 0. , 0. , 0. ,\n", 192 | " 0. , 0.1254902 , 0.9960785 , 0.9960785 , 0.9725491 ,\n", 193 | " 0.37254903, 0. , 0. , 0. , 0. ,\n", 194 | " 0. , 0. , 0. , 0. , 0. ,\n", 195 | " 0. , 0. , 0. ],\n", 196 | " [0. , 0. , 0. , 0. , 0. ,\n", 197 | " 0. , 0. , 0. , 0. , 0. ,\n", 198 | " 0. , 0.16078432, 0.9960785 , 0.9960785 , 0.9058824 ,\n", 199 | " 0. , 0. , 0. , 0. , 0. ,\n", 200 | " 0. , 0. , 0. , 0. , 0. ,\n", 201 | " 0. , 0. , 0. ],\n", 202 | " [0. , 0. , 0. , 0. , 0. ,\n", 203 | " 0. , 0. , 0. , 0. , 0. ,\n", 204 | " 0. , 0.7294118 , 0.9960785 , 0.9960785 , 0.627451 ,\n", 205 | " 0. , 0. , 0. , 0. , 0. ,\n", 206 | " 0. , 0. , 0. , 0. , 0. ,\n", 207 | " 0. , 0. , 0. ],\n", 208 | " [0. , 0. , 0. , 0. , 0. ,\n", 209 | " 0. , 0. , 0. , 0. , 0. ,\n", 210 | " 0.08235294, 0.7960785 , 0.9960785 , 0.7372549 , 0.04705883,\n", 211 | " 0. , 0. , 0. , 0. , 0. ,\n", 212 | " 0. , 0. , 0. , 0. , 0. ,\n", 213 | " 0. , 0. , 0. ],\n", 214 | " [0. , 0. , 0. , 0. , 0. ,\n", 215 | " 0. , 0. , 0. , 0. , 0. ,\n", 216 | " 0.3372549 , 0.9960785 , 1. , 0.7019608 , 0.03137255,\n", 217 | " 0.01568628, 0. , 0. , 0. , 0. ,\n", 218 | " 0. , 0. , 0. , 0. , 0. ,\n", 219 | " 0. , 0. , 0. ],\n", 220 | " [0. , 0. , 0. , 0. , 0. ,\n", 221 | " 0. , 0. , 0. , 0. , 0. ,\n", 222 | " 0.3372549 , 0.9960785 , 0.9960785 , 0.9960785 , 0.9960785 ,\n", 223 | " 0.50980395, 0. , 0. , 0. , 0. ,\n", 224 | " 0. , 0. , 0. , 0. , 0. ,\n", 225 | " 0. , 0. , 0. ],\n", 226 | " [0. , 0. , 0. , 0. , 0. ,\n", 227 | " 0. , 0. , 0. , 0. , 0. ,\n", 228 | " 0.26666668, 0.94117653, 0.9960785 , 0.9960785 , 0.9803922 ,\n", 229 | " 0.4039216 , 0. , 0. , 0. , 0. ,\n", 230 | " 0. , 0. , 0. , 0. , 0. ,\n", 231 | " 0. , 0. , 0. ],\n", 232 | " [0. , 0. , 0. , 0. , 0. ,\n", 233 | " 0. , 0. , 0. , 0. , 0. ,\n", 234 | " 0. , 0.36078432, 1. , 0.8117648 , 0.35686275,\n", 235 | " 0. , 0. , 0. , 0. , 0. ,\n", 236 | " 0. , 0. , 0. , 0. , 0. ,\n", 237 | " 0. , 0. , 0. ],\n", 238 | " [0. , 0. , 0. , 0. , 0. ,\n", 239 | " 0. , 0. , 0. , 0. , 0. ,\n", 240 | " 0. , 0. , 0. , 0. , 0. ,\n", 241 | " 0. , 0. , 0. , 0. , 0. ,\n", 242 | " 0. , 0. , 0. , 0. , 0. ,\n", 243 | " 0. , 0. , 0. ],\n", 244 | " [0. , 0. , 0. , 0. , 0. ,\n", 245 | " 0. , 0. , 0. , 0. , 0. ,\n", 246 | " 0. , 0. , 0. , 0. , 0. ,\n", 247 | " 0. , 0. , 0. , 0. , 0. ,\n", 248 | " 0. , 0. , 0. , 0. , 0. ,\n", 249 | " 0. , 0. , 0. ],\n", 250 | " [0. , 0. , 0. , 0. , 0. ,\n", 251 | " 0. , 0. , 0. , 0. , 0. ,\n", 252 | " 0. , 0. , 0. , 0. , 0. ,\n", 253 | " 0. , 0. , 0. , 0. , 0. ,\n", 254 | " 0. , 0. , 0. , 0. , 0. ,\n", 255 | " 0. , 0. , 0. ],\n", 256 | " [0. , 0. , 0. , 0. , 0. ,\n", 257 | " 0. , 0. , 0. , 0. , 0. ,\n", 258 | " 0. , 0. , 0. , 0. , 0. ,\n", 259 | " 0. , 0. , 0. , 0. , 0. ,\n", 260 | " 0. , 0. , 0. , 0. , 0. ,\n", 261 | " 0. , 0. , 0. ]], dtype=float32)" 262 | ] 263 | }, 264 | "execution_count": 8, 265 | "metadata": {}, 266 | "output_type": "execute_result" 267 | } 268 | ], 269 | "source": [ 270 | "# check MNIST training images matrix data\n", 271 | "sample_img = mnist.train.images[6].reshape(28, 28)\n", 272 | "sample_img" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 9, 278 | "metadata": {}, 279 | "outputs": [ 280 | { 281 | "data": { 282 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAADFRJREFUeJzt3V2IXOUdx/HfL9qKmIDRTJdotZsWKSxC0zIEIWpS+4KVQiyINhclBWm8aLBCLyr2ol5FLbUlQqmkGppKa1toY3KhtWlokIIUV0l9rTWVDSbE7ARfqviSav692JOyjTtnx5lz5szm//3AsGfOc+acPyf72+fMeSbzOCIEIJ9FTRcAoBmEH0iK8ANJEX4gKcIPJEX4gaQIP5AU4QeSIvxAUqcP82DLli2L8fHxYR4SSGVqakpHjx51L9sOFH7bV0raIuk0SfdExO1l24+Pj2tycnKQQwIo0W63e96278t+26dJ+qmkr0iakLTe9kS/+wMwXIO8518laX9EvBgRxyT9RtK6asoCULdBwn++pJdmPT9YrPs/tjfanrQ92el0BjgcgCrVfrc/IrZGRDsi2q1Wq+7DAejRIOE/JOmCWc8/XqwDsAAMEv7HJF1ke4Xtj0r6uqRd1ZQFoG59D/VFxHu2N0l6WDNDfdsi4pnKKgNQq4HG+SPiQUkPVlQLgCHi471AUoQfSIrwA0kRfiApwg8kRfiBpAg/kBThB5Ii/EBShB9IivADSRF+ICnCDyRF+IGkCD+QFOEHkiL8QFKEH0iK8ANJEX4gKcIPJDXUKbpx6rn77rtL2++5556ubbt37y597dKlS/uqCb2h5weSIvxAUoQfSIrwA0kRfiApwg8kRfiBpAYa57c9JekNSe9Lei8i2lUUhdHx9ttvl7bfcccdpe0HDhzo2vb888+XvvaSSy4pbcdgqviQz+cj4mgF+wEwRFz2A0kNGv6Q9Cfbj9veWEVBAIZj0Mv+SyPikO2PSdpt+x8R8cjsDYo/Chsl6cILLxzwcACqMlDPHxGHip/TknZIWjXHNlsjoh0R7VarNcjhAFSo7/DbPsv2khPLkr4s6emqCgNQr0Eu+8ck7bB9Yj+/jog/VlIVgNr1Hf6IeFHSZyqsBSPogQceKG0vG8fHaGOoD0iK8ANJEX4gKcIPJEX4gaQIP5AUX92NUg8//HDTJaAm9PxAUoQfSIrwA0kRfiApwg8kRfiBpAg/kBTj/Mnt37+/tP2hhx4aUiUYNnp+ICnCDyRF+IGkCD+QFOEHkiL8QFKEH0iKcf7kXn/99dL2TqczpEowbPT8QFKEH0iK8ANJEX4gKcIPJEX4gaQIP5DUvOP8trdJ+qqk6Yi4uFh3jqTfShqXNCXp2oh4tb4ysVCdd955XdvGxsaGWAlO1kvP/wtJV5607mZJeyLiIkl7iucAFpB5wx8Rj0h65aTV6yRtL5a3S7q64roA1Kzf9/xjEXG4WH5ZEtdvwAIz8A2/iAhJ0a3d9kbbk7Yn+Zw4MDr6Df8R28slqfg53W3DiNgaEe2IaLdarT4PB6Bq/YZ/l6QNxfIGSTurKQfAsMwbftv3S3pU0qdtH7R9vaTbJX3J9guSvlg8B7CAzDvOHxHruzR9oeJa0IDNmzfXuv/LLrusa9uKFStqPTbK8Qk/ICnCDyRF+IGkCD+QFOEHkiL8QFJ8dXdyjz76aK37X7++20gxmkbPDyRF+IGkCD+QFOEHkiL8QFKEH0iK8ANJMc6PWq1Zs6bpEtAFPT+QFOEHkiL8QFKEH0iK8ANJEX4gKcIPJMU4/ylu7969pe2vvjrYzOqLFy8ubV+0iP5lVPEvAyRF+IGkCD+QFOEHkiL8QFKEH0iK8ANJzTvOb3ubpK9Kmo6Ii4t1t0r6lqROsdktEfFgXUWi3DvvvNO1bcuWLaWvfffddwc69nxTfC9ZsmSg/aM+vfT8v5B05RzrfxIRK4sHwQcWmHnDHxGPSHplCLUAGKJB3vNvsv2k7W22l1ZWEYCh6Df8P5P0KUkrJR2WdGe3DW1vtD1pe7LT6XTbDMCQ9RX+iDgSEe9HxHFJP5e0qmTbrRHRjoh2q9Xqt04AFesr/LaXz3r6NUlPV1MOgGHpZajvfklrJS2zfVDSDySttb1SUkiaknRDjTUCqMG84Y+IuSZYv7eGWtCnsnH+nTt3DrTvM844o7R9YmJioP2jOXzCD0iK8ANJEX4gKcIPJEX4gaQIP5AUX919Cjh27Fht+z733HNL26+44orajo160fMDSRF+ICnCDyRF+IGkCD+QFOEHkiL8QFKM858Cbrzxxtr2fd1119W2bzSLnh9IivADSRF+ICnCDyRF+IGkCD+QFOEHkmKcfwF46623Stunp6f73vc111xT2n7bbbf1vW+MNnp+ICnCDyRF+IGkCD+QFOEHkiL8QFKEH0hq3nF+2xdI+qWkMUkhaWtEbLF9jqTfShqXNCXp2oh4tb5S89q3b19p+969e7u2RUTpa88888zS9kWLyvuH48ePD/R6NKeXf5n3JH03IiYkXSLp27YnJN0saU9EXCRpT/EcwAIxb/gj4nBEPFEsvyHpOUnnS1onaXux2XZJV9dVJIDqfahrMtvjkj4r6W+SxiLicNH0smbeFgBYIHoOv+3Fkn4v6aaI+Pfstph5Yznnm0vbG21P2p7sdDoDFQugOj2F3/ZHNBP8X0XEH4rVR2wvL9qXS5rzf5dExNaIaEdEu9VqVVEzgArMG37blnSvpOci4sezmnZJ2lAsb5C0s/ryANSll//Su1rSNyQ9ZfvEmNMtkm6X9Dvb10s6IOnaekrEIGb+dnd33333DdR+1113lbZv2rSptB3NmTf8EfFXSd1+g75QbTkAhoVPYABJEX4gKcIPJEX4gaQIP5AU4QeS4qu7F4Czzz677/bXXnttoGOffnr5r8j4+PhA+0dz6PmBpAg/kBThB5Ii/EBShB9IivADSRF+ICnG+ReAiYmJ0vYdO3Z0bVu7dm3pa1evXl3avnnz5tL2yy+/vLQdo4ueH0iK8ANJEX4gKcIPJEX4gaQIP5AU4QeSYpz/FLBmzZqubfNN0Y286PmBpAg/kBThB5Ii/EBShB9IivADSRF+IKl5w2/7Att/sf2s7Wdsf6dYf6vtQ7b3FY+r6i8XQFV6+ZDPe5K+GxFP2F4i6XHbu4u2n0TEj+orD0Bd5g1/RByWdLhYfsP2c5LOr7swAPX6UO/5bY9L+qykvxWrNtl+0vY220u7vGaj7Unbk51OZ6BiAVSn5/DbXizp95Juioh/S/qZpE9JWqmZK4M753pdRGyNiHZEtFutVgUlA6hCT+G3/RHNBP9XEfEHSYqIIxHxfkQcl/RzSavqKxNA1Xq5229J90p6LiJ+PGv98lmbfU3S09WXB6AuvdztXy3pG5Kesr2vWHeLpPW2V0oKSVOSbqilQgC16OVu/18leY6mB6svB8Cw8Ak/ICnCDyRF+IGkCD+QFOEHkiL8QFKEH0iK8ANJEX4gKcIPJEX4gaQIP5AU4QeSIvxAUh7mFM62O5IOzFq1TNLRoRXw4YxqbaNal0Rt/aqytk9ERE/flzfU8H/g4PZkRLQbK6DEqNY2qnVJ1Navpmrjsh9IivADSTUd/q0NH7/MqNY2qnVJ1NavRmpr9D0/gOY03fMDaEgj4bd9pe3nbe+3fXMTNXRje8r2U8XMw5MN17LN9rTtp2etO8f2btsvFD/nnCatodpGYubmkpmlGz13ozbj9dAv+22fJumfkr4k6aCkxyStj4hnh1pIF7anJLUjovExYduXS3pT0i8j4uJi3Q8lvRIRtxd/OJdGxPdGpLZbJb3Z9MzNxYQyy2fPLC3paknfVIPnrqSua9XAeWui518laX9EvBgRxyT9RtK6BuoYeRHxiKRXTlq9TtL2Ynm7Zn55hq5LbSMhIg5HxBPF8huSTsws3ei5K6mrEU2E/3xJL816flCjNeV3SPqT7cdtb2y6mDmMFdOmS9LLksaaLGYO887cPEwnzSw9Mueunxmvq8YNvw+6NCI+J+krkr5dXN6OpJh5zzZKwzU9zdw8LHPMLP0/TZ67fme8rloT4T8k6YJZzz9erBsJEXGo+DktaYdGb/bhIycmSS1+Tjdcz/+M0szNc80srRE4d6M043UT4X9M0kW2V9j+qKSvS9rVQB0fYPus4kaMbJ8l6csavdmHd0naUCxvkLSzwVr+z6jM3NxtZmk1fO5GbsbriBj6Q9JVmrnj/y9J32+ihi51fVLS34vHM03XJul+zVwG/kcz90aul3SupD2SXpD0Z0nnjFBt90l6StKTmgna8oZqu1Qzl/RPStpXPK5q+tyV1NXIeeMTfkBS3PADkiL8QFKEH0iK8ANJEX4gKcIPJEX4gaQIP5DUfwHXgNMxwvvjUAAAAABJRU5ErkJggg==\n", 283 | "text/plain": [ 284 | "
" 285 | ] 286 | }, 287 | "metadata": {}, 288 | "output_type": "display_data" 289 | } 290 | ], 291 | "source": [ 292 | "# plot the image\n", 293 | "plt.imshow(sample_img).set_cmap('Greys')" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "## Training Labels\n", 301 | "![mnist.train.ys](https://www.tensorflow.org/versions/master/images/mnist-train-ys.png)" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 6, 307 | "metadata": {}, 308 | "outputs": [ 309 | { 310 | "data": { 311 | "text/plain": [ 312 | "(55000, 10)" 313 | ] 314 | }, 315 | "execution_count": 6, 316 | "metadata": {}, 317 | "output_type": "execute_result" 318 | } 319 | ], 320 | "source": [ 321 | "# check MNIST labels shape\n", 322 | "mnist.train.labels.shape" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 10, 328 | "metadata": {}, 329 | "outputs": [ 330 | { 331 | "data": { 332 | "text/plain": [ 333 | "array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0.])" 334 | ] 335 | }, 336 | "execution_count": 10, 337 | "metadata": {}, 338 | "output_type": "execute_result" 339 | } 340 | ], 341 | "source": [ 342 | "# show MNIST label data\n", 343 | "sample_label = mnist.train.labels[6]\n", 344 | "sample_label" 345 | ] 346 | }, 347 | { 348 | "cell_type": "markdown", 349 | "metadata": {}, 350 | "source": [ 351 | "# Defining a Neural Network" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "## in a graph:\n", 359 | "![](https://www.tensorflow.org/versions/master/images/softmax-regression-scalargraph.png)\n", 360 | "\n", 361 | "## in a vector equation:\n", 362 | "![](https://www.tensorflow.org/versions/master/images/softmax-regression-vectorequation.png)\n", 363 | "\n", 364 | "## so that we'll have the weights like this:\n", 365 | "blue: positive weights, red: negative weights\n", 366 | "![](https://www.tensorflow.org/versions/master/images/softmax-weights.png)" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": 11, 372 | "metadata": {}, 373 | "outputs": [ 374 | { 375 | "data": { 376 | "text/plain": [ 377 | "" 378 | ] 379 | }, 380 | "execution_count": 11, 381 | "metadata": {}, 382 | "output_type": "execute_result" 383 | } 384 | ], 385 | "source": [ 386 | "# define a neural network (softmax logistic regression)\n", 387 | "import tensorflow as tf\n", 388 | "\n", 389 | "x = tf.placeholder(tf.float32, [None, 784])\n", 390 | "\n", 391 | "W = tf.Variable(tf.zeros([784, 10]))\n", 392 | "b = tf.Variable(tf.zeros([10]))\n", 393 | "\n", 394 | "y = tf.nn.softmax(tf.matmul(x, W) + b) # the equation\n", 395 | "y" 396 | ] 397 | }, 398 | { 399 | "cell_type": "markdown", 400 | "metadata": {}, 401 | "source": [ 402 | "# Defining the Train Step" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": 12, 408 | "metadata": {}, 409 | "outputs": [ 410 | { 411 | "data": { 412 | "text/plain": [ 413 | "" 414 | ] 415 | }, 416 | "execution_count": 12, 417 | "metadata": {}, 418 | "output_type": "execute_result" 419 | } 420 | ], 421 | "source": [ 422 | "# define the train step to minimize the cross entropy with SGD\n", 423 | "y_ = tf.placeholder(tf.float32, [None, 10])\n", 424 | "\n", 425 | "cross_entropy = -tf.reduce_sum(y_*tf.log(y))\n", 426 | "\n", 427 | "train_step = tf.train.GradientDescentOptimizer(0.0001).minimize(cross_entropy)\n", 428 | "train_step" 429 | ] 430 | }, 431 | { 432 | "cell_type": "markdown", 433 | "metadata": {}, 434 | "source": [ 435 | "## Use Gradient Decent to find the optimal weights\n", 436 | "![](http://blog.datumbox.com/wp-content/uploads/2013/10/gradient-descent.png)\n", 437 | "From: [Machine Learning Blog & Software Development News](http://blog.datumbox.com/tuning-the-learning-rate-in-gradient-descent/)" 438 | ] 439 | }, 440 | { 441 | "cell_type": "markdown", 442 | "metadata": {}, 443 | "source": [ 444 | "# Do 1000 times of mini-batch training" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": 14, 450 | "metadata": {}, 451 | "outputs": [ 452 | { 453 | "name": "stdout", 454 | "output_type": "stream", 455 | "text": [ 456 | "done\n" 457 | ] 458 | } 459 | ], 460 | "source": [ 461 | "# initialize variables and session\n", 462 | "init = tf.global_variables_initializer()\n", 463 | "sess = tf.Session()\n", 464 | "sess.run(init)\n", 465 | "\n", 466 | "# train the model mini batch with 100 elements, for 1K times\n", 467 | "for i in range(1000):\n", 468 | " batch_xs, batch_ys = mnist.train.next_batch(100)\n", 469 | " sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})\n", 470 | " \n", 471 | "print(\"done\")" 472 | ] 473 | }, 474 | { 475 | "cell_type": "markdown", 476 | "metadata": {}, 477 | "source": [ 478 | "# Test" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": 16, 484 | "metadata": {}, 485 | "outputs": [ 486 | { 487 | "name": "stdout", 488 | "output_type": "stream", 489 | "text": [ 490 | "0.8695\n" 491 | ] 492 | } 493 | ], 494 | "source": [ 495 | "# evaluate the accuracy of the model\n", 496 | "correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))\n", 497 | "accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\n", 498 | "print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))" 499 | ] 500 | }, 501 | { 502 | "cell_type": "markdown", 503 | "metadata": {}, 504 | "source": [ 505 | "# TODO\n", 506 | "\n", 507 | "* Ajouter une cellule ou vous lancerez une initialisation, afficherez la valeur des poids avant et après une optimisation. Est-ce que les poids change effectivement avant et après l'optimisation ? \n", 508 | "\n", 509 | "\n", 510 | "* Modifier ce notebook pour afficher deux courbes montrant la progression du loss/accuracy sur le training/test une fois le training terminé. Vous pouvez pour cela utiliser matplotlib.pyplot\n", 511 | "\n", 512 | "\n", 513 | "* quelle courbe semble dominer et obtenir les meilleurs résultats?\n", 514 | "\n", 515 | "\n", 516 | "* Affichez les logs de l'évolution du training et du test pendant le training.\n", 517 | "\n", 518 | "\n", 519 | "* Pouvez vous imaginez une formule d'erreur différente ? Mettez-la en place et comparer les résultats.\n", 520 | "\n", 521 | "\n", 522 | "* Modifiez le learning rate de 0001 à 0.1. Quelle sont les conséquences ? Pourquoi ?\n", 523 | "\n", 524 | "\n", 525 | "* Ajouter un seed à python et un seed à tensorflow. Pouvez-vous en déduire l'importance du seed dans un entrainement ?\n", 526 | "\n", 527 | "\n", 528 | "* A présent, trouvez le learning rate le plus favorable\n", 529 | "\n", 530 | "\n", 531 | "* Essayez de rajouter un hidden layer constitué de 16 neurones. La structure passera ainsi de 764 -> 10 à 764 -> 16 -> 10. Le layer intermédiaire devra être constitué de poids et d'un bias. Initialiser les poids à zero est-il une bonne idée avec deux layers ? Pourquoi ? Arrivez-vous à atteindre les mêmes performances avec un layer additionnel ? Que pouvez-vous en déduire ? \n", 532 | "\n", 533 | "\n", 534 | "* Il existe d'autre algorithms basé sur la déscente de gradient utile pour optimiser vos variables, essayer de remplacer la version actuel par une version plus sophisticé. Documentez vous sur les différences exacts" 535 | ] 536 | } 537 | ], 538 | "metadata": { 539 | "kernelspec": { 540 | "display_name": "Python 3", 541 | "language": "python", 542 | "name": "python3" 543 | }, 544 | "language_info": { 545 | "codemirror_mode": { 546 | "name": "ipython", 547 | "version": 3 548 | }, 549 | "file_extension": ".py", 550 | "mimetype": "text/x-python", 551 | "name": "python", 552 | "nbconvert_exporter": "python", 553 | "pygments_lexer": "ipython3", 554 | "version": "3.5.2" 555 | } 556 | }, 557 | "nbformat": 4, 558 | "nbformat_minor": 1 559 | } 560 | --------------------------------------------------------------------------------