├── .editorconfig ├── .gitignore ├── .jscsrc ├── .jshintrc ├── LICENSE ├── README.md ├── bower.json ├── package.json └── src ├── agent.js ├── config.js ├── main.js ├── rat.js ├── ratsim.js ├── robot.js ├── ros.js ├── utils.js └── vendor ├── convnet.js └── uncertain ├── convnet.js └── deepqlearn.js /.editorconfig: -------------------------------------------------------------------------------- 1 | [*.js] 2 | indent_style = space 3 | indent_size = 2 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | 5 | # Runtime data 6 | pids 7 | *.pid 8 | *.seed 9 | 10 | # Directory for instrumented libs generated by jscoverage/JSCover 11 | lib-cov 12 | 13 | # Coverage directory used by tools like istanbul 14 | coverage 15 | 16 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 17 | .grunt 18 | 19 | # node-waf configuration 20 | .lock-wscript 21 | 22 | # Compiled binary addons (http://nodejs.org/api/addons.html) 23 | build/Release 24 | 25 | # Dependency directory 26 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git 27 | node_modules 28 | bower_components 29 | 30 | brains 31 | -------------------------------------------------------------------------------- /.jscsrc: -------------------------------------------------------------------------------- 1 | { 2 | "disallowEmptyBlocks": true, 3 | "disallowKeywordsOnNewLine": ["else"], 4 | "disallowMultipleLineStrings": true, 5 | "disallowNewlineBeforeBlockStatements": true, 6 | "disallowOperatorBeforeLineBreak": ["."], 7 | "disallowSpaceAfterObjectKeys": true, 8 | "disallowSpaceAfterPrefixUnaryOperators": ["++", "--", "~", "!"], 9 | "disallowSpaceBeforePostfixUnaryOperators": ["++", "--"], 10 | "disallowSpacesInCallExpression": true, 11 | "disallowSpacesInsideArrayBrackets": true, 12 | "disallowTrailingComma": true, 13 | "disallowYodaConditions": true, 14 | "requireCommaBeforeLineBreak": true, 15 | "requireSpaceAfterKeywords": [ 16 | "do", 17 | "for", 18 | "if", 19 | "else", 20 | "case", 21 | "try", 22 | "catch", 23 | "while", 24 | "with", 25 | "return" 26 | ], 27 | "requireSpaceBeforeBlockStatements": true, 28 | "requireSpaceBeforeKeywords": [ 29 | "else", 30 | "while", 31 | "catch" 32 | ], 33 | "requireSpaceBeforeObjectValues": true, 34 | "requireSpacesInForStatement": true, 35 | "safeContextKeyword": "_self" 36 | } 37 | -------------------------------------------------------------------------------- /.jshintrc: -------------------------------------------------------------------------------- 1 | { 2 | "esnext": false 3 | } 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ROS + DQN + RatSLAM 2 | 3 | Training Deep Q-Learning neural network based on ConvNetJS demo to use sonar range sensors and RatSLAM goals. 4 | 5 | * [ConvNetJS - demo](http://cs.stanford.edu/people/karpathy/convnetjs/demo/rldemo.html) 6 | * [ROSLibJS](https://github.com/RobotWebTools/roslibjs/) 7 | * [RatSLAM fork](https://github.com/mryellow/ratslam) (extended ROS integration) 8 | 9 | # Status 10 | 11 | Got busy and distracted, it works well enough for direct goal seeking and that may be enough to train up an agent which makes pretty maps in RatSLAM (if not straying too far before turning back). Have some decent [experiments](https://github.com/mryellow/reinforcejs/tree/demo-multiagent) with [ReinforceJS](http://cs.stanford.edu/people/karpathy/reinforcejs/). Finding goals on the other side of walls and traps will require a different implementation, namely Actor-critic and/or Actor-mimic style architectures to get around these opstacles (when a goal can be seen on the other side of a trap). 12 | 13 | # Setup 14 | 15 | ``` 16 | npm install 17 | bower install 18 | ``` 19 | 20 | # TODO 21 | 22 | * [ ] Teleop. 23 | * [ ] Integrate IMU/tilt/odom feedback. 24 | * [ ] Catkin-ise. 25 | * [ ] Define custom ROS messages. 26 | * [ ] LTM/STM with long-term sets of "important" experiences. 27 | * [ ] Save/load DQN experience sets. 28 | 29 | # Usage 30 | 31 | ``` 32 | roslaunch kulbu_base sim.launch world:=rat1 33 | roslaunch kulbu_slam rat.launch use_rat_odom:=false topic_odom:=/kulbu/odometry/filtered 34 | rosrun turtlebot_teleop turtlebot_teleop_key /turtlebot_teleop/cmd_vel:=/kulbu/diff_drive_controller/cmd_vel 35 | 36 | roslaunch rosbridge_server rosbridge_websocket.launch # ROSLibJS 37 | node src/main.js 38 | node src/main.js --noise # Generate noise on extra sensors. 39 | node src/ratsim.js # Simulate RatSLAM goals for training. 40 | rqt_plot /dqn/reward:epsilon 41 | rqt_plot /dqn/avg_reward:avg_loss 42 | 43 | rostopic pub -1 /dqn/status std_msgs/String -- '"{\"learning\": true, \"moving\": true, \"sensors\": false}"' # TODO: Custom message format. 44 | rostopic pub -1 /dqn/save std_msgs/String -- 'file' # Save DQN as JSON. 45 | rostopic pub -1 /dqn/load std_msgs/String -- 'file' # Load DQN from JSON. 46 | rostopic pub -1 /dqn/set_age std_msgs/String -- '"100000"' # FIXME: Datatype. 47 | ``` 48 | 49 | # Future work 50 | 51 | ## RatSLAM 52 | 53 | * [x] Reverse goal order and tweak for use on exploration tasks. 54 | * [ ] Discard experiences with *many* links. 55 | * [ ] Quality metric for LV. Don't link low quality experiences. 56 | * [ ] Reject closures with vastly different magnetic reading? 57 | * [ ] Implement multi Experience Maps [RatSLAM on Humanoids](https://www2.informatik.uni-hamburg.de/wtm/ps/M%C3%BCller_ICANN2014_CR.pdf) 58 | 59 | ## DQN 60 | 61 | * Further test [Dropout uncertainty](https://github.com/yaringal/DropoutUncertaintyDemos/). 62 | * Implement in [Caffe](https://github.com/muupan/dqn-in-the-caffe) [fork](https://github.com/mhauskn/dqn) or [Theano](https://github.com/spragunr/deep_q_rl) if not [Torch](https://github.com/kuz/DeepMind-Atari-Deep-Q-Learner) 63 | -------------------------------------------------------------------------------- /bower.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dqn-ros", 3 | "version": "0.0.0", 4 | "authors": [ 5 | "Mr-Yellow " 6 | ], 7 | "main": "src/dqn.js", 8 | "moduleType": [ 9 | "node" 10 | ], 11 | "license": "MIT", 12 | "ignore": [ 13 | "**/.*", 14 | "node_modules", 15 | "bower_components", 16 | "test", 17 | "tests" 18 | ], 19 | "dependencies": { 20 | "convnetjs": "https://github.com/karpathy/convnetjs.git" 21 | }, 22 | "devDependencies": { 23 | "convnetjs": "https://github.com/karpathy/convnetjs.git" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ros-dqn-rat-js", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "src/main.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "Mr-Yellow", 10 | "license": "MIT", 11 | "devDependencies": { 12 | "fs": "0.0.2", 13 | "jStat": "^1.3.0", 14 | "path": "^0.11.14", 15 | "roslib": "^0.15.0" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/agent.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Sensor sensor has a maximum range and senses walls 3 | * @class 4 | * @constructor {object} input 5 | */ 6 | var Sensor = function(input) { 7 | console.log('Creating sensor', input.name); 8 | this.name = (input && input.name)? input.name:''; 9 | this.angle = (input && input.angle)? input.angle:0; 10 | this.fov = (input && input.fov)? input.fov:(15*Math.PI/180); // Default 15deg. 11 | this.max_range = (input && input.max_range)? input.max_range:4; 12 | this.max_type = (input && input.max_type)? input.max_type:1; 13 | this.sensed_proximity = this.max_range; 14 | this.sensed_type = -1; // what does the eye see? 15 | 16 | // Watch for updates, syncing framerate to sensors. 17 | this.updated = false; 18 | }; 19 | 20 | 21 | // RatSLAM Goal log for rewarding distance. 22 | var Goal = function(id, dis, rad) { 23 | //console.log('Creating goal', id, dis, rad); 24 | this.id = id; 25 | this.dis = dis; 26 | this.rad = rad; 27 | }; 28 | 29 | /** 30 | * A single agent 31 | * @class Agent 32 | * @param {Ros} ros 33 | * @param {object} sensors 34 | * @param {array} actions 35 | * @param {object} brain_opts 36 | */ 37 | var Agent = function(ros, sensors, actions, brain_opts) { 38 | if (!ros) throw new Exception('ROS instance must be passed to RatSLAM.'); 39 | this.ros = ros; 40 | var i,j; 41 | 42 | // TODO: Validate given configs and throw errors. 43 | 44 | this.repeat_cnt = 0; 45 | 46 | // Initialise eyes from config passed in. 47 | // TODO: Handle any number of sensor types. 48 | var num_inputs = 0; 49 | this.sensors = {}; 50 | for (j in sensors) { 51 | if (sensors.hasOwnProperty(j)) { 52 | for (i=0; i