├── ContinuousHTMGPU ├── resources │ ├── bot.png │ ├── car.png │ ├── end.png │ ├── arial.ttf │ ├── cart.png │ ├── floor.png │ ├── pole.png │ ├── start.png │ ├── wall.png │ ├── background.png │ ├── inputCart.png │ ├── inputPole.png │ ├── pixelated.ttf │ ├── lineGradient.png │ ├── license.txt │ ├── read me.txt │ ├── htm.cl │ ├── cae.cl │ └── htmrl.cl ├── source │ ├── system │ │ ├── Uncopyable.h │ │ ├── ComputeProgram.h │ │ ├── ComputeSystem.h │ │ ├── ComputeProgram.cpp │ │ └── ComputeSystem.cpp │ ├── vis │ │ ├── HTMRLVisualizer.h │ │ ├── Plot.h │ │ ├── HTMRLVisualizer.cpp │ │ └── Plot.cpp │ ├── htm │ │ ├── AnythingEncoder.h │ │ ├── AnythingEncoder.cpp │ │ ├── HTMRL.h │ │ └── HTMRL.cpp │ └── Main.cpp ├── CMakeLists.txt ├── FindOpenCL.cmake └── FindSFML.cmake ├── LICENSE.md └── README.md /ContinuousHTMGPU/resources/bot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/bot.png -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/car.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/car.png -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/end.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/end.png -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/arial.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/arial.ttf -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/cart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/cart.png -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/floor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/floor.png -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/pole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/pole.png -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/start.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/start.png -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/wall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/wall.png -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/background.png -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/inputCart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/inputCart.png -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/inputPole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/inputPole.png -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/pixelated.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/pixelated.ttf -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/lineGradient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/lineGradient.png -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/license.txt: -------------------------------------------------------------------------------- 1 | The FontStruction “Pixelated” 2 | (http://fontstruct.com/fontstructions/show/426637) by “Greenma201” is 3 | licensed under a Creative Commons Attribution Share Alike license 4 | (http://creativecommons.org/licenses/by-sa/3.0/). 5 | -------------------------------------------------------------------------------- /ContinuousHTMGPU/source/system/Uncopyable.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Inherit from this class to make the class uncopyable 4 | namespace sys { 5 | class Uncopyable { 6 | protected: 7 | Uncopyable() {} 8 | virtual ~Uncopyable() {} 9 | private: 10 | Uncopyable(const Uncopyable &); 11 | Uncopyable &operator=(const Uncopyable &); 12 | }; 13 | } -------------------------------------------------------------------------------- /ContinuousHTMGPU/source/system/ComputeProgram.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | namespace sys { 8 | class ComputeProgram { 9 | private: 10 | cl::Program _program; 11 | 12 | public: 13 | bool loadFromFile(const std::string &name, ComputeSystem &cs); 14 | 15 | cl::Program &getProgram() { 16 | return _program; 17 | } 18 | }; 19 | } -------------------------------------------------------------------------------- /ContinuousHTMGPU/source/vis/HTMRLVisualizer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace vis { 7 | class HTMRLVisualizer { 8 | private: 9 | sf::RenderTexture _rt; 10 | public: 11 | void create(unsigned int width); 12 | 13 | void update(sf::RenderTexture &target, const sf::Vector2f &position, const sf::Vector2f &scale, sys::ComputeSystem &cs, const htm::HTMRL &htmrl, std::mt19937 &generator); 14 | }; 15 | } -------------------------------------------------------------------------------- /ContinuousHTMGPU/source/system/ComputeSystem.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #define SYS_ALLOW_CL_GL_CONTEXT 0 7 | 8 | namespace sys { 9 | class ComputeSystem : private Uncopyable { 10 | public: 11 | enum DeviceType { 12 | _cpu, _gpu, _all, _none 13 | }; 14 | 15 | private: 16 | cl::Platform _platform; 17 | cl::Device _device; 18 | cl::Context _context; 19 | cl::CommandQueue _queue; 20 | 21 | public: 22 | bool create(DeviceType type, bool createFromGLContext = false); 23 | 24 | cl::Platform &getPlatform() { 25 | return _platform; 26 | } 27 | 28 | cl::Device &getDevice() { 29 | return _device; 30 | } 31 | 32 | cl::Context &getContext() { 33 | return _context; 34 | } 35 | 36 | cl::CommandQueue &getQueue() { 37 | return _queue; 38 | } 39 | }; 40 | } -------------------------------------------------------------------------------- /ContinuousHTMGPU/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | 3 | project(CHTMGPU) 4 | 5 | include_directories("${PROJECT_SOURCE_DIR}/source") 6 | 7 | # This is only required for the script to work in the version control 8 | set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}") 9 | 10 | find_package(OpenCL REQUIRED) 11 | 12 | include_directories(${OPENCL_INCLUDE_DIRS}) 13 | 14 | if(OPENCL_HAS_CPP_BINDINGS) 15 | message("OpenCL has CPP bindings. Full include is: " ${OPENCL_INCLUDE_DIRS}) 16 | else(OPENCL_HAS_CPP_BINDINGS) 17 | message("No OpenCL CPP bindings found") 18 | endif(OPENCL_HAS_CPP_BINDINGS) 19 | 20 | find_package(SFML 2 REQUIRED system window graphics) 21 | 22 | include_directories(${SFML_INCLUDE_DIR}) 23 | 24 | add_executable(CHTMGPU "${PROJECT_SOURCE_DIR}/source/Main.cpp") 25 | 26 | target_link_libraries(CHTMGPU ${OPENCL_LIBRARIES}) 27 | target_link_libraries(CHTMGPU ${SFML_LIBRARIES}) -------------------------------------------------------------------------------- /ContinuousHTMGPU/source/system/ComputeProgram.cpp: -------------------------------------------------------------------------------- 1 | #include "ComputeProgram.h" 2 | 3 | #include 4 | #include 5 | 6 | using namespace sys; 7 | 8 | bool ComputeProgram::loadFromFile(const std::string &name, ComputeSystem &cs) { 9 | std::ifstream fromFile(name); 10 | 11 | if (!fromFile.is_open()) { 12 | #ifdef SYS_DEBUG 13 | std::cerr << "Could not open file " << name << "!" << std::endl; 14 | #endif 15 | return false; 16 | } 17 | 18 | std::string source = ""; 19 | 20 | while (!fromFile.eof() && fromFile.good()) { 21 | std::string line; 22 | 23 | std::getline(fromFile, line); 24 | 25 | source += line + "\n"; 26 | } 27 | 28 | _program = cl::Program(cs.getContext(), source); 29 | 30 | if (_program.build(std::vector(1, cs.getDevice())) != CL_SUCCESS) { 31 | #ifdef SYS_DEBUG 32 | std::cerr << "Error building: " << _program.getBuildInfo(cs.getDevice()) << std::endl; 33 | #endif 34 | return false; 35 | } 36 | 37 | return true; 38 | } -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | ContinuousHTMGPU 2 | Copyright (C) 2014-2015 Eric Laukien 3 | 4 | This software is provided 'as-is', without any express or implied 5 | warranty. In no event will the authors be held liable for any damages 6 | arising from the use of this software. 7 | 8 | Permission is granted to anyone to use this software for any purpose, 9 | including commercial applications, and to alter it and redistribute it 10 | freely, subject to the following restrictions: 11 | 12 | 1. The origin of this software must not be misrepresented; you must not 13 | claim that you wrote the original software. If you use this software 14 | in a product, an acknowledgment in the product documentation would be 15 | appreciated but is not required. 16 | 2. Altered source versions must be plainly marked as such, and must not be 17 | misrepresented as being the original software. 18 | 3. This notice may not be removed or altered from any source distribution. 19 | 20 | ------------------------------------------------------------------------------ 21 | 22 | ContinuousHTMGPU uses the following external libraries: 23 | 24 | SFML - source code is licensed under the zlib/png license. 25 | OpenCL -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/read me.txt: -------------------------------------------------------------------------------- 1 | The font file in this archive was created using Fontstruct the free, online 2 | font-building tool. 3 | This font was created by “Greenma201”. 4 | This font has a homepage where this archive and other versions may be found: 5 | http://fontstruct.com/fontstructions/show/426637 6 | 7 | Try Fontstruct at http://fontstruct.com 8 | It’s easy and it’s fun. 9 | 10 | NOTE FOR FLASH USERS: Fontstruct fonts (fontstructions) are optimized for 11 | Flash. If the font in this archive is a pixel font, it is best displayed at a 12 | font-size of 8. 13 | 14 | Fontstruct is sponsored by FontShop. 15 | Visit them at http://www.fontshop.com 16 | FontShop is the original independent font retailer. We’ve been around since 17 | the dawn of digital type. Whether you need the right font or need to create the 18 | right font from scratch, let our 20 years of experience work for you. 19 | 20 | Fontstruct is copyright ©2011 Rob Meek 21 | 22 | LEGAL NOTICE: 23 | In using this font you must comply with the licensing terms described in the 24 | file “license.txt” included with this archive. 25 | If you redistribute the font file in this archive, it must be accompanied by 26 | all the other files from this archive, including this one. 27 | -------------------------------------------------------------------------------- /ContinuousHTMGPU/source/vis/Plot.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace vis { 6 | struct Point { 7 | sf::Vector2f _position; 8 | 9 | sf::Color _color; 10 | 11 | Point() 12 | : _color(sf::Color::Black) 13 | {} 14 | }; 15 | 16 | struct Curve { 17 | std::string _name; 18 | 19 | float _shadow; 20 | sf::Vector2f _shadowOffset; 21 | 22 | std::vector _points; 23 | 24 | Curve() 25 | : _shadow(0.5f), _shadowOffset(-4.0f, 4.0f) 26 | {} 27 | }; 28 | 29 | struct Plot { 30 | sf::Color _axesColor; 31 | sf::Color _backgroundColor; 32 | 33 | std::vector _curves; 34 | 35 | Plot() 36 | : _axesColor(sf::Color::Black), _backgroundColor(sf::Color::White) 37 | {} 38 | 39 | void draw(sf::RenderTarget &target, const sf::Texture &lineGradientTexture, const sf::Font &tickFont, float tickTextScale, 40 | const sf::Vector2f &domain, const sf::Vector2f &range, const sf::Vector2f &margins, const sf::Vector2f &tickIncrements, float axesSize, float lineSize, float tickSize, float tickLength, float textTickOffset, int precision); 41 | }; 42 | 43 | float vectorMagnitude(const sf::Vector2f &vector); 44 | sf::Vector2f vectorNormalize(const sf::Vector2f &vector); 45 | float vectorDot(const sf::Vector2f &left, const sf::Vector2f &right); 46 | } -------------------------------------------------------------------------------- /ContinuousHTMGPU/source/htm/AnythingEncoder.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | 11 | namespace htm { 12 | class AnythingEncoder { 13 | public: 14 | float sigmoid(float x) { 15 | return 1.0f / (1.0f + std::exp(-x)); 16 | } 17 | 18 | private: 19 | struct Node { 20 | std::vector _center; 21 | 22 | float _activation; 23 | float _output; 24 | float _outputPrev; 25 | float _dutyCycle; 26 | 27 | Node() 28 | : _activation(0.0f), _output(0.0f), _outputPrev(0.0f), _dutyCycle(0.0f) 29 | {} 30 | }; 31 | 32 | int _sdrSize; 33 | int _inputSize; 34 | 35 | int _boostCandidate; 36 | float _bestRepresentation; 37 | 38 | std::vector _nodes; 39 | 40 | public: 41 | AnythingEncoder() 42 | : _boostCandidate(0), _bestRepresentation(1.0f) 43 | {} 44 | 45 | void create(int sdrSize, int inputSize, float minInitCenter, float maxInitCenter, std::mt19937 &generator); 46 | 47 | void encode(const std::vector &input, std::vector &sdr, float localActivity, float outputIntensity, float dutyCycleDecay); 48 | void learn(const std::vector &input, float centerAlpha, float maxDutyCycleForLearn, float noMatchIntensity); 49 | void decode(const std::vector &sdr, std::vector &recon); 50 | }; 51 | } -------------------------------------------------------------------------------- /ContinuousHTMGPU/source/vis/HTMRLVisualizer.cpp: -------------------------------------------------------------------------------- 1 | #include "HTMRLVisualizer.h" 2 | 3 | using namespace vis; 4 | 5 | void HTMRLVisualizer::create(unsigned int width) { 6 | _rt.create(width, width, false); 7 | _rt.setSmooth(true); 8 | } 9 | 10 | void HTMRLVisualizer::update(sf::RenderTexture &target, const sf::Vector2f &position, const sf::Vector2f &scale, sys::ComputeSystem &cs, const htm::HTMRL &htmrl, std::mt19937 &generator) { 11 | std::vector> images; 12 | 13 | htmrl.exportCellData(cs, images, 5634); 14 | 15 | const float heightStep = 1.0f; 16 | const float transparency = 0.3f; 17 | const int cellLayerSteps = 5; 18 | 19 | int h = 0; 20 | 21 | sf::Texture imageTexture; 22 | 23 | for (int i = 0; i < images.size(); i++) { 24 | // Render to RT 25 | _rt.setActive(); 26 | 27 | imageTexture.loadFromImage(*images[i]); 28 | 29 | imageTexture.setSmooth(false); 30 | 31 | sf::Sprite imageSprite; 32 | imageSprite.setTexture(imageTexture); 33 | 34 | imageSprite.setOrigin(imageTexture.getSize().x * 0.5f, imageTexture.getSize().y * 0.5f); 35 | 36 | imageSprite.setRotation(45.0f); 37 | imageSprite.setPosition(_rt.getSize().x * 0.5f, _rt.getSize().y * 0.5f); 38 | imageSprite.setScale(static_cast(_rt.getSize().x) / imageTexture.getSize().x * 0.75f, static_cast(_rt.getSize().y) / imageTexture.getSize().y * 0.75f); 39 | 40 | sf::RenderStates clearStates; 41 | clearStates.blendMode = sf::BlendNone; 42 | 43 | sf::RectangleShape clearShape; 44 | clearShape.setSize(sf::Vector2f(_rt.getSize().x, _rt.getSize().y)); 45 | clearShape.setFillColor(sf::Color::Transparent); 46 | 47 | _rt.draw(clearShape, clearStates); 48 | 49 | _rt.draw(imageSprite); 50 | 51 | _rt.display(); 52 | 53 | // Render rt to main image 54 | target.setActive(); 55 | 56 | sf::Sprite transformedSprite; 57 | transformedSprite.setTexture(_rt.getTexture()); 58 | transformedSprite.setOrigin(transformedSprite.getTexture()->getSize().x * 0.5f, transformedSprite.getTexture()->getSize().y * 0.5f); 59 | 60 | transformedSprite.setScale(scale.x * 0.5f, scale.y * 0.25f); 61 | transformedSprite.setColor(sf::Color(255, 255, 255, 255.0f * transparency)); 62 | 63 | target.setSmooth(true); 64 | 65 | for (int s = 0; s < cellLayerSteps; s++) { 66 | transformedSprite.setPosition(position.x, position.y - h * heightStep); 67 | target.draw(transformedSprite); 68 | 69 | h++; 70 | } 71 | } 72 | 73 | target.display(); 74 | } -------------------------------------------------------------------------------- /ContinuousHTMGPU/source/system/ComputeSystem.cpp: -------------------------------------------------------------------------------- 1 | #include "ComputeSystem.h" 2 | 3 | #include 4 | 5 | using namespace sys; 6 | 7 | bool ComputeSystem::create(DeviceType type, bool createFromGLContext) { 8 | if (type == _none) { 9 | #ifdef SYS_DEBUG 10 | std::cout << "No OpenCL context created." << std::endl; 11 | #endif 12 | return true; 13 | } 14 | 15 | std::vector allPlatforms; 16 | cl::Platform::get(&allPlatforms); 17 | 18 | if (allPlatforms.empty()) { 19 | #ifdef SYS_DEBUG 20 | std::cout << "No platforms found. Check your OpenCL installation." << std::endl; 21 | #endif 22 | return false; 23 | } 24 | 25 | _platform = allPlatforms.front(); 26 | 27 | #ifdef SYS_DEBUG 28 | std::cout << "Using platform: " << _platform.getInfo() << std::endl; 29 | #endif 30 | 31 | std::vector allDevices; 32 | 33 | switch (type) { 34 | case _cpu: 35 | _platform.getDevices(CL_DEVICE_TYPE_CPU, &allDevices); 36 | break; 37 | case _gpu: 38 | _platform.getDevices(CL_DEVICE_TYPE_GPU, &allDevices); 39 | break; 40 | case _all: 41 | _platform.getDevices(CL_DEVICE_TYPE_ALL, &allDevices); 42 | break; 43 | } 44 | 45 | if (allDevices.empty()) { 46 | #ifdef SYS_DEBUG 47 | std::cout << "No devices found. Check your OpenCL installation." << std::endl; 48 | #endif 49 | return false; 50 | } 51 | 52 | _device = allDevices.front(); 53 | 54 | #ifdef SYS_DEBUG 55 | std::cout << "Using device: " << _device.getInfo() << std::endl; 56 | #endif 57 | 58 | #if(SYS_ALLOW_CL_GL_CONTEXT) 59 | if (createFromGLContext) { 60 | #if defined (__APPLE__) || defined(MACOSX) 61 | CGLContextObj kCGLContext = CGLGetCurrentContext(); 62 | CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext); 63 | cl_context_properties props[] = { 64 | CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)kCGLShareGroup, 65 | 0 66 | }; 67 | #else 68 | #if defined WIN32 69 | cl_context_properties props[] = { 70 | CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(), 71 | CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(), 72 | CL_CONTEXT_PLATFORM, (cl_context_properties)static_cast(_platform()), 73 | 0 74 | }; 75 | #else 76 | cl_context_properties props[] = { 77 | CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(), 78 | CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(), 79 | CL_CONTEXT_PLATFORM, (cl_context_properties)static_cast(_platform()), 80 | 0 81 | }; 82 | #endif 83 | #endif 84 | 85 | _context = cl::Context(_device, props); 86 | } 87 | else 88 | #endif 89 | _context = _device; 90 | 91 | _queue = cl::CommandQueue(_context, _device); 92 | 93 | return true; 94 | } -------------------------------------------------------------------------------- /ContinuousHTMGPU/source/htm/AnythingEncoder.cpp: -------------------------------------------------------------------------------- 1 | #include "AnythingEncoder.h" 2 | 3 | using namespace htm; 4 | 5 | void AnythingEncoder::create(int sdrSize, int inputSize, float minInitCenter, float maxInitCenter, std::mt19937 &generator) { 6 | _sdrSize = sdrSize; 7 | _inputSize = inputSize; 8 | 9 | _nodes.resize(sdrSize); 10 | 11 | std::uniform_real_distribution centerDist(minInitCenter, maxInitCenter); 12 | 13 | for (int i = 0; i < _sdrSize; i++) { 14 | _nodes[i]._center.resize(inputSize); 15 | 16 | for (int j = 0; j < _inputSize; j++) 17 | _nodes[i]._center[j] = centerDist(generator); 18 | } 19 | } 20 | 21 | void AnythingEncoder::encode(const std::vector &input, std::vector &sdr, float localActivity, float outputIntensity, float dutyCycleDecay) { 22 | if (sdr.size() != _sdrSize) 23 | sdr.resize(_sdrSize); 24 | 25 | float maxActivation = -999999.0f; 26 | 27 | for (int i = 0; i < _sdrSize; i++) { 28 | float sum = 0.0f; 29 | 30 | for (int j = 0; j < _inputSize; j++) { 31 | float difference = _nodes[i]._center[j] - input[j]; 32 | 33 | sum += difference * difference; 34 | } 35 | 36 | _nodes[i]._activation = -sum; 37 | 38 | maxActivation = std::max(maxActivation, _nodes[i]._activation); 39 | } 40 | 41 | _bestRepresentation = maxActivation; 42 | 43 | // Inhibit 44 | for (int i = 0; i < _sdrSize; i++) { 45 | float numHigher = 0.0f; 46 | 47 | for (int j = 0; j < _sdrSize; j++) { 48 | if (_nodes[j]._activation > _nodes[i]._activation) 49 | numHigher++; 50 | } 51 | 52 | _nodes[i]._outputPrev = _nodes[i]._output; 53 | 54 | sdr[i] = _nodes[i]._output = sigmoid((localActivity - numHigher) * outputIntensity); 55 | 56 | _nodes[i]._dutyCycle = std::max((1.0f - dutyCycleDecay) * _nodes[i]._dutyCycle, _nodes[i]._output); 57 | 58 | if (_nodes[i]._dutyCycle < _nodes[_boostCandidate]._dutyCycle) 59 | _boostCandidate = i; 60 | } 61 | } 62 | 63 | void AnythingEncoder::learn(const std::vector &input, float centerAlpha, float maxDutyCycleForLearn, float noMatchIntensity) { 64 | float noMatch = 1.0f - exp(_bestRepresentation * noMatchIntensity); 65 | 66 | float boost = _nodes[_boostCandidate]._dutyCycle < maxDutyCycleForLearn ? noMatch : 0.0f; 67 | 68 | float learnScalar = (1.0f - boost) * std::max(0.0f, _nodes[_boostCandidate]._output - _nodes[_boostCandidate]._outputPrev) + boost; 69 | 70 | for (int j = 0; j < _inputSize; j++) { 71 | float difference = input[j] - _nodes[_boostCandidate]._center[j]; 72 | 73 | _nodes[_boostCandidate]._center[j] += centerAlpha * learnScalar * difference; 74 | } 75 | } 76 | 77 | void AnythingEncoder::decode(const std::vector &sdr, std::vector &recon) { 78 | if (recon.size() != _inputSize) 79 | recon.resize(_inputSize); 80 | 81 | for (int i = 0; i < _inputSize; i++) { 82 | float sum = 0.0f; 83 | float divisor = 0.0f; 84 | 85 | for (int j = 0; j < _sdrSize; j++) { 86 | sum += _nodes[j]._center[i] * _nodes[j]._output; 87 | 88 | divisor += _nodes[j]._output; 89 | } 90 | 91 | if (divisor == 0.0f) 92 | recon[i] = 0.0f; 93 | else 94 | recon[i] = sum / divisor; 95 | } 96 | } -------------------------------------------------------------------------------- /ContinuousHTMGPU/FindOpenCL.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find OpenCL 2 | # This module tries to find an OpenCL implementation on your system. It supports 3 | # AMD / ATI, Apple and NVIDIA implementations, but should work, too. 4 | # 5 | # To set manually the paths, define these environment variables: 6 | # OpenCL_INCPATH - Include path (e.g. OpenCL_INCPATH=/opt/cuda/4.0/cuda/include) 7 | # OpenCL_LIBPATH - Library path (e.h. OpenCL_LIBPATH=/usr/lib64/nvidia) 8 | # 9 | # Once done this will define 10 | # OPENCL_FOUND - system has OpenCL 11 | # OPENCL_INCLUDE_DIRS - the OpenCL include directory 12 | # OPENCL_LIBRARIES - link these to use OpenCL 13 | # 14 | # WIN32 should work, but is untested 15 | 16 | FIND_PACKAGE(PackageHandleStandardArgs) 17 | 18 | SET (OPENCL_VERSION_STRING "0.1.0") 19 | SET (OPENCL_VERSION_MAJOR 0) 20 | SET (OPENCL_VERSION_MINOR 1) 21 | SET (OPENCL_VERSION_PATCH 0) 22 | 23 | IF (APPLE) 24 | 25 | FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX") 26 | FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX") 27 | FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl.hpp DOC "Include for OpenCL CPP bindings on OSX") 28 | 29 | ELSE (APPLE) 30 | 31 | IF (WIN32) 32 | 33 | FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h) 34 | FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp) 35 | 36 | # The AMD SDK currently installs both x86 and x86_64 libraries 37 | # This is only a hack to find out architecture 38 | IF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) 39 | SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86_64") 40 | ELSE (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64") 41 | SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86") 42 | ENDIF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) 43 | FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib PATHS ${OPENCL_LIB_DIR} ENV OpenCL_LIBPATH) 44 | 45 | GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) 46 | 47 | # On Win32 search relative to the library 48 | FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS "${_OPENCL_INC_CAND}" ENV OpenCL_INCPATH) 49 | FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS "${_OPENCL_INC_CAND}" ENV OpenCL_INCPATH) 50 | 51 | ELSE (WIN32) 52 | 53 | # Unix style platforms 54 | FIND_LIBRARY(OPENCL_LIBRARIES OpenCL 55 | PATHS ENV LD_LIBRARY_PATH ENV OpenCL_LIBPATH 56 | ) 57 | 58 | GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH) 59 | GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) 60 | 61 | # The AMD SDK currently does not place its headers 62 | # in /usr/include, therefore also search relative 63 | # to the library 64 | FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include" "/opt/AMDAPP/include" ENV OpenCL_INCPATH) 65 | FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include" "/opt/AMDAPP/include" ENV OpenCL_INCPATH) 66 | 67 | ENDIF (WIN32) 68 | 69 | ENDIF (APPLE) 70 | 71 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS) 72 | 73 | IF(_OPENCL_CPP_INCLUDE_DIRS) 74 | SET( OPENCL_HAS_CPP_BINDINGS TRUE ) 75 | LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} ) 76 | # This is often the same, so clean up 77 | LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS ) 78 | ENDIF(_OPENCL_CPP_INCLUDE_DIRS) 79 | 80 | MARK_AS_ADVANCED( 81 | OPENCL_INCLUDE_DIRS 82 | ) 83 | 84 | -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/htm.cl: -------------------------------------------------------------------------------- 1 | constant sampler_t normalizedClampedNearestSampler = CLK_NORMALIZED_COORDS_TRUE | 2 | CLK_ADDRESS_CLAMP_TO_EDGE | 3 | CLK_FILTER_NEAREST; 4 | 5 | constant float sparsity = 0.9f; 6 | constant float intensity = 4.0f; 7 | 8 | float randFloat(uint2* state) 9 | { 10 | const float invMaxInt = 1.0f / 4294967296.0f; 11 | uint x = (*state).x * 17 + (*state).y * 13123; 12 | (*state).x = (x << 13) ^ x; 13 | (*state).y ^= (x << 7); 14 | 15 | uint tmp = x * (x * x * 15731 + 74323) + 871483; 16 | 17 | return convert_float(tmp) * invMaxInt; 18 | } 19 | 20 | float sigmoid(float x) { 21 | return 1.0f / (1.0f + exp(-x)); 22 | } 23 | 24 | float logit(float x) { 25 | return -log(1.0f / x - 1.0f); 26 | } 27 | 28 | void kernel weightInit(write_only image2d_t states, write_only image2d_array_t weights, int receptiveFieldSize, uint2 seed, float minWeight, float maxWeight) { 29 | uint2 seedValue = seed + (uint2)(get_global_id(0), get_global_id(1)); 30 | 31 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 32 | 33 | write_imagef(states, columnPosition, (float4)(0.0f, 0.0f, 0.0f, 0.0f)); 34 | 35 | for (int wi = 0; wi < receptiveFieldSize; wi++) { 36 | int4 weightPosition = (int4)(columnPosition.x, columnPosition.y, wi, 0); 37 | 38 | float weight = randFloat(&seedValue) * (maxWeight - minWeight) + minWeight; 39 | 40 | write_imagef(weights, weightPosition, (float4)(weight, weight, weight, weight)); 41 | } 42 | } 43 | 44 | void kernel layerActivate(read_only image2d_t prevStates, write_only image2d_t activations, read_only image2d_array_t weights, float2 layerSizeInv, float2 inputReceptiveFieldRadius, float2 inputReceptiveFieldStep) { 45 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 46 | float2 inputCenterPositionNormalized = (float2)(columnPosition.x * layerSizeInv.x, columnPosition.y * layerSizeInv.y); 47 | 48 | float sum = 0.0f; 49 | 50 | int weightIndex = 0; 51 | 52 | for (float dx = -inputReceptiveFieldRadius.x; dx <= inputReceptiveFieldRadius.x; dx += inputReceptiveFieldStep.x) 53 | for (float dy = -inputReceptiveFieldRadius.y; dy <= inputReceptiveFieldRadius.y; dy += inputReceptiveFieldStep.y) { 54 | float2 inputPositionNormalized = inputCenterPositionNormalized + (float2)(dx, dy); 55 | 56 | float weight = read_imagef(weights, (int4)(columnPosition.x, columnPosition.y, weightIndex, 0)).x; 57 | float prevState = read_imagef(prevStates, normalizedClampedNearestSampler, inputPositionNormalized).x; 58 | 59 | sum += weight * prevState; 60 | 61 | weightIndex++; 62 | } 63 | 64 | float activation = sigmoid(sum) * 2.0f - 1.0f; 65 | 66 | write_imagef(activations, columnPosition, (float4)(activation, activation, activation, activation)); 67 | } 68 | 69 | void kernel layerInhibit(read_only image2d_t activations, write_only image2d_t states, float2 layerSizeInv, float2 layerReceptiveFieldRadius, float2 layerReceptiveFieldStep) { 70 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 71 | float2 layerCenterPositionNormalized = (float2)(columnPosition.x * layerSizeInv.x, columnPosition.y * layerSizeInv.y); 72 | 73 | float average = 0.0f; 74 | float maximum = 0.0f; 75 | float minimum = 1.0f; 76 | 77 | int weightIndex = 0; 78 | 79 | for (float dx = -layerReceptiveFieldRadius.x; dx <= layerReceptiveFieldRadius.x; dx += layerReceptiveFieldStep.x) 80 | for (float dy = -layerReceptiveFieldRadius.y; dy <= layerReceptiveFieldRadius.y; dy += layerReceptiveFieldStep.y) { 81 | float2 layerPositionNormalized = layerCenterPositionNormalized + (float2)(dx, dy); 82 | 83 | float activation = read_imagef(activations, normalizedClampedNearestSampler, layerPositionNormalized).x; 84 | 85 | average += activation; 86 | maximum = max(maximum, activation); 87 | minimum = min(minimum, activation); 88 | 89 | weightIndex++; 90 | } 91 | 92 | average /= weightIndex; 93 | 94 | float thisActivation = read_imagef(activations, normalizedClampedNearestSampler, layerCenterPositionNormalized).x; 95 | 96 | // If this activation is above average 97 | float error = thisActivation - (sparsity * maximum + (1.0f - sparsity) * average); 98 | float inhibitedResult = sigmoid(error * intensity) * 2.0f - 1.0f; 99 | 100 | write_imagef(states, columnPosition, (float4)(inhibitedResult, inhibitedResult, inhibitedResult, inhibitedResult)); 101 | } 102 | 103 | void kernel layerWeightUpdate(read_only image2d_t prevStates, read_only image2d_t states, read_only image2d_array_t prevWeights, write_only image2d_array_t weights, float2 layerSizeInv, float2 inputReceptiveFieldRadius, float2 inputReceptiveFieldStep, float alpha) { 104 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 105 | float2 inputCenterPositionNormalized = (float2)(columnPosition.x * layerSizeInv.x, columnPosition.y * layerSizeInv.y); 106 | 107 | float state = read_imagef(states, columnPosition).x; 108 | 109 | // Adjust weights by their source activations and error 110 | int weightIndex = 0; 111 | 112 | for (float dx = -inputReceptiveFieldRadius.x; dx <= inputReceptiveFieldRadius.x; dx += inputReceptiveFieldStep.x) 113 | for (float dy = -inputReceptiveFieldRadius.y; dy <= inputReceptiveFieldRadius.y; dy += inputReceptiveFieldStep.y) { 114 | float2 inputPositionNormalized = inputCenterPositionNormalized + (float2)(dx, dy); 115 | 116 | float prevState = read_imagef(prevStates, normalizedClampedNearestSampler, inputPositionNormalized).x; 117 | 118 | float prevWeight = read_imagef(prevWeights, (int4)(columnPosition.x, columnPosition.y, weightIndex, 0)).x; 119 | 120 | float change = alpha * (state * prevState); 121 | 122 | float newWeight = prevWeight + change; 123 | 124 | write_imagef(weights, (int4)(columnPosition.x, columnPosition.y, weightIndex, 0), (float4)(newWeight, newWeight, newWeight, newWeight)); 125 | 126 | weightIndex++; 127 | } 128 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ![CHTMGPU Logo](http://i1218.photobucket.com/albums/dd401/222464/CHTMLOGOSMALL.png) 2 | 3 | Continuous HTM GPU 4 | ======= 5 | 6 | Runs a continuous (not discrete) version of HTM (Hierarchical Temporal Memory, from Numenta: ) on the GPU, and uses it for reinforcement learning. 7 | Follow updates on my blog! [https://cireneikual.wordpress.com/](https://cireneikual.wordpress.com/) 8 | 9 | Install 10 | ----------- 11 | 12 | ContinuousHTMGPU relies on 2 external libraries: OpenCL and SFML. SFML is used only for visualization, and can be removed if desired. 13 | 14 | To get OpenCL, refer to your graphics hardware vendor website (for AMD and Nvidia), or CPU vendor (e.g. the Intel OpenCL SDK). 15 | Works best with AMD cards (best OpenCL support). 16 | 17 | To get SFML, choose a package from here: [http://www.sfml-dev.org/download/sfml/2.2/](http://www.sfml-dev.org/download/sfml/2.2/) 18 | 19 | ContinuousHTMGPU uses CMake as the build system. You can get CMake here: [http://www.cmake.org/download/](http://www.cmake.org/download/) 20 | 21 | Set CMake's source code directory to the ContinuousHTMGPU root directory (the one that contains the /source folder as well as a CMakeLists.txt). 22 | 23 | Set CMake's build directory to the same directory as in the previous step. Optionally, you can also set it to a folder of your choice, but this may make browse the source more difficult if you are using Visual Studio. 24 | 25 | Then press configure, and choose your compiler. 26 | 27 | It will likely error. If this happens, no fear, there is a fix! 28 | 29 | You can specify the paths where CMake looks manually. They will appear in red if they need to be set in the CMake GUI. 30 | 31 | SFML is a bit tricky, you have to add a custom variable entry for a variable called SFML_ROOT and set it to the SFML root directory. 32 | 33 | When eventually the configuration does not result in errors you can hit generate. This will generate files necessary for your compiler. 34 | 35 | You should then be able to compile and execute the program. If you are using Visual Studio, you may have to set your startup project to the ERL project, and you may have to add the source files to the project. 36 | 37 | Quick Start 38 | ----------- 39 | 40 | If you want to use ContinuousHTMGPU in your own project without visualization, you can strip out the SFML visualizer if desired by simply removing the "vis" directory. 41 | 42 | First, include HTMRL.h: 43 | 44 | ```cpp 45 | #include 46 | ``` 47 | 48 | Next, you have to create the compute system. You can specify either GPU or CPU (GPU is recommended if you have it): 49 | 50 | ```cpp 51 | sys::ComputeSystem cs; 52 | 53 | cs.create(sys::ComputeSystem::_gpu); 54 | ``` 55 | 56 | With that created, you need to load the OpenCL program: 57 | 58 | ```cpp 59 | sys::ComputeProgram program; 60 | 61 | program.loadFromFile("resources/htmrl.cl", cs); 62 | ``` 63 | 64 | Then create the agent. Fill out a vector of LayerDesc objects to describe the structure of your agent, and specify the types of the inputs (input/action/unused). In the following actions nodes are selected randomly: 65 | 66 | ```cpp 67 | htm::HTMRL agent; 68 | 69 | std::vector layerDescs(5); 70 | 71 | layerDescs[0]._width = 64; 72 | layerDescs[0]._height = 64; 73 | 74 | layerDescs[1]._width = 44; 75 | layerDescs[1]._height = 44; 76 | 77 | layerDescs[2]._width = 32; 78 | layerDescs[2]._height = 32; 79 | 80 | layerDescs[3]._width = 20; 81 | layerDescs[3]._height = 20; 82 | 83 | layerDescs[4]._width = 16; 84 | layerDescs[4]._height = 16; 85 | 86 | std::vector inputTypes(64 * 64, htm::HTMRL::_state); 87 | 88 | for (int x = 0; x < 64; x++) { 89 | for (int y = 32; y < 64; y++) { 90 | inputTypes[x + y * 64] = htm::HTMRL::_unused; 91 | } 92 | } 93 | 94 | std::uniform_int_distribution actionXDist(0, 63); 95 | std::uniform_int_distribution actionYDist(33, 63); 96 | 97 | std::vector actionIndices; 98 | 99 | for (int i = 0; i < 8; i++) { 100 | int x = actionXDist(generator); 101 | int y = actionYDist(generator); 102 | 103 | if (inputTypes[x + y * 64] == htm::HTMRL::_action) 104 | continue; 105 | 106 | inputTypes[x + y * 64] = htm::HTMRL::_action; 107 | 108 | actionIndices.push_back(x + y * 64); 109 | } 110 | 111 | agent.createRandom(cs, program, 64, 64, 4, layerDescs, inputTypes, -0.05f, 0.05f, -0.05f, 0.05f, generator); 112 | ``` 113 | 114 | Then to use the agent, call: 115 | 116 | ```cpp 117 | agent.setInput(x, y, ); 118 | ``` 119 | 120 | to set the value of an input, and: 121 | 122 | ```cpp 123 | agent.getOutput(actionIndices[i]); // actionIndices[i] is the index of the output, from the above example 124 | ``` 125 | 126 | to get a output. 127 | 128 | Step the simulation like this: 129 | 130 | ```cpp 131 | agent.step(cs, reward, 0.01f, 0.01f, 0.01f, 0.05f, 0.01f, 0.05f, 0.2f, 0.5f, 0.5f, 0.5f, 0.01f, 0.2f, 0.992f, 0.15f, 0.15f, 120, 10, 2, generator); 132 | ``` 133 | 134 | The parameters above are suggested values. 135 | 136 | Visualization 137 | ----------- 138 | 139 | Instructions coming soon! For now just take a look at the example code, Main.cpp. 140 | 141 | License 142 | ----------- 143 | 144 | ContinuousHTMGPU 145 | Copyright (C) 2014-2015 Eric Laukien 146 | 147 | This software is provided 'as-is', without any express or implied 148 | warranty. In no event will the authors be held liable for any damages 149 | arising from the use of this software. 150 | 151 | Permission is granted to anyone to use this software for any purpose, 152 | including commercial applications, and to alter it and redistribute it 153 | freely, subject to the following restrictions: 154 | 155 | 1. The origin of this software must not be misrepresented; you must not 156 | claim that you wrote the original software. If you use this software 157 | in a product, an acknowledgment in the product documentation would be 158 | appreciated but is not required. 159 | 2. Altered source versions must be plainly marked as such, and must not be 160 | misrepresented as being the original software. 161 | 3. This notice may not be removed or altered from any source distribution. 162 | 163 | ------------------------------------------------------------------------------ 164 | 165 | ContinuousHTMGPU uses the following external libraries: 166 | 167 | SFML - source code is licensed under the zlib/png license. 168 | OpenCL 169 | 170 | -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/cae.cl: -------------------------------------------------------------------------------- 1 | constant sampler_t normalizedClampedNearestSampler = CLK_NORMALIZED_COORDS_TRUE | 2 | CLK_ADDRESS_CLAMP_TO_EDGE | 3 | CLK_FILTER_NEAREST; 4 | 5 | constant sampler_t unnormalizedClampedNearestSampler = CLK_NORMALIZED_COORDS_FALSE | 6 | CLK_ADDRESS_CLAMP | 7 | CLK_FILTER_NEAREST; 8 | 9 | float sigmoid(float x) { 10 | return 1.0f / (1.0f + exp(-x)); 11 | } 12 | 13 | void kernel layerActivateForward(read_only image2d_array_t prevLayerOutputs, read_only image2d_t layerWeights, 14 | write_only image2d_array_t layerOutputs, int2 kernelSize, int prevNumMaps, float2 layerSizeInv, float2 prevLayerSizeInv) 15 | { 16 | int3 position = (int3)(get_global_id(0), get_global_id(1), get_global_id(2)); 17 | 18 | float2 positionNormalized2D = (float2)(position.x * layerSizeInv.x, position.y * layerSizeInv.y); 19 | 20 | // First weight is bias 21 | float sum = read_imagef(layerWeights, (int2)(0, position.z)).x; 22 | 23 | int weightIndex = 1; 24 | 25 | for (int x = 0; x < kernelSize.x; x++) 26 | for (int y = 0; y < kernelSize.y; y++) 27 | for (int m = 0; m < prevNumMaps; m++) { 28 | float weight = read_imagef(layerWeights, (int2)(weightIndex, position.z)).x; 29 | float prevLayerOutput = read_imagef(prevLayerOutputs, normalizedClampedNearestSampler, (float4)(positionNormalized2D.x + (x - kernelSize.x * 0.5f) * prevLayerSizeInv.x, positionNormalized2D.y + (y - kernelSize.y * 0.5f) * prevLayerSizeInv.x, m, 0)).x; 30 | 31 | sum += weight * prevLayerOutput; 32 | 33 | weightIndex++; 34 | } 35 | 36 | float output = sigmoid(sum); 37 | 38 | write_imagef(layerOutputs, (int4)(position.x, position.y, position.z, 0), (float4)(output, output, output, output)); 39 | } 40 | 41 | void kernel layerActivateBackward(read_only image2d_array_t layerOutputs, read_only image2d_array_t prevLayerOutputs, 42 | read_only image2d_t layerWeights, read_only image2d_array_t prevLayerBiases, write_only image2d_array_t newPrevLayerBiases, write_only image2d_array_t prevLayerErrors, 43 | int2 kernelSize, int2 reverseKernelSize, int numMaps, int prevNumMaps, float2 layerSizeInv, float2 prevLayerSizeInv, float alpha) 44 | { 45 | int3 prevPosition = (int3)(get_global_id(0), get_global_id(1), get_global_id(2)); 46 | 47 | float2 prevPositionNormalized2D = (float2)(prevPosition.x * prevLayerSizeInv.x, prevPosition.y * prevLayerSizeInv.y); 48 | 49 | float prevLayerBias = read_imagef(prevLayerBiases, (int4)(prevPosition.x, prevPosition.y, prevPosition.z, 0)).x; 50 | 51 | float sum = prevLayerBias; 52 | 53 | int2 start = (int2)(prevPosition.x, prevPosition.y); 54 | 55 | for (int x = 0; x < reverseKernelSize.x; x++) 56 | for (int y = 0; y < reverseKernelSize.y; y++) 57 | for (int m = 0; m < numMaps; m++) { 58 | float weight = read_imagef(layerWeights, (int2)(prevPosition.z + y * prevNumMaps + x * prevNumMaps * reverseKernelSize.y + 1, m)).x; 59 | float layerOutput = read_imagef(layerOutputs, normalizedClampedNearestSampler, (float4)(prevPositionNormalized2D.x + (x - reverseKernelSize.x * 0.5f) * layerSizeInv.x, prevPositionNormalized2D.y + (y - reverseKernelSize.y * 0.5f) * layerSizeInv.y, m, 0)).x; 60 | 61 | sum += weight * layerOutput; 62 | } 63 | 64 | float output = sigmoid(sum); 65 | 66 | float target = read_imagef(prevLayerOutputs, (int4)(prevPosition.x, prevPosition.y, prevPosition.z, 0)).x; 67 | 68 | float error = (target - output);// * output * (1.0f - output); 69 | 70 | // Update prev layer bias 71 | float newPrevLayerBias = prevLayerBias + alpha * error; 72 | 73 | write_imagef(newPrevLayerBiases, (int4)(prevPosition.x, prevPosition.y, prevPosition.z, 0), (float4)(newPrevLayerBias, newPrevLayerBias, newPrevLayerBias, newPrevLayerBias)); 74 | 75 | // Store error 76 | write_imagef(prevLayerErrors, (int4)(prevPosition.x, prevPosition.y, prevPosition.z, 0), (float4)(error, error, error, error)); 77 | } 78 | 79 | void kernel layerWeightUpdate(read_only image2d_array_t layerOutputs, read_only image2d_array_t prevLayerOutputs, read_only image2d_array_t prevLayerErrors, 80 | read_only image2d_t layerWeights, write_only image2d_array_t weightDeltaSummationMap, 81 | int2 kernelSize, int numMaps, int prevNumMaps, float2 layerSizeInv, float2 prevLayerSizeInv, int numWeightsPerMap, float alpha) 82 | { 83 | int3 position = (int3)(get_global_id(0), get_global_id(1), get_global_id(2)); 84 | 85 | float2 positionNormalized2D = (float2)(position.x * layerSizeInv.x, position.y * layerSizeInv.y); 86 | 87 | float layerOutput = read_imagef(layerOutputs, (int4)(position.x, position.y, position.z, 0)).x; 88 | 89 | int weightIndex = 1; // 1 since we are skipping the bias for the error backpropagation 90 | 91 | // Backpropagate error 92 | float error = 0.0f; 93 | 94 | for (int x = 0; x < kernelSize.x; x++) 95 | for (int y = 0; y < kernelSize.y; y++) 96 | for (int m = 0; m < prevNumMaps; m++) { 97 | float weight = read_imagef(layerWeights, (int2)(weightIndex, position.z)).x; 98 | float prevLayerError = read_imagef(prevLayerErrors, normalizedClampedNearestSampler, (float4)(positionNormalized2D.x + (x - kernelSize.x * 0.5f) * prevLayerSizeInv.x, positionNormalized2D.y + (y - kernelSize.y * 0.5f) * prevLayerSizeInv.y, m, 0)).x; 99 | 100 | error += weight * prevLayerError; 101 | 102 | weightIndex++; 103 | } 104 | 105 | error *= layerOutput * (1.0f - layerOutput); 106 | 107 | // Update bias 108 | float bias = read_imagef(layerWeights, (int2)(0, position.z)).x; 109 | 110 | float biasDelta = alpha * error; 111 | 112 | int thisWeightsStart = position.z * numWeightsPerMap; 113 | 114 | write_imagef(weightDeltaSummationMap, (int4)(position.x, position.y, thisWeightsStart, 0), biasDelta); 115 | 116 | weightIndex = 1; 117 | 118 | // Update all non-bias weights 119 | for (int x = 0; x < kernelSize.x; x++) 120 | for (int y = 0; y < kernelSize.y; y++) 121 | for (int m = 0; m < prevNumMaps; m++) { 122 | float weight = read_imagef(layerWeights, (int2)(weightIndex, position.z)).x; 123 | float prevLayerOutput = read_imagef(prevLayerOutputs, normalizedClampedNearestSampler, (float4)(positionNormalized2D.x + (x - kernelSize.x * 0.5f) * prevLayerSizeInv.x, positionNormalized2D.y + (y - kernelSize.y * 0.5f) * prevLayerSizeInv.y, m, 0)).x; 124 | float prevLayerError = read_imagef(prevLayerErrors, normalizedClampedNearestSampler, (float4)(positionNormalized2D.x + (x - kernelSize.x * 0.5f) * prevLayerSizeInv.x, positionNormalized2D.y + (y - kernelSize.y * 0.5f) * prevLayerSizeInv.y, m, 0)).x; 125 | 126 | float weightDelta = alpha * (error * prevLayerOutput + prevLayerError * layerOutput); 127 | 128 | write_imagef(weightDeltaSummationMap, (int4)(position.x, position.y, thisWeightsStart + weightIndex, 0), weightDelta); 129 | 130 | weightIndex++; 131 | } 132 | } 133 | 134 | void kernel weightDeltaReduce(read_only image2d_array_t expandedWeightDeltas, write_only image2d_array_t reducedWeightDeltas, int totalNumWeightsPerLayer, int2 reduceStep) { 135 | int2 positionReduced = (int2)(get_global_id(0), get_global_id(1)); 136 | int2 positionExpanded = (int2)(positionReduced.x * reduceStep.x, positionReduced.y * reduceStep.y); 137 | 138 | for (int i = 0; i < totalNumWeightsPerLayer; i++) { 139 | float sum = 0.0f; 140 | 141 | for (int dx = 0; dx < reduceStep.x; dx++) 142 | for (int dy = 0; dy < reduceStep.y; dy++) 143 | sum += read_imagef(expandedWeightDeltas, unnormalizedClampedNearestSampler, (int4)(positionExpanded.x + dx, positionExpanded.y + dy, i, 0)).x; 144 | 145 | write_imagef(reducedWeightDeltas, (int4)(positionReduced.x, positionReduced.y, i, 0), (float4)(sum, sum, sum, sum)); 146 | } 147 | } 148 | 149 | void kernel mapsDeltaUpdate(read_only image2d_array_t reducedWeightDeltas, read_only image2d_t layerWeights, write_only image2d_t newLayerWeights, int numWeightsPerMap) { 150 | int3 position = (int3)(get_global_id(0), get_global_id(1), get_global_id(2)); 151 | 152 | for (int i = 0; i < numWeightsPerMap; i++) { 153 | float delta = read_imagef(reducedWeightDeltas, (int4)(position.x, position.y, position.z * numWeightsPerMap + i, 0)).x; 154 | 155 | float original = read_imagef(layerWeights, (int2)(i, position.z)).x; 156 | 157 | float next = original + delta; 158 | 159 | write_imagef(newLayerWeights, (int2)(i, position.z), (float4)(next, next, next, next)); 160 | } 161 | } 162 | 163 | void kernel layerDownsample(read_only image2d_array_t layerOutputs, write_only image2d_array_t downsampledOutputs, int2 downsampleSize) { 164 | int3 position = (int3)(get_global_id(0), get_global_id(1), get_global_id(2)); 165 | 166 | float sample = 0.0f; 167 | 168 | for (int dx = 0; dx < downsampleSize.x; dx++) 169 | for (int dy = 0; dy < downsampleSize.y; dy++) { 170 | float layerOutput = read_imagef(layerOutputs, (int4)(position.x * downsampleSize.x + dx, position.y * downsampleSize.y + dy, position.z, 0)).x; 171 | 172 | sample = max(sample, layerOutput); 173 | } 174 | 175 | write_imagef(downsampledOutputs, (int4)(position.x, position.y, position.z, 0), (float4)(sample, sample, sample, sample)); 176 | } -------------------------------------------------------------------------------- /ContinuousHTMGPU/source/htm/HTMRL.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #include 16 | 17 | namespace htm { 18 | class HTMRL { 19 | public: 20 | enum InputType { 21 | _state, _action, _unused 22 | }; 23 | 24 | struct LayerDesc { 25 | int _width, _height; 26 | 27 | int _receptiveFieldRadius; 28 | int _lateralConnectionRadius; 29 | int _inhibitionRadius; 30 | 31 | int _cellsInColumn; 32 | 33 | int _numSegmentsPerCell; 34 | 35 | float _qInfluenceMultiplier; 36 | 37 | int _numColumnStateBlurPasses; 38 | float _columnStateBlurKernelWidthMultiplier; 39 | 40 | int _columnInfluenceRadius; 41 | 42 | float _qImportance; 43 | 44 | LayerDesc() 45 | : _width(16), _height(16), _receptiveFieldRadius(3), _lateralConnectionRadius(4), _inhibitionRadius(2), _cellsInColumn(3), _numSegmentsPerCell(3), 46 | _qInfluenceMultiplier(1.0f), _numColumnStateBlurPasses(1), _columnStateBlurKernelWidthMultiplier(1.0f), _columnInfluenceRadius(5), _qImportance(1.0f) 47 | {} 48 | }; 49 | 50 | static float sigmoid(float x) { 51 | return 1.0f / (1.0f + std::exp(-x)); 52 | } 53 | 54 | private: 55 | struct Layer { 56 | cl::Image2D _columnActivations; 57 | 58 | cl::Image2D _columnStatesPrev; 59 | cl::Image2D _columnStates; 60 | 61 | cl::Image2D _inputBiasesPrev; 62 | cl::Image2D _inputBiases; 63 | 64 | cl::Image2D _reconstruction; 65 | 66 | cl::Image2D _columnPredictionsPrev; 67 | cl::Image2D _columnPredictions; 68 | 69 | cl::Image3D _columnFeedForwardWeightsPrev; 70 | cl::Image3D _columnFeedForwardWeights; 71 | 72 | cl::Image3D _cellWeightsPrev; 73 | cl::Image3D _cellWeights; 74 | 75 | cl::Image3D _cellStatesPrev; 76 | cl::Image3D _cellStates; 77 | 78 | cl::Image3D _segmentStatesPrev; 79 | cl::Image3D _segmentStates; 80 | 81 | //cl::Image3D _segmentWeightsPrev; 82 | //cl::Image3D _segmentWeights; 83 | 84 | cl::Image3D _cellQValuesPrev; 85 | cl::Image3D _cellQValues; 86 | 87 | cl::Image2D _columnQValues; 88 | 89 | cl::Image2D _columnPrevValues; 90 | cl::Image2D _columnPrevValuesPrev; 91 | 92 | // Contains just tdError 93 | cl::Image2D _columnTdErrors; 94 | 95 | cl::Image3D _cellPredictionsPrev; 96 | cl::Image3D _cellPredictions; 97 | }; 98 | 99 | int _inputWidth, _inputHeight; 100 | 101 | std::vector _layerDescs; 102 | std::vector _layers; 103 | 104 | cl::Kernel _layerColumnActivateKernel; 105 | cl::Kernel _layerColumnInhibitKernel; 106 | cl::Kernel _layerCellActivateKernel; 107 | cl::Kernel _layerCellWeightUpdateKernel; 108 | cl::Kernel _layerCellWeightUpdateLastKernel; 109 | cl::Kernel _layerCellPredictKernel; 110 | cl::Kernel _layerCellPredictLastKernel; 111 | cl::Kernel _layerColumnWeightUpdateKernel; 112 | cl::Kernel _layerColumnPredictionKernel; 113 | cl::Kernel _layerColumnQKernel; 114 | cl::Kernel _layerColumnQLastKernel; 115 | cl::Kernel _layerAssignQKernel; 116 | 117 | cl::Kernel _reconstructInputKernel; 118 | cl::Kernel _inputBiasUpdateKernel; 119 | 120 | // For blur 121 | cl::Kernel _gaussianBlurXKernel; 122 | cl::Kernel _gaussianBlurYKernel; 123 | 124 | std::vector _input; 125 | 126 | std::vector _inputTypes; 127 | 128 | std::vector _output; 129 | std::vector _prediction; 130 | std::vector _exploratoryOutput; 131 | std::vector _prevOutput; 132 | std::vector _prevOutputExploratory; 133 | std::vector _prevInput; 134 | 135 | float _prevMaxQ; 136 | float _prevValue; 137 | float _prevPrevValue; 138 | float _prevQ; 139 | float _prevTDError; 140 | 141 | cl::Image2D _inputImage; 142 | cl::Image2D _reconstructedPrediction; 143 | 144 | int _addReplaySampleStepCounter; 145 | 146 | std::list _inputReplayChain; 147 | 148 | void stepBegin(sys::ComputeSystem &cs, int addReplaySampleSteps, int maxReplayChainSize); 149 | 150 | void activate(std::vector &input, sys::ComputeSystem &cs, float reward, float alpha, float gamma, float columnDecay, float cellStateDecay, float columnConnectionAlpha, float columnConnectionBeta, float columnConnectionGamma, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, int maxReplayChainSize, int numReplaySamples, int addSampleSteps, unsigned long seed); 151 | 152 | void learnSpatialReplay(sys::ComputeSystem &cs, float cellStateDecay, float alpha, float beta, float gamma, int maxReplayChainSize, int numReplaySamples, unsigned long seed); 153 | 154 | void learnTemporal(sys::ComputeSystem &cs, float tdError, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, unsigned long seed); 155 | 156 | void initLayer(sys::ComputeSystem &cs, cl::Kernel &initPartOneKernel, cl::Kernel &initPartTwoKernel, cl::Kernel &initPartThreeKernel, int inputWidth, int inputHeight, int inputCellsPerColumn, Layer &layer, const LayerDesc &layerDesc, bool isTopmost, float minInitWeight, float maxInitWeight, float minInitCenter, float maxInitCenter, float minInitWidth, float maxInitWidth, std::mt19937 &generator); 157 | void spatialPoolLayer(sys::ComputeSystem &cs, cl::Image2D &prevLayerOutput, int prevLayerWidth, int prevLayerHeight, Layer &layer, const LayerDesc &layerDesc, float columnDecay, std::mt19937 &generator); 158 | void cellActivateLayer(sys::ComputeSystem &cs, Layer &layer, const LayerDesc &layerDesc, float cellStateDecay, std::mt19937 &generator); 159 | void predictLayer(sys::ComputeSystem &cs, cl::Image2D &nextLayerPrediction, cl::Image2D &nextLayerPredictionPrev, int nextLayerWidth, int nextLayerHeight, Layer &layer, const LayerDesc &layerDesc, std::mt19937 &generator); 160 | void predictLayerLast(sys::ComputeSystem &cs, Layer &layer, const LayerDesc &layerDesc, std::mt19937 &generator); 161 | void determineLayerColumnQ(sys::ComputeSystem &cs, Layer &layer, LayerDesc &layerDesc, Layer &nextLayer, LayerDesc &nextLayerDesc); 162 | void determineLayerColumnQLast(sys::ComputeSystem &cs, Layer &layer, LayerDesc &layerDesc); 163 | void assignLayerQ(sys::ComputeSystem &cs, Layer &layer, LayerDesc &layerDesc, float alpha); 164 | void learnLayerSpatial(sys::ComputeSystem &cs, Layer &layer, cl::Image2D &prevLayerOutput, int prevLayerWidth, int prevLayerHeight, const LayerDesc &layerDesc, float alpha, float beta, float gamma, std::mt19937 &generator); 165 | void learnLayerTemporal(sys::ComputeSystem &cs, Layer &layer, cl::Image2D &prevLayerOutput, int prevLayerWidth, int prevLayerHeight, cl::Image2D &nextLayerPrediction, int nextLayerWidth, int nextLayerHeight, const LayerDesc &layerDesc, float tdError, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, std::mt19937 &generator); 166 | void learnLayerTemporalLast(sys::ComputeSystem &cs, Layer &layer, cl::Image2D &prevLayerOutput, int prevLayerWidth, int prevLayerHeight, const LayerDesc &layerDesc, float tdError, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, std::mt19937 &generator); 167 | void dutyCycleLayerUpdate(sys::ComputeSystem &cs, Layer &layer, const LayerDesc &layerDesc, float activationDutyCycleDecay, float stateDutyCycleDecay); 168 | 169 | // Reconstruction 170 | void getReconstructedPrediction(std::vector &prediction, sys::ComputeSystem &cs); 171 | 172 | // Blur 173 | void gaussianBlur(sys::ComputeSystem &cs, cl::Image2D &source, cl::Image2D &ping, cl::Image2D &pong, int imageSizeX, int imageSizeY, int passes, float kernelWidth); 174 | 175 | // Q 176 | float retreiveQ(sys::ComputeSystem &cs); 177 | 178 | public: 179 | void createRandom(sys::ComputeSystem &cs, sys::ComputeProgram &program, int inputWidth, int inputHeight, int reconstructionReceptiveRadius, const std::vector &layerDescs, const std::vector &inputTypes, float minInitWeight, float maxInitWeight, float minInitCenter, float maxInitCenter, std::mt19937 &generator); 180 | 181 | void step(sys::ComputeSystem &cs, float reward, float reconstructionAlpha, float columnDecay, float cellStateDecay, float columnConnectionAlpha, float columnConnectionBeta, float columnConnectionGamma, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, float alpha, float gamma, float breakChance, float perturbationStdDev, int maxReplayChainSize, int numReplaySamples, int addReplaySampleSteps, std::mt19937 &generator); 182 | 183 | int getInputWidth() const { 184 | return _inputWidth; 185 | } 186 | 187 | int getInputHeight() const { 188 | return _inputHeight; 189 | } 190 | 191 | const std::vector &getLayerDescs() const { 192 | return _layerDescs; 193 | } 194 | 195 | void setInput(int i, float value) { 196 | _input[i] = value; 197 | } 198 | 199 | void setInput(int x, int y, float value) { 200 | setInput(x + y * _inputWidth, value); 201 | } 202 | 203 | float getOutput(int i) const { 204 | return _input[i]; 205 | } 206 | 207 | float getOutput(int x, int y) const { 208 | return getOutput(x + y * _inputWidth); 209 | } 210 | 211 | void exportCellData(sys::ComputeSystem &cs, std::vector> &images, unsigned long seed) const; 212 | }; 213 | } -------------------------------------------------------------------------------- /ContinuousHTMGPU/FindSFML.cmake: -------------------------------------------------------------------------------- 1 | # This script locates the SFML library 2 | # ------------------------------------ 3 | # 4 | # Usage 5 | # ----- 6 | # 7 | # When you try to locate the SFML libraries, you must specify which modules you want to use (system, window, graphics, network, audio, main). 8 | # If none is given, the SFML_LIBRARIES variable will be empty and you'll end up linking to nothing. 9 | # example: 10 | # find_package(SFML COMPONENTS graphics window system) // find the graphics, window and system modules 11 | # 12 | # You can enforce a specific version, either MAJOR.MINOR or only MAJOR. 13 | # If nothing is specified, the version won't be checked (ie. any version will be accepted). 14 | # example: 15 | # find_package(SFML COMPONENTS ...) // no specific version required 16 | # find_package(SFML 2 COMPONENTS ...) // any 2.x version 17 | # find_package(SFML 2.4 COMPONENTS ...) // version 2.4 or greater 18 | # 19 | # By default, the dynamic libraries of SFML will be found. To find the static ones instead, 20 | # you must set the SFML_STATIC_LIBRARIES variable to TRUE before calling find_package(SFML ...). 21 | # In case of static linking, the SFML_STATIC macro will also be defined by this script. 22 | # example: 23 | # set(SFML_STATIC_LIBRARIES TRUE) 24 | # find_package(SFML 2 COMPONENTS network system) 25 | # 26 | # On Mac OS X if SFML_STATIC_LIBRARIES is not set to TRUE then by default CMake will search for frameworks unless 27 | # CMAKE_FIND_FRAMEWORK is set to "NEVER" for example. Please refer to CMake documentation for more details. 28 | # Moreover, keep in mind that SFML frameworks are only available as release libraries unlike dylibs which 29 | # are available for both release and debug modes. 30 | # 31 | # If SFML is not installed in a standard path, you can use the SFML_ROOT CMake (or environment) variable 32 | # to tell CMake where SFML is. 33 | # 34 | # Output 35 | # ------ 36 | # 37 | # This script defines the following variables: 38 | # - For each specified module XXX (system, window, graphics, network, audio, main): 39 | # - SFML_XXX_LIBRARY_DEBUG: the name of the debug library of the xxx module (set to SFML_XXX_LIBRARY_RELEASE is no debug version is found) 40 | # - SFML_XXX_LIBRARY_RELEASE: the name of the release library of the xxx module (set to SFML_XXX_LIBRARY_DEBUG is no release version is found) 41 | # - SFML_XXX_LIBRARY: the name of the library to link to for the xxx module (includes both debug and optimized names if necessary) 42 | # - SFML_XXX_FOUND: true if either the debug or release library of the xxx module is found 43 | # - SFML_LIBRARIES: the list of all libraries corresponding to the required modules 44 | # - SFML_FOUND: true if all the required modules are found 45 | # - SFML_INCLUDE_DIR: the path where SFML headers are located (the directory containing the SFML/Config.hpp file) 46 | # 47 | # example: 48 | # find_package(SFML 2 COMPONENTS system window graphics audio REQUIRED) 49 | # include_directories(${SFML_INCLUDE_DIR}) 50 | # add_executable(myapp ...) 51 | # target_link_libraries(myapp ${SFML_LIBRARIES}) 52 | 53 | # define the SFML_STATIC macro if static build was chosen 54 | if(SFML_STATIC_LIBRARIES) 55 | add_definitions(-DSFML_STATIC) 56 | endif() 57 | 58 | # deduce the libraries suffix from the options 59 | set(FIND_SFML_LIB_SUFFIX "") 60 | if(SFML_STATIC_LIBRARIES) 61 | set(FIND_SFML_LIB_SUFFIX "${FIND_SFML_LIB_SUFFIX}-s") 62 | endif() 63 | 64 | # find the SFML include directory 65 | find_path(SFML_INCLUDE_DIR SFML/Config.hpp 66 | PATH_SUFFIXES include 67 | PATHS 68 | ${SFML_ROOT} 69 | $ENV{SFML_ROOT} 70 | ~/Library/Frameworks 71 | /Library/Frameworks 72 | /usr/local/ 73 | /usr/ 74 | /sw # Fink 75 | /opt/local/ # DarwinPorts 76 | /opt/csw/ # Blastwave 77 | /opt/) 78 | 79 | # check the version number 80 | set(SFML_VERSION_OK TRUE) 81 | if(SFML_FIND_VERSION AND SFML_INCLUDE_DIR) 82 | # extract the major and minor version numbers from SFML/Config.hpp 83 | # we have to handle framework a little bit differently : 84 | if("${SFML_INCLUDE_DIR}" MATCHES "SFML.framework") 85 | set(SFML_CONFIG_HPP_INPUT "${SFML_INCLUDE_DIR}/Headers/Config.hpp") 86 | else() 87 | set(SFML_CONFIG_HPP_INPUT "${SFML_INCLUDE_DIR}/SFML/Config.hpp") 88 | endif() 89 | FILE(READ "${SFML_CONFIG_HPP_INPUT}" SFML_CONFIG_HPP_CONTENTS) 90 | STRING(REGEX MATCH ".*#define SFML_VERSION_MAJOR ([0-9]+).*#define SFML_VERSION_MINOR ([0-9]+).*" SFML_CONFIG_HPP_CONTENTS "${SFML_CONFIG_HPP_CONTENTS}") 91 | STRING(REGEX REPLACE ".*#define SFML_VERSION_MAJOR ([0-9]+).*" "\\1" SFML_VERSION_MAJOR "${SFML_CONFIG_HPP_CONTENTS}") 92 | STRING(REGEX REPLACE ".*#define SFML_VERSION_MINOR ([0-9]+).*" "\\1" SFML_VERSION_MINOR "${SFML_CONFIG_HPP_CONTENTS}") 93 | math(EXPR SFML_REQUESTED_VERSION "${SFML_FIND_VERSION_MAJOR} * 10 + ${SFML_FIND_VERSION_MINOR}") 94 | 95 | # if we could extract them, compare with the requested version number 96 | if (SFML_VERSION_MAJOR) 97 | # transform version numbers to an integer 98 | math(EXPR SFML_VERSION "${SFML_VERSION_MAJOR} * 10 + ${SFML_VERSION_MINOR}") 99 | 100 | # compare them 101 | if(SFML_VERSION LESS SFML_REQUESTED_VERSION) 102 | set(SFML_VERSION_OK FALSE) 103 | endif() 104 | else() 105 | # SFML version is < 2.0 106 | if (SFML_REQUESTED_VERSION GREATER 19) 107 | set(SFML_VERSION_OK FALSE) 108 | set(SFML_VERSION_MAJOR 1) 109 | set(SFML_VERSION_MINOR x) 110 | endif() 111 | endif() 112 | endif() 113 | 114 | # find the requested modules 115 | set(SFML_FOUND TRUE) # will be set to false if one of the required modules is not found 116 | set(FIND_SFML_LIB_PATHS 117 | ${SFML_ROOT} 118 | $ENV{SFML_ROOT} 119 | ~/Library/Frameworks 120 | /Library/Frameworks 121 | /usr/local 122 | /usr 123 | /sw 124 | /opt/local 125 | /opt/csw 126 | /opt) 127 | foreach(FIND_SFML_COMPONENT ${SFML_FIND_COMPONENTS}) 128 | string(TOLOWER ${FIND_SFML_COMPONENT} FIND_SFML_COMPONENT_LOWER) 129 | string(TOUPPER ${FIND_SFML_COMPONENT} FIND_SFML_COMPONENT_UPPER) 130 | set(FIND_SFML_COMPONENT_NAME sfml-${FIND_SFML_COMPONENT_LOWER}${FIND_SFML_LIB_SUFFIX}) 131 | 132 | # no suffix for sfml-main, it is always a static library 133 | if(FIND_SFML_COMPONENT_LOWER STREQUAL "main") 134 | set(FIND_SFML_COMPONENT_NAME sfml-${FIND_SFML_COMPONENT_LOWER}) 135 | endif() 136 | 137 | # debug library 138 | find_library(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG 139 | NAMES ${FIND_SFML_COMPONENT_NAME}-d 140 | PATH_SUFFIXES lib64 lib 141 | PATHS ${FIND_SFML_LIB_PATHS}) 142 | 143 | # release library 144 | find_library(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE 145 | NAMES ${FIND_SFML_COMPONENT_NAME} 146 | PATH_SUFFIXES lib64 lib 147 | PATHS ${FIND_SFML_LIB_PATHS}) 148 | 149 | if (SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG OR SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE) 150 | # library found 151 | set(SFML_${FIND_SFML_COMPONENT_UPPER}_FOUND TRUE) 152 | 153 | # if both are found, set SFML_XXX_LIBRARY to contain both 154 | if (SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG AND SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE) 155 | set(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY debug ${SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG} 156 | optimized ${SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE}) 157 | endif() 158 | 159 | # if only one debug/release variant is found, set the other to be equal to the found one 160 | if (SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG AND NOT SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE) 161 | # debug and not release 162 | set(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE ${SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG}) 163 | set(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY ${SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG}) 164 | endif() 165 | if (SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE AND NOT SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG) 166 | # release and not debug 167 | set(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG ${SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE}) 168 | set(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY ${SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE}) 169 | endif() 170 | else() 171 | # library not found 172 | set(SFML_FOUND FALSE) 173 | set(SFML_${FIND_SFML_COMPONENT_UPPER}_FOUND FALSE) 174 | set(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY "") 175 | set(FIND_SFML_MISSING "${FIND_SFML_MISSING} SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY") 176 | endif() 177 | 178 | # mark as advanced 179 | MARK_AS_ADVANCED(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY 180 | SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE 181 | SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG) 182 | 183 | # add to the global list of libraries 184 | set(SFML_LIBRARIES ${SFML_LIBRARIES} "${SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY}") 185 | endforeach() 186 | 187 | # handle errors 188 | if(NOT SFML_VERSION_OK) 189 | # SFML version not ok 190 | set(FIND_SFML_ERROR "SFML found but version too low (requested: ${SFML_FIND_VERSION}, found: ${SFML_VERSION_MAJOR}.${SFML_VERSION_MINOR})") 191 | set(SFML_FOUND FALSE) 192 | elseif(NOT SFML_FOUND) 193 | # include directory or library not found 194 | set(FIND_SFML_ERROR "Could NOT find SFML (missing: ${FIND_SFML_MISSING})") 195 | endif() 196 | if (NOT SFML_FOUND) 197 | if(SFML_FIND_REQUIRED) 198 | # fatal error 199 | message(FATAL_ERROR ${FIND_SFML_ERROR}) 200 | elseif(NOT SFML_FIND_QUIETLY) 201 | # error but continue 202 | message("${FIND_SFML_ERROR}") 203 | endif() 204 | endif() 205 | 206 | # handle success 207 | if(SFML_FOUND) 208 | message(STATUS "Found SFML ${SFML_VERSION_MAJOR}.${SFML_VERSION_MINOR} in ${SFML_INCLUDE_DIR}") 209 | endif() 210 | -------------------------------------------------------------------------------- /ContinuousHTMGPU/source/vis/Plot.cpp: -------------------------------------------------------------------------------- 1 | #include "Plot.h" 2 | 3 | #include 4 | 5 | using namespace vis; 6 | 7 | void Plot::draw(sf::RenderTarget &target, const sf::Texture &lineGradientTexture, const sf::Font &tickFont, float tickTextScale, 8 | const sf::Vector2f &domain, const sf::Vector2f &range, const sf::Vector2f &margins, const sf::Vector2f &tickIncrements, float axesSize, float lineSize, float tickSize, float tickLength, float textTickOffset, int precision) 9 | { 10 | target.clear(_backgroundColor); 11 | 12 | sf::Vector2f plotSize = sf::Vector2f(target.getSize().x - margins.x, target.getSize().y - margins.y); 13 | 14 | sf::Vector2f origin = sf::Vector2f(margins.x, target.getSize().y - margins.y); 15 | 16 | // Draw curves 17 | for (int c = 0; c < _curves.size(); c++) { 18 | if (_curves[c]._points.empty()) 19 | continue; 20 | 21 | sf::VertexArray vertexArray; 22 | 23 | vertexArray.resize((_curves[c]._points.size() - 1) * 6); 24 | 25 | int index = 0; 26 | 27 | // Go through points 28 | for (int p = 0; p < _curves[c]._points.size() - 1; p++) { 29 | Point &point = _curves[c]._points[p]; 30 | Point &pointNext = _curves[c]._points[p + 1]; 31 | 32 | sf::Vector2f difference = pointNext._position - point._position; 33 | sf::Vector2f direction = vectorNormalize(difference); 34 | 35 | sf::Vector2f renderPointFirst, renderPointSecond; 36 | 37 | bool pointVisible = point._position.x >= domain.x && point._position.x <= domain.y && 38 | point._position.y >= range.x && point._position.y <= range.y; 39 | 40 | bool pointNextVisible = pointNext._position.x >= domain.x && pointNext._position.x <= domain.y && 41 | pointNext._position.y >= range.x && pointNext._position.y <= range.y; 42 | 43 | if (pointVisible || pointNextVisible) { 44 | sf::Vector2f renderPoint = sf::Vector2f(origin.x + (point._position.x - domain.x) / (domain.y - domain.x) * plotSize.x, 45 | origin.y - (point._position.y - range.x) / (range.y - range.x) * plotSize.y); 46 | 47 | sf::Vector2f renderPointNext = sf::Vector2f(origin.x + (pointNext._position.x - domain.x) / (domain.y - domain.x) * plotSize.x, 48 | origin.y - (pointNext._position.y - range.x) / (range.y - range.x) * plotSize.y); 49 | 50 | sf::Vector2f renderDirection = vectorNormalize(renderPointNext - renderPoint); 51 | 52 | sf::Vector2f sizeOffset; 53 | sf::Vector2f sizeOffsetNext; 54 | 55 | if (p > 0) { 56 | sf::Vector2f renderPointPrev = sf::Vector2f(origin.x + (_curves[c]._points[p - 1]._position.x - domain.x) / (domain.y - domain.x) * plotSize.x, 57 | origin.y - (_curves[c]._points[p - 1]._position.y - range.x) / (range.y - range.x) * plotSize.y); 58 | 59 | sf::Vector2f averageDirection = (renderDirection + vectorNormalize(renderPoint - renderPointPrev)) * 0.5f; 60 | 61 | sizeOffset = vectorNormalize(sf::Vector2f(-averageDirection.y, averageDirection.x)); 62 | } 63 | else 64 | sizeOffset = vectorNormalize(sf::Vector2f(-renderDirection.y, renderDirection.x)); 65 | 66 | if (p < _curves[c]._points.size() - 2) { 67 | sf::Vector2f renderPointNextNext = sf::Vector2f(origin.x + (_curves[c]._points[p + 2]._position.x - domain.x) / (domain.y - domain.x) * plotSize.x, 68 | origin.y - (_curves[c]._points[p + 2]._position.y - range.x) / (range.y - range.x) * plotSize.y); 69 | 70 | sf::Vector2f averageDirection = (renderDirection + vectorNormalize(renderPointNextNext - renderPointNext)) * 0.5f; 71 | 72 | sizeOffsetNext = vectorNormalize(sf::Vector2f(-averageDirection.y, averageDirection.x)); 73 | } 74 | else 75 | sizeOffsetNext = vectorNormalize(sf::Vector2f(-renderDirection.y, renderDirection.x)); 76 | 77 | sf::Vector2f perpendicular = vectorNormalize(sf::Vector2f(-renderDirection.y, renderDirection.x)); 78 | 79 | sizeOffset *= 1.0f / vectorDot(perpendicular, sizeOffset) * lineSize * 0.5f; 80 | sizeOffsetNext *= 1.0f / vectorDot(perpendicular, sizeOffsetNext) * lineSize * 0.5f; 81 | 82 | vertexArray[index].position = renderPoint - sizeOffset; 83 | vertexArray[index].texCoords = sf::Vector2f(0.0f, 0.0f); 84 | vertexArray[index].color = point._color; 85 | 86 | index++; 87 | 88 | vertexArray[index].position = renderPointNext - sizeOffsetNext; 89 | vertexArray[index].texCoords = sf::Vector2f(0.0f, 0.0f); 90 | vertexArray[index].color = pointNext._color; 91 | 92 | index++; 93 | 94 | vertexArray[index].position = renderPointNext + sizeOffsetNext; 95 | vertexArray[index].texCoords = sf::Vector2f(0.0f, lineGradientTexture.getSize().y); 96 | vertexArray[index].color = pointNext._color; 97 | 98 | index++; 99 | 100 | vertexArray[index].position = renderPoint - sizeOffset; 101 | vertexArray[index].texCoords = sf::Vector2f(0.0f, 0.0f); 102 | vertexArray[index].color = point._color; 103 | 104 | index++; 105 | 106 | vertexArray[index].position = renderPointNext + sizeOffsetNext; 107 | vertexArray[index].texCoords = sf::Vector2f(0.0f, lineGradientTexture.getSize().y); 108 | vertexArray[index].color = pointNext._color; 109 | 110 | index++; 111 | 112 | vertexArray[index].position = renderPoint + sizeOffset; 113 | vertexArray[index].texCoords = sf::Vector2f(0.0f, lineGradientTexture.getSize().y); 114 | vertexArray[index].color = point._color; 115 | 116 | index++; 117 | } 118 | } 119 | 120 | vertexArray.resize(index); 121 | 122 | vertexArray.setPrimitiveType(sf::PrimitiveType::Triangles); 123 | 124 | if (_curves[c]._shadow != 0.0f) { 125 | sf::VertexArray shadowArray = vertexArray; 126 | 127 | for (int v = 0; v < shadowArray.getVertexCount(); v++) { 128 | shadowArray[v].position += _curves[c]._shadowOffset; 129 | shadowArray[v].color = sf::Color(0, 0, 0, _curves[c]._shadow * 255.0f); 130 | } 131 | 132 | target.draw(shadowArray, &lineGradientTexture); 133 | } 134 | 135 | target.draw(vertexArray, &lineGradientTexture); 136 | } 137 | 138 | // Mask off parts of the curve that go beyond bounds 139 | sf::RectangleShape leftMask; 140 | leftMask.setSize(sf::Vector2f(margins.x, target.getSize().y)); 141 | leftMask.setFillColor(_backgroundColor); 142 | 143 | target.draw(leftMask); 144 | 145 | sf::RectangleShape rightMask; 146 | rightMask.setSize(sf::Vector2f(target.getSize().x, margins.y)); 147 | rightMask.setPosition(sf::Vector2f(0.0f, target.getSize().y - margins.y)); 148 | rightMask.setFillColor(_backgroundColor); 149 | 150 | target.draw(rightMask); 151 | 152 | // Draw axes 153 | sf::RectangleShape xAxis; 154 | xAxis.setSize(sf::Vector2f(plotSize.x + axesSize * 0.5f, axesSize)); 155 | xAxis.setPosition(sf::Vector2f(origin.x - axesSize * 0.5f, origin.y - axesSize * 0.5f)); 156 | xAxis.setFillColor(_axesColor); 157 | 158 | target.draw(xAxis); 159 | 160 | sf::RectangleShape yAxis; 161 | yAxis.setSize(sf::Vector2f(axesSize, plotSize.y + axesSize * 0.5f)); 162 | yAxis.setPosition(sf::Vector2f(origin.x - axesSize * 0.5f, origin.y - axesSize * 0.5f - plotSize.y)); 163 | yAxis.setFillColor(_axesColor); 164 | 165 | target.draw(yAxis); 166 | 167 | // Draw ticks 168 | { 169 | float xDistance = domain.y - domain.x; 170 | int xTicks = std::floor(xDistance / tickIncrements.x); 171 | float xTickOffset = std::fmod(domain.x, tickIncrements.x); 172 | 173 | if (xTickOffset < 0.0f) 174 | xTickOffset += tickIncrements.x; 175 | 176 | float xTickRenderOffset = xTickOffset / xDistance; 177 | 178 | float xTickRenderDistance = tickIncrements.x / xDistance * plotSize.x; 179 | 180 | std::ostringstream os; 181 | 182 | os.precision(precision); 183 | 184 | for (int t = 0; t < xTicks; t++) { 185 | sf::RectangleShape xTick; 186 | xTick.setSize(sf::Vector2f(axesSize, tickLength)); 187 | xTick.setPosition(sf::Vector2f(origin.x + xTickRenderOffset + xTickRenderDistance * t - tickSize * 0.5f, origin.y)); 188 | xTick.setFillColor(_axesColor); 189 | 190 | target.draw(xTick); 191 | 192 | float value = domain.x + xTickOffset + t * tickIncrements.x; 193 | 194 | os.str(""); 195 | os << value; 196 | 197 | sf::Text xTickText; 198 | xTickText.setString(os.str()); 199 | xTickText.setFont(tickFont); 200 | xTickText.setPosition(sf::Vector2f(xTick.getPosition().x, xTick.getPosition().y + tickLength + textTickOffset)); 201 | xTickText.setRotation(45.0f); 202 | xTickText.setColor(_axesColor); 203 | xTickText.setScale(sf::Vector2f(tickTextScale, tickTextScale)); 204 | 205 | target.draw(xTickText); 206 | } 207 | } 208 | 209 | { 210 | float yDistance = range.y - range.x; 211 | int yTicks = std::floor(yDistance / tickIncrements.y); 212 | float yTickOffset = std::fmod(range.x, tickIncrements.y); 213 | 214 | if (yTickOffset < 0.0f) 215 | yTickOffset += tickIncrements.y; 216 | 217 | float yTickRenderOffset = yTickOffset / yDistance; 218 | 219 | float yTickRenderDistance = tickIncrements.y / yDistance * plotSize.y; 220 | 221 | std::ostringstream os; 222 | 223 | os.precision(precision); 224 | 225 | for (int t = 0; t < yTicks; t++) { 226 | sf::RectangleShape yTick; 227 | yTick.setSize(sf::Vector2f(tickLength, axesSize)); 228 | yTick.setPosition(sf::Vector2f(origin.x - tickLength, origin.y - yTickRenderOffset - yTickRenderDistance * t - tickSize * 0.5f)); 229 | yTick.setFillColor(_axesColor); 230 | 231 | target.draw(yTick); 232 | 233 | float value = range.x + yTickOffset + t * tickIncrements.y; 234 | 235 | os.str(""); 236 | os << value; 237 | 238 | sf::Text yTickText; 239 | yTickText.setString(os.str()); 240 | yTickText.setFont(tickFont); 241 | sf::FloatRect bounds = yTickText.getLocalBounds(); 242 | yTickText.setPosition(sf::Vector2f(yTick.getPosition().x - bounds.width * 0.5f - tickLength * 0.5f - textTickOffset, yTick.getPosition().y - bounds.height * 0.5f)); 243 | yTickText.setRotation(0.0f); 244 | yTickText.setColor(_axesColor); 245 | yTickText.setScale(sf::Vector2f(tickTextScale, tickTextScale)); 246 | 247 | target.draw(yTickText); 248 | } 249 | } 250 | } 251 | 252 | float vis::vectorMagnitude(const sf::Vector2f &vector) { 253 | return std::sqrt(vector.x * vector.x + vector.y * vector.y); 254 | } 255 | 256 | sf::Vector2f vis::vectorNormalize(const sf::Vector2f &vector) { 257 | float magnitude = vectorMagnitude(vector); 258 | 259 | return vector / magnitude; 260 | } 261 | 262 | float vis::vectorDot(const sf::Vector2f &left, const sf::Vector2f &right) { 263 | return left.x * right.x + left.y * right.y; 264 | } -------------------------------------------------------------------------------- /ContinuousHTMGPU/source/Main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | int main() { 15 | std::mt19937 generator(time(nullptr)); 16 | 17 | sys::ComputeSystem cs; 18 | 19 | cs.create(sys::ComputeSystem::_gpu); 20 | 21 | sys::ComputeProgram program; 22 | 23 | program.loadFromFile("resources/htmrl.cl", cs); 24 | 25 | float reward = 0.0f; 26 | float prevReward = 0.0f; 27 | 28 | float initReward = 0.0f; 29 | 30 | float totalReward = 0.0f; 31 | 32 | sf::RenderWindow window; 33 | 34 | window.create(sf::VideoMode(800, 600), "Pole Balancing"); 35 | 36 | //window.setVerticalSyncEnabled(true); 37 | 38 | window.setFramerateLimit(60); 39 | 40 | // -------------------------- Load Resources -------------------------- 41 | 42 | sf::Texture backgroundTexture; 43 | sf::Texture cartTexture; 44 | sf::Texture poleTexture; 45 | 46 | backgroundTexture.loadFromFile("resources/background.png"); 47 | cartTexture.loadFromFile("resources/cart.png"); 48 | poleTexture.loadFromFile("resources/pole.png"); 49 | 50 | sf::Texture inputCartTexture; 51 | sf::Texture inputPoleTexture; 52 | 53 | inputCartTexture.loadFromFile("resources/inputCart.png"); 54 | inputPoleTexture.loadFromFile("resources/inputPole.png"); 55 | 56 | // -------------------------------------------------------------------- 57 | 58 | sf::Sprite backgroundSprite; 59 | sf::Sprite cartSprite; 60 | sf::Sprite poleSprite; 61 | 62 | backgroundSprite.setTexture(backgroundTexture); 63 | cartSprite.setTexture(cartTexture); 64 | poleSprite.setTexture(poleTexture); 65 | 66 | backgroundSprite.setPosition(sf::Vector2f(0.0f, 0.0f)); 67 | 68 | cartSprite.setOrigin(sf::Vector2f(static_cast(cartSprite.getTexture()->getSize().x) * 0.5f, static_cast(cartSprite.getTexture()->getSize().y))); 69 | poleSprite.setOrigin(sf::Vector2f(static_cast(poleSprite.getTexture()->getSize().x) * 0.5f, static_cast(poleSprite.getTexture()->getSize().y))); 70 | 71 | sf::Sprite inputCartSprite; 72 | sf::Sprite inputPoleSprite; 73 | 74 | inputCartSprite.setTexture(inputCartTexture); 75 | inputPoleSprite.setTexture(inputPoleTexture); 76 | 77 | inputCartSprite.setOrigin(sf::Vector2f(static_cast(inputCartSprite.getTexture()->getSize().x) * 0.5f, static_cast(inputCartSprite.getTexture()->getSize().y))); 78 | inputPoleSprite.setOrigin(sf::Vector2f(static_cast(inputPoleSprite.getTexture()->getSize().x) * 0.5f, static_cast(inputPoleSprite.getTexture()->getSize().y))); 79 | 80 | // ----------------------------- Physics ------------------------------ 81 | 82 | float pixelsPerMeter = 128.0f; 83 | float inputPixelsPerMeter = 8.0f; 84 | float poleLength = 1.0f; 85 | float g = -2.8f; 86 | float massMass = 40.0f; 87 | float cartMass = 2.0f; 88 | sf::Vector2f massPos(0.0f, poleLength); 89 | sf::Vector2f massVel(0.0f, 0.0f); 90 | float poleAngle = static_cast(3.14159f) * 0.0f; 91 | float poleAngleVel = 0.0f; 92 | float poleAngleAccel = 0.0f; 93 | float cartX = 0.0f; 94 | float cartVelX = 0.0f; 95 | float cartAccelX = 0.0f; 96 | float poleRotationalFriction = 0.008f; 97 | float cartMoveRadius = 1.8f; 98 | float cartFriction = 0.02f; 99 | float maxSpeed = 3.0f; 100 | 101 | // ---------------------------- Game Loop ----------------------------- 102 | 103 | bool quit = false; 104 | 105 | sf::Clock clock; 106 | 107 | float dt = 0.017f; 108 | 109 | float fitness = 0.0f; 110 | float prevFitness = 0.0f; 111 | 112 | float lowPassFitness = 0.0f; 113 | 114 | bool reverseDirection = false; 115 | 116 | bool trainMode = true; 117 | 118 | bool tDownLastFrame = false; 119 | 120 | std::uniform_real_distribution dist01(0.0f, 1.0f); 121 | 122 | sf::Font font; 123 | 124 | font.loadFromFile("resources/pixelated.ttf"); 125 | 126 | sf::RenderTexture inputRT; 127 | 128 | inputRT.create(64, 32); 129 | 130 | float avgReward = 0.0f; 131 | float avgRewardDecay = 0.003f; 132 | 133 | float minReward = 0.0f; 134 | float maxReward = 1.0f; 135 | 136 | float totalTime = 0.0f; 137 | 138 | float plotUpdateTimer = 0.0f; 139 | 140 | htm::HTMRL agent; 141 | 142 | std::vector layerDescs(5); 143 | 144 | layerDescs[0]._width = 64; 145 | layerDescs[0]._height = 64; 146 | 147 | layerDescs[1]._width = 44; 148 | layerDescs[1]._height = 44; 149 | 150 | layerDescs[2]._width = 32; 151 | layerDescs[2]._height = 32; 152 | 153 | layerDescs[3]._width = 20; 154 | layerDescs[3]._height = 20; 155 | 156 | layerDescs[4]._width = 16; 157 | layerDescs[4]._height = 16; 158 | 159 | std::vector inputTypes(64 * 64, htm::HTMRL::_state); 160 | 161 | for (int x = 0; x < 64; x++) { 162 | for (int y = 32; y < 64; y++) { 163 | inputTypes[x + y * 64] = htm::HTMRL::_unused; 164 | } 165 | } 166 | 167 | std::uniform_int_distribution actionXDist(0, 63); 168 | std::uniform_int_distribution actionYDist(33, 63); 169 | 170 | std::vector actionIndices; 171 | 172 | /*for (int x = 28; x < 34; x++) 173 | for (int y = 48; y < 54; y++) { 174 | if (inputTypes[x + y * 64] == htm::HTMRL::_action) 175 | continue; 176 | 177 | inputTypes[x + y * 64] = htm::HTMRL::_action; 178 | 179 | actionIndices.push_back(x + y * 64); 180 | }*/ 181 | 182 | for (int i = 0; i < 8; i++) { 183 | int x = actionXDist(generator); 184 | int y = actionYDist(generator); 185 | 186 | if (inputTypes[x + y * 64] == htm::HTMRL::_action) 187 | continue; 188 | 189 | inputTypes[x + y * 64] = htm::HTMRL::_action; 190 | 191 | actionIndices.push_back(x + y * 64); 192 | } 193 | 194 | agent.createRandom(cs, program, 64, 64, 4, layerDescs, inputTypes, -0.05f, 0.05f, -0.05f, 0.05f, generator); 195 | 196 | sf::RenderTexture htmRT; 197 | htmRT.create(1024, 1024, false); 198 | 199 | vis::HTMRLVisualizer visualizer; 200 | visualizer.create(1024); 201 | 202 | vis::Plot plot; 203 | 204 | plot._curves.resize(1); 205 | 206 | sf::RenderTexture plotRT; 207 | plotRT.create(800, 600, false); 208 | 209 | sf::Texture lineGradient; 210 | lineGradient.loadFromFile("resources/lineGradient.png"); 211 | 212 | sf::Font tickFont; 213 | tickFont.loadFromFile("resources/arial.ttf"); 214 | 215 | const int plotSampleTicks = 60; 216 | int plotSampleCounter = 0; 217 | 218 | do { 219 | clock.restart(); 220 | 221 | // ----------------------------- Input ----------------------------- 222 | 223 | sf::Event windowEvent; 224 | 225 | while (window.pollEvent(windowEvent)) 226 | { 227 | switch (windowEvent.type) 228 | { 229 | case sf::Event::Closed: 230 | quit = true; 231 | break; 232 | } 233 | } 234 | 235 | if (sf::Keyboard::isKeyPressed(sf::Keyboard::Escape)) 236 | quit = true; 237 | 238 | // Update fitness 239 | if (poleAngle < static_cast(3.14159f)) 240 | fitness = -(static_cast(3.14159f)* 0.5f - poleAngle); 241 | else 242 | fitness = -(static_cast(3.14159f)* 0.5f - (static_cast(3.14159f)* 2.0f - poleAngle)); 243 | 244 | //fitness += static_cast(3.14159f)* 0.5f; 245 | 246 | //fitness = fitness - std::abs(poleAngleVel * 1.0f); 247 | 248 | //fitness = -std::abs(cartX); 249 | 250 | if (sf::Keyboard::isKeyPressed(sf::Keyboard::A)) 251 | fitness = -cartX; 252 | else if (sf::Keyboard::isKeyPressed(sf::Keyboard::D)) 253 | fitness = cartX; 254 | 255 | // ------------------------------ AI ------------------------------- 256 | 257 | float dFitness = fitness - prevFitness; 258 | 259 | //reward = dFitness * 5.0f; 260 | 261 | reward = fitness; 262 | 263 | if (totalTime == 0.0f) 264 | avgReward = reward; 265 | else 266 | avgReward = (1.0f - avgRewardDecay) * avgReward + avgRewardDecay * reward; 267 | 268 | minReward = std::min(minReward, avgReward); 269 | maxReward = std::max(maxReward, avgReward); 270 | 271 | if (plotSampleCounter == plotSampleTicks) { 272 | plotSampleCounter = 0; 273 | 274 | vis::Point p; 275 | p._position.x = plot._curves[0]._points.size() - 1; 276 | p._position.y = avgReward; 277 | p._color = sf::Color::Red; 278 | 279 | plot._curves[0]._points.push_back(p); 280 | } 281 | 282 | plotSampleCounter++; 283 | 284 | sf::Image img = inputRT.getTexture().copyToImage(); 285 | 286 | for (int x = 0; x < 64; x++) 287 | for (int y = 0; y < 32; y++) { 288 | agent.setInput(x, y, img.getPixel(x, y).r / 255.0f); 289 | } 290 | 291 | agent.step(cs, reward, 0.01f, 0.01f, 0.01f, 0.05f, 0.1f, 0.05f, 0.05f, 0.5f, 0.5f, 0.5f, 0.01f, 0.2f, 0.992f, 0.15f, 0.15f, 120, 10, 2, generator); 292 | 293 | float output = 0.0f; 294 | int c = 0; 295 | 296 | for (int i = 0; i < actionIndices.size(); i++) { 297 | output += agent.getOutput(actionIndices[i]); 298 | c++; 299 | } 300 | 301 | output /= c; 302 | 303 | float dir = std::min(1.0f, std::max(-1.0f, 1.6f * (output * 2.0f - 1.0f))); 304 | 305 | //std::cout << dir << std::endl; 306 | 307 | float agentForce = 4000.0f * dir; 308 | 309 | prevFitness = fitness; 310 | 311 | // ---------------------------- Physics ---------------------------- 312 | 313 | float pendulumCartAccelX = cartAccelX; 314 | 315 | if (cartX < -cartMoveRadius) 316 | pendulumCartAccelX = 0.0f; 317 | else if (cartX > cartMoveRadius) 318 | pendulumCartAccelX = 0.0f; 319 | 320 | poleAngleAccel = pendulumCartAccelX * std::cos(poleAngle) + g * std::sin(poleAngle); 321 | poleAngleVel += -poleRotationalFriction * poleAngleVel + poleAngleAccel * dt; 322 | poleAngle += poleAngleVel * dt; 323 | 324 | massPos = sf::Vector2f(cartX + std::cos(poleAngle + static_cast(3.14159f)* 0.5f) * poleLength, std::sin(poleAngle + static_cast(3.14159f)* 0.5f) * poleLength); 325 | 326 | float force = 0.0f; 327 | 328 | if (std::abs(cartVelX) < maxSpeed) { 329 | force = std::max(-4000.0f, std::min(4000.0f, agentForce)); 330 | 331 | if (sf::Keyboard::isKeyPressed(sf::Keyboard::Left)) 332 | force = -4000.0f; 333 | 334 | if (sf::Keyboard::isKeyPressed(sf::Keyboard::Right)) 335 | force = 4000.0f; 336 | } 337 | 338 | if (cartX < -cartMoveRadius) { 339 | cartX = -cartMoveRadius; 340 | 341 | cartAccelX = -cartVelX / dt; 342 | cartVelX = -0.5f * cartVelX; 343 | } 344 | else if (cartX > cartMoveRadius) { 345 | cartX = cartMoveRadius; 346 | 347 | cartAccelX = -cartVelX / dt; 348 | cartVelX = -0.5f * cartVelX; 349 | } 350 | 351 | cartAccelX = 0.25f * (force + massMass * poleLength * poleAngleAccel * std::cos(poleAngle) - massMass * poleLength * poleAngleVel * poleAngleVel * std::sin(poleAngle)) / (massMass + cartMass); 352 | cartVelX += -cartFriction * cartVelX + cartAccelX * dt; 353 | cartX += cartVelX * dt; 354 | 355 | poleAngle = std::fmod(poleAngle, (2.0f * static_cast(3.14159f))); 356 | 357 | if (poleAngle < 0.0f) 358 | poleAngle += static_cast(3.14159f)* 2.0f; 359 | 360 | if (sf::Keyboard::isKeyPressed(sf::Keyboard::T)) { 361 | if (!tDownLastFrame) { 362 | trainMode = !trainMode; 363 | } 364 | 365 | tDownLastFrame = true; 366 | } 367 | else 368 | tDownLastFrame = false; 369 | 370 | // ---------------------------- Rendering ---------------------------- 371 | 372 | // Render to input buffer 373 | inputRT.clear(); 374 | 375 | inputCartSprite.setPosition(sf::Vector2f(inputRT.getSize().x * 0.5f + inputPixelsPerMeter * cartX, inputRT.getSize().y * 0.5f + 4.0f)); 376 | 377 | inputRT.draw(inputCartSprite); 378 | 379 | inputPoleSprite.setPosition(inputCartSprite.getPosition() + sf::Vector2f(0.0f, -4.0f)); 380 | inputPoleSprite.setRotation(poleAngle * 180.0f / static_cast(3.14159f) + 180.0f); 381 | 382 | inputRT.draw(inputPoleSprite); 383 | 384 | inputRT.display(); 385 | 386 | window.clear(); 387 | 388 | window.draw(backgroundSprite); 389 | 390 | cartSprite.setPosition(sf::Vector2f(800.0f * 0.5f + pixelsPerMeter * cartX, 600.0f * 0.5f + 3.0f)); 391 | 392 | window.draw(cartSprite); 393 | 394 | poleSprite.setPosition(cartSprite.getPosition() + sf::Vector2f(0.0f, -45.0f)); 395 | poleSprite.setRotation(poleAngle * 180.0f / static_cast(3.14159f) + 180.0f); 396 | 397 | window.draw(poleSprite); 398 | 399 | sf::Sprite inputSprite; 400 | 401 | inputSprite.setTexture(inputRT.getTexture()); 402 | 403 | inputSprite.setPosition(0, 0); 404 | inputSprite.setScale(4.0f, 4.0f); 405 | 406 | window.draw(inputSprite); 407 | 408 | if (sf::Keyboard::isKeyPressed(sf::Keyboard::B)) { 409 | plotRT.setActive(); 410 | plotRT.clear(sf::Color::White); 411 | 412 | plot.draw(plotRT, lineGradient, tickFont, 0.5f, sf::Vector2f(0.0f, plot._curves[0]._points.size()), sf::Vector2f(minReward, maxReward), sf::Vector2f(64.0f, 64.0f), sf::Vector2f(plot._curves[0]._points.size() / 10.0f, (maxReward - minReward) / 10.0f), 2.0f, 4.0f, 2.0f, 6.0f, 2.0f, 4); 413 | 414 | plotRT.display(); 415 | 416 | sf::Sprite plotSprite; 417 | plotSprite.setTexture(plotRT.getTexture()); 418 | 419 | window.draw(plotSprite); 420 | } 421 | 422 | if (sf::Keyboard::isKeyPressed(sf::Keyboard::V)) { 423 | htmRT.setActive(); 424 | htmRT.clear(sf::Color::White); 425 | 426 | visualizer.update(htmRT, sf::Vector2f(512.0f, 512.0f), sf::Vector2f(1.95f, 1.95f), cs, agent, generator); 427 | 428 | htmRT.display(); 429 | 430 | sf::Sprite htmSprite; 431 | htmSprite.setTexture(htmRT.getTexture()); 432 | 433 | htmSprite.setScale(0.8f, 0.8f); 434 | htmSprite.setOrigin(512, 512); 435 | htmSprite.setPosition(400.0f, 300.0f); 436 | 437 | window.draw(htmSprite); 438 | } 439 | 440 | // ------------------------------------------------------------------- 441 | 442 | window.display(); 443 | 444 | //dt = clock.getElapsedTime().asSeconds(); 445 | 446 | totalTime += dt; 447 | plotUpdateTimer += dt; 448 | } while (!quit); 449 | 450 | return 0; 451 | } -------------------------------------------------------------------------------- /ContinuousHTMGPU/resources/htmrl.cl: -------------------------------------------------------------------------------- 1 | constant sampler_t normalizedClampedNearestSampler = CLK_NORMALIZED_COORDS_TRUE | 2 | CLK_ADDRESS_CLAMP | 3 | CLK_FILTER_NEAREST; 4 | 5 | constant sampler_t normalizedClampedToEdgeNearestSampler = CLK_NORMALIZED_COORDS_TRUE | 6 | CLK_ADDRESS_CLAMP_TO_EDGE | 7 | CLK_FILTER_NEAREST; 8 | 9 | constant sampler_t unnormalizedClampedNearestSampler = CLK_NORMALIZED_COORDS_FALSE | 10 | CLK_ADDRESS_CLAMP | 11 | CLK_FILTER_NEAREST; 12 | 13 | constant sampler_t defaultNormalizedSampler = CLK_NORMALIZED_COORDS_TRUE | 14 | CLK_ADDRESS_CLAMP_TO_EDGE | 15 | CLK_FILTER_NEAREST; 16 | 17 | constant sampler_t defaultUnnormalizedSampler = CLK_NORMALIZED_COORDS_FALSE | 18 | CLK_ADDRESS_CLAMP_TO_EDGE | 19 | CLK_FILTER_NEAREST; 20 | 21 | #define MAX_RECEPTIVE_SIZE 81 22 | #define MAX_SEGMENTS_PER_CELL 4 23 | 24 | constant float columnIntensity = 1.0f; 25 | constant float learnTolerance = 0.01f; 26 | constant float sparsityMultiplier = 10.0f; 27 | constant float sparsityThreshold = 0.04f; 28 | constant float sparsity = 0.06f; 29 | constant float segmentSparsity = 0.3f; 30 | constant float columnTraceDecay = 0.002f; 31 | constant float columnMomentum = 0.1f; 32 | constant float columnRandomness = 0.1f; 33 | constant float minDerivative = 0.1f; 34 | constant float minSimilarity = 0.0001f; 35 | constant float minLearn = 0.0f; 36 | constant float learnFalloff = 0.1f; 37 | constant float noMatchTolerance = 0.0001f; 38 | constant float falloffIntensity = 0.5f; 39 | constant float activationModulationPower = 4.0f; 40 | constant float qModulationPower = 1.0f; 41 | constant float crowdingIntensity = 8.0f; 42 | constant float cellStateIntensity = 32.0f; 43 | constant float cellPredictionIntensity = 4.0f; 44 | constant float minLearningThreshold = 0.0f; 45 | constant float predictionRangeExtension = 0.1f; 46 | constant float localActivity = 1.0f; 47 | constant float reconstructionErrorActivity = 2.0f; 48 | constant float boostThreshold = 0.01f; 49 | constant float rectifierLeak = 0.03f; 50 | constant float minDivisor = 0.0001f; 51 | constant float higherLayerQPower = 16.0f; 52 | constant float dutyCycleDecay = 0.005f; 53 | constant float minReconstructionError = 0.1f; 54 | 55 | // LCA 56 | constant float lcaTauInv = 0.01f; 57 | constant float lcaAlpha = 0.01f; 58 | constant float lcaLambda = 0.01f; 59 | constant float lcaGamma = 100.0f; 60 | 61 | float randFloat(uint2* state) { 62 | const float invMaxInt = 1.0f / 4294967296.0f; 63 | uint x = (*state).x * 17 + (*state).y * 13123; 64 | (*state).x = (x << 13) ^ x; 65 | (*state).y ^= (x << 7); 66 | 67 | uint tmp = x * (x * x * 15731 + 74323) + 871483; 68 | 69 | return convert_float(tmp) * invMaxInt; 70 | } 71 | 72 | float sigmoid(float x) { 73 | return 1.0f / (1.0f + exp(-x)); 74 | } 75 | 76 | float relu(float x) { 77 | return log(1.0f + exp(x)); 78 | } 79 | 80 | float rectifier(float x) { 81 | return fmax(0.0f, x); 82 | } 83 | 84 | float rectifierDerivative(float x) { 85 | return x > rectifierLeak ? 1.0f : rectifierLeak; 86 | } 87 | 88 | float scaledSigmoid(float x) { 89 | return 2.0f / (1.0f + exp(-x)) - 1.0f; 90 | } 91 | 92 | float lcaThreshold(float potential) { 93 | return (potential - lcaAlpha * lcaLambda) / (1.0f + exp(-lcaGamma * (potential - lcaLambda))); 94 | } 95 | 96 | float boostFunction(float dutyCycle, float threshold) { 97 | return fmin(1.0f, fmax(0.0f, threshold - dutyCycle) / threshold); 98 | } 99 | 100 | void kernel initializePartOne(write_only image2d_t columnActivations, write_only image2d_t columnStates, write_only image3d_t columnFeedForwardWeights, write_only image2d_t columnPrevValues, 101 | int cellsInColumn, int receptiveFieldSize, int lateralConnectionsSize, uint2 seed, float minWeight, float maxWeight) 102 | { 103 | uint2 seedValue = seed + (uint2)(get_global_id(0) * 29 - 12, get_global_id(1) * 16 + 23) * 36; 104 | 105 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 106 | 107 | write_imagef(columnActivations, columnPosition, (float4)(0.0f, 0.0f, 0.0f, 0.0f)); 108 | write_imagef(columnStates, columnPosition, (float4)(0.0f, localActivity / receptiveFieldSize, 0.0f, 0.0f)); 109 | write_imagef(columnPrevValues, columnPosition, (float4)(0.0f, 0.0f, 0.0f, 0.0f)); 110 | 111 | for (int wi = 0; wi < receptiveFieldSize; wi++) { 112 | int4 weightPosition = (int4)(columnPosition.x, columnPosition.y, wi, 0); 113 | 114 | float columnConnectionWeight = randFloat(&seedValue) * (maxWeight - minWeight) + minWeight; 115 | 116 | write_imagef(columnFeedForwardWeights, weightPosition, (float4)(columnConnectionWeight, 0.0f, 0.0f, 0.0f)); 117 | } 118 | } 119 | 120 | void kernel initializePartTwo(write_only image3d_t cellStates, write_only image3d_t segmentStates, write_only image3d_t cellWeights, write_only image3d_t cellPredictions, write_only image3d_t cellQValues, 121 | int cellsInColumn, int receptiveFieldSize, int lateralConnectionsSize, int segmentsPerCell, uint2 seed, float minWeight, float maxWeight) 122 | { 123 | uint2 seedValue = seed + (uint2)(get_global_id(0) * 32 + 24, get_global_id(1) * 11 - 66) * 23; 124 | 125 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 126 | 127 | for (int ci = 0; ci < cellsInColumn; ci++) { 128 | write_imagef(cellStates, (int4)(columnPosition.x, columnPosition.y, ci, 0), (float4)(0.0f, 0.0f, 0.0f, 0.0f)); 129 | 130 | for (int i = 0; i < segmentsPerCell; i++) { 131 | write_imagef(segmentStates, (int4)(columnPosition.x, columnPosition.y, ci * segmentsPerCell + i, 0), (float4)(0.0f, 0.0f, 0.0f, 0.0f)); 132 | } 133 | 134 | write_imagef(cellPredictions, (int4)(columnPosition.x, columnPosition.y, ci, 0), (float4)(0.0f, 0.0f, 0.0f, 0.0f)); 135 | write_imagef(cellQValues, (int4)(columnPosition.x, columnPosition.y, ci, 0), (float4)(0.0f, 0.0f, 0.0f, 0.0f)); 136 | 137 | int weightSecondCoordinate = ci + columnPosition.y * cellsInColumn; 138 | 139 | for (int wi = 0; wi < lateralConnectionsSize; wi++) { 140 | int4 weightPosition = (int4)(columnPosition.x, weightSecondCoordinate, wi, 0); 141 | 142 | float cellWeight = randFloat(&seedValue) * (maxWeight - minWeight) + minWeight; 143 | 144 | write_imagef(cellWeights, weightPosition, (float4)(cellWeight, 0.0f, 0.0f, 0.0f)); 145 | } 146 | } 147 | } 148 | 149 | void kernel layerColumnActivate(read_only image2d_t columnStatesInput, read_only image3d_t columnFeedForwardWeightsPrev, read_only image2d_t columnStatesPrev, write_only image2d_t columnActivations, 150 | float2 layerSizeMinusOneInv, int2 inputReceptiveFieldRadius, int2 inputSize, int2 inputSizeMinusOne, uint2 seed) 151 | { 152 | uint2 seedValue = seed + (uint2)(get_global_id(0), get_global_id(1)) * 20; 153 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 154 | 155 | float2 inputCenterPositionNormalized = (float2)(columnPosition.x * layerSizeMinusOneInv.x, columnPosition.y * layerSizeMinusOneInv.y); 156 | float2 inputCenterPosition = (float2)(inputCenterPositionNormalized.x * inputSizeMinusOne.x, inputCenterPositionNormalized.y * inputSizeMinusOne.y); 157 | 158 | float sum = 0.0f; 159 | 160 | int weightIndex = 0; 161 | 162 | for (int dx = -inputReceptiveFieldRadius.x; dx <= inputReceptiveFieldRadius.x; dx++) 163 | for (int dy = -inputReceptiveFieldRadius.y; dy <= inputReceptiveFieldRadius.y; dy++) { 164 | int2 inputPosition = (int2)(inputCenterPosition.x + dx, inputCenterPosition.y + dy); 165 | 166 | if (inputPosition.x >= 0 && inputPosition.x < inputSize.x && inputPosition.y >= 0 && inputPosition.y < inputSize.y) { 167 | float input = read_imagef(columnStatesInput, inputPosition).x; 168 | 169 | float weight = read_imagef(columnFeedForwardWeightsPrev, (int4)(columnPosition.x, columnPosition.y, weightIndex, 0)).x; 170 | 171 | sum += weight * input; 172 | } 173 | 174 | weightIndex++; 175 | } 176 | 177 | // Bias 178 | float bias = read_imagef(columnFeedForwardWeightsPrev, (int4)(columnPosition.x, columnPosition.y, weightIndex, 0)).x; 179 | 180 | sum += bias; 181 | 182 | write_imagef(columnActivations, columnPosition, (float4)(sigmoid(sum), 0.0f, 0.0f, 0.0f)); 183 | } 184 | 185 | void kernel layerColumnInhibit(read_only image2d_t columnActivations, read_only image2d_t columnStatesPrev, read_only image3d_t columnFeedForwardWeightsPrev, write_only image2d_t columnStates, 186 | int2 layerSize, float2 layerSizeInv, int2 inhibitionRadii, int receptiveFieldSize) 187 | { 188 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 189 | 190 | float thisActivation = read_imagef(columnActivations, columnPosition).x; 191 | 192 | float numHigher = 0.0f; 193 | 194 | for (int dx = -inhibitionRadii.x; dx <= inhibitionRadii.x; dx++) 195 | for (int dy = -inhibitionRadii.y; dy <= inhibitionRadii.y; dy++) { 196 | int2 layerPosition = (int2)(columnPosition.x + dx, columnPosition.y + dy); 197 | 198 | if (layerPosition.x >= 0 && layerPosition.x < layerSize.x && layerPosition.y >= 0 && layerPosition.y < layerSize.y) { 199 | float activation = read_imagef(columnActivations, layerPosition).x; 200 | 201 | if (activation > thisActivation) 202 | numHigher++; 203 | } 204 | } 205 | 206 | float prevTrace = read_imagef(columnStatesPrev, columnPosition).y; 207 | 208 | float newState = numHigher < localActivity ? 1.0f : 0.0f;//exp(-numHigher * columnIntensity) * sigmoid(thisActivation); //&& thisActivation > 0.0f 209 | 210 | float newTrace = (1.0f - columnTraceDecay) * prevTrace + columnTraceDecay * newState; 211 | 212 | write_imagef(columnStates, columnPosition, (float4)(newState, newTrace, 0.0f, 0.0f)); 213 | } 214 | 215 | void kernel layerColumnWeightUpdate(read_only image2d_t reconstruction, read_only image2d_t inputs, read_only image2d_t columnActivations, read_only image2d_t columnStates, read_only image2d_t columnPredictions, read_only image3d_t columnFeedForwardWeightsPrev, write_only image3d_t columnFeedForwardWeights, 216 | int2 layerSize, float2 layerSizeMinusOneInv, int2 inputReceptiveFieldRadius, int2 inhibitionRadii, int2 inputSize, int2 inputSizeMinusOne, int receptiveFieldSize, float alpha, float beta, float gamma, uint2 seed) 217 | { 218 | uint2 seedValue = seed + (uint2)(get_global_id(0), get_global_id(1)) * 130; 219 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 220 | 221 | float2 inputCenterPositionNormalized = (float2)(columnPosition.x * layerSizeMinusOneInv.x, columnPosition.y * layerSizeMinusOneInv.y); 222 | float2 inputCenterPosition = (float2)(inputCenterPositionNormalized.x * inputSizeMinusOne.x, inputCenterPositionNormalized.y * inputSizeMinusOne.y); 223 | 224 | float2 thisState = read_imagef(columnStates, columnPosition).xy; 225 | float thisActivation = read_imagef(columnActivations, columnPosition).x; 226 | 227 | // Inhibition 228 | /*float averageState = 0.0f; 229 | 230 | int count = 0; 231 | 232 | for (int dx = -inhibitionRadii.x; dx <= inhibitionRadii.x; dx++) 233 | for (int dy = -inhibitionRadii.y; dy <= inhibitionRadii.y; dy++) { 234 | int2 layerPosition = (int2)(columnPosition.x + dx, columnPosition.y + dy); 235 | 236 | if (layerPosition.x >= 0 && layerPosition.x < layerSize.x && layerPosition.y >= 0 && layerPosition.y < layerSize.y) { 237 | float state = read_imagef(columnStates, layerPosition).x; 238 | 239 | averageState += state; 240 | 241 | count++; 242 | } 243 | } 244 | 245 | averageState /= count; 246 | 247 | float sparsityPenalty = beta * (sparsity - averageState);*/ 248 | 249 | float sum = 0.0f; 250 | 251 | int weightIndex = 0; 252 | 253 | for (int dx = -inputReceptiveFieldRadius.x; dx <= inputReceptiveFieldRadius.x; dx++) 254 | for (int dy = -inputReceptiveFieldRadius.y; dy <= inputReceptiveFieldRadius.y; dy++) { 255 | int2 inputPosition = (int2)(inputCenterPosition.x + dx, inputCenterPosition.y + dy); 256 | 257 | if (inputPosition.x >= 0 && inputPosition.x < inputSize.x && inputPosition.y >= 0 && inputPosition.y < inputSize.y) { 258 | float input = read_imagef(inputs, inputPosition).x; 259 | 260 | float recon = read_imagef(reconstruction, inputPosition).x; 261 | 262 | int4 weightPosition = (int4)(columnPosition.x, columnPosition.y, weightIndex, 0); 263 | 264 | float2 prevWeight = read_imagef(columnFeedForwardWeightsPrev, weightPosition).xy; 265 | 266 | sum += (input - recon) * prevWeight.x; 267 | } 268 | 269 | weightIndex++; 270 | } 271 | 272 | float hiddenError = sum / weightIndex * thisActivation * (1.0f - thisActivation); 273 | float sparsity = localActivity / weightIndex; 274 | 275 | weightIndex = 0; 276 | 277 | for (int dx = -inputReceptiveFieldRadius.x; dx <= inputReceptiveFieldRadius.x; dx++) 278 | for (int dy = -inputReceptiveFieldRadius.y; dy <= inputReceptiveFieldRadius.y; dy++) { 279 | int2 inputPosition = (int2)(inputCenterPosition.x + dx, inputCenterPosition.y + dy); 280 | 281 | if (inputPosition.x >= 0 && inputPosition.x < inputSize.x && inputPosition.y >= 0 && inputPosition.y < inputSize.y) { 282 | float input = read_imagef(inputs, inputPosition).x; 283 | 284 | float recon = read_imagef(reconstruction, inputPosition).x; 285 | 286 | int4 weightPosition = (int4)(columnPosition.x, columnPosition.y, weightIndex, 0); 287 | 288 | float2 prevWeight = read_imagef(columnFeedForwardWeightsPrev, weightPosition).xy; 289 | 290 | float delta = prevWeight.y * columnMomentum + alpha * 0.5f * ((input - recon) * thisState.x + hiddenError * input);// + beta * (sparsity - thisState.y) * input; 291 | 292 | float newWeight = prevWeight.x + delta; 293 | 294 | write_imagef(columnFeedForwardWeights, weightPosition, (float4)(newWeight, delta, 0.0f, 0.0f)); 295 | } 296 | 297 | weightIndex++; 298 | } 299 | 300 | // Bias 301 | int4 weightPosition = (int4)(columnPosition.x, columnPosition.y, weightIndex, 0); 302 | 303 | float2 prevWeight = read_imagef(columnFeedForwardWeightsPrev, weightPosition).xy; 304 | 305 | float delta = prevWeight.y * columnMomentum + alpha * hiddenError; 306 | 307 | float newWeight = prevWeight.x + delta;// + beta * (sparsity - thisState.y); 308 | 309 | write_imagef(columnFeedForwardWeights, weightPosition, (float4)(newWeight, delta, 0.0f, 0.0f)); 310 | } 311 | 312 | void kernel layerCellActivate(read_only image2d_t columnStates, read_only image3d_t cellStatesPrev, read_only image3d_t cellPredictionsPrev, read_only image3d_t cellWeightsPrev, read_only image2d_t columnPredictionsPrev, 313 | write_only image3d_t cellStates, int cellsInColumn, int2 lateralConnectionsRadii, float cellTraceDecay, uint2 seed) 314 | { 315 | /*int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 316 | 317 | float columnState = read_imagef(columnStates, columnPosition).x; 318 | 319 | float minPredictionError = 1.0f; 320 | 321 | for (int ci = 0; ci < cellsInColumn; ci++) { 322 | float prediction = read_imagef(cellPredictionsPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x; 323 | 324 | float predictionError = fabs(columnState - prediction); 325 | 326 | minPredictionError = fmin(minPredictionError, predictionError); 327 | } 328 | 329 | for (int ci = 0; ci < cellsInColumn; ci++) { 330 | float prediction = read_imagef(cellPredictionsPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x; 331 | 332 | float predictionError = fabs(columnState - prediction); 333 | 334 | float newCellState = exp((minPredictionError - predictionError) * cellStateIntensity) * columnState; 335 | 336 | float prevTrace = read_imagef(cellStatesPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).y; 337 | 338 | float newTrace = fmax((1.0f - cellTraceDecay) * prevTrace, newCellState); 339 | 340 | write_imagef(cellStates, (int4)(columnPosition.x, columnPosition.y, ci, 0), (float4)(newCellState, newTrace, 0.0f, 0.0f)); 341 | }*/ 342 | 343 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 344 | 345 | float columnState = read_imagef(columnStates, columnPosition).x; 346 | 347 | float maxCellPrediction = 0.0f; 348 | int maxCellPredictionIndex = 0; 349 | 350 | for (int ci = 0; ci < cellsInColumn; ci++) { 351 | float prediction = read_imagef(cellPredictionsPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x; 352 | 353 | if (prediction > maxCellPrediction) { 354 | maxCellPredictionIndex = ci; 355 | 356 | maxCellPrediction = prediction; 357 | } 358 | } 359 | 360 | float allCellsIncrease = 1.0f - maxCellPrediction; 361 | 362 | for (int ci = 0; ci < cellsInColumn; ci++) { 363 | //float prediction = read_imagef(cellPredictionsPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x; 364 | 365 | float umodulatedCellState = (1.0f - allCellsIncrease) * (ci == maxCellPredictionIndex ? 1.0f : 0.0f) + allCellsIncrease; 366 | 367 | //float umodulatedCellState = (1.0f - maximum) * prediction + maximum; 368 | 369 | float newCellState = umodulatedCellState * columnState; 370 | 371 | float prevTrace = read_imagef(cellStatesPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).y; 372 | 373 | float newTrace = fmax((1.0f - cellTraceDecay) * prevTrace, newCellState); 374 | 375 | write_imagef(cellStates, (int4)(columnPosition.x, columnPosition.y, ci, 0), (float4)(newCellState, newTrace, 0.0f, 0.0f)); 376 | } 377 | } 378 | 379 | void kernel layerCellWeightUpdate(read_only image2d_t columnStates, read_only image2d_t columnPredictionsPrev, read_only image3d_t cellPredictionsPrev, read_only image3d_t cellStates, read_only image3d_t cellStatesPrev, read_only image2d_t nextLayerContextPrev, read_only image3d_t segmentStatesPrev, read_only image3d_t cellWeightsPrev, 380 | write_only image3d_t cellWeights, int cellsInColumn, int2 layerSize, int2 lateralConnectionsRadii, int numSegmentsPerCell, float2 layerSizeMinusOneInv, int2 nextLayerSize, int2 nextLayerSizeMinusOne, float tdError, float alpha, float beta, float gamma, float temperature, float eligibilityDecay) 381 | { 382 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 383 | 384 | float2 normalizedColumnCoords = (float2)(columnPosition.x * layerSizeMinusOneInv.x, columnPosition.y * layerSizeMinusOneInv.y); 385 | int2 connectionCoordsNextCenter = (int2)(normalizedColumnCoords.x * nextLayerSizeMinusOne.x, normalizedColumnCoords.y * nextLayerSizeMinusOne.y); 386 | 387 | float columnState = read_imagef(columnStates, columnPosition).x; 388 | float columnPredictionPrev = read_imagef(columnPredictionsPrev, columnPosition).x; 389 | 390 | //float tdError = read_imagef(columnTdErrors, columnPosition).x; 391 | 392 | //float learn = tdError > 0.0f ? 1.0f : 0.0f; 393 | 394 | //float predictionError = columnState - columnPredictionPrev; 395 | 396 | for (int ci = 0; ci < cellsInColumn; ci++) { 397 | int weightSecondCoordinate = ci + columnPosition.y * cellsInColumn; 398 | 399 | float cellState = read_imagef(cellStates, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x; 400 | float2 cellPredictionPrev = read_imagef(cellPredictionsPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).xy; 401 | 402 | float cellError = cellState - cellPredictionPrev.y; 403 | 404 | float errors[MAX_SEGMENTS_PER_CELL]; 405 | 406 | int wi = 0; 407 | 408 | if (cellState > 0.5f) { 409 | for (int i = 0; i < numSegmentsPerCell; i++) { 410 | float value = read_imagef(segmentStatesPrev, (int4)(columnPosition.x, columnPosition.y, ci * numSegmentsPerCell + i, 0)).x; 411 | 412 | if (value == cellPredictionPrev.y) 413 | errors[i] = 1.0f - value; 414 | else 415 | errors[i] = 0.0f - value; 416 | } 417 | } 418 | else { 419 | for (int i = 0; i < numSegmentsPerCell; i++) { 420 | float value = read_imagef(segmentStatesPrev, (int4)(columnPosition.x, columnPosition.y, ci * numSegmentsPerCell + i, 0)).x; 421 | 422 | errors[i] = 0.0f - value; 423 | } 424 | } 425 | 426 | // Go through all connections and update them 427 | for (int dx = -lateralConnectionsRadii.x; dx <= lateralConnectionsRadii.x; dx++) 428 | for (int dy = -lateralConnectionsRadii.y; dy <= lateralConnectionsRadii.y; dy++) { 429 | int2 connectionCoords = (int2)(columnPosition.x + dx, columnPosition.y + dy); 430 | 431 | if (connectionCoords.x >= 0 && connectionCoords.x < layerSize.x && connectionCoords.y >= 0 && connectionCoords.y < layerSize.y) { 432 | for (int cio = 0; cio < cellsInColumn; cio++) { 433 | float connection = read_imagef(cellStatesPrev, (int4)(connectionCoords.x, connectionCoords.y, cio, 0)).x; 434 | 435 | for (int i = 0; i < numSegmentsPerCell; i++) { 436 | int4 weightPosition = (int4)(columnPosition.x, weightSecondCoordinate, wi, 0); 437 | 438 | float2 cellWeightPrev = read_imagef(cellWeightsPrev, weightPosition).xy; 439 | 440 | float eligibility = errors[i] * connection; 441 | 442 | float newTrace = (1.0f - eligibilityDecay) * cellWeightPrev.y + beta * exp(-fabs(cellWeightPrev.y) * temperature) * eligibility; 443 | 444 | float2 newCellWeight = (float2)(cellWeightPrev.x + alpha * newTrace, newTrace); 445 | 446 | write_imagef(cellWeights, weightPosition, (float4)(newCellWeight.x, newCellWeight.y, 0.0f, 0.0f)); 447 | 448 | wi++; 449 | } 450 | } 451 | 452 | // Additional context from next layer 453 | int2 connectionCoordsNext = (int2)(connectionCoordsNextCenter.x + dx, connectionCoordsNextCenter.y + dy); 454 | 455 | if (connectionCoordsNext.x >= 0 && connectionCoordsNext.x < nextLayerSize.x && connectionCoordsNext.y >= 0 && connectionCoordsNext.y < nextLayerSize.y) { 456 | float nextContextPrev = read_imagef(nextLayerContextPrev, connectionCoordsNext).x; 457 | 458 | for (int i = 0; i < numSegmentsPerCell; i++) { 459 | int4 weightPosition = (int4)(columnPosition.x, weightSecondCoordinate, wi, 0); 460 | 461 | float2 cellWeightPrev = read_imagef(cellWeightsPrev, weightPosition).xy; 462 | 463 | float eligibility = errors[i] * nextContextPrev; 464 | 465 | float newTrace = (1.0f - eligibilityDecay) * cellWeightPrev.y + beta * exp(-fabs(cellWeightPrev.y) * temperature) * eligibility; 466 | 467 | float2 newCellWeight = (float2)(cellWeightPrev.x + alpha * newTrace, newTrace); 468 | 469 | write_imagef(cellWeights, weightPosition, (float4)(newCellWeight.x, newCellWeight.y, 0.0f, 0.0f)); 470 | 471 | wi++; 472 | } 473 | } 474 | else 475 | wi += numSegmentsPerCell; 476 | } 477 | else 478 | wi += numSegmentsPerCell * (cellsInColumn + 1); 479 | } 480 | } 481 | } 482 | 483 | void kernel layerCellWeightUpdateLast(read_only image2d_t columnStates, read_only image2d_t columnPredictionsPrev, read_only image3d_t cellPredictionsPrev, read_only image3d_t cellStates, read_only image3d_t cellStatesPrev, read_only image3d_t segmentStatesPrev, read_only image3d_t cellWeightsPrev, 484 | write_only image3d_t cellWeights, int cellsInColumn, int2 layerSize, int2 lateralConnectionsRadii, int numSegmentsPerCell, float tdError, float alpha, float beta, float gamma, float temperature, float eligibilityDecay) 485 | { 486 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 487 | 488 | float columnState = read_imagef(columnStates, columnPosition).x; 489 | float columnPredictionPrev = read_imagef(columnPredictionsPrev, columnPosition).x; 490 | 491 | //float tdError = read_imagef(columnTdErrors, columnPosition).x; 492 | 493 | //float predictionError = columnState - columnPredictionPrev; 494 | 495 | for (int ci = 0; ci < cellsInColumn; ci++) { 496 | int weightSecondCoordinate = ci + columnPosition.y * cellsInColumn; 497 | 498 | float cellState = read_imagef(cellStates, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x; 499 | float2 cellPredictionPrev = read_imagef(cellPredictionsPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).xy; 500 | 501 | float cellError = cellState - cellPredictionPrev.y;//(cellState - cellPredictionPrev);//((1.0f - columnState) * columnPredictionPrev + columnState) * 502 | 503 | float errors[MAX_SEGMENTS_PER_CELL]; 504 | 505 | int wi = 0; 506 | 507 | if (cellState > 0.5f) { 508 | for (int i = 0; i < numSegmentsPerCell; i++) { 509 | float value = read_imagef(segmentStatesPrev, (int4)(columnPosition.x, columnPosition.y, ci * numSegmentsPerCell + i, 0)).x; 510 | 511 | if (value == cellPredictionPrev.y) 512 | errors[i] = 1.0f - value; 513 | else 514 | errors[i] = 0.0f - value; 515 | } 516 | } 517 | else { 518 | for (int i = 0; i < numSegmentsPerCell; i++) { 519 | float value = read_imagef(segmentStatesPrev, (int4)(columnPosition.x, columnPosition.y, ci * numSegmentsPerCell + i, 0)).x; 520 | 521 | errors[i] = 0.0f - value; 522 | } 523 | } 524 | 525 | // Go through all connections and update them 526 | for (int dx = -lateralConnectionsRadii.x; dx <= lateralConnectionsRadii.x; dx++) 527 | for (int dy = -lateralConnectionsRadii.y; dy <= lateralConnectionsRadii.y; dy++) { 528 | int2 connectionCoords = (int2)(columnPosition.x + dx, columnPosition.y + dy); 529 | 530 | if (connectionCoords.x >= 0 && connectionCoords.x < layerSize.x && connectionCoords.y >= 0 && connectionCoords.y < layerSize.y) { 531 | for (int cio = 0; cio < cellsInColumn; cio++) { 532 | float connection = read_imagef(cellStatesPrev, (int4)(connectionCoords.x, connectionCoords.y, cio, 0)).x; 533 | 534 | for (int i = 0; i < numSegmentsPerCell; i++) { 535 | int4 weightPosition = (int4)(columnPosition.x, weightSecondCoordinate, wi, 0); 536 | 537 | float2 cellWeightPrev = read_imagef(cellWeightsPrev, weightPosition).xy; 538 | 539 | float eligibility = errors[i] * connection; 540 | 541 | float newTrace = (1.0f - eligibilityDecay) * cellWeightPrev.y + beta * exp(-fabs(cellWeightPrev.y) * temperature) * eligibility; 542 | 543 | float2 newCellWeight = (float2)(cellWeightPrev.x + alpha * newTrace, newTrace); 544 | 545 | write_imagef(cellWeights, weightPosition, (float4)(newCellWeight.x, newCellWeight.y, 0.0f, 0.0f)); 546 | 547 | wi++; 548 | } 549 | } 550 | } 551 | else 552 | wi += numSegmentsPerCell * cellsInColumn; 553 | } 554 | } 555 | } 556 | 557 | void kernel layerCellPredict(read_only image3d_t cellStates, read_only image3d_t cellStatesPrev, read_only image3d_t cellWeights, read_only image2d_t nextLayerContext, read_only image2d_t nextLayerContextPrev, 558 | write_only image3d_t cellPredictions, write_only image3d_t segmentStates, int cellsInColumn, int2 layerSize, int2 lateralConnectionsRadii, int numSegmentsPerCell, float2 layerSizeMinusOneInv, int2 nextLayerSize, int2 nextLayerSizeMinusOne) 559 | { 560 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 561 | 562 | float2 normalizedColumnCoords = (float2)(columnPosition.x * layerSizeMinusOneInv.x, columnPosition.y * layerSizeMinusOneInv.y); 563 | int2 connectionCoordsNextCenter = (int2)(normalizedColumnCoords.x * nextLayerSizeMinusOne.x, normalizedColumnCoords.y * nextLayerSizeMinusOne.y); 564 | 565 | for (int ci = 0; ci < cellsInColumn; ci++) { 566 | float sums[MAX_SEGMENTS_PER_CELL]; 567 | 568 | for (int i = 0; i < numSegmentsPerCell; i++) 569 | sums[i] = 0.0f; 570 | 571 | int weightSecondCoordinate = ci + columnPosition.y * cellsInColumn; 572 | 573 | int wi = 0; 574 | 575 | // Go through all connections 576 | for (int dx = -lateralConnectionsRadii.x; dx <= lateralConnectionsRadii.x; dx++) 577 | for (int dy = -lateralConnectionsRadii.y; dy <= lateralConnectionsRadii.y; dy++) { 578 | int2 connectionCoords = (int2)(columnPosition.x + dx, columnPosition.y + dy); 579 | 580 | if (connectionCoords.x >= 0 && connectionCoords.x < layerSize.x && connectionCoords.y >= 0 && connectionCoords.y < layerSize.y) { 581 | for (int cio = 0; cio < cellsInColumn; cio++) { 582 | float connectionState = read_imagef(cellStates, (int4)(connectionCoords.x, connectionCoords.y, cio, 0)).x; 583 | //float connectionStatePrev = read_imagef(cellStatesPrev, (int4)(connectionCoords.x, connectionCoords.y, cio, 0)).x; 584 | 585 | for (int i = 0; i < numSegmentsPerCell; i++) { 586 | int4 weightPosition = (int4)(columnPosition.x, weightSecondCoordinate, wi, 0); 587 | 588 | float cellWeight = read_imagef(cellWeights, weightPosition).x; 589 | 590 | sums[i] += cellWeight * connectionState; 591 | 592 | wi++; 593 | } 594 | } 595 | 596 | int2 connectionCoordsNext = (int2)(connectionCoordsNextCenter.x + dx, connectionCoordsNextCenter.y + dy); 597 | 598 | if (connectionCoordsNext.x >= 0 && connectionCoordsNext.x < nextLayerSize.x && connectionCoordsNext.y >= 0 && connectionCoordsNext.y < nextLayerSize.y) { 599 | float nextContext = read_imagef(nextLayerContext, connectionCoordsNext).x; 600 | //float nextContextPrev = read_imagef(nextLayerContextPrev, connectionCoordsNext).x; 601 | 602 | for (int i = 0; i < numSegmentsPerCell; i++) { 603 | int4 weightPosition = (int4)(columnPosition.x, weightSecondCoordinate, wi, 0); 604 | 605 | float cellWeight = read_imagef(cellWeights, weightPosition).x; 606 | 607 | sums[i] += cellWeight * nextContext; 608 | 609 | wi++; 610 | } 611 | } 612 | else 613 | wi += numSegmentsPerCell; 614 | } 615 | else 616 | wi += numSegmentsPerCell * (cellsInColumn + 1); // + 1 for context from higher layer 617 | } 618 | 619 | float maximum = 0.0f; 620 | 621 | for (int i = 0; i < numSegmentsPerCell; i++) { 622 | float s = sigmoid(sums[i]); 623 | 624 | maximum = fmax(maximum, s); 625 | 626 | write_imagef(segmentStates, (int4)(columnPosition.x, columnPosition.y, ci * numSegmentsPerCell + i, 0), (float4)(s, 0.0f, 0.0f, 0.0f)); 627 | } 628 | 629 | write_imagef(cellPredictions, (int4)(columnPosition.x, columnPosition.y, ci, 0), (float4)(maximum > 0.5f ? 1.0f : 0.0f, maximum, 0.0f, 0.0f)); 630 | } 631 | } 632 | 633 | void kernel layerCellPredictLast(read_only image3d_t cellStates, read_only image3d_t cellStatesPrev, read_only image3d_t cellWeights, 634 | write_only image3d_t cellPredictions, write_only image3d_t segmentStates, int cellsInColumn, int2 layerSize, int2 lateralConnectionsRadii, int numSegmentsPerCell) 635 | { 636 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 637 | 638 | for (int ci = 0; ci < cellsInColumn; ci++) { 639 | float sums[MAX_SEGMENTS_PER_CELL]; 640 | 641 | for (int i = 0; i < numSegmentsPerCell; i++) 642 | sums[i] = 0.0f; 643 | 644 | int weightSecondCoordinate = ci + columnPosition.y * cellsInColumn; 645 | 646 | int wi = 0; 647 | 648 | // Go through all connections 649 | for (int dx = -lateralConnectionsRadii.x; dx <= lateralConnectionsRadii.x; dx++) 650 | for (int dy = -lateralConnectionsRadii.y; dy <= lateralConnectionsRadii.y; dy++) { 651 | int2 connectionCoords = (int2)(columnPosition.x + dx, columnPosition.y + dy); 652 | 653 | if (connectionCoords.x >= 0 && connectionCoords.x < layerSize.x && connectionCoords.y >= 0 && connectionCoords.y < layerSize.y) { 654 | for (int cio = 0; cio < cellsInColumn; cio++) { 655 | float connectionState = read_imagef(cellStates, (int4)(connectionCoords.x, connectionCoords.y, cio, 0)).x; 656 | //float connectionStatePrev = read_imagef(cellStatesPrev, (int4)(connectionCoords.x, connectionCoords.y, cio, 0)).x; 657 | 658 | for (int i = 0; i < numSegmentsPerCell; i++) { 659 | int4 weightPosition = (int4)(columnPosition.x, weightSecondCoordinate, wi, 0); 660 | 661 | float cellWeight = read_imagef(cellWeights, weightPosition).x; 662 | 663 | sums[i] += cellWeight * connectionState; 664 | 665 | wi++; 666 | } 667 | } 668 | } 669 | else 670 | wi += cellsInColumn * numSegmentsPerCell; 671 | } 672 | 673 | float maximum = 0.0f; 674 | 675 | for (int i = 0; i < numSegmentsPerCell; i++) { 676 | float s = sigmoid(sums[i]); 677 | 678 | maximum = fmax(maximum, s); 679 | 680 | write_imagef(segmentStates, (int4)(columnPosition.x, columnPosition.y, ci * numSegmentsPerCell + i, 0), (float4)(s, 0.0f, 0.0f, 0.0f)); 681 | } 682 | 683 | write_imagef(cellPredictions, (int4)(columnPosition.x, columnPosition.y, ci, 0), (float4)(maximum > 0.5f ? 1.0f : 0.0f, maximum, 0.0f, 0.0f)); 684 | } 685 | } 686 | 687 | void kernel layerColumnPrediction(read_only image3d_t cellPredictions, read_only image3d_t cellStates, write_only image2d_t columnPredictions, int cellsInColumn) { 688 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 689 | 690 | float maxPrediction = 0.0f; 691 | 692 | for (int ci = 0; ci < cellsInColumn; ci++) { 693 | float prediction = read_imagef(cellPredictions, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x; 694 | 695 | maxPrediction = fmax(maxPrediction, prediction); 696 | } 697 | 698 | float output = maxPrediction; 699 | 700 | write_imagef(columnPredictions, columnPosition, (float4)(output, 0.0f, 0.0f, 0.0f)); 701 | } 702 | 703 | void kernel layerAssignQ(read_only image3d_t cellQValuesPrev, read_only image3d_t cellStatesPrev, write_only image3d_t cellQValues, 704 | int cellsInColumn, float alpha) 705 | { 706 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 707 | 708 | for (int ci = 0; ci < cellsInColumn; ci++) { 709 | float qPrev = read_imagef(cellQValuesPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x; 710 | 711 | float cellEligibility = read_imagef(cellStatesPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).y; 712 | 713 | float storeQ = qPrev + cellEligibility * alpha; 714 | 715 | write_imagef(cellQValues, (int4)(columnPosition.x, columnPosition.y, ci, 0), (float4)(storeQ, 0.0f, 0.0f, 0.0f)); 716 | } 717 | } 718 | 719 | void kernel layerColumnQ(read_only image3d_t cellQValuesPrev, read_only image3d_t cellStatesPrev, read_only image3d_t cellStates, read_only image2d_t columnStates, read_only image2d_t columnStatesNext, read_only image2d_t columnQValuesNext, write_only image2d_t columnQValues, 720 | int cellsInColumn, float2 layerSizeMinusOneInv, int2 nextLayerSize, int2 nextLayerSizeMinusOne) 721 | { 722 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 723 | 724 | float2 columnPositionNormalized = (float2)(columnPosition.x * layerSizeMinusOneInv.x, columnPosition.y * layerSizeMinusOneInv.y); 725 | int2 nextLayerPositionCenter = (int2)(columnPositionNormalized.x * nextLayerSizeMinusOne.x, columnPositionNormalized.y * nextLayerSizeMinusOne.y); 726 | 727 | float sum = 0.0f; 728 | float divisor = 0.0f; 729 | 730 | for (int ci = 0; ci < cellsInColumn; ci++) { 731 | float state = read_imagef(cellStates, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x; 732 | float cellQ = read_imagef(cellQValuesPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x; 733 | 734 | sum += state * cellQ; 735 | divisor += state; 736 | } 737 | 738 | float thisQ = sum / fmax(minDivisor, divisor); 739 | 740 | float output = thisQ; 741 | 742 | write_imagef(columnQValues, columnPosition, (float4)(output, 0.0f, 0.0f, 0.0f)); 743 | } 744 | 745 | void kernel layerColumnQLast(read_only image3d_t cellQValuesPrev, read_only image3d_t cellStatesPrev, read_only image3d_t cellStates, write_only image2d_t columnQValues, 746 | int cellsInColumn) 747 | { 748 | int2 columnPosition = (int2)(get_global_id(0), get_global_id(1)); 749 | 750 | float sum = 0.0f; 751 | float divisor = 0.0f; 752 | 753 | for (int ci = 0; ci < cellsInColumn; ci++) { 754 | float state = read_imagef(cellStates, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x; 755 | float cellQ = read_imagef(cellQValuesPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x; 756 | 757 | sum += state * cellQ; 758 | divisor += state; 759 | } 760 | 761 | float thisQ = sum / fmax(minDivisor, divisor); 762 | 763 | write_imagef(columnQValues, columnPosition, (float4)(thisQ, 0.0f, 0.0f, 0.0f)); 764 | } 765 | 766 | void kernel initializePartThree(write_only image2d_t inputBiases, uint2 seed, float minBias, float maxBias) { 767 | uint2 seedValue = seed + (uint2)(get_global_id(0), get_global_id(1)) * 130; 768 | int2 inputPosition = (int2)(get_global_id(0), get_global_id(1)); 769 | 770 | float bias = randFloat(&seedValue) * (maxBias - minBias) + minBias; 771 | 772 | write_imagef(inputBiases, inputPosition, (float4)(bias, 0.0f, 0.0f, 0.0f)); 773 | } 774 | 775 | void kernel reconstructInput(read_only image3d_t columnFeedForwardWeights, read_only image2d_t inputBiases, read_only image2d_t columnStates, write_only image2d_t reconstruction, 776 | int2 reverseReceptiveFieldRadius, int2 sdrReceptiveFieldRadius, int2 inputSizeMinusOne, float2 inputSizeMinusOneInv, int2 sdrSize, int2 sdrSizeMinusOne, float2 sdrSizeMinusOneInv) 777 | { 778 | int2 inputPosition = (int2)(get_global_id(0), get_global_id(1)); 779 | float2 inputPositionNormalized = (float2)(inputPosition.x * inputSizeMinusOneInv.x, inputPosition.y * inputSizeMinusOneInv.y); 780 | float2 sdrPositionCenter = (float2)(inputPositionNormalized.x * sdrSizeMinusOne.x, inputPositionNormalized.y * sdrSizeMinusOne.y); 781 | 782 | float sum = 0.0f; 783 | 784 | for (int dx = -reverseReceptiveFieldRadius.x; dx <= reverseReceptiveFieldRadius.x; dx++) 785 | for (int dy = -reverseReceptiveFieldRadius.y; dy <= reverseReceptiveFieldRadius.y; dy++) { 786 | int2 sdrPosition = (int2)(sdrPositionCenter.x + dx, sdrPositionCenter.y + dy); 787 | 788 | if (sdrPosition.x >= 0 && sdrPosition.x < sdrSize.x && sdrPosition.y >= 0 && sdrPosition.y < sdrSize.y) { 789 | // Next layer node's receptive field 790 | int2 fieldCenter = (int2)(sdrPosition.x * sdrSizeMinusOneInv.x * inputSizeMinusOne.x, sdrPosition.y * sdrSizeMinusOneInv.y * inputSizeMinusOne.y); 791 | 792 | int2 fieldLowerBounds = fieldCenter - sdrReceptiveFieldRadius; 793 | int2 fieldUpperBounds = fieldCenter + sdrReceptiveFieldRadius; 794 | 795 | // Check for containment 796 | if (inputPosition.x >= fieldLowerBounds.x && inputPosition.x <= fieldUpperBounds.x && inputPosition.y >= fieldLowerBounds.y && inputPosition.y <= fieldUpperBounds.y) { 797 | int rdx = inputPosition.x - fieldCenter.x; 798 | int rdy = inputPosition.y - fieldCenter.y; 799 | 800 | float source = read_imagef(columnStates, sdrPosition).x; 801 | 802 | int weightIndex = (sdrReceptiveFieldRadius.y + rdy) + (sdrReceptiveFieldRadius.x + rdx) * (sdrReceptiveFieldRadius.y * 2 + 1); 803 | 804 | float weight = read_imagef(columnFeedForwardWeights, (int4)(sdrPosition.x, sdrPosition.y, weightIndex, 0)).x; 805 | 806 | sum += source * weight; 807 | } 808 | } 809 | } 810 | 811 | float bias = read_imagef(inputBiases, inputPosition).x; 812 | 813 | sum += bias; 814 | 815 | write_imagef(reconstruction, inputPosition, (float4)(sum, 0.0f, 0.0f, 0.0f)); 816 | } 817 | 818 | void kernel inputBiasUpdate(read_only image2d_t inputs, read_only image2d_t reconstruction, read_only image2d_t inputBiasesPrev, write_only image2d_t inputBiases, 819 | float gamma) 820 | { 821 | int2 inputPosition = (int2)(get_global_id(0), get_global_id(1)); 822 | 823 | float2 prevBias = read_imagef(inputBiasesPrev, inputPosition).xy; 824 | 825 | float recon = read_imagef(reconstruction, inputPosition).x; 826 | float input = read_imagef(inputs, inputPosition).x; 827 | 828 | float delta = prevBias.y * columnMomentum + gamma * (input - recon); 829 | 830 | float newBias = prevBias.x + delta; 831 | 832 | write_imagef(inputBiases, inputPosition, (float4)(newBias, delta, 0.0f, 0.0f)); 833 | } 834 | 835 | void kernel gaussianBlurX(read_only image2d_t source, write_only image2d_t destination, float2 sizeInv, float kernelWidth) { 836 | int2 destinationPosition = (int2)(get_global_id(0), get_global_id(1)); 837 | float2 destinationPositionNormalized = (float2)(destinationPosition.x * sizeInv.x, destinationPosition.y * sizeInv.y); 838 | 839 | float4 sum = (float4)(0.0f, 0.0f, 0.0f, 0.0f); 840 | 841 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x - 4.0f * kernelWidth, destinationPositionNormalized.y)) * 0.05f; 842 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x - 3.0f * kernelWidth, destinationPositionNormalized.y)) * 0.09f; 843 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x - 2.0f * kernelWidth, destinationPositionNormalized.y)) * 0.12f; 844 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x - kernelWidth, destinationPositionNormalized.y)) * 0.15f; 845 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y)) * 0.16f; 846 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x + kernelWidth, destinationPositionNormalized.y)) * 0.15f; 847 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x + 2.0f * kernelWidth, destinationPositionNormalized.y)) * 0.12f; 848 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x + 3.0f * kernelWidth, destinationPositionNormalized.y)) * 0.09f; 849 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x + 4.0f * kernelWidth, destinationPositionNormalized.y)) * 0.05f; 850 | 851 | write_imagef(destination, destinationPosition, sum); 852 | } 853 | 854 | void kernel gaussianBlurY(read_only image2d_t source, write_only image2d_t destination, float2 sizeInv, float kernelWidth) { 855 | int2 destinationPosition = (int2)(get_global_id(0), get_global_id(1)); 856 | float2 destinationPositionNormalized = (float2)(destinationPosition.x * sizeInv.x, destinationPosition.y * sizeInv.y); 857 | 858 | float4 sum = (float4)(0.0f, 0.0f, 0.0f, 0.0f); 859 | 860 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y - 4.0f * kernelWidth)) * 0.05f; 861 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y - 3.0f * kernelWidth)) * 0.09f; 862 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y - 2.0f * kernelWidth)) * 0.12f; 863 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y - kernelWidth)) * 0.15f; 864 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y)) * 0.16f; 865 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y + kernelWidth)) * 0.15f; 866 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y + 2.0f * kernelWidth)) * 0.12f; 867 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y + 3.0f * kernelWidth)) * 0.09f; 868 | sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y + 4.0f * kernelWidth)) * 0.05f; 869 | 870 | write_imagef(destination, destinationPosition, sum); 871 | } -------------------------------------------------------------------------------- /ContinuousHTMGPU/source/htm/HTMRL.cpp: -------------------------------------------------------------------------------- 1 | #include "HTMRL.h" 2 | 3 | #include 4 | 5 | using namespace htm; 6 | 7 | void HTMRL::createRandom(sys::ComputeSystem &cs, sys::ComputeProgram &program, int inputWidth, int inputHeight, int reconstructionReceptiveRadius, const std::vector &layerDescs, const std::vector &inputTypes, float minInitWeight, float maxInitWeight, float minInitCenter, float maxInitCenter, std::mt19937 &generator) { 8 | struct Uint2 { 9 | unsigned int _x, _y; 10 | }; 11 | 12 | _addReplaySampleStepCounter = 0; 13 | 14 | _inputWidth = inputWidth; 15 | _inputHeight = inputHeight; 16 | 17 | _layerDescs = layerDescs; 18 | 19 | _layers.resize(_layerDescs.size()); 20 | 21 | _inputTypes = inputTypes; 22 | 23 | std::uniform_real_distribution weightDist(minInitWeight, maxInitWeight); 24 | std::uniform_real_distribution actionDist(0.0f, 1.0f); 25 | 26 | _prevMaxQ = 0.0f; 27 | _prevValue = 0.0f; 28 | _prevPrevValue = 0.0f; 29 | _prevQ = 0.0f; 30 | _prevTDError = 0.0f; 31 | 32 | cl::Kernel initPartOneKernel = cl::Kernel(program.getProgram(), "initializePartOne"); 33 | cl::Kernel initPartTwoKernel = cl::Kernel(program.getProgram(), "initializePartTwo"); 34 | cl::Kernel initPartThreeKernel = cl::Kernel(program.getProgram(), "initializePartThree"); 35 | 36 | _input.clear(); 37 | _input.resize(_inputWidth * _inputHeight); 38 | 39 | _output.clear(); 40 | _output.assign(_inputWidth * _inputHeight, 0.0f); 41 | 42 | _prediction.clear(); 43 | _prediction.assign(_inputWidth * _inputHeight, 0.0f); 44 | 45 | _exploratoryOutput.clear(); 46 | _exploratoryOutput.assign(_inputWidth * _inputHeight, 0.0f); 47 | 48 | _prevOutput.clear(); 49 | _prevOutput.assign(_inputWidth * _inputHeight, 0.0f); 50 | 51 | _prevOutputExploratory.clear(); 52 | _prevOutputExploratory.assign(_inputWidth * _inputHeight, 0.0f); 53 | 54 | _prevInput.clear(); 55 | _prevInput.assign(_inputWidth * _inputHeight, 0.0f); 56 | 57 | // Initialize action portions randomly 58 | for (int i = 0; i < _input.size(); i++) 59 | if (_inputTypes[i] == _action) { 60 | float value = actionDist(generator); 61 | 62 | _input[i] = value; 63 | 64 | _exploratoryOutput[i] = value; 65 | 66 | _prevOutput[i] = value; 67 | 68 | _prevOutputExploratory[i] = value; 69 | 70 | _prevInput[i] = value; 71 | } 72 | 73 | _inputImage = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _inputWidth, _inputHeight); 74 | 75 | _reconstructedPrediction = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _inputWidth, _inputHeight); 76 | 77 | int prevWidth = _inputWidth; 78 | int prevHeight = _inputHeight; 79 | int prevCellsPerColumn = 1; 80 | 81 | for (int l = 0; l < _layers.size(); l++) { 82 | initLayer(cs, initPartOneKernel, initPartTwoKernel, initPartThreeKernel, prevWidth, prevHeight, prevCellsPerColumn, _layers[l], _layerDescs[l], l == _layers.size() - 1, minInitWeight, maxInitWeight, minInitCenter, maxInitCenter, minInitWeight, maxInitWeight, generator); 83 | 84 | prevWidth = _layerDescs[l]._width; 85 | prevHeight = _layerDescs[l]._height; 86 | prevCellsPerColumn = _layerDescs[l]._cellsInColumn; 87 | } 88 | 89 | _layerColumnActivateKernel = cl::Kernel(program.getProgram(), "layerColumnActivate"); 90 | _layerColumnInhibitKernel = cl::Kernel(program.getProgram(), "layerColumnInhibit"); 91 | _layerCellActivateKernel = cl::Kernel(program.getProgram(), "layerCellActivate"); 92 | _layerCellWeightUpdateKernel = cl::Kernel(program.getProgram(), "layerCellWeightUpdate"); 93 | _layerCellWeightUpdateLastKernel = cl::Kernel(program.getProgram(), "layerCellWeightUpdateLast"); 94 | _layerCellPredictKernel = cl::Kernel(program.getProgram(), "layerCellPredict"); 95 | _layerCellPredictLastKernel = cl::Kernel(program.getProgram(), "layerCellPredictLast"); 96 | _layerColumnWeightUpdateKernel = cl::Kernel(program.getProgram(), "layerColumnWeightUpdate"); 97 | _layerColumnPredictionKernel = cl::Kernel(program.getProgram(), "layerColumnPrediction"); 98 | _layerColumnQKernel = cl::Kernel(program.getProgram(), "layerColumnQ"); 99 | _layerColumnQLastKernel = cl::Kernel(program.getProgram(), "layerColumnQLast"); 100 | _layerAssignQKernel = cl::Kernel(program.getProgram(), "layerAssignQ"); 101 | 102 | _gaussianBlurXKernel = cl::Kernel(program.getProgram(), "gaussianBlurX"); 103 | _gaussianBlurYKernel = cl::Kernel(program.getProgram(), "gaussianBlurY"); 104 | 105 | _reconstructInputKernel = cl::Kernel(program.getProgram(), "reconstructInput"); 106 | _inputBiasUpdateKernel = cl::Kernel(program.getProgram(), "inputBiasUpdate"); 107 | } 108 | 109 | void HTMRL::initLayer(sys::ComputeSystem &cs, cl::Kernel &initPartOneKernel, cl::Kernel &initPartTwoKernel, cl::Kernel &initPartThreeKernel, int inputWidth, int inputHeight, int inputCellsPerColumn, Layer &layer, const LayerDesc &layerDesc, bool isTopmost, float minInitWeight, float maxInitWeight, float minInitCenter, float maxInitCenter, float minInitWidth, float maxInitWidth, std::mt19937 &generator) { 110 | struct Uint2 { 111 | unsigned int _x, _y; 112 | }; 113 | 114 | struct Float2 { 115 | float _x, _y; 116 | }; 117 | 118 | std::uniform_int_distribution uniformDist(0, 10000); 119 | 120 | int receptiveFieldSize = std::pow(layerDesc._receptiveFieldRadius * 2 + 1, 2) + 1; // + 1 for bias 121 | int lateralConnectionsSize; 122 | 123 | // If not the last layer, add weights for additional context from next layer 124 | if (isTopmost) 125 | lateralConnectionsSize = layerDesc._numSegmentsPerCell * (std::pow(layerDesc._lateralConnectionRadius * 2 + 1, 2) * (layerDesc._cellsInColumn) + 1); // + 1 for bias 126 | else 127 | lateralConnectionsSize = layerDesc._numSegmentsPerCell * (std::pow(layerDesc._lateralConnectionRadius * 2 + 1, 2) * (layerDesc._cellsInColumn + 1) + 1); // + 1 for bias 128 | 129 | layer._columnActivations = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height); 130 | 131 | layer._columnStates = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height); 132 | layer._columnStatesPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height); 133 | 134 | layer._columnFeedForwardWeights = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height, receptiveFieldSize); 135 | layer._columnFeedForwardWeightsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height, receptiveFieldSize); 136 | 137 | layer._cellStates = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn); 138 | layer._cellStatesPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn); 139 | 140 | layer._segmentStatesPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn * layerDesc._numSegmentsPerCell); 141 | layer._segmentStates = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn * layerDesc._numSegmentsPerCell); 142 | 143 | //layer._segmentWeightsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn * layerDesc._numSegmentsPerCell); 144 | //layer._segmentWeights = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn * layerDesc._numSegmentsPerCell); 145 | 146 | layer._cellQValues = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn); 147 | layer._cellQValuesPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn); 148 | 149 | layer._columnQValues = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height); 150 | 151 | layer._columnPrevValues = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height); 152 | layer._columnPrevValuesPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height); 153 | 154 | layer._columnTdErrors = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height); 155 | 156 | layer._cellPredictions = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn); 157 | layer._cellPredictionsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn); 158 | 159 | layer._cellWeights = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height * layerDesc._cellsInColumn, lateralConnectionsSize); 160 | layer._cellWeightsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height * layerDesc._cellsInColumn, lateralConnectionsSize); 161 | 162 | layer._columnPredictions = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height); 163 | layer._columnPredictionsPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height); 164 | 165 | //layer._blurPing = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height); 166 | //layer._blurPong = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height); 167 | 168 | layer._reconstruction = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), inputWidth, inputHeight); 169 | 170 | layer._inputBiases = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), inputWidth, inputHeight); 171 | layer._inputBiasesPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), inputWidth, inputHeight); 172 | 173 | { 174 | cl::size_t<3> origin; 175 | cl::size_t<3> region; 176 | 177 | origin[0] = 0; 178 | origin[1] = 0; 179 | origin[2] = 0; 180 | 181 | region[0] = inputWidth; 182 | region[1] = inputHeight; 183 | region[2] = 1; 184 | 185 | cl_uint4 fillColor; 186 | 187 | fillColor.x = 0; 188 | 189 | cs.getQueue().enqueueFillImage(layer._reconstruction, fillColor, origin, region); 190 | } 191 | 192 | 193 | Uint2 seed1; 194 | seed1._x = uniformDist(generator); 195 | seed1._y = uniformDist(generator); 196 | 197 | initPartOneKernel.setArg(0, layer._columnActivations); 198 | initPartOneKernel.setArg(1, layer._columnStates); 199 | initPartOneKernel.setArg(2, layer._columnFeedForwardWeights); 200 | initPartOneKernel.setArg(3, layer._columnPrevValues); 201 | initPartOneKernel.setArg(4, layerDesc._cellsInColumn); 202 | initPartOneKernel.setArg(5, receptiveFieldSize); 203 | initPartOneKernel.setArg(6, lateralConnectionsSize); 204 | initPartOneKernel.setArg(7, seed1); 205 | initPartOneKernel.setArg(8, minInitCenter); 206 | initPartOneKernel.setArg(9, maxInitCenter); 207 | 208 | cs.getQueue().enqueueNDRangeKernel(initPartOneKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height)); 209 | 210 | Uint2 seed2; 211 | seed2._x = uniformDist(generator); 212 | seed2._y = uniformDist(generator); 213 | 214 | initPartTwoKernel.setArg(0, layer._cellStates); 215 | initPartTwoKernel.setArg(1, layer._segmentStates); 216 | initPartTwoKernel.setArg(2, layer._cellWeights); 217 | initPartTwoKernel.setArg(3, layer._cellPredictions); 218 | initPartTwoKernel.setArg(4, layer._cellQValues); 219 | initPartTwoKernel.setArg(5, layerDesc._cellsInColumn); 220 | initPartTwoKernel.setArg(6, receptiveFieldSize); 221 | initPartTwoKernel.setArg(7, lateralConnectionsSize); 222 | initPartTwoKernel.setArg(8, layerDesc._numSegmentsPerCell); 223 | initPartTwoKernel.setArg(9, seed2); 224 | initPartTwoKernel.setArg(10, minInitWeight); 225 | initPartTwoKernel.setArg(11, maxInitWeight); 226 | 227 | cs.getQueue().enqueueNDRangeKernel(initPartTwoKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height)); 228 | 229 | Uint2 seed3; 230 | seed3._x = uniformDist(generator); 231 | seed3._y = uniformDist(generator); 232 | 233 | initPartThreeKernel.setArg(0, layer._inputBiases); 234 | initPartThreeKernel.setArg(1, seed2); 235 | initPartThreeKernel.setArg(2, minInitWeight); 236 | initPartThreeKernel.setArg(3, maxInitWeight); 237 | 238 | cs.getQueue().enqueueNDRangeKernel(initPartThreeKernel, cl::NullRange, cl::NDRange(inputWidth, inputHeight)); 239 | 240 | { 241 | cl::size_t<3> origin; 242 | cl::size_t<3> region; 243 | 244 | origin[0] = 0; 245 | origin[1] = 0; 246 | origin[2] = 0; 247 | 248 | region[0] = layerDesc._width; 249 | region[1] = layerDesc._height; 250 | region[2] = 1; 251 | 252 | cs.getQueue().enqueueCopyImage(layer._columnStates, layer._columnStatesPrev, origin, origin, region); 253 | } 254 | 255 | { 256 | cl::size_t<3> origin; 257 | cl::size_t<3> region; 258 | 259 | origin[0] = 0; 260 | origin[1] = 0; 261 | origin[2] = 0; 262 | 263 | region[0] = layerDesc._width; 264 | region[1] = layerDesc._height; 265 | region[2] = 1; 266 | 267 | cs.getQueue().enqueueCopyImage(layer._columnPredictions, layer._columnPredictionsPrev, origin, origin, region); 268 | } 269 | 270 | { 271 | cl::size_t<3> origin; 272 | cl::size_t<3> region; 273 | 274 | origin[0] = 0; 275 | origin[1] = 0; 276 | origin[2] = 0; 277 | 278 | region[0] = layerDesc._width; 279 | region[1] = layerDesc._height; 280 | region[2] = 1; 281 | 282 | cs.getQueue().enqueueCopyImage(layer._columnPrevValues, layer._columnPrevValuesPrev, origin, origin, region); 283 | } 284 | 285 | { 286 | cl::size_t<3> origin; 287 | cl::size_t<3> region; 288 | 289 | origin[0] = 0; 290 | origin[1] = 0; 291 | origin[2] = 0; 292 | 293 | region[0] = layerDesc._width; 294 | region[1] = layerDesc._height; 295 | region[2] = receptiveFieldSize; 296 | 297 | cs.getQueue().enqueueCopyImage(layer._columnFeedForwardWeights, layer._columnFeedForwardWeightsPrev, origin, origin, region); 298 | } 299 | 300 | { 301 | cl::size_t<3> origin; 302 | cl::size_t<3> region; 303 | 304 | origin[0] = 0; 305 | origin[1] = 0; 306 | origin[2] = 0; 307 | 308 | region[0] = layerDesc._width; 309 | region[1] = layerDesc._height; 310 | region[2] = layerDesc._cellsInColumn; 311 | 312 | cs.getQueue().enqueueCopyImage(layer._cellStates, layer._cellStatesPrev, origin, origin, region); 313 | } 314 | 315 | { 316 | cl::size_t<3> origin; 317 | cl::size_t<3> region; 318 | 319 | origin[0] = 0; 320 | origin[1] = 0; 321 | origin[2] = 0; 322 | 323 | region[0] = layerDesc._width; 324 | region[1] = layerDesc._height; 325 | region[2] = layerDesc._cellsInColumn * layerDesc._numSegmentsPerCell; 326 | 327 | cs.getQueue().enqueueCopyImage(layer._segmentStates, layer._segmentStatesPrev, origin, origin, region); 328 | } 329 | 330 | /*{ 331 | cl::size_t<3> origin; 332 | cl::size_t<3> region; 333 | 334 | origin[0] = 0; 335 | origin[1] = 0; 336 | origin[2] = 0; 337 | 338 | region[0] = layerDesc._width; 339 | region[1] = layerDesc._height; 340 | region[2] = layerDesc._cellsInColumn * layerDesc._numSegmentsPerCell; 341 | 342 | cs.getQueue().enqueueCopyImage(layer._segmentWeights, layer._segmentWeightsPrev, origin, origin, region); 343 | }*/ 344 | 345 | { 346 | cl::size_t<3> origin; 347 | cl::size_t<3> region; 348 | 349 | origin[0] = 0; 350 | origin[1] = 0; 351 | origin[2] = 0; 352 | 353 | region[0] = layerDesc._width; 354 | region[1] = layerDesc._height; 355 | region[2] = layerDesc._cellsInColumn; 356 | 357 | cs.getQueue().enqueueCopyImage(layer._cellQValues, layer._cellQValuesPrev, origin, origin, region); 358 | } 359 | 360 | { 361 | cl::size_t<3> origin; 362 | cl::size_t<3> region; 363 | 364 | origin[0] = 0; 365 | origin[1] = 0; 366 | origin[2] = 0; 367 | 368 | region[0] = layerDesc._width; 369 | region[1] = layerDesc._height * layerDesc._cellsInColumn; 370 | region[2] = lateralConnectionsSize; 371 | 372 | cs.getQueue().enqueueCopyImage(layer._cellWeights, layer._cellWeightsPrev, origin, origin, region); 373 | } 374 | 375 | { 376 | cl::size_t<3> origin; 377 | cl::size_t<3> region; 378 | 379 | origin[0] = 0; 380 | origin[1] = 0; 381 | origin[2] = 0; 382 | 383 | region[0] = layerDesc._width; 384 | region[1] = layerDesc._height; 385 | region[2] = layerDesc._cellsInColumn; 386 | 387 | cs.getQueue().enqueueCopyImage(layer._cellPredictions, layer._cellPredictionsPrev, origin, origin, region); 388 | } 389 | 390 | { 391 | cl::size_t<3> origin; 392 | cl::size_t<3> region; 393 | 394 | origin[0] = 0; 395 | origin[1] = 0; 396 | origin[2] = 0; 397 | 398 | region[0] = inputWidth; 399 | region[1] = inputHeight; 400 | region[2] = 1; 401 | 402 | cs.getQueue().enqueueCopyImage(layer._inputBiases, layer._inputBiasesPrev, origin, origin, region); 403 | } 404 | } 405 | 406 | void HTMRL::stepBegin(sys::ComputeSystem &cs, int addReplaySampleSteps, int maxReplayChainSize) { 407 | for (int l = 0; l < _layers.size(); l++) { 408 | std::swap(_layers[l]._columnStates, _layers[l]._columnStatesPrev); 409 | std::swap(_layers[l]._columnPredictions, _layers[l]._columnPredictionsPrev); 410 | std::swap(_layers[l]._columnFeedForwardWeights, _layers[l]._columnFeedForwardWeightsPrev); 411 | std::swap(_layers[l]._columnPrevValues, _layers[l]._columnPrevValuesPrev); 412 | std::swap(_layers[l]._cellStates, _layers[l]._cellStatesPrev); 413 | std::swap(_layers[l]._segmentStates, _layers[l]._segmentStatesPrev); 414 | //std::swap(_layers[l]._segmentWeights, _layers[l]._segmentWeightsPrev); 415 | std::swap(_layers[l]._cellQValues, _layers[l]._cellQValuesPrev); 416 | std::swap(_layers[l]._cellPredictions, _layers[l]._cellPredictionsPrev); 417 | std::swap(_layers[l]._cellWeights, _layers[l]._cellWeightsPrev); 418 | std::swap(_layers[l]._inputBiases, _layers[l]._inputBiasesPrev); 419 | } 420 | 421 | if (_addReplaySampleStepCounter >= addReplaySampleSteps) { 422 | _addReplaySampleStepCounter = 0; 423 | 424 | if (_inputReplayChain.size() < maxReplayChainSize) { 425 | cl::size_t<3> origin; 426 | cl::size_t<3> region; 427 | 428 | origin[0] = 0; 429 | origin[1] = 0; 430 | origin[2] = 0; 431 | 432 | region[0] = _inputWidth; 433 | region[1] = _inputHeight; 434 | region[2] = 1; 435 | 436 | cl::Image2D newSample = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _inputWidth, _inputHeight); 437 | 438 | cs.getQueue().enqueueCopyImage(_inputImage, newSample, origin, origin, region); 439 | 440 | _inputReplayChain.push_back(newSample); 441 | } 442 | else { 443 | cl::size_t<3> origin; 444 | cl::size_t<3> region; 445 | 446 | origin[0] = 0; 447 | origin[1] = 0; 448 | origin[2] = 0; 449 | 450 | region[0] = _inputWidth; 451 | region[1] = _inputHeight; 452 | region[2] = 1; 453 | 454 | cl::Image2D temp = _inputReplayChain.back(); 455 | 456 | _inputReplayChain.pop_back(); 457 | 458 | cs.getQueue().enqueueCopyImage(_inputImage, temp, origin, origin, region); 459 | 460 | _inputReplayChain.push_back(temp); 461 | } 462 | } 463 | 464 | _addReplaySampleStepCounter++; 465 | } 466 | 467 | void HTMRL::spatialPoolLayer(sys::ComputeSystem &cs, cl::Image2D &prevLayerOutput, int prevLayerWidth, int prevLayerHeight, Layer &layer, const LayerDesc &layerDesc, float columnDecay, std::mt19937 &generator) { 468 | struct Uint2 { 469 | unsigned int _x, _y; 470 | }; 471 | 472 | struct Int2 { 473 | int _x, _y; 474 | }; 475 | 476 | struct Float2 { 477 | float _x, _y; 478 | }; 479 | 480 | std::uniform_int_distribution uniformDist(0, 10000); 481 | 482 | Uint2 seed1; 483 | seed1._x = uniformDist(generator); 484 | seed1._y = uniformDist(generator); 485 | 486 | Uint2 seed2; 487 | seed2._x = uniformDist(generator); 488 | seed2._y = uniformDist(generator); 489 | 490 | Uint2 seed3; 491 | seed3._x = uniformDist(generator); 492 | seed3._y = uniformDist(generator); 493 | 494 | Uint2 seed4; 495 | seed4._x = uniformDist(generator); 496 | seed4._y = uniformDist(generator); 497 | 498 | Int2 inputSize; 499 | inputSize._x = prevLayerWidth; 500 | inputSize._y = prevLayerHeight; 501 | 502 | Int2 layerSize; 503 | layerSize._x = layerDesc._width; 504 | layerSize._y = layerDesc._height; 505 | 506 | Float2 layerSizeMinusOneInv; 507 | layerSizeMinusOneInv._x = 1.0f / (layerDesc._width - 1); 508 | layerSizeMinusOneInv._y = 1.0f / (layerDesc._height - 1); 509 | 510 | Float2 layerSizeInv; 511 | layerSizeInv._x = 1.0f / layerDesc._width; 512 | layerSizeInv._y = 1.0f / layerDesc._height; 513 | 514 | Int2 inputReceptiveFieldRadius; 515 | inputReceptiveFieldRadius._x = layerDesc._receptiveFieldRadius; 516 | inputReceptiveFieldRadius._y = layerDesc._receptiveFieldRadius; 517 | 518 | Int2 inputSizeMinusOne; 519 | inputSizeMinusOne._x = prevLayerWidth - 1; 520 | inputSizeMinusOne._y = prevLayerHeight - 1; 521 | 522 | // Activation 523 | _layerColumnActivateKernel.setArg(0, prevLayerOutput); 524 | _layerColumnActivateKernel.setArg(1, layer._columnFeedForwardWeightsPrev); 525 | _layerColumnActivateKernel.setArg(2, layer._columnStatesPrev); 526 | _layerColumnActivateKernel.setArg(3, layer._columnActivations); 527 | _layerColumnActivateKernel.setArg(4, layerSizeMinusOneInv); 528 | _layerColumnActivateKernel.setArg(5, inputReceptiveFieldRadius); 529 | _layerColumnActivateKernel.setArg(6, inputSize); 530 | _layerColumnActivateKernel.setArg(7, inputSizeMinusOne); 531 | _layerColumnActivateKernel.setArg(8, seed1); 532 | 533 | cs.getQueue().enqueueNDRangeKernel(_layerColumnActivateKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height)); 534 | 535 | Int2 layerInhibitionRadius; 536 | layerInhibitionRadius._x = layerDesc._inhibitionRadius; 537 | layerInhibitionRadius._y = layerDesc._inhibitionRadius; 538 | 539 | int receptiveFieldSize = std::pow(layerDesc._receptiveFieldRadius * 2 + 1, 2) + 1; 540 | 541 | // Inhibition 542 | _layerColumnInhibitKernel.setArg(0, layer._columnActivations); 543 | _layerColumnInhibitKernel.setArg(1, layer._columnStatesPrev); 544 | _layerColumnInhibitKernel.setArg(2, layer._columnFeedForwardWeightsPrev); 545 | _layerColumnInhibitKernel.setArg(3, layer._columnStates); 546 | _layerColumnInhibitKernel.setArg(4, layerSize); 547 | _layerColumnInhibitKernel.setArg(5, layerSizeInv); 548 | _layerColumnInhibitKernel.setArg(6, layerInhibitionRadius); 549 | _layerColumnInhibitKernel.setArg(7, receptiveFieldSize); 550 | 551 | cs.getQueue().enqueueNDRangeKernel(_layerColumnInhibitKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height)); 552 | 553 | Float2 inputSizeMinusOneInv; 554 | inputSizeMinusOneInv._x = 1.0f / (prevLayerWidth - 1); 555 | inputSizeMinusOneInv._y = 1.0f / (prevLayerHeight - 1); 556 | 557 | Int2 reconstructionReceptiveFieldRadii; 558 | reconstructionReceptiveFieldRadii._x = std::ceil(static_cast(layerDesc._width) / prevLayerWidth * layerDesc._receptiveFieldRadius); 559 | reconstructionReceptiveFieldRadii._y = std::ceil(static_cast(layerDesc._height) / prevLayerHeight * layerDesc._receptiveFieldRadius); 560 | 561 | Int2 layerSizeMinusOne; 562 | layerSizeMinusOne._x = layerDesc._width - 1; 563 | layerSizeMinusOne._y = layerDesc._height - 1; 564 | 565 | // Reconstruct 566 | _reconstructInputKernel.setArg(0, layer._columnFeedForwardWeightsPrev); 567 | _reconstructInputKernel.setArg(1, layer._inputBiasesPrev); 568 | _reconstructInputKernel.setArg(2, layer._columnStates); 569 | _reconstructInputKernel.setArg(3, layer._reconstruction); 570 | _reconstructInputKernel.setArg(4, reconstructionReceptiveFieldRadii); 571 | _reconstructInputKernel.setArg(5, inputReceptiveFieldRadius); 572 | _reconstructInputKernel.setArg(6, inputSizeMinusOne); 573 | _reconstructInputKernel.setArg(7, inputSizeMinusOneInv); 574 | _reconstructInputKernel.setArg(8, layerSize); 575 | _reconstructInputKernel.setArg(9, layerSizeMinusOne); 576 | _reconstructInputKernel.setArg(10, layerSizeMinusOneInv); 577 | 578 | cs.getQueue().enqueueNDRangeKernel(_reconstructInputKernel, cl::NullRange, cl::NDRange(prevLayerWidth, prevLayerHeight)); 579 | } 580 | 581 | void HTMRL::cellActivateLayer(sys::ComputeSystem &cs, Layer &layer, const LayerDesc &layerDesc, float cellStateDecay, std::mt19937 &generator) { 582 | struct Uint2 { 583 | unsigned int _x, _y; 584 | }; 585 | 586 | struct Int2 { 587 | int _x, _y; 588 | }; 589 | 590 | struct Float2 { 591 | float _x, _y; 592 | }; 593 | 594 | std::uniform_int_distribution uniformDist(0, 10000); 595 | 596 | Uint2 seed; 597 | seed._x = uniformDist(generator); 598 | seed._y = uniformDist(generator); 599 | 600 | Int2 layerSize; 601 | layerSize._x = layerDesc._width; 602 | layerSize._y = layerDesc._height; 603 | 604 | Float2 layerSizeMinusOneInv; 605 | layerSizeMinusOneInv._x = 1.0f / (layerDesc._width - 1); 606 | layerSizeMinusOneInv._y = 1.0f / (layerDesc._height - 1); 607 | 608 | Float2 layerSizeInv; 609 | layerSizeInv._x = 1.0f / layerDesc._width; 610 | layerSizeInv._y = 1.0f / layerDesc._height; 611 | 612 | Int2 lateralConnectionRadii; 613 | lateralConnectionRadii._x = layerDesc._lateralConnectionRadius; 614 | lateralConnectionRadii._y = layerDesc._lateralConnectionRadius; 615 | 616 | // Cell activation 617 | _layerCellActivateKernel.setArg(0, layer._columnStates); 618 | _layerCellActivateKernel.setArg(1, layer._cellStatesPrev); 619 | _layerCellActivateKernel.setArg(2, layer._cellPredictionsPrev); 620 | _layerCellActivateKernel.setArg(3, layer._cellWeightsPrev); 621 | _layerCellActivateKernel.setArg(4, layer._columnPredictionsPrev); 622 | _layerCellActivateKernel.setArg(5, layer._cellStates); 623 | _layerCellActivateKernel.setArg(6, layerDesc._cellsInColumn); 624 | _layerCellActivateKernel.setArg(7, lateralConnectionRadii); 625 | _layerCellActivateKernel.setArg(8, cellStateDecay); 626 | _layerCellActivateKernel.setArg(9, seed); 627 | 628 | cs.getQueue().enqueueNDRangeKernel(_layerCellActivateKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height)); 629 | } 630 | 631 | void HTMRL::predictLayer(sys::ComputeSystem &cs, cl::Image2D &nextLayerPrediction, cl::Image2D &nextLayerPredictionPrev, int nextLayerWidth, int nextLayerHeight, Layer &layer, const LayerDesc &layerDesc, std::mt19937 &generator) { 632 | struct Int2 { 633 | int _x, _y; 634 | }; 635 | 636 | struct Float2 { 637 | float _x, _y; 638 | }; 639 | 640 | Int2 layerSize; 641 | layerSize._x = layerDesc._width; 642 | layerSize._y = layerDesc._height; 643 | 644 | Float2 layerSizeMinusOneInv; 645 | layerSizeMinusOneInv._x = 1.0f / (layerDesc._width - 1); 646 | layerSizeMinusOneInv._y = 1.0f / (layerDesc._height - 1); 647 | 648 | Int2 lateralConnectionRadii; 649 | lateralConnectionRadii._x = layerDesc._lateralConnectionRadius; 650 | lateralConnectionRadii._y = layerDesc._lateralConnectionRadius; 651 | 652 | // Cell prediction 653 | Int2 nextLayerSize; 654 | nextLayerSize._x = nextLayerWidth; 655 | nextLayerSize._y = nextLayerHeight; 656 | 657 | Int2 nextLayerSizeMinusOne; 658 | nextLayerSizeMinusOne._x = nextLayerWidth - 1; 659 | nextLayerSizeMinusOne._y = nextLayerHeight - 1; 660 | 661 | _layerCellPredictKernel.setArg(0, layer._cellStates); 662 | _layerCellPredictKernel.setArg(1, layer._cellStatesPrev); 663 | _layerCellPredictKernel.setArg(2, layer._cellWeights); 664 | _layerCellPredictKernel.setArg(3, nextLayerPrediction); 665 | _layerCellPredictKernel.setArg(4, nextLayerPredictionPrev); 666 | _layerCellPredictKernel.setArg(5, layer._cellPredictions); 667 | _layerCellPredictKernel.setArg(6, layer._segmentStates); 668 | _layerCellPredictKernel.setArg(7, layerDesc._cellsInColumn); 669 | _layerCellPredictKernel.setArg(8, layerSize); 670 | _layerCellPredictKernel.setArg(9, lateralConnectionRadii); 671 | _layerCellPredictKernel.setArg(10, layerDesc._numSegmentsPerCell); 672 | _layerCellPredictKernel.setArg(11, layerSizeMinusOneInv); 673 | _layerCellPredictKernel.setArg(12, nextLayerSize); 674 | _layerCellPredictKernel.setArg(13, nextLayerSizeMinusOne); 675 | 676 | cs.getQueue().enqueueNDRangeKernel(_layerCellPredictKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height)); 677 | 678 | // Column prediction 679 | _layerColumnPredictionKernel.setArg(0, layer._cellPredictions); 680 | _layerColumnPredictionKernel.setArg(1, layer._cellStates); 681 | _layerColumnPredictionKernel.setArg(2, layer._columnPredictions); 682 | _layerColumnPredictionKernel.setArg(3, layerDesc._cellsInColumn); 683 | 684 | cs.getQueue().enqueueNDRangeKernel(_layerColumnPredictionKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height)); 685 | } 686 | 687 | void HTMRL::predictLayerLast(sys::ComputeSystem &cs, Layer &layer, const LayerDesc &layerDesc, std::mt19937 &generator) { 688 | struct Int2 { 689 | int _x, _y; 690 | }; 691 | 692 | struct Float2 { 693 | float _x, _y; 694 | }; 695 | 696 | Int2 layerSize; 697 | layerSize._x = layerDesc._width; 698 | layerSize._y = layerDesc._height; 699 | 700 | Float2 layerSizeInv; 701 | layerSizeInv._x = 1.0f / layerDesc._width; 702 | layerSizeInv._y = 1.0f / layerDesc._height; 703 | 704 | Int2 lateralConnectionRadii; 705 | lateralConnectionRadii._x = layerDesc._lateralConnectionRadius; 706 | lateralConnectionRadii._y = layerDesc._lateralConnectionRadius; 707 | 708 | // Cell prediction 709 | _layerCellPredictLastKernel.setArg(0, layer._cellStates); 710 | _layerCellPredictLastKernel.setArg(1, layer._cellStatesPrev); 711 | _layerCellPredictLastKernel.setArg(2, layer._cellWeights); 712 | _layerCellPredictLastKernel.setArg(3, layer._cellPredictions); 713 | _layerCellPredictLastKernel.setArg(4, layer._segmentStates); 714 | _layerCellPredictLastKernel.setArg(5, layerDesc._cellsInColumn); 715 | _layerCellPredictLastKernel.setArg(6, layerSize); 716 | _layerCellPredictLastKernel.setArg(7, lateralConnectionRadii); 717 | _layerCellPredictLastKernel.setArg(8, layerDesc._numSegmentsPerCell); 718 | 719 | cs.getQueue().enqueueNDRangeKernel(_layerCellPredictLastKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height)); 720 | 721 | // Column prediction 722 | _layerColumnPredictionKernel.setArg(0, layer._cellPredictions); 723 | _layerColumnPredictionKernel.setArg(1, layer._cellStates); 724 | _layerColumnPredictionKernel.setArg(2, layer._columnPredictions); 725 | _layerColumnPredictionKernel.setArg(3, layerDesc._cellsInColumn); 726 | 727 | cs.getQueue().enqueueNDRangeKernel(_layerColumnPredictionKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height)); 728 | } 729 | 730 | void HTMRL::activate(std::vector &input, sys::ComputeSystem &cs, float reward, float alpha, float gamma, float columnDecay, float cellStateDecay, float columnConnectionAlpha, float columnConnectionBeta, float columnConnectionGamma, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, int maxReplayChainSize, int numReplaySamples, int addSampleSteps, unsigned long seed) { 731 | // Create buffer from input 732 | { 733 | cl::size_t<3> origin; 734 | origin[0] = 0; 735 | origin[1] = 0; 736 | origin[2] = 0; 737 | 738 | cl::size_t<3> region; 739 | region[0] = _inputWidth; 740 | region[1] = _inputHeight; 741 | region[2] = 1; 742 | 743 | cs.getQueue().enqueueWriteImage(_inputImage, CL_TRUE, origin, region, 0, 0, &input[0]); 744 | } 745 | 746 | std::mt19937 generator(seed); 747 | 748 | learnSpatialReplay(cs, cellStateDecay, columnConnectionAlpha, columnConnectionBeta, columnConnectionGamma, maxReplayChainSize, numReplaySamples, seed); 749 | 750 | for (int l = 0; l < _layers.size(); l++) 751 | cellActivateLayer(cs, _layers[l], _layerDescs[l], cellStateDecay, generator); 752 | 753 | for (int l = _layers.size() - 1; l >= 0; l--) { 754 | if (l == _layers.size() - 1) 755 | determineLayerColumnQLast(cs, _layers[l], _layerDescs[l]); 756 | else 757 | determineLayerColumnQ(cs, _layers[l], _layerDescs[l], _layers[l + 1], _layerDescs[l + 1]); 758 | } 759 | 760 | float value = retreiveQ(cs); 761 | 762 | float tdError = reward + gamma * value - _prevValue; 763 | 764 | std::cout << "R: " << reward << "Q: " << reward + gamma * value << " T: " << tdError << std::endl; 765 | 766 | _prevValue = value; 767 | 768 | for (int l = _layers.size() - 1; l >= 0; l--) 769 | assignLayerQ(cs, _layers[l], _layerDescs[l], alpha * tdError); 770 | 771 | learnTemporal(cs, tdError, cellConnectionAlpha * (tdError > 0.0f ? 1.0f : 0.0f), cellConnectionBeta, cellConnectionGamma, cellConnectionTemperature, cellWeightEligibilityDecay, seed + 1); 772 | 773 | for (int l = _layers.size() - 1; l >= 0; l--) { 774 | if (l == _layers.size() - 1) 775 | predictLayerLast(cs, _layers[l], _layerDescs[l], generator); 776 | else 777 | predictLayer(cs, _layers[l + 1]._columnPredictions, _layers[l + 1]._columnPredictionsPrev, _layerDescs[l + 1]._width, _layerDescs[l + 1]._height, _layers[l], _layerDescs[l], generator); 778 | } 779 | 780 | /*pPrevLayerOutput = &_inputImage; 781 | prevLayerWidth = _inputWidth; 782 | prevLayerHeight = _inputHeight; 783 | 784 | for (int l = 0; l < _layers.size(); l++) { 785 | learnLayerSpatial(cs, _layers[l], *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layerDescs[l], columnConnectionAlpha, columnConnectionBeta, columnConnectionGamma, generator); 786 | 787 | pPrevLayerOutput = &_layers[l]._columnStates; 788 | prevLayerWidth = _layerDescs[l]._width; 789 | prevLayerHeight = _layerDescs[l]._height; 790 | }*/ 791 | } 792 | 793 | void HTMRL::determineLayerColumnQ(sys::ComputeSystem &cs, Layer &layer, LayerDesc &layerDesc, Layer &nextLayer, LayerDesc &nextLayerDesc) { 794 | struct Int2 { 795 | int _x, _y; 796 | }; 797 | 798 | struct Float2 { 799 | float _x, _y; 800 | }; 801 | 802 | Int2 layerSize; 803 | layerSize._x = layerDesc._width; 804 | layerSize._y = layerDesc._height; 805 | 806 | Int2 nextLayerSize; 807 | nextLayerSize._x = nextLayerDesc._width; 808 | nextLayerSize._y = nextLayerDesc._height; 809 | 810 | Int2 nextLayerSizeMinusOne; 811 | nextLayerSizeMinusOne._x = nextLayerDesc._width - 1; 812 | nextLayerSizeMinusOne._y = nextLayerDesc._height - 1; 813 | 814 | Float2 layerSizeMinusOneInv; 815 | layerSizeMinusOneInv._x = 1.0f / (layerDesc._width - 1); 816 | layerSizeMinusOneInv._y = 1.0f / (layerDesc._height - 1); 817 | 818 | _layerColumnQKernel.setArg(0, layer._cellQValuesPrev); 819 | _layerColumnQKernel.setArg(1, layer._cellStatesPrev); 820 | _layerColumnQKernel.setArg(2, layer._cellStates); 821 | _layerColumnQKernel.setArg(3, layer._columnStates); 822 | _layerColumnQKernel.setArg(4, nextLayer._columnStates); 823 | _layerColumnQKernel.setArg(5, nextLayer._columnQValues); 824 | _layerColumnQKernel.setArg(6, layer._columnQValues); 825 | _layerColumnQKernel.setArg(7, layerDesc._cellsInColumn); 826 | _layerColumnQKernel.setArg(8, layerSizeMinusOneInv); 827 | _layerColumnQKernel.setArg(9, nextLayerSize); 828 | _layerColumnQKernel.setArg(10, nextLayerSizeMinusOne); 829 | 830 | cs.getQueue().enqueueNDRangeKernel(_layerColumnQKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height)); 831 | } 832 | 833 | void HTMRL::determineLayerColumnQLast(sys::ComputeSystem &cs, Layer &layer, LayerDesc &layerDesc) { 834 | struct Int2 { 835 | int _x, _y; 836 | }; 837 | 838 | struct Float2 { 839 | float _x, _y; 840 | }; 841 | 842 | _layerColumnQLastKernel.setArg(0, layer._cellQValuesPrev); 843 | _layerColumnQLastKernel.setArg(1, layer._cellStatesPrev); 844 | _layerColumnQLastKernel.setArg(2, layer._cellStates); 845 | _layerColumnQLastKernel.setArg(3, layer._columnQValues); 846 | _layerColumnQLastKernel.setArg(4, layerDesc._cellsInColumn); 847 | 848 | cs.getQueue().enqueueNDRangeKernel(_layerColumnQLastKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height)); 849 | } 850 | 851 | float HTMRL::retreiveQ(sys::ComputeSystem &cs) { 852 | float total = 0.0f; 853 | 854 | float sum = 0.0f; 855 | float divisor = 0.0f; 856 | 857 | for (int l = 0; l < _layers.size(); l++) { 858 | cl::size_t<3> origin; 859 | origin[0] = 0; 860 | origin[1] = 0; 861 | origin[2] = 0; 862 | 863 | cl::size_t<3> region; 864 | region[0] = _layerDescs[l]._width; 865 | region[1] = _layerDescs[l]._height; 866 | region[2] = 1; 867 | 868 | std::vector layerQ(_layerDescs[l]._width * _layerDescs[l]._height); 869 | 870 | cs.getQueue().enqueueReadImage(_layers[l]._columnQValues, CL_TRUE, origin, region, 0, 0, &layerQ[0]); 871 | 872 | std::vector layerColumns(_layerDescs[l]._width * _layerDescs.front()._height * 2); 873 | 874 | cs.getQueue().enqueueReadImage(_layers[l]._columnStates, CL_TRUE, origin, region, 0, 0, &layerColumns[0]); 875 | 876 | for (int i = 0; i < layerQ.size(); i++) { 877 | sum += layerQ[i] * _layerDescs[l]._qImportance * layerColumns[i * 2]; 878 | divisor += _layerDescs[l]._qImportance * layerColumns[i * 2]; 879 | } 880 | } 881 | 882 | if (divisor == 0.0f) 883 | return 0.0f; 884 | 885 | return sum / divisor; 886 | } 887 | 888 | void HTMRL::assignLayerQ(sys::ComputeSystem &cs, Layer &layer, LayerDesc &layerDesc, float alpha) { 889 | struct Int2 { 890 | int _x, _y; 891 | }; 892 | 893 | struct Float2 { 894 | float _x, _y; 895 | }; 896 | 897 | Int2 layerSize; 898 | layerSize._x = layerDesc._width; 899 | layerSize._y = layerDesc._height; 900 | 901 | Float2 layerSizeMinusOneInv; 902 | layerSizeMinusOneInv._x = 1.0f / (layerDesc._width - 1); 903 | layerSizeMinusOneInv._y = 1.0f / (layerDesc._height - 1); 904 | 905 | _layerAssignQKernel.setArg(0, layer._cellQValuesPrev); 906 | _layerAssignQKernel.setArg(1, layer._cellStatesPrev); 907 | _layerAssignQKernel.setArg(2, layer._cellQValues); 908 | _layerAssignQKernel.setArg(3, layerDesc._cellsInColumn); 909 | _layerAssignQKernel.setArg(4, alpha); 910 | 911 | cs.getQueue().enqueueNDRangeKernel(_layerAssignQKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height)); 912 | } 913 | 914 | void HTMRL::learnLayerSpatial(sys::ComputeSystem &cs, Layer &layer, cl::Image2D &prevLayerOutput, int prevLayerWidth, int prevLayerHeight, const LayerDesc &layerDesc, float alpha, float beta, float gamma, std::mt19937 &generator) { 915 | struct Uint2 { 916 | unsigned int _x, _y; 917 | }; 918 | 919 | struct Int2 { 920 | int _x, _y; 921 | }; 922 | 923 | struct Float2 { 924 | float _x, _y; 925 | }; 926 | 927 | std::uniform_int_distribution uniformDist(0, 10000); 928 | 929 | Uint2 seed; 930 | seed._x = uniformDist(generator); 931 | seed._y = uniformDist(generator); 932 | 933 | Int2 inputSize; 934 | inputSize._x = prevLayerWidth; 935 | inputSize._y = prevLayerHeight; 936 | 937 | Int2 layerSize; 938 | layerSize._x = layerDesc._width; 939 | layerSize._y = layerDesc._height; 940 | 941 | Float2 layerSizeMinusOneInv; 942 | layerSizeMinusOneInv._x = 1.0f / (layerDesc._width - 1); 943 | layerSizeMinusOneInv._y = 1.0f / (layerDesc._height - 1); 944 | 945 | Int2 inputReceptiveFieldRadius; 946 | inputReceptiveFieldRadius._x = layerDesc._receptiveFieldRadius; 947 | inputReceptiveFieldRadius._y = layerDesc._receptiveFieldRadius; 948 | 949 | int receptiveFieldSize = std::pow(layerDesc._receptiveFieldRadius * 2 + 1, 2) + 1; 950 | 951 | Int2 influenceRadius; 952 | influenceRadius._x = layerDesc._columnInfluenceRadius; 953 | influenceRadius._y = layerDesc._columnInfluenceRadius; 954 | 955 | Int2 inputSizeMinusOne; 956 | inputSizeMinusOne._x = layerDesc._width - 1; 957 | inputSizeMinusOne._y = layerDesc._height - 1; 958 | 959 | Int2 inhibitionRadii; 960 | inhibitionRadii._x = layerDesc._inhibitionRadius; 961 | inhibitionRadii._y = layerDesc._inhibitionRadius; 962 | 963 | // Column weight update 964 | _layerColumnWeightUpdateKernel.setArg(0, layer._reconstruction); 965 | _layerColumnWeightUpdateKernel.setArg(1, prevLayerOutput); 966 | _layerColumnWeightUpdateKernel.setArg(2, layer._columnActivations); 967 | _layerColumnWeightUpdateKernel.setArg(3, layer._columnStates); 968 | _layerColumnWeightUpdateKernel.setArg(4, layer._columnPredictions); 969 | _layerColumnWeightUpdateKernel.setArg(5, layer._columnFeedForwardWeightsPrev); 970 | _layerColumnWeightUpdateKernel.setArg(6, layer._columnFeedForwardWeights); 971 | _layerColumnWeightUpdateKernel.setArg(7, layerSize); 972 | _layerColumnWeightUpdateKernel.setArg(8, layerSizeMinusOneInv); 973 | _layerColumnWeightUpdateKernel.setArg(9, inputReceptiveFieldRadius); 974 | _layerColumnWeightUpdateKernel.setArg(10, inhibitionRadii); 975 | _layerColumnWeightUpdateKernel.setArg(11, inputSize); 976 | _layerColumnWeightUpdateKernel.setArg(12, inputSizeMinusOne); 977 | _layerColumnWeightUpdateKernel.setArg(13, receptiveFieldSize); 978 | _layerColumnWeightUpdateKernel.setArg(14, alpha); 979 | _layerColumnWeightUpdateKernel.setArg(15, beta); 980 | _layerColumnWeightUpdateKernel.setArg(16, gamma); 981 | _layerColumnWeightUpdateKernel.setArg(17, seed); 982 | 983 | cs.getQueue().enqueueNDRangeKernel(_layerColumnWeightUpdateKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height)); 984 | 985 | Float2 inputSizeMinusOneInv; 986 | inputSizeMinusOneInv._x = 1.0f / (prevLayerWidth - 1); 987 | inputSizeMinusOneInv._y = 1.0f / (prevLayerHeight - 1); 988 | 989 | Int2 reconstructionReceptiveFieldRadii; 990 | reconstructionReceptiveFieldRadii._x = std::ceil(static_cast(layerDesc._width) / prevLayerWidth * layerDesc._receptiveFieldRadius); 991 | reconstructionReceptiveFieldRadii._y = std::ceil(static_cast(layerDesc._height) / prevLayerHeight * layerDesc._receptiveFieldRadius); 992 | 993 | Int2 layerSizeMinusOne; 994 | layerSizeMinusOne._x = layerDesc._width - 1; 995 | layerSizeMinusOne._y = layerDesc._height - 1; 996 | 997 | // Reconstruct 998 | _inputBiasUpdateKernel.setArg(0, prevLayerOutput); 999 | _inputBiasUpdateKernel.setArg(1, layer._reconstruction); 1000 | _inputBiasUpdateKernel.setArg(2, layer._inputBiasesPrev); 1001 | _inputBiasUpdateKernel.setArg(3, layer._inputBiases); 1002 | _inputBiasUpdateKernel.setArg(4, gamma); 1003 | 1004 | cs.getQueue().enqueueNDRangeKernel(_inputBiasUpdateKernel, cl::NullRange, cl::NDRange(prevLayerWidth, prevLayerHeight)); 1005 | } 1006 | 1007 | void HTMRL::learnLayerTemporal(sys::ComputeSystem &cs, Layer &layer, cl::Image2D &prevLayerOutput, int prevLayerWidth, int prevLayerHeight, cl::Image2D &nextLayerPrediction, int nextLayerWidth, int nextLayerHeight, const LayerDesc &layerDesc, float tdError, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, std::mt19937 &generator) { 1008 | struct Uint2 { 1009 | unsigned int _x, _y; 1010 | }; 1011 | 1012 | struct Int2 { 1013 | int _x, _y; 1014 | }; 1015 | 1016 | struct Float2 { 1017 | float _x, _y; 1018 | }; 1019 | 1020 | std::uniform_int_distribution uniformDist(0, 10000); 1021 | 1022 | Uint2 seed; 1023 | seed._x = uniformDist(generator); 1024 | seed._y = uniformDist(generator); 1025 | 1026 | Int2 inputSize; 1027 | inputSize._x = prevLayerWidth; 1028 | inputSize._y = prevLayerHeight; 1029 | 1030 | Int2 layerSize; 1031 | layerSize._x = layerDesc._width; 1032 | layerSize._y = layerDesc._height; 1033 | 1034 | Float2 inputSizeInv; 1035 | inputSizeInv._x = 1.0f / prevLayerWidth; 1036 | inputSizeInv._y = 1.0f / prevLayerHeight; 1037 | 1038 | Float2 layerSizeMinusOneInv; 1039 | layerSizeMinusOneInv._x = 1.0f / (layerDesc._width - 1); 1040 | layerSizeMinusOneInv._y = 1.0f / (layerDesc._height - 1); 1041 | 1042 | Int2 inputReceptiveFieldRadius; 1043 | inputReceptiveFieldRadius._x = layerDesc._receptiveFieldRadius; 1044 | inputReceptiveFieldRadius._y = layerDesc._receptiveFieldRadius; 1045 | 1046 | Int2 layerReceptiveFieldRadius; 1047 | layerReceptiveFieldRadius._x = layerDesc._receptiveFieldRadius; 1048 | layerReceptiveFieldRadius._y = layerDesc._receptiveFieldRadius; 1049 | 1050 | Int2 lateralConnectionRadii; 1051 | lateralConnectionRadii._x = layerDesc._lateralConnectionRadius; 1052 | lateralConnectionRadii._y = layerDesc._lateralConnectionRadius; 1053 | 1054 | // Lateral weight update 1055 | Int2 nextLayerSize; 1056 | nextLayerSize._x = nextLayerWidth; 1057 | nextLayerSize._y = nextLayerHeight; 1058 | 1059 | Int2 nextLayerSizeMinusOne; 1060 | nextLayerSizeMinusOne._x = nextLayerWidth - 1; 1061 | nextLayerSizeMinusOne._y = nextLayerHeight - 1; 1062 | 1063 | _layerCellWeightUpdateKernel.setArg(0, layer._columnStates); 1064 | _layerCellWeightUpdateKernel.setArg(1, layer._columnPredictionsPrev); 1065 | _layerCellWeightUpdateKernel.setArg(2, layer._cellPredictionsPrev); 1066 | _layerCellWeightUpdateKernel.setArg(3, layer._cellStates); 1067 | _layerCellWeightUpdateKernel.setArg(4, layer._cellStatesPrev); 1068 | _layerCellWeightUpdateKernel.setArg(5, nextLayerPrediction); 1069 | _layerCellWeightUpdateKernel.setArg(6, layer._segmentStatesPrev); 1070 | _layerCellWeightUpdateKernel.setArg(7, layer._cellWeightsPrev); 1071 | _layerCellWeightUpdateKernel.setArg(8, layer._cellWeights); 1072 | _layerCellWeightUpdateKernel.setArg(9, layerDesc._cellsInColumn); 1073 | _layerCellWeightUpdateKernel.setArg(10, layerSize); 1074 | _layerCellWeightUpdateKernel.setArg(11, lateralConnectionRadii); 1075 | _layerCellWeightUpdateKernel.setArg(12, layerDesc._numSegmentsPerCell); 1076 | _layerCellWeightUpdateKernel.setArg(13, layerSizeMinusOneInv); 1077 | _layerCellWeightUpdateKernel.setArg(14, nextLayerSize); 1078 | _layerCellWeightUpdateKernel.setArg(15, nextLayerSizeMinusOne); 1079 | _layerCellWeightUpdateKernel.setArg(16, tdError); 1080 | _layerCellWeightUpdateKernel.setArg(17, cellConnectionAlpha); 1081 | _layerCellWeightUpdateKernel.setArg(18, cellConnectionBeta); 1082 | _layerCellWeightUpdateKernel.setArg(19, cellConnectionGamma); 1083 | _layerCellWeightUpdateKernel.setArg(20, cellConnectionTemperature); 1084 | _layerCellWeightUpdateKernel.setArg(21, cellWeightEligibilityDecay); 1085 | 1086 | cs.getQueue().enqueueNDRangeKernel(_layerCellWeightUpdateKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height)); 1087 | } 1088 | 1089 | void HTMRL::learnLayerTemporalLast(sys::ComputeSystem &cs, Layer &layer, cl::Image2D &prevLayerOutput, int prevLayerWidth, int prevLayerHeight, const LayerDesc &layerDesc, float tdError, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, std::mt19937 &generator) { 1090 | struct Uint2 { 1091 | unsigned int _x, _y; 1092 | }; 1093 | 1094 | struct Int2 { 1095 | int _x, _y; 1096 | }; 1097 | 1098 | struct Float2 { 1099 | float _x, _y; 1100 | }; 1101 | 1102 | std::uniform_int_distribution uniformDist(0, 10000); 1103 | 1104 | Uint2 seed; 1105 | seed._x = uniformDist(generator); 1106 | seed._y = uniformDist(generator); 1107 | 1108 | Int2 inputSize; 1109 | inputSize._x = prevLayerWidth; 1110 | inputSize._y = prevLayerHeight; 1111 | 1112 | Int2 layerSize; 1113 | layerSize._x = layerDesc._width; 1114 | layerSize._y = layerDesc._height; 1115 | 1116 | Float2 inputSizeInv; 1117 | inputSizeInv._x = 1.0f / prevLayerWidth; 1118 | inputSizeInv._y = 1.0f / prevLayerHeight; 1119 | 1120 | Float2 layerSizeInv; 1121 | layerSizeInv._x = 1.0f / layerDesc._width; 1122 | layerSizeInv._y = 1.0f / layerDesc._height; 1123 | 1124 | Int2 lateralConnectionRadii; 1125 | lateralConnectionRadii._x = layerDesc._lateralConnectionRadius; 1126 | lateralConnectionRadii._y = layerDesc._lateralConnectionRadius; 1127 | 1128 | // Lateral weight update 1129 | _layerCellWeightUpdateLastKernel.setArg(0, layer._columnStates); 1130 | _layerCellWeightUpdateLastKernel.setArg(1, layer._columnPredictionsPrev); 1131 | _layerCellWeightUpdateLastKernel.setArg(2, layer._cellPredictionsPrev); 1132 | _layerCellWeightUpdateLastKernel.setArg(3, layer._cellStates); 1133 | _layerCellWeightUpdateLastKernel.setArg(4, layer._cellStatesPrev); 1134 | _layerCellWeightUpdateLastKernel.setArg(5, layer._segmentStatesPrev); 1135 | _layerCellWeightUpdateLastKernel.setArg(6, layer._cellWeightsPrev); 1136 | _layerCellWeightUpdateLastKernel.setArg(7, layer._cellWeights); 1137 | _layerCellWeightUpdateLastKernel.setArg(8, layerDesc._cellsInColumn); 1138 | _layerCellWeightUpdateLastKernel.setArg(9, layerSize); 1139 | _layerCellWeightUpdateLastKernel.setArg(10, lateralConnectionRadii); 1140 | _layerCellWeightUpdateLastKernel.setArg(11, layerDesc._numSegmentsPerCell); 1141 | _layerCellWeightUpdateLastKernel.setArg(12, tdError); 1142 | _layerCellWeightUpdateLastKernel.setArg(13, cellConnectionAlpha); 1143 | _layerCellWeightUpdateLastKernel.setArg(14, cellConnectionBeta); 1144 | _layerCellWeightUpdateLastKernel.setArg(15, cellConnectionGamma); 1145 | _layerCellWeightUpdateLastKernel.setArg(16, cellConnectionTemperature); 1146 | _layerCellWeightUpdateLastKernel.setArg(17, cellWeightEligibilityDecay); 1147 | 1148 | cs.getQueue().enqueueNDRangeKernel(_layerCellWeightUpdateLastKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height)); 1149 | } 1150 | 1151 | void HTMRL::learnSpatialReplay(sys::ComputeSystem &cs, float cellStateDecay, float alpha, float beta, float gamma, int maxReplayChainSize, int numReplaySamples, unsigned long seed) { 1152 | std::mt19937 generator(seed); 1153 | 1154 | std::uniform_int_distribution sampleDist(0, _inputReplayChain.size()); 1155 | 1156 | for (int i = 0; i < numReplaySamples; i++) { 1157 | int sampleIndex = sampleDist(generator); 1158 | 1159 | if (sampleIndex == 0) { 1160 | // Replay input 1161 | cl::Image2D* pPrevLayerOutput = &_inputImage; 1162 | int prevLayerWidth = _inputWidth; 1163 | int prevLayerHeight = _inputHeight; 1164 | 1165 | for (int l = 0; l < _layers.size(); l++) { 1166 | spatialPoolLayer(cs, *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layers[l], _layerDescs[l], 0.0f, generator); 1167 | learnLayerSpatial(cs, _layers[l], *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layerDescs[l], alpha, beta, gamma, generator); 1168 | 1169 | pPrevLayerOutput = &_layers[l]._columnStates; 1170 | prevLayerWidth = _layerDescs[l]._width; 1171 | prevLayerHeight = _layerDescs[l]._height; 1172 | } 1173 | } 1174 | else { 1175 | int index = 0; 1176 | 1177 | cl::Image2D* pPrevLayerOutput; 1178 | 1179 | for (std::list::iterator it = _inputReplayChain.begin(); it != _inputReplayChain.end(); it++, index++) { 1180 | if (index >= sampleIndex - 1) { 1181 | pPrevLayerOutput = &(*it); 1182 | break; 1183 | } 1184 | } 1185 | 1186 | // Replay input 1187 | int prevLayerWidth = _inputWidth; 1188 | int prevLayerHeight = _inputHeight; 1189 | 1190 | for (int l = 0; l < _layers.size(); l++) { 1191 | spatialPoolLayer(cs, *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layers[l], _layerDescs[l], 0.0f, generator); 1192 | learnLayerSpatial(cs, _layers[l], *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layerDescs[l], alpha, beta, gamma, generator); 1193 | 1194 | pPrevLayerOutput = &_layers[l]._columnStates; 1195 | prevLayerWidth = _layerDescs[l]._width; 1196 | prevLayerHeight = _layerDescs[l]._height; 1197 | } 1198 | } 1199 | 1200 | for (int l = 0; l < _layers.size(); l++) { 1201 | std::swap(_layers[l]._columnFeedForwardWeights, _layers[l]._columnFeedForwardWeightsPrev); 1202 | std::swap(_layers[l]._inputBiases, _layers[l]._inputBiasesPrev); 1203 | } 1204 | } 1205 | 1206 | // Replay input to set state properly 1207 | cl::Image2D* pPrevLayerOutput = &_inputImage; 1208 | int prevLayerWidth = _inputWidth; 1209 | int prevLayerHeight = _inputHeight; 1210 | 1211 | for (int l = 0; l < _layers.size(); l++) { 1212 | spatialPoolLayer(cs, *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layers[l], _layerDescs[l], 0.0f, generator); 1213 | learnLayerSpatial(cs, _layers[l], *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layerDescs[l], alpha, beta, gamma, generator); 1214 | 1215 | pPrevLayerOutput = &_layers[l]._columnStates; 1216 | prevLayerWidth = _layerDescs[l]._width; 1217 | prevLayerHeight = _layerDescs[l]._height; 1218 | } 1219 | } 1220 | 1221 | void HTMRL::learnTemporal(sys::ComputeSystem &cs, float tdError, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, unsigned long seed) { 1222 | std::mt19937 generator(seed); 1223 | 1224 | cl::Image2D* pPrevLayerOutput = &_inputImage; 1225 | int prevLayerWidth = _inputWidth; 1226 | int prevLayerHeight = _inputHeight; 1227 | 1228 | for (int l = 0; l < _layers.size(); l++) { 1229 | if (l == _layers.size() - 1) 1230 | learnLayerTemporalLast(cs, _layers[l], *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layerDescs[l], tdError, cellConnectionAlpha, cellConnectionBeta, cellConnectionGamma, cellConnectionTemperature, cellWeightEligibilityDecay, generator); 1231 | else 1232 | learnLayerTemporal(cs, _layers[l], *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layers[l + 1]._columnPredictionsPrev, _layerDescs[l + 1]._width, _layerDescs[l + 1]._width, _layerDescs[l], tdError, cellConnectionAlpha, cellConnectionBeta, cellConnectionGamma, cellConnectionTemperature, cellWeightEligibilityDecay, generator); 1233 | 1234 | pPrevLayerOutput = &_layers[l]._columnStates; 1235 | prevLayerWidth = _layerDescs[l]._width; 1236 | prevLayerHeight = _layerDescs[l]._height; 1237 | } 1238 | } 1239 | 1240 | void HTMRL::gaussianBlur(sys::ComputeSystem &cs, cl::Image2D &source, cl::Image2D &ping, cl::Image2D &pong, int imageSizeX, int imageSizeY, int passes, float kernelWidth) { 1241 | struct Int2 { 1242 | int _x, _y; 1243 | }; 1244 | 1245 | struct Float2 { 1246 | float _x, _y; 1247 | }; 1248 | 1249 | Float2 imageSizeInv; 1250 | imageSizeInv._x = 1.0f / imageSizeX; 1251 | imageSizeInv._y = 1.0f / imageSizeY; 1252 | 1253 | // Blur source to ping 1254 | _gaussianBlurXKernel.setArg(0, source); 1255 | _gaussianBlurXKernel.setArg(1, ping); 1256 | _gaussianBlurXKernel.setArg(2, imageSizeInv); 1257 | _gaussianBlurXKernel.setArg(3, kernelWidth * imageSizeInv._x); 1258 | 1259 | cs.getQueue().enqueueNDRangeKernel(_gaussianBlurXKernel, cl::NullRange, cl::NDRange(imageSizeX, imageSizeY)); 1260 | 1261 | for (int p = 0; p < passes - 1; p++) { 1262 | _gaussianBlurYKernel.setArg(0, ping); 1263 | _gaussianBlurYKernel.setArg(1, pong); 1264 | _gaussianBlurYKernel.setArg(2, imageSizeInv); 1265 | _gaussianBlurYKernel.setArg(3, kernelWidth * imageSizeInv._y); 1266 | 1267 | cs.getQueue().enqueueNDRangeKernel(_gaussianBlurYKernel, cl::NullRange, cl::NDRange(imageSizeX, imageSizeY)); 1268 | 1269 | _gaussianBlurXKernel.setArg(0, pong); 1270 | _gaussianBlurXKernel.setArg(1, ping); 1271 | _gaussianBlurXKernel.setArg(2, imageSizeInv); 1272 | _gaussianBlurXKernel.setArg(3, kernelWidth * imageSizeInv._x); 1273 | 1274 | cs.getQueue().enqueueNDRangeKernel(_gaussianBlurXKernel, cl::NullRange, cl::NDRange(imageSizeX, imageSizeY)); 1275 | } 1276 | 1277 | _gaussianBlurYKernel.setArg(0, ping); 1278 | _gaussianBlurYKernel.setArg(1, pong); 1279 | _gaussianBlurYKernel.setArg(2, imageSizeInv); 1280 | _gaussianBlurYKernel.setArg(3, kernelWidth * imageSizeInv._y); 1281 | 1282 | cs.getQueue().enqueueNDRangeKernel(_gaussianBlurYKernel, cl::NullRange, cl::NDRange(imageSizeX, imageSizeY)); 1283 | } 1284 | 1285 | void HTMRL::getReconstructedPrediction(std::vector &prediction, sys::ComputeSystem &cs) { 1286 | struct Int2 { 1287 | int _x, _y; 1288 | }; 1289 | 1290 | struct Float2 { 1291 | float _x, _y; 1292 | }; 1293 | 1294 | Int2 layerSize; 1295 | layerSize._x = _layerDescs.front()._width; 1296 | layerSize._y = _layerDescs.front()._height; 1297 | 1298 | Int2 inputSizeMinusOne; 1299 | inputSizeMinusOne._x = _inputWidth - 1; 1300 | inputSizeMinusOne._y = _inputHeight - 1; 1301 | 1302 | Float2 inputSizeMinusOneInv; 1303 | inputSizeMinusOneInv._x = 1.0f / (_inputWidth - 1); 1304 | inputSizeMinusOneInv._y = 1.0f / (_inputHeight - 1); 1305 | 1306 | Int2 reconstructionReceptiveFieldRadii; 1307 | reconstructionReceptiveFieldRadii._x = std::ceil(static_cast(_layerDescs.front()._width) / _inputWidth * _layerDescs.front()._receptiveFieldRadius); 1308 | reconstructionReceptiveFieldRadii._y = std::ceil(static_cast(_layerDescs.front()._height) / _inputHeight * _layerDescs.front()._receptiveFieldRadius); 1309 | 1310 | Int2 sdrReceptiveFieldRadii; 1311 | sdrReceptiveFieldRadii._x = _layerDescs.front()._receptiveFieldRadius; 1312 | sdrReceptiveFieldRadii._y = _layerDescs.front()._receptiveFieldRadius; 1313 | 1314 | Int2 sdrSizeMinusOne; 1315 | sdrSizeMinusOne._x = _layerDescs.front()._width - 1; 1316 | sdrSizeMinusOne._y = _layerDescs.front()._height - 1; 1317 | 1318 | Float2 sdrSizeMinusOneInv; 1319 | sdrSizeMinusOneInv._x = 1.0f / (_layerDescs.front()._width - 1); 1320 | sdrSizeMinusOneInv._y = 1.0f / (_layerDescs.front()._height - 1); 1321 | 1322 | _reconstructInputKernel.setArg(0, _layers.front()._columnFeedForwardWeights); 1323 | _reconstructInputKernel.setArg(1, _layers.front()._inputBiases); 1324 | _reconstructInputKernel.setArg(2, _layers.front()._columnPredictions); 1325 | _reconstructInputKernel.setArg(3, _reconstructedPrediction); 1326 | _reconstructInputKernel.setArg(4, reconstructionReceptiveFieldRadii); 1327 | _reconstructInputKernel.setArg(5, sdrReceptiveFieldRadii); 1328 | _reconstructInputKernel.setArg(6, inputSizeMinusOne); 1329 | _reconstructInputKernel.setArg(7, inputSizeMinusOneInv); 1330 | _reconstructInputKernel.setArg(8, layerSize); 1331 | _reconstructInputKernel.setArg(9, sdrSizeMinusOne); 1332 | _reconstructInputKernel.setArg(10, sdrSizeMinusOneInv); 1333 | 1334 | cs.getQueue().enqueueNDRangeKernel(_reconstructInputKernel, cl::NullRange, cl::NDRange(_inputWidth, _inputHeight)); 1335 | 1336 | if (prediction.size() != _input.size()) 1337 | prediction.resize(_input.size()); 1338 | 1339 | // Read prediction 1340 | { 1341 | cl::size_t<3> origin; 1342 | origin[0] = 0; 1343 | origin[1] = 0; 1344 | origin[2] = 0; 1345 | 1346 | cl::size_t<3> region; 1347 | region[0] = _inputWidth; 1348 | region[1] = _inputHeight; 1349 | region[2] = 1; 1350 | 1351 | cs.getQueue().enqueueReadImage(_reconstructedPrediction, CL_TRUE, origin, region, 0, 0, &prediction[0]); 1352 | } 1353 | } 1354 | 1355 | void HTMRL::step(sys::ComputeSystem &cs, float reward, float reconstructionAlpha, float columnDecay, float cellStateDecay, float columnConnectionAlpha, float columnConnectionBeta, float columnConnectionGamma, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, float alpha, float gamma, float breakChance, float perturbationStdDev, int maxReplayChainSize, int numReplaySamples, int addReplaySampleSteps, std::mt19937 &generator) { 1356 | std::uniform_int_distribution seedDist(0, 10000); 1357 | 1358 | unsigned long seed = seedDist(generator); 1359 | 1360 | stepBegin(cs, addReplaySampleSteps, maxReplayChainSize); 1361 | 1362 | activate(_input, cs, reward, alpha, gamma, columnDecay, cellStateDecay, columnConnectionAlpha, columnConnectionBeta, columnConnectionGamma, cellConnectionAlpha, cellConnectionBeta, cellConnectionGamma, cellConnectionTemperature, cellWeightEligibilityDecay, maxReplayChainSize, numReplaySamples, addReplaySampleSteps, seed); 1363 | 1364 | std::vector output; 1365 | 1366 | getReconstructedPrediction(output, cs); 1367 | 1368 | // Exploratory action 1369 | std::uniform_real_distribution dist01(0.0f, 1.0f); 1370 | std::normal_distribution pertDist(0.0f, perturbationStdDev); 1371 | 1372 | for (int i = 0; i < _input.size(); i++) 1373 | if (_inputTypes[i] == _action) { 1374 | if (dist01(generator) < breakChance) 1375 | _input[i] = dist01(generator) > 0.5f ? 1.0f : 0.0f; 1376 | else 1377 | _input[i] = output[i] > 0.5f ? 1.0f : 0.0f;// std::min(1.0f, std::max(0.0f, std::min(1.0f, std::max(0.0f, output[i])) + pertDist(generator))); 1378 | } 1379 | else if (_inputTypes[i] == _unused) 1380 | _input[i] = 0.0f; 1381 | } 1382 | 1383 | void HTMRL::exportCellData(sys::ComputeSystem &cs, std::vector> &images, unsigned long seed) const { 1384 | std::mt19937 generator(seed); 1385 | 1386 | int maxWidth = _inputWidth; 1387 | int maxHeight = _inputHeight; 1388 | 1389 | for (int l = 0; l < _layers.size(); l++) { 1390 | maxWidth = std::max(maxWidth, _layerDescs[l]._width); 1391 | maxHeight = std::max(maxHeight, _layerDescs[l]._height); 1392 | } 1393 | 1394 | std::uniform_real_distribution uniformDist(0.0f, 1.0f); 1395 | 1396 | { 1397 | std::vector state(_inputWidth * _inputHeight); 1398 | 1399 | cl::size_t<3> origin; 1400 | origin[0] = 0; 1401 | origin[1] = 0; 1402 | origin[2] = 0; 1403 | 1404 | cl::size_t<3> region; 1405 | region[0] = _inputWidth; 1406 | region[1] = _inputHeight; 1407 | region[2] = 1; 1408 | 1409 | cs.getQueue().enqueueReadImage(_layers.front()._reconstruction, CL_TRUE, origin, region, 0, 0, &state[0]); 1410 | 1411 | sf::Color c; 1412 | c.r = uniformDist(generator) * 255.0f; 1413 | c.g = uniformDist(generator) * 255.0f; 1414 | c.b = uniformDist(generator) * 255.0f; 1415 | 1416 | // Convert to colors 1417 | std::shared_ptr image = std::make_shared(); 1418 | 1419 | image->create(maxWidth, maxHeight, sf::Color::Transparent); 1420 | 1421 | for (int x = 0; x < _inputWidth; x++) 1422 | for (int y = 0; y < _inputHeight; y++) { 1423 | sf::Color color; 1424 | 1425 | color = c; 1426 | 1427 | color.a = std::min(1.0f, std::max(0.0f, state[x + y * _inputWidth])) * (255.0f - 3.0f) + 3; 1428 | 1429 | image->setPixel(x - _inputWidth / 2 + maxWidth / 2, y - _inputHeight / 2 + maxHeight / 2, color); 1430 | } 1431 | 1432 | images.push_back(image); 1433 | } 1434 | 1435 | /*{ 1436 | sf::Color c; 1437 | c.r = uniformDist(generator) * 255.0f; 1438 | c.g = uniformDist(generator) * 255.0f; 1439 | c.b = uniformDist(generator) * 255.0f; 1440 | 1441 | // Convert to colors 1442 | std::shared_ptr image = std::make_shared(); 1443 | 1444 | image->create(maxWidth, maxHeight, sf::Color::Transparent); 1445 | 1446 | for (int x = 0; x < _inputWidth; x++) 1447 | for (int y = 0; y < _inputHeight; y++) { 1448 | sf::Color color; 1449 | 1450 | color = c; 1451 | 1452 | color.a = std::min(1.0f, std::max(0.0f, _exploratoryOutput[x + y * _inputWidth])) * (255.0f - 3.0f) + 3; 1453 | 1454 | image->setPixel(x - _inputWidth / 2 + maxWidth / 2, y - _inputHeight / 2 + maxHeight / 2, color); 1455 | } 1456 | 1457 | images.push_back(image); 1458 | }*/ 1459 | 1460 | if (sf::Keyboard::isKeyPressed(sf::Keyboard::P)) { 1461 | for (int l = 0; l < _layers.size(); l++) { 1462 | std::vector state(_layerDescs[l]._width * _layerDescs[l]._height * _layerDescs[l]._cellsInColumn * 2); 1463 | 1464 | cl::size_t<3> origin; 1465 | origin[0] = 0; 1466 | origin[1] = 0; 1467 | origin[2] = 0; 1468 | 1469 | cl::size_t<3> region; 1470 | region[0] = _layerDescs[l]._width; 1471 | region[1] = _layerDescs[l]._height; 1472 | region[2] = _layerDescs[l]._cellsInColumn; 1473 | 1474 | cs.getQueue().enqueueReadImage(_layers[l]._cellPredictions, CL_TRUE, origin, region, 0, 0, &state[0]); 1475 | 1476 | sf::Color c; 1477 | c.r = uniformDist(generator) * 255.0f; 1478 | c.g = uniformDist(generator) * 255.0f; 1479 | c.b = uniformDist(generator) * 255.0f; 1480 | 1481 | // Convert to colors 1482 | for (int ci = 0; ci < _layerDescs[l]._cellsInColumn; ci++) { 1483 | std::shared_ptr image = std::make_shared(); 1484 | 1485 | image->create(maxWidth, maxHeight, sf::Color::Transparent); 1486 | 1487 | for (int x = 0; x < _layerDescs[l]._width; x++) 1488 | for (int y = 0; y < _layerDescs[l]._height; y++) { 1489 | sf::Color color; 1490 | 1491 | color = c; 1492 | 1493 | color.a = std::min(1.0f, std::max(0.0f, state[2 * (x + y * _layerDescs[l]._width + ci * _layerDescs[l]._width *_layerDescs[l]._height)])) * (255.0f - 3.0f) + 3; 1494 | 1495 | int wx = x - _layerDescs[l]._width / 2 + maxWidth / 2; 1496 | int wy = y - _layerDescs[l]._height / 2 + maxHeight / 2; 1497 | 1498 | assert(wx >= 0 && wy >= 0 && wx < maxWidth && wy < maxHeight); 1499 | 1500 | image->setPixel(wx, wy, color); 1501 | } 1502 | 1503 | images.push_back(image); 1504 | } 1505 | } 1506 | } 1507 | else { 1508 | for (int l = 0; l < _layers.size(); l++) { 1509 | std::vector state(_layerDescs[l]._width * _layerDescs[l]._height * _layerDescs[l]._cellsInColumn * 2); 1510 | 1511 | cl::size_t<3> origin; 1512 | origin[0] = 0; 1513 | origin[1] = 0; 1514 | origin[2] = 0; 1515 | 1516 | cl::size_t<3> region; 1517 | region[0] = _layerDescs[l]._width; 1518 | region[1] = _layerDescs[l]._height; 1519 | region[2] = _layerDescs[l]._cellsInColumn; 1520 | 1521 | cs.getQueue().enqueueReadImage(_layers[l]._cellStates, CL_TRUE, origin, region, 0, 0, &state[0]); 1522 | 1523 | sf::Color c; 1524 | c.r = uniformDist(generator) * 255.0f; 1525 | c.g = uniformDist(generator) * 255.0f; 1526 | c.b = uniformDist(generator) * 255.0f; 1527 | 1528 | // Convert to colors 1529 | for (int ci = 0; ci < _layerDescs[l]._cellsInColumn; ci++) { 1530 | std::shared_ptr image = std::make_shared(); 1531 | 1532 | image->create(maxWidth, maxHeight, sf::Color::Transparent); 1533 | 1534 | for (int x = 0; x < _layerDescs[l]._width; x++) 1535 | for (int y = 0; y < _layerDescs[l]._height; y++) { 1536 | sf::Color color; 1537 | 1538 | color = c; 1539 | 1540 | color.a = std::min(1.0f, std::max(0.0f, std::max(0.0f, state[0 + 2 * (x + y * _layerDescs[l]._width + ci * _layerDescs[l]._width *_layerDescs[l]._height)]))) * (255.0f - 3.0f) + 3; 1541 | 1542 | //color.g = std::min(1.0f, std::max(0.0f, std::max(0.0f, state[2 + 4 * (x + y * _layerDescs[l]._width + ci * _layerDescs[l]._width *_layerDescs[l]._height)]))) * (255.0f - 3.0f) + 3; 1543 | 1544 | //color.b = 0; 1545 | //color.a = 0.5f * (color.r + color.g); 1546 | 1547 | int wx = x - _layerDescs[l]._width / 2 + maxWidth / 2; 1548 | int wy = y - _layerDescs[l]._height / 2 + maxHeight / 2; 1549 | 1550 | assert(wx >= 0 && wy >= 0 && wx < maxWidth && wy < maxHeight); 1551 | 1552 | image->setPixel(wx, wy, color); 1553 | } 1554 | 1555 | images.push_back(image); 1556 | } 1557 | } 1558 | 1559 | /*for (int l = 0; l < _layers.size(); l++) { 1560 | std::vector state(_layerDescs[l]._width * _layerDescs[l]._height * 2); 1561 | 1562 | cl::size_t<3> origin; 1563 | origin[0] = 0; 1564 | origin[1] = 0; 1565 | origin[2] = 0; 1566 | 1567 | cl::size_t<3> region; 1568 | region[0] = _layerDescs[l]._width; 1569 | region[1] = _layerDescs[l]._height; 1570 | region[2] = 1; 1571 | 1572 | cs.getQueue().enqueueReadImage(_layers[l]._blurPong, CL_TRUE, origin, region, 0, 0, &state[0]); 1573 | 1574 | sf::Color c; 1575 | c.r = uniformDist(generator) * 255.0f; 1576 | c.g = uniformDist(generator) * 255.0f; 1577 | c.b = uniformDist(generator) * 255.0f; 1578 | 1579 | // Convert to colors 1580 | std::shared_ptr image = std::make_shared(); 1581 | 1582 | image->create(maxWidth, maxHeight, sf::Color::Transparent); 1583 | 1584 | for (int x = 0; x < _layerDescs[l]._width; x++) 1585 | for (int y = 0; y < _layerDescs[l]._height; y++) { 1586 | sf::Color color; 1587 | 1588 | color = c; 1589 | 1590 | color.a = std::min(1.0f, std::max(0.0f, state[0 + 2 * (x + y * _layerDescs[l]._width)] > 0.0f ? 1.0f : 0.0f)) * (255.0f - 3.0f) + 3; 1591 | 1592 | int wx = x - _layerDescs[l]._width / 2 + maxWidth / 2; 1593 | int wy = y - _layerDescs[l]._height / 2 + maxHeight / 2; 1594 | 1595 | assert(wx >= 0 && wy >= 0 && wx < maxWidth && wy < maxHeight); 1596 | 1597 | image->setPixel(wx, wy, color); 1598 | } 1599 | 1600 | images.push_back(image); 1601 | }*/ 1602 | } 1603 | } --------------------------------------------------------------------------------