├── ContinuousHTMGPU
    ├── resources
    │   ├── bot.png
    │   ├── car.png
    │   ├── end.png
    │   ├── arial.ttf
    │   ├── cart.png
    │   ├── floor.png
    │   ├── pole.png
    │   ├── start.png
    │   ├── wall.png
    │   ├── background.png
    │   ├── inputCart.png
    │   ├── inputPole.png
    │   ├── pixelated.ttf
    │   ├── lineGradient.png
    │   ├── license.txt
    │   ├── read me.txt
    │   ├── htm.cl
    │   ├── cae.cl
    │   └── htmrl.cl
    ├── source
    │   ├── system
    │   │   ├── Uncopyable.h
    │   │   ├── ComputeProgram.h
    │   │   ├── ComputeSystem.h
    │   │   ├── ComputeProgram.cpp
    │   │   └── ComputeSystem.cpp
    │   ├── vis
    │   │   ├── HTMRLVisualizer.h
    │   │   ├── Plot.h
    │   │   ├── HTMRLVisualizer.cpp
    │   │   └── Plot.cpp
    │   ├── htm
    │   │   ├── AnythingEncoder.h
    │   │   ├── AnythingEncoder.cpp
    │   │   ├── HTMRL.h
    │   │   └── HTMRL.cpp
    │   └── Main.cpp
    ├── CMakeLists.txt
    ├── FindOpenCL.cmake
    └── FindSFML.cmake
├── LICENSE.md
└── README.md


/ContinuousHTMGPU/resources/bot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/bot.png


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/car.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/car.png


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/end.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/end.png


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/arial.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/arial.ttf


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/cart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/cart.png


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/floor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/floor.png


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/pole.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/pole.png


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/start.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/start.png


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/wall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/wall.png


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/background.png


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/inputCart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/inputCart.png


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/inputPole.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/inputPole.png


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/pixelated.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/pixelated.ttf


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/lineGradient.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/222464/ContinuousHTMGPU/HEAD/ContinuousHTMGPU/resources/lineGradient.png


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/license.txt:
--------------------------------------------------------------------------------
1 | The FontStruction “Pixelated”
2 | (http://fontstruct.com/fontstructions/show/426637) by “Greenma201” is
3 | licensed under a Creative Commons Attribution Share Alike license
4 | (http://creativecommons.org/licenses/by-sa/3.0/).
5 | 


--------------------------------------------------------------------------------
/ContinuousHTMGPU/source/system/Uncopyable.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // Inherit from this class to make the class uncopyable
 4 | namespace sys {
 5 | 	class Uncopyable {
 6 | 	protected:
 7 | 		Uncopyable() {}
 8 | 		virtual ~Uncopyable() {}
 9 | 	private:
10 | 		Uncopyable(const Uncopyable &);
11 | 		Uncopyable &operator=(const Uncopyable &);
12 | 	};
13 | }


--------------------------------------------------------------------------------
/ContinuousHTMGPU/source/system/ComputeProgram.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <system/ComputeSystem.h>
 4 | 
 5 | #include <assert.h>
 6 | 
 7 | namespace sys {
 8 | 	class ComputeProgram {
 9 | 	private:
10 | 		cl::Program _program;
11 | 
12 | 	public:
13 | 		bool loadFromFile(const std::string &name, ComputeSystem &cs);
14 | 
15 | 		cl::Program &getProgram() {
16 | 			return _program;
17 | 		}
18 | 	};
19 | }


--------------------------------------------------------------------------------
/ContinuousHTMGPU/source/vis/HTMRLVisualizer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <SFML/Graphics.hpp>
 4 | #include <htm/HTMRL.h>
 5 | 
 6 | namespace vis {
 7 | 	class HTMRLVisualizer {
 8 | 	private:
 9 | 		sf::RenderTexture _rt;
10 | 	public:
11 | 		void create(unsigned int width);
12 | 
13 | 		void update(sf::RenderTexture &target, const sf::Vector2f &position, const sf::Vector2f &scale, sys::ComputeSystem &cs, const htm::HTMRL &htmrl, std::mt19937 &generator);
14 | 	};
15 | }


--------------------------------------------------------------------------------
/ContinuousHTMGPU/source/system/ComputeSystem.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <system/Uncopyable.h>
 4 | #include <CL/cl.hpp>
 5 | 
 6 | #define SYS_ALLOW_CL_GL_CONTEXT 0
 7 | 
 8 | namespace sys {
 9 | 	class ComputeSystem : private Uncopyable {
10 | 	public:
11 | 		enum DeviceType {
12 | 			_cpu, _gpu, _all, _none
13 | 		};
14 | 
15 | 	private:
16 | 		cl::Platform _platform;
17 | 		cl::Device _device;
18 | 		cl::Context _context;
19 | 		cl::CommandQueue _queue;
20 | 
21 | 	public:
22 | 		bool create(DeviceType type, bool createFromGLContext = false);
23 | 
24 | 		cl::Platform &getPlatform() {
25 | 			return _platform;
26 | 		}
27 | 
28 | 		cl::Device &getDevice() {
29 | 			return _device;
30 | 		}
31 | 
32 | 		cl::Context &getContext() {
33 | 			return _context;
34 | 		}
35 | 
36 | 		cl::CommandQueue &getQueue() {
37 | 			return _queue;
38 | 		}
39 | 	};
40 | }


--------------------------------------------------------------------------------
/ContinuousHTMGPU/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8)
 2 | 
 3 | project(CHTMGPU)
 4 | 
 5 | include_directories("${PROJECT_SOURCE_DIR}/source")
 6 | 
 7 | # This is only required for the script to work in the version control
 8 | set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}")
 9 |  
10 | find_package(OpenCL REQUIRED)
11 |  
12 | include_directories(${OPENCL_INCLUDE_DIRS})
13 |  
14 | if(OPENCL_HAS_CPP_BINDINGS)
15 |     message("OpenCL has CPP bindings. Full include is: " ${OPENCL_INCLUDE_DIRS})
16 | else(OPENCL_HAS_CPP_BINDINGS)
17 |     message("No OpenCL CPP bindings found")
18 | endif(OPENCL_HAS_CPP_BINDINGS)
19 | 
20 | find_package(SFML 2 REQUIRED system window graphics)
21 |  
22 | include_directories(${SFML_INCLUDE_DIR})
23 |  
24 | add_executable(CHTMGPU "${PROJECT_SOURCE_DIR}/source/Main.cpp")
25 | 
26 | target_link_libraries(CHTMGPU ${OPENCL_LIBRARIES})
27 | target_link_libraries(CHTMGPU ${SFML_LIBRARIES})


--------------------------------------------------------------------------------
/ContinuousHTMGPU/source/system/ComputeProgram.cpp:
--------------------------------------------------------------------------------
 1 | #include "ComputeProgram.h"
 2 | 
 3 | #include <fstream>
 4 | #include <iostream>
 5 | 
 6 | using namespace sys;
 7 | 
 8 | bool ComputeProgram::loadFromFile(const std::string &name, ComputeSystem &cs) {
 9 | 	std::ifstream fromFile(name);
10 | 
11 | 	if (!fromFile.is_open()) {
12 | #ifdef SYS_DEBUG
13 | 		std::cerr << "Could not open file " << name << "!" << std::endl;
14 | #endif
15 | 		return false;
16 | 	}
17 | 
18 | 	std::string source = "";
19 | 
20 | 	while (!fromFile.eof() && fromFile.good()) {
21 | 		std::string line; 
22 | 
23 | 		std::getline(fromFile, line);
24 | 
25 | 		source += line + "\n";
26 | 	}
27 | 
28 | 	_program = cl::Program(cs.getContext(), source);
29 | 
30 | 	if (_program.build(std::vector<cl::Device>(1, cs.getDevice())) != CL_SUCCESS) {
31 | #ifdef SYS_DEBUG
32 | 		std::cerr << "Error building: " << _program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(cs.getDevice()) << std::endl;
33 | #endif
34 | 		return false;
35 | 	}
36 | 
37 | 	return true;
38 | }


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | ContinuousHTMGPU
 2 | Copyright (C) 2014-2015 Eric Laukien
 3 | 
 4 | This software is provided 'as-is', without any express or implied
 5 | warranty.  In no event will the authors be held liable for any damages
 6 | arising from the use of this software.
 7 | 
 8 | Permission is granted to anyone to use this software for any purpose,
 9 | including commercial applications, and to alter it and redistribute it
10 | freely, subject to the following restrictions:
11 | 
12 | 1. The origin of this software must not be misrepresented; you must not
13 | 	claim that you wrote the original software. If you use this software
14 | 	in a product, an acknowledgment in the product documentation would be
15 | 	appreciated but is not required.
16 | 2. Altered source versions must be plainly marked as such, and must not be
17 | 	misrepresented as being the original software.
18 | 3. This notice may not be removed or altered from any source distribution.
19 | 
20 | ------------------------------------------------------------------------------
21 | 
22 | ContinuousHTMGPU uses the following external libraries:
23 | 
24 | SFML - source code is licensed under the zlib/png license.
25 | OpenCL


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/read me.txt:
--------------------------------------------------------------------------------
 1 | The font file in this archive was created using Fontstruct the free, online
 2 | font-building tool.
 3 | This font was created by “Greenma201”.
 4 | This font has a homepage where this archive and other versions may be found:
 5 | http://fontstruct.com/fontstructions/show/426637
 6 | 
 7 | Try Fontstruct at http://fontstruct.com
 8 | It’s easy and it’s fun.
 9 | 
10 | NOTE FOR FLASH USERS: Fontstruct fonts (fontstructions) are optimized for
11 | Flash. If the font in this archive is a pixel font, it is best displayed at a
12 | font-size of 8.
13 | 
14 | Fontstruct is sponsored by FontShop.
15 | Visit them at http://www.fontshop.com
16 | FontShop is the original independent font retailer. We’ve been around since
17 | the dawn of digital type. Whether you need the right font or need to create the
18 | right font from scratch, let our 20 years of experience work for you.
19 | 
20 | Fontstruct is copyright ©2011 Rob Meek
21 | 
22 | LEGAL NOTICE:
23 | In using this font you must comply with the licensing terms described in the
24 | file “license.txt” included with this archive.
25 | If you redistribute the font file in this archive, it must be accompanied by
26 | all the other files from this archive, including this one.
27 | 


--------------------------------------------------------------------------------
/ContinuousHTMGPU/source/vis/Plot.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <SFML/Graphics.hpp>
 4 | 
 5 | namespace vis {
 6 | 	struct Point {
 7 | 		sf::Vector2f _position;
 8 | 
 9 | 		sf::Color _color;
10 | 
11 | 		Point()
12 | 			: _color(sf::Color::Black)
13 | 		{}
14 | 	};
15 | 
16 | 	struct Curve {
17 | 		std::string _name;
18 | 
19 | 		float _shadow;
20 | 		sf::Vector2f _shadowOffset;
21 | 
22 | 		std::vector<Point> _points;
23 | 
24 | 		Curve()
25 | 			: _shadow(0.5f), _shadowOffset(-4.0f, 4.0f)
26 | 		{}
27 | 	};
28 | 
29 | 	struct Plot {
30 | 		sf::Color _axesColor;
31 | 		sf::Color _backgroundColor;
32 | 
33 | 		std::vector<Curve> _curves;
34 | 
35 | 		Plot()
36 | 			: _axesColor(sf::Color::Black), _backgroundColor(sf::Color::White)
37 | 		{}
38 | 
39 | 		void draw(sf::RenderTarget &target, const sf::Texture &lineGradientTexture, const sf::Font &tickFont, float tickTextScale,
40 | 			const sf::Vector2f &domain, const sf::Vector2f &range, const sf::Vector2f &margins, const sf::Vector2f &tickIncrements, float axesSize, float lineSize, float tickSize, float tickLength, float textTickOffset, int precision);
41 | 	};
42 | 
43 | 	float vectorMagnitude(const sf::Vector2f &vector);
44 | 	sf::Vector2f vectorNormalize(const sf::Vector2f &vector);
45 | 	float vectorDot(const sf::Vector2f &left, const sf::Vector2f &right);
46 | }


--------------------------------------------------------------------------------
/ContinuousHTMGPU/source/htm/AnythingEncoder.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <vector>
 4 | 
 5 | #include <random>
 6 | 
 7 | #include <memory>
 8 | 
 9 | #include <algorithm>
10 | 
11 | namespace htm {
12 | 	class AnythingEncoder {
13 | 	public:
14 | 		float sigmoid(float x) {
15 | 			return 1.0f / (1.0f + std::exp(-x));
16 | 		}
17 | 
18 | 	private:
19 | 		struct Node {
20 | 			std::vector<float> _center;
21 | 
22 | 			float _activation;
23 | 			float _output;
24 | 			float _outputPrev;
25 | 			float _dutyCycle;
26 | 
27 | 			Node()
28 | 				: _activation(0.0f), _output(0.0f), _outputPrev(0.0f), _dutyCycle(0.0f)
29 | 			{}
30 | 		};
31 | 
32 | 		int _sdrSize;
33 | 		int _inputSize;
34 | 
35 | 		int _boostCandidate;
36 | 		float _bestRepresentation;
37 | 
38 | 		std::vector<Node> _nodes;
39 | 	
40 | 	public:
41 | 		AnythingEncoder()
42 | 			: _boostCandidate(0), _bestRepresentation(1.0f)
43 | 		{}
44 | 
45 | 		void create(int sdrSize, int inputSize, float minInitCenter, float maxInitCenter, std::mt19937 &generator);
46 | 
47 | 		void encode(const std::vector<float> &input, std::vector<float> &sdr, float localActivity, float outputIntensity, float dutyCycleDecay);
48 | 		void learn(const std::vector<float> &input, float centerAlpha, float maxDutyCycleForLearn, float noMatchIntensity);
49 | 		void decode(const std::vector<float> &sdr, std::vector<float> &recon);
50 | 	};
51 | }


--------------------------------------------------------------------------------
/ContinuousHTMGPU/source/vis/HTMRLVisualizer.cpp:
--------------------------------------------------------------------------------
 1 | #include "HTMRLVisualizer.h"
 2 | 
 3 | using namespace vis;
 4 | 
 5 | void HTMRLVisualizer::create(unsigned int width) {
 6 | 	_rt.create(width, width, false);
 7 | 	_rt.setSmooth(true);
 8 | }
 9 | 
10 | void HTMRLVisualizer::update(sf::RenderTexture &target, const sf::Vector2f &position, const sf::Vector2f &scale, sys::ComputeSystem &cs, const htm::HTMRL &htmrl, std::mt19937 &generator) {
11 | 	std::vector<std::shared_ptr<sf::Image>> images;
12 | 
13 | 	htmrl.exportCellData(cs, images, 5634);
14 | 
15 | 	const float heightStep = 1.0f;
16 | 	const float transparency = 0.3f;
17 | 	const int cellLayerSteps = 5;
18 | 
19 | 	int h = 0;
20 | 
21 | 	sf::Texture imageTexture;
22 | 
23 | 	for (int i = 0; i < images.size(); i++) {
24 | 		// Render to RT
25 | 		_rt.setActive();
26 | 
27 | 		imageTexture.loadFromImage(*images[i]);
28 | 
29 | 		imageTexture.setSmooth(false);
30 | 		
31 | 		sf::Sprite imageSprite;
32 | 		imageSprite.setTexture(imageTexture);
33 | 
34 | 		imageSprite.setOrigin(imageTexture.getSize().x * 0.5f, imageTexture.getSize().y * 0.5f);
35 | 
36 | 		imageSprite.setRotation(45.0f);
37 | 		imageSprite.setPosition(_rt.getSize().x * 0.5f, _rt.getSize().y * 0.5f);
38 | 		imageSprite.setScale(static_cast<float>(_rt.getSize().x) / imageTexture.getSize().x * 0.75f, static_cast<float>(_rt.getSize().y) / imageTexture.getSize().y * 0.75f);
39 | 
40 | 		sf::RenderStates clearStates;
41 | 		clearStates.blendMode = sf::BlendNone;
42 | 
43 | 		sf::RectangleShape clearShape;
44 | 		clearShape.setSize(sf::Vector2f(_rt.getSize().x, _rt.getSize().y));
45 | 		clearShape.setFillColor(sf::Color::Transparent);
46 | 
47 | 		_rt.draw(clearShape, clearStates);
48 | 
49 | 		_rt.draw(imageSprite);
50 | 
51 | 		_rt.display();
52 | 
53 | 		// Render rt to main image
54 | 		target.setActive();
55 | 
56 | 		sf::Sprite transformedSprite;
57 | 		transformedSprite.setTexture(_rt.getTexture());
58 | 		transformedSprite.setOrigin(transformedSprite.getTexture()->getSize().x * 0.5f, transformedSprite.getTexture()->getSize().y * 0.5f);
59 | 	
60 | 		transformedSprite.setScale(scale.x * 0.5f, scale.y * 0.25f);
61 | 		transformedSprite.setColor(sf::Color(255, 255, 255, 255.0f * transparency));
62 | 
63 | 		target.setSmooth(true);
64 | 
65 | 		for (int s = 0; s < cellLayerSteps; s++) {
66 | 			transformedSprite.setPosition(position.x, position.y - h * heightStep);
67 | 			target.draw(transformedSprite);
68 | 
69 | 			h++;
70 | 		}
71 | 	}
72 | 
73 | 	target.display();
74 | }


--------------------------------------------------------------------------------
/ContinuousHTMGPU/source/system/ComputeSystem.cpp:
--------------------------------------------------------------------------------
 1 | #include "ComputeSystem.h"
 2 | 
 3 | #include <iostream>
 4 | 
 5 | using namespace sys;
 6 | 
 7 | bool ComputeSystem::create(DeviceType type, bool createFromGLContext) {
 8 | 	if (type == _none) {
 9 | #ifdef SYS_DEBUG
10 | 		std::cout << "No OpenCL context created." << std::endl;
11 | #endif
12 | 		return true;
13 | 	}
14 | 
15 | 	std::vector<cl::Platform> allPlatforms;
16 | 	cl::Platform::get(&allPlatforms);
17 | 
18 | 	if (allPlatforms.empty()) {
19 | #ifdef SYS_DEBUG
20 | 		std::cout << "No platforms found. Check your OpenCL installation." << std::endl;
21 | #endif
22 | 		return false;
23 | 	}
24 | 
25 | 	_platform = allPlatforms.front();
26 | 
27 | #ifdef SYS_DEBUG
28 | 	std::cout << "Using platform: " << _platform.getInfo<CL_PLATFORM_NAME>() << std::endl;
29 | #endif
30 | 
31 | 	std::vector<cl::Device> allDevices;
32 | 
33 | 	switch (type) {
34 | 	case _cpu:
35 | 		_platform.getDevices(CL_DEVICE_TYPE_CPU, &allDevices);
36 | 		break;
37 | 	case _gpu:
38 | 		_platform.getDevices(CL_DEVICE_TYPE_GPU, &allDevices);
39 | 		break;
40 | 	case _all:
41 | 		_platform.getDevices(CL_DEVICE_TYPE_ALL, &allDevices);
42 | 		break;
43 | 	}
44 | 
45 | 	if (allDevices.empty()) {
46 | #ifdef SYS_DEBUG
47 | 		std::cout << "No devices found. Check your OpenCL installation." << std::endl;
48 | #endif
49 | 		return false;
50 | 	}
51 | 
52 | 	_device = allDevices.front();
53 | 
54 | #ifdef SYS_DEBUG
55 | 	std::cout << "Using device: " << _device.getInfo<CL_DEVICE_NAME>() << std::endl;
56 | #endif
57 | 	
58 | #if(SYS_ALLOW_CL_GL_CONTEXT)
59 | 	if (createFromGLContext) {
60 | #if defined (__APPLE__) || defined(MACOSX)
61 | 		CGLContextObj kCGLContext = CGLGetCurrentContext();
62 | 		CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext);
63 | 		cl_context_properties props[] = {
64 | 			CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)kCGLShareGroup,
65 | 			0
66 | 		};
67 | #else
68 | #if defined WIN32
69 | 		cl_context_properties props[] = {
70 | 			CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
71 | 			CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
72 | 			CL_CONTEXT_PLATFORM, (cl_context_properties)static_cast<cl_platform_id>(_platform()),
73 | 			0
74 | 		};
75 | #else
76 | 		cl_context_properties props[] = {
77 | 			CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(),
78 | 			CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(),
79 | 			CL_CONTEXT_PLATFORM, (cl_context_properties)static_cast<cl_platform_id>(_platform()),
80 | 			0
81 | 		};
82 | #endif
83 | #endif
84 | 
85 | 		_context = cl::Context(_device, props);
86 | 	}
87 | 	else
88 | #endif
89 | 		_context = _device;
90 | 
91 | 	_queue = cl::CommandQueue(_context, _device);
92 | 
93 | 	return true;
94 | }


--------------------------------------------------------------------------------
/ContinuousHTMGPU/source/htm/AnythingEncoder.cpp:
--------------------------------------------------------------------------------
 1 | #include "AnythingEncoder.h"
 2 | 
 3 | using namespace htm;
 4 | 
 5 | void AnythingEncoder::create(int sdrSize, int inputSize, float minInitCenter, float maxInitCenter, std::mt19937 &generator) {
 6 | 	_sdrSize = sdrSize;
 7 | 	_inputSize = inputSize;
 8 | 
 9 | 	_nodes.resize(sdrSize);
10 | 	
11 | 	std::uniform_real_distribution<float> centerDist(minInitCenter, maxInitCenter);
12 | 
13 | 	for (int i = 0; i < _sdrSize; i++) {
14 | 		_nodes[i]._center.resize(inputSize);
15 | 
16 | 		for (int j = 0; j < _inputSize; j++)
17 | 			_nodes[i]._center[j] = centerDist(generator);
18 | 	}
19 | }
20 | 
21 | void AnythingEncoder::encode(const std::vector<float> &input, std::vector<float> &sdr, float localActivity, float outputIntensity, float dutyCycleDecay) {
22 | 	if (sdr.size() != _sdrSize)
23 | 		sdr.resize(_sdrSize);
24 | 
25 | 	float maxActivation = -999999.0f;
26 | 
27 | 	for (int i = 0; i < _sdrSize; i++) {
28 | 		float sum = 0.0f;
29 | 
30 | 		for (int j = 0; j < _inputSize; j++) {
31 | 			float difference = _nodes[i]._center[j] - input[j];
32 | 
33 | 			sum += difference * difference;
34 | 		}
35 | 
36 | 		_nodes[i]._activation = -sum;
37 | 
38 | 		maxActivation = std::max(maxActivation, _nodes[i]._activation);
39 | 	}
40 | 
41 | 	_bestRepresentation = maxActivation;
42 | 
43 | 	// Inhibit
44 | 	for (int i = 0; i < _sdrSize; i++) {
45 | 		float numHigher = 0.0f;
46 | 
47 | 		for (int j = 0; j < _sdrSize; j++) {
48 | 			if (_nodes[j]._activation > _nodes[i]._activation)
49 | 				numHigher++;
50 | 		}
51 | 
52 | 		_nodes[i]._outputPrev = _nodes[i]._output;
53 | 
54 | 		sdr[i] = _nodes[i]._output = sigmoid((localActivity - numHigher) * outputIntensity);
55 | 
56 | 		_nodes[i]._dutyCycle = std::max((1.0f - dutyCycleDecay) * _nodes[i]._dutyCycle, _nodes[i]._output);
57 | 
58 | 		if (_nodes[i]._dutyCycle < _nodes[_boostCandidate]._dutyCycle)
59 | 			_boostCandidate = i;
60 | 	}
61 | }
62 | 
63 | void AnythingEncoder::learn(const std::vector<float> &input, float centerAlpha, float maxDutyCycleForLearn, float noMatchIntensity) {
64 | 	float noMatch = 1.0f - exp(_bestRepresentation * noMatchIntensity);
65 | 
66 | 	float boost = _nodes[_boostCandidate]._dutyCycle < maxDutyCycleForLearn ? noMatch : 0.0f;
67 | 
68 | 	float learnScalar = (1.0f - boost) * std::max(0.0f, _nodes[_boostCandidate]._output - _nodes[_boostCandidate]._outputPrev) + boost;
69 | 
70 | 	for (int j = 0; j < _inputSize; j++) {
71 | 		float difference = input[j] - _nodes[_boostCandidate]._center[j];
72 | 
73 | 		_nodes[_boostCandidate]._center[j] += centerAlpha * learnScalar * difference;
74 | 	}
75 | }
76 | 
77 | void AnythingEncoder::decode(const std::vector<float> &sdr, std::vector<float> &recon) {
78 | 	if (recon.size() != _inputSize)
79 | 		recon.resize(_inputSize);
80 | 
81 | 	for (int i = 0; i < _inputSize; i++) {
82 | 		float sum = 0.0f;
83 | 		float divisor = 0.0f;
84 | 
85 | 		for (int j = 0; j < _sdrSize; j++) {
86 | 			sum += _nodes[j]._center[i] * _nodes[j]._output;
87 | 
88 | 			divisor += _nodes[j]._output;
89 | 		}
90 | 
91 | 		if (divisor == 0.0f)
92 | 			recon[i] = 0.0f;
93 | 		else
94 | 			recon[i] = sum / divisor;
95 | 	}
96 | }


--------------------------------------------------------------------------------
/ContinuousHTMGPU/FindOpenCL.cmake:
--------------------------------------------------------------------------------
 1 | # - Try to find OpenCL
 2 | # This module tries to find an OpenCL implementation on your system. It supports
 3 | # AMD / ATI, Apple and NVIDIA implementations, but should work, too.
 4 | #
 5 | # To set manually the paths, define these environment variables:
 6 | # OpenCL_INCPATH    - Include path (e.g. OpenCL_INCPATH=/opt/cuda/4.0/cuda/include)
 7 | # OpenCL_LIBPATH    - Library path (e.h. OpenCL_LIBPATH=/usr/lib64/nvidia)
 8 | #
 9 | # Once done this will define
10 | #  OPENCL_FOUND        - system has OpenCL
11 | #  OPENCL_INCLUDE_DIRS  - the OpenCL include directory
12 | #  OPENCL_LIBRARIES    - link these to use OpenCL
13 | #
14 | # WIN32 should work, but is untested
15 | 
16 | FIND_PACKAGE(PackageHandleStandardArgs)
17 | 
18 | SET (OPENCL_VERSION_STRING "0.1.0")
19 | SET (OPENCL_VERSION_MAJOR 0)
20 | SET (OPENCL_VERSION_MINOR 1)
21 | SET (OPENCL_VERSION_PATCH 0)
22 | 
23 | IF (APPLE)
24 | 
25 | 	FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX")
26 | 	FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX")
27 | 	FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl.hpp DOC "Include for OpenCL CPP bindings on OSX")
28 | 
29 | ELSE (APPLE)
30 | 
31 | 	IF (WIN32)
32 | 
33 | 		FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h)
34 | 		FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp)
35 | 
36 | 		# The AMD SDK currently installs both x86 and x86_64 libraries
37 | 		# This is only a hack to find out architecture
38 | 		IF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" )
39 | 			SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86_64")
40 | 		ELSE (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
41 | 			SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86")
42 | 		ENDIF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" )
43 | 		FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib PATHS ${OPENCL_LIB_DIR} ENV OpenCL_LIBPATH)
44 | 
45 | 		GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE)
46 | 
47 | 		# On Win32 search relative to the library
48 | 		FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS "${_OPENCL_INC_CAND}" ENV OpenCL_INCPATH)
49 | 		FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS "${_OPENCL_INC_CAND}" ENV OpenCL_INCPATH)
50 | 
51 | 	ELSE (WIN32)
52 | 
53 | 		# Unix style platforms
54 | 		FIND_LIBRARY(OPENCL_LIBRARIES OpenCL
55 | 			PATHS ENV LD_LIBRARY_PATH ENV OpenCL_LIBPATH
56 | 		)
57 | 
58 | 		GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH)
59 | 		GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE)
60 | 
61 | 		# The AMD SDK currently does not place its headers
62 | 		# in /usr/include, therefore also search relative
63 | 		# to the library
64 | 		FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include" "/opt/AMDAPP/include" ENV OpenCL_INCPATH)
65 | 		FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include" "/opt/AMDAPP/include" ENV OpenCL_INCPATH)
66 | 
67 | 	ENDIF (WIN32)
68 | 
69 | ENDIF (APPLE)
70 | 
71 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS)
72 | 
73 | IF(_OPENCL_CPP_INCLUDE_DIRS)
74 | 	SET( OPENCL_HAS_CPP_BINDINGS TRUE )
75 | 	LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} )
76 | 	# This is often the same, so clean up
77 | 	LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS )
78 | ENDIF(_OPENCL_CPP_INCLUDE_DIRS)
79 | 
80 | MARK_AS_ADVANCED(
81 |   OPENCL_INCLUDE_DIRS
82 | )
83 | 
84 | 


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/htm.cl:
--------------------------------------------------------------------------------
  1 | constant sampler_t normalizedClampedNearestSampler = CLK_NORMALIZED_COORDS_TRUE |
  2 | 	CLK_ADDRESS_CLAMP_TO_EDGE |
  3 | 	CLK_FILTER_NEAREST;
  4 | 	
  5 | constant float sparsity = 0.9f;
  6 | constant float intensity = 4.0f;
  7 | 
  8 | float randFloat(uint2* state)
  9 | {
 10 |     const float invMaxInt = 1.0f / 4294967296.0f;
 11 |     uint x = (*state).x * 17 + (*state).y * 13123;
 12 |     (*state).x = (x << 13) ^ x;
 13 |     (*state).y ^= (x << 7);
 14 | 
 15 |     uint tmp = x * (x * x * 15731 + 74323) + 871483;
 16 | 
 17 |     return convert_float(tmp) * invMaxInt;
 18 | }
 19 | 
 20 | float sigmoid(float x) {
 21 | 	return 1.0f / (1.0f + exp(-x));
 22 | }
 23 | 
 24 | float logit(float x) {
 25 | 	return -log(1.0f / x - 1.0f);
 26 | }
 27 | 
 28 | void kernel weightInit(write_only image2d_t states, write_only image2d_array_t weights, int receptiveFieldSize, uint2 seed, float minWeight, float maxWeight) {
 29 | 	uint2 seedValue = seed + (uint2)(get_global_id(0), get_global_id(1));
 30 | 
 31 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
 32 | 
 33 | 	write_imagef(states, columnPosition, (float4)(0.0f, 0.0f, 0.0f, 0.0f));
 34 | 
 35 | 	for (int wi = 0; wi < receptiveFieldSize; wi++) {
 36 | 		int4 weightPosition = (int4)(columnPosition.x, columnPosition.y, wi, 0);
 37 | 	
 38 | 		float weight = randFloat(&seedValue) * (maxWeight - minWeight) + minWeight;
 39 | 	
 40 | 		write_imagef(weights, weightPosition, (float4)(weight, weight, weight, weight));
 41 | 	}
 42 | }
 43 | 
 44 | void kernel layerActivate(read_only image2d_t prevStates, write_only image2d_t activations, read_only image2d_array_t weights, float2 layerSizeInv, float2 inputReceptiveFieldRadius, float2 inputReceptiveFieldStep) {
 45 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
 46 | 	float2 inputCenterPositionNormalized = (float2)(columnPosition.x * layerSizeInv.x, columnPosition.y * layerSizeInv.y);
 47 | 
 48 | 	float sum = 0.0f;
 49 | 	
 50 | 	int weightIndex = 0;
 51 | 	
 52 | 	for (float dx = -inputReceptiveFieldRadius.x; dx <= inputReceptiveFieldRadius.x; dx += inputReceptiveFieldStep.x)
 53 | 	for (float dy = -inputReceptiveFieldRadius.y; dy <= inputReceptiveFieldRadius.y; dy += inputReceptiveFieldStep.y) {
 54 | 		float2 inputPositionNormalized = inputCenterPositionNormalized + (float2)(dx, dy);
 55 | 		
 56 | 		float weight = read_imagef(weights, (int4)(columnPosition.x, columnPosition.y, weightIndex, 0)).x;
 57 | 		float prevState = read_imagef(prevStates, normalizedClampedNearestSampler, inputPositionNormalized).x;
 58 | 		
 59 | 		sum += weight * prevState;
 60 | 		
 61 | 		weightIndex++;
 62 | 	}
 63 | 	
 64 | 	float activation = sigmoid(sum) * 2.0f - 1.0f;
 65 | 	
 66 | 	write_imagef(activations, columnPosition, (float4)(activation, activation, activation, activation));
 67 | }
 68 | 
 69 | void kernel layerInhibit(read_only image2d_t activations, write_only image2d_t states, float2 layerSizeInv, float2 layerReceptiveFieldRadius, float2 layerReceptiveFieldStep) {
 70 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
 71 | 	float2 layerCenterPositionNormalized = (float2)(columnPosition.x * layerSizeInv.x, columnPosition.y * layerSizeInv.y);
 72 | 
 73 | 	float average = 0.0f;
 74 | 	float maximum = 0.0f;
 75 | 	float minimum = 1.0f;
 76 | 	
 77 | 	int weightIndex = 0;
 78 | 	
 79 | 	for (float dx = -layerReceptiveFieldRadius.x; dx <= layerReceptiveFieldRadius.x; dx += layerReceptiveFieldStep.x)
 80 | 	for (float dy = -layerReceptiveFieldRadius.y; dy <= layerReceptiveFieldRadius.y; dy += layerReceptiveFieldStep.y) {
 81 | 		float2 layerPositionNormalized = layerCenterPositionNormalized + (float2)(dx, dy);
 82 | 		
 83 | 		float activation = read_imagef(activations, normalizedClampedNearestSampler, layerPositionNormalized).x;
 84 | 		
 85 | 		average += activation;
 86 | 		maximum = max(maximum, activation);
 87 | 		minimum = min(minimum, activation);
 88 | 		
 89 | 		weightIndex++;
 90 | 	}
 91 | 	
 92 | 	average /= weightIndex;
 93 | 	
 94 | 	float thisActivation = read_imagef(activations, normalizedClampedNearestSampler, layerCenterPositionNormalized).x;
 95 | 	
 96 | 	// If this activation is above average
 97 | 	float error = thisActivation - (sparsity * maximum + (1.0f - sparsity) * average);
 98 | 	float inhibitedResult = sigmoid(error * intensity) * 2.0f - 1.0f;
 99 | 
100 | 	write_imagef(states, columnPosition, (float4)(inhibitedResult, inhibitedResult, inhibitedResult, inhibitedResult));
101 | }
102 | 
103 | void kernel layerWeightUpdate(read_only image2d_t prevStates, read_only image2d_t states, read_only image2d_array_t prevWeights, write_only image2d_array_t weights, float2 layerSizeInv, float2 inputReceptiveFieldRadius, float2 inputReceptiveFieldStep, float alpha) {
104 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
105 | 	float2 inputCenterPositionNormalized = (float2)(columnPosition.x * layerSizeInv.x, columnPosition.y * layerSizeInv.y);
106 | 
107 | 	float state = read_imagef(states, columnPosition).x;
108 | 		
109 | 	// Adjust weights by their source activations and error
110 | 	int weightIndex = 0;
111 | 	
112 | 	for (float dx = -inputReceptiveFieldRadius.x; dx <= inputReceptiveFieldRadius.x; dx += inputReceptiveFieldStep.x)
113 | 	for (float dy = -inputReceptiveFieldRadius.y; dy <= inputReceptiveFieldRadius.y; dy += inputReceptiveFieldStep.y) {
114 | 		float2 inputPositionNormalized = inputCenterPositionNormalized + (float2)(dx, dy);
115 | 		
116 | 		float prevState = read_imagef(prevStates, normalizedClampedNearestSampler, inputPositionNormalized).x;
117 | 		
118 | 		float prevWeight = read_imagef(prevWeights, (int4)(columnPosition.x, columnPosition.y, weightIndex, 0)).x;
119 | 		
120 | 		float change = alpha * (state * prevState);
121 | 		
122 | 		float newWeight = prevWeight + change;
123 | 		
124 | 		write_imagef(weights, (int4)(columnPosition.x, columnPosition.y, weightIndex, 0), (float4)(newWeight, newWeight, newWeight, newWeight));
125 | 		
126 | 		weightIndex++;
127 | 	}
128 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ![CHTMGPU Logo](http://i1218.photobucket.com/albums/dd401/222464/CHTMLOGOSMALL.png)
  2 | 
  3 | Continuous HTM GPU
  4 | =======
  5 | 
  6 | Runs a continuous (not discrete) version of HTM (Hierarchical Temporal Memory, from Numenta: ) on the GPU, and uses it for reinforcement learning.
  7 | Follow updates on my blog! [https://cireneikual.wordpress.com/](https://cireneikual.wordpress.com/)
  8 | 
  9 | Install
 10 | -----------
 11 | 
 12 | ContinuousHTMGPU relies on 2 external libraries: OpenCL and SFML. SFML is used only for visualization, and can be removed if desired.
 13 | 
 14 | To get OpenCL, refer to your graphics hardware vendor website (for AMD and Nvidia), or CPU vendor (e.g. the Intel OpenCL SDK).
 15 | Works best with AMD cards (best OpenCL support).
 16 | 
 17 | To get SFML, choose a package from here: [http://www.sfml-dev.org/download/sfml/2.2/](http://www.sfml-dev.org/download/sfml/2.2/)
 18 | 
 19 | ContinuousHTMGPU uses CMake as the build system. You can get CMake here: [http://www.cmake.org/download/](http://www.cmake.org/download/)
 20 | 
 21 | Set CMake's source code directory to the ContinuousHTMGPU root directory (the one that contains the /source folder as well as a CMakeLists.txt).
 22 | 
 23 | Set CMake's build directory to the same directory as in the previous step. Optionally, you can also set it to a folder of your choice, but this may make browse the source more difficult if you are using Visual Studio.
 24 | 
 25 | Then press configure, and choose your compiler.
 26 | 
 27 | It will likely error. If this happens, no fear, there is a fix!
 28 | 
 29 | You can specify the paths where CMake looks manually. They will appear in red if they need to be set in the CMake GUI.
 30 | 
 31 | SFML is a bit tricky, you have to add a custom variable entry for a variable called SFML_ROOT and set it to the SFML root directory.
 32 | 
 33 | When eventually the configuration does not result in errors you can hit generate. This will generate files necessary for your compiler.
 34 | 
 35 | You should then be able to compile and execute the program. If you are using Visual Studio, you may have to set your startup project to the ERL project, and you may have to add the source files to the project.
 36 | 
 37 | Quick Start
 38 | -----------
 39 | 
 40 | If you want to use ContinuousHTMGPU in your own project without visualization, you can strip out the SFML visualizer if desired by simply removing the "vis" directory.
 41 | 
 42 | First, include HTMRL.h:
 43 | 
 44 | ```cpp
 45 | #include <htm/HTMRL.h>
 46 | ```
 47 | 
 48 | Next, you have to create the compute system. You can specify either GPU or CPU (GPU is recommended if you have it):
 49 | 
 50 | ```cpp
 51 | sys::ComputeSystem cs;
 52 | 
 53 | cs.create(sys::ComputeSystem::_gpu);
 54 | ```
 55 | 
 56 | With that created, you need to load the OpenCL program:
 57 | 
 58 | ```cpp
 59 | sys::ComputeProgram program;
 60 | 
 61 | program.loadFromFile("resources/htmrl.cl", cs);
 62 | ```
 63 | 
 64 | Then create the agent. Fill out a vector of LayerDesc objects to describe the structure of your agent, and specify the types of the inputs (input/action/unused). In the following actions nodes are selected randomly:
 65 | 
 66 | ```cpp
 67 | htm::HTMRL agent;
 68 | 
 69 | std::vector<htm::HTMRL::LayerDesc> layerDescs(5);
 70 | 
 71 | layerDescs[0]._width = 64;
 72 | layerDescs[0]._height = 64;
 73 | 
 74 | layerDescs[1]._width = 44;
 75 | layerDescs[1]._height = 44;
 76 | 
 77 | layerDescs[2]._width = 32;
 78 | layerDescs[2]._height = 32;
 79 | 
 80 | layerDescs[3]._width = 20;
 81 | layerDescs[3]._height = 20;
 82 | 
 83 | layerDescs[4]._width = 16;
 84 | layerDescs[4]._height = 16;
 85 | 
 86 | std::vector<htm::HTMRL::InputType> inputTypes(64 * 64, htm::HTMRL::_state);
 87 | 
 88 | for (int x = 0; x < 64; x++) {
 89 | 	for (int y = 32; y < 64; y++) {
 90 | 		inputTypes[x + y * 64] = htm::HTMRL::_unused;
 91 | 	}
 92 | }
 93 | 
 94 | std::uniform_int_distribution<int> actionXDist(0, 63);
 95 | std::uniform_int_distribution<int> actionYDist(33, 63);
 96 | 
 97 | std::vector<int> actionIndices;
 98 | 
 99 | for (int i = 0; i < 8; i++) {
100 | 	int x = actionXDist(generator);
101 | 	int y = actionYDist(generator);
102 | 
103 | 	if (inputTypes[x + y * 64] == htm::HTMRL::_action)
104 | 		continue;
105 | 
106 | 	inputTypes[x + y * 64] = htm::HTMRL::_action;
107 | 
108 | 	actionIndices.push_back(x + y * 64);
109 | }
110 | 
111 | agent.createRandom(cs, program, 64, 64, 4, layerDescs, inputTypes, -0.05f, 0.05f, -0.05f, 0.05f, generator);
112 | ``` 
113 | 
114 | Then to use the agent, call:
115 | 
116 | ```cpp
117 | agent.setInput(x, y, <value>);
118 | ```
119 | 
120 | to set the value of an input, and:
121 | 
122 | ```cpp
123 | agent.getOutput(actionIndices[i]); // actionIndices[i] is the index of the output, from the above example
124 | ```
125 | 
126 | to get a output.
127 | 
128 | Step the simulation like this:
129 | 
130 | ```cpp
131 | agent.step(cs, reward, 0.01f, 0.01f, 0.01f, 0.05f, 0.01f, 0.05f, 0.2f, 0.5f, 0.5f, 0.5f, 0.01f, 0.2f, 0.992f, 0.15f, 0.15f, 120, 10, 2, generator);
132 | ```
133 | 
134 | The parameters above are suggested values.
135 | 
136 | Visualization
137 | -----------
138 | 
139 | Instructions coming soon! For now just take a look at the example code, Main.cpp.
140 | 
141 | License
142 | -----------
143 | 
144 | ContinuousHTMGPU
145 | Copyright (C) 2014-2015 Eric Laukien
146 | 
147 | This software is provided 'as-is', without any express or implied
148 | warranty.  In no event will the authors be held liable for any damages
149 | arising from the use of this software.
150 | 
151 | Permission is granted to anyone to use this software for any purpose,
152 | including commercial applications, and to alter it and redistribute it
153 | freely, subject to the following restrictions:
154 | 
155 | 1. The origin of this software must not be misrepresented; you must not
156 | 	claim that you wrote the original software. If you use this software
157 | 	in a product, an acknowledgment in the product documentation would be
158 | 	appreciated but is not required.
159 | 2. Altered source versions must be plainly marked as such, and must not be
160 | 	misrepresented as being the original software.
161 | 3. This notice may not be removed or altered from any source distribution.
162 | 
163 | ------------------------------------------------------------------------------
164 | 
165 | ContinuousHTMGPU uses the following external libraries:
166 | 
167 | SFML - source code is licensed under the zlib/png license.
168 | OpenCL
169 | 
170 | 


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/cae.cl:
--------------------------------------------------------------------------------
  1 | constant sampler_t normalizedClampedNearestSampler = CLK_NORMALIZED_COORDS_TRUE |
  2 | 	CLK_ADDRESS_CLAMP_TO_EDGE |
  3 | 	CLK_FILTER_NEAREST;
  4 | 	
  5 | constant sampler_t unnormalizedClampedNearestSampler = CLK_NORMALIZED_COORDS_FALSE |
  6 | 	CLK_ADDRESS_CLAMP |
  7 | 	CLK_FILTER_NEAREST;
  8 | 	
  9 | float sigmoid(float x) {
 10 | 	return 1.0f / (1.0f + exp(-x));
 11 | }
 12 | 
 13 | void kernel layerActivateForward(read_only image2d_array_t prevLayerOutputs, read_only image2d_t layerWeights,
 14 | 	write_only image2d_array_t layerOutputs, int2 kernelSize, int prevNumMaps, float2 layerSizeInv, float2 prevLayerSizeInv)
 15 | {
 16 | 	int3 position = (int3)(get_global_id(0), get_global_id(1), get_global_id(2));
 17 | 	
 18 | 	float2 positionNormalized2D = (float2)(position.x * layerSizeInv.x, position.y * layerSizeInv.y);
 19 | 
 20 | 	// First weight is bias
 21 | 	float sum = read_imagef(layerWeights, (int2)(0, position.z)).x;
 22 | 	
 23 | 	int weightIndex = 1;
 24 | 	
 25 | 	for (int x = 0; x < kernelSize.x; x++)
 26 | 	for (int y = 0; y < kernelSize.y; y++)
 27 | 	for (int m = 0; m < prevNumMaps; m++) {
 28 | 		float weight = read_imagef(layerWeights, (int2)(weightIndex, position.z)).x;
 29 | 		float prevLayerOutput = read_imagef(prevLayerOutputs, normalizedClampedNearestSampler, (float4)(positionNormalized2D.x + (x - kernelSize.x * 0.5f) * prevLayerSizeInv.x, positionNormalized2D.y + (y - kernelSize.y * 0.5f) * prevLayerSizeInv.x, m, 0)).x;
 30 | 		
 31 | 		sum += weight * prevLayerOutput;
 32 | 		
 33 | 		weightIndex++;
 34 | 	}
 35 | 	
 36 | 	float output = sigmoid(sum);
 37 | 	
 38 | 	write_imagef(layerOutputs, (int4)(position.x, position.y, position.z, 0), (float4)(output, output, output, output));
 39 | }
 40 | 
 41 | void kernel layerActivateBackward(read_only image2d_array_t layerOutputs, read_only image2d_array_t prevLayerOutputs,
 42 | 	read_only image2d_t layerWeights, read_only image2d_array_t prevLayerBiases, write_only image2d_array_t newPrevLayerBiases, write_only image2d_array_t prevLayerErrors,
 43 | 	int2 kernelSize, int2 reverseKernelSize, int numMaps, int prevNumMaps, float2 layerSizeInv, float2 prevLayerSizeInv, float alpha)
 44 | {
 45 | 	int3 prevPosition = (int3)(get_global_id(0), get_global_id(1), get_global_id(2));
 46 | 
 47 | 	float2 prevPositionNormalized2D = (float2)(prevPosition.x * prevLayerSizeInv.x, prevPosition.y * prevLayerSizeInv.y);
 48 | 	
 49 | 	float prevLayerBias = read_imagef(prevLayerBiases, (int4)(prevPosition.x, prevPosition.y, prevPosition.z, 0)).x;
 50 | 	
 51 | 	float sum = prevLayerBias;
 52 | 	
 53 | 	int2 start = (int2)(prevPosition.x, prevPosition.y);
 54 | 	
 55 | 	for (int x = 0; x < reverseKernelSize.x; x++)
 56 | 	for (int y = 0; y < reverseKernelSize.y; y++)
 57 | 	for (int m = 0; m < numMaps; m++) {
 58 | 		float weight = read_imagef(layerWeights, (int2)(prevPosition.z + y * prevNumMaps + x * prevNumMaps * reverseKernelSize.y + 1, m)).x;
 59 | 		float layerOutput = read_imagef(layerOutputs, normalizedClampedNearestSampler, (float4)(prevPositionNormalized2D.x + (x - reverseKernelSize.x * 0.5f) * layerSizeInv.x, prevPositionNormalized2D.y + (y - reverseKernelSize.y * 0.5f) * layerSizeInv.y, m, 0)).x;
 60 | 		
 61 | 		sum += weight * layerOutput;
 62 | 	}
 63 | 	
 64 | 	float output = sigmoid(sum);
 65 | 	
 66 | 	float target = read_imagef(prevLayerOutputs, (int4)(prevPosition.x, prevPosition.y, prevPosition.z, 0)).x;
 67 | 	
 68 | 	float error = (target - output);// * output * (1.0f - output);
 69 | 	
 70 | 	// Update prev layer bias
 71 | 	float newPrevLayerBias = prevLayerBias + alpha * error;
 72 | 	
 73 | 	write_imagef(newPrevLayerBiases, (int4)(prevPosition.x, prevPosition.y, prevPosition.z, 0), (float4)(newPrevLayerBias, newPrevLayerBias, newPrevLayerBias, newPrevLayerBias));
 74 | 	
 75 | 	// Store error
 76 | 	write_imagef(prevLayerErrors, (int4)(prevPosition.x, prevPosition.y, prevPosition.z, 0), (float4)(error, error, error, error));
 77 | }
 78 | 
 79 | void kernel layerWeightUpdate(read_only image2d_array_t layerOutputs, read_only image2d_array_t prevLayerOutputs, read_only image2d_array_t prevLayerErrors,
 80 | 	read_only image2d_t layerWeights, write_only image2d_array_t weightDeltaSummationMap,
 81 | 	int2 kernelSize, int numMaps, int prevNumMaps, float2 layerSizeInv, float2 prevLayerSizeInv, int numWeightsPerMap, float alpha)
 82 | {
 83 | 	int3 position = (int3)(get_global_id(0), get_global_id(1), get_global_id(2));
 84 | 	
 85 | 	float2 positionNormalized2D = (float2)(position.x * layerSizeInv.x, position.y * layerSizeInv.y);
 86 | 	
 87 | 	float layerOutput = read_imagef(layerOutputs, (int4)(position.x, position.y, position.z, 0)).x;
 88 | 
 89 | 	int weightIndex = 1; // 1 since we are skipping the bias for the error backpropagation
 90 | 	
 91 | 	// Backpropagate error
 92 | 	float error = 0.0f;
 93 | 	
 94 | 	for (int x = 0; x < kernelSize.x; x++)
 95 | 	for (int y = 0; y < kernelSize.y; y++)
 96 | 	for (int m = 0; m < prevNumMaps; m++) {
 97 | 		float weight = read_imagef(layerWeights, (int2)(weightIndex, position.z)).x;
 98 | 		float prevLayerError = read_imagef(prevLayerErrors, normalizedClampedNearestSampler, (float4)(positionNormalized2D.x + (x - kernelSize.x * 0.5f) * prevLayerSizeInv.x, positionNormalized2D.y + (y - kernelSize.y * 0.5f) * prevLayerSizeInv.y, m, 0)).x;
 99 | 		
100 | 		error += weight * prevLayerError;
101 | 		
102 | 		weightIndex++;
103 | 	}
104 | 
105 | 	error *= layerOutput * (1.0f - layerOutput);
106 | 	
107 | 	// Update bias
108 | 	float bias = read_imagef(layerWeights, (int2)(0, position.z)).x;
109 | 	
110 | 	float biasDelta = alpha * error;
111 | 	
112 | 	int thisWeightsStart = position.z * numWeightsPerMap;
113 | 	
114 | 	write_imagef(weightDeltaSummationMap, (int4)(position.x, position.y, thisWeightsStart, 0), biasDelta);
115 | 	
116 | 	weightIndex = 1;
117 | 	
118 | 	// Update all non-bias weights
119 | 	for (int x = 0; x < kernelSize.x; x++)
120 | 	for (int y = 0; y < kernelSize.y; y++)
121 | 	for (int m = 0; m < prevNumMaps; m++) {
122 | 		float weight = read_imagef(layerWeights, (int2)(weightIndex, position.z)).x;
123 | 		float prevLayerOutput = read_imagef(prevLayerOutputs, normalizedClampedNearestSampler, (float4)(positionNormalized2D.x + (x - kernelSize.x * 0.5f) * prevLayerSizeInv.x, positionNormalized2D.y + (y - kernelSize.y * 0.5f) * prevLayerSizeInv.y, m, 0)).x;
124 | 		float prevLayerError = read_imagef(prevLayerErrors, normalizedClampedNearestSampler, (float4)(positionNormalized2D.x + (x - kernelSize.x * 0.5f) * prevLayerSizeInv.x, positionNormalized2D.y + (y - kernelSize.y * 0.5f) * prevLayerSizeInv.y, m, 0)).x;
125 | 		
126 | 		float weightDelta = alpha * (error * prevLayerOutput + prevLayerError * layerOutput);
127 | 		
128 | 		write_imagef(weightDeltaSummationMap, (int4)(position.x, position.y, thisWeightsStart + weightIndex, 0), weightDelta);
129 | 		
130 | 		weightIndex++;
131 | 	}
132 | }
133 | 
134 | void kernel weightDeltaReduce(read_only image2d_array_t expandedWeightDeltas, write_only image2d_array_t reducedWeightDeltas, int totalNumWeightsPerLayer, int2 reduceStep) {
135 | 	int2 positionReduced = (int2)(get_global_id(0), get_global_id(1));
136 | 	int2 positionExpanded = (int2)(positionReduced.x * reduceStep.x, positionReduced.y * reduceStep.y);
137 | 	
138 | 	for (int i = 0; i < totalNumWeightsPerLayer; i++) {
139 | 		float sum = 0.0f;
140 | 		
141 | 		for (int dx = 0; dx < reduceStep.x; dx++)
142 | 		for (int dy = 0; dy < reduceStep.y; dy++)
143 | 			sum += read_imagef(expandedWeightDeltas, unnormalizedClampedNearestSampler, (int4)(positionExpanded.x + dx, positionExpanded.y + dy, i, 0)).x;
144 | 			
145 | 		write_imagef(reducedWeightDeltas, (int4)(positionReduced.x, positionReduced.y, i, 0), (float4)(sum, sum, sum, sum));
146 | 	}
147 | }
148 | 
149 | void kernel mapsDeltaUpdate(read_only image2d_array_t reducedWeightDeltas, read_only image2d_t layerWeights, write_only image2d_t newLayerWeights, int numWeightsPerMap) {
150 | 	int3 position = (int3)(get_global_id(0), get_global_id(1), get_global_id(2));
151 | 	
152 | 	for (int i = 0; i < numWeightsPerMap; i++) {
153 | 		float delta = read_imagef(reducedWeightDeltas, (int4)(position.x, position.y, position.z * numWeightsPerMap + i, 0)).x;
154 | 	
155 | 		float original = read_imagef(layerWeights, (int2)(i, position.z)).x;
156 | 	
157 | 		float next = original + delta;
158 | 	
159 | 		write_imagef(newLayerWeights, (int2)(i, position.z), (float4)(next, next, next, next));
160 | 	}
161 | }
162 | 
163 | void kernel layerDownsample(read_only image2d_array_t layerOutputs, write_only image2d_array_t downsampledOutputs, int2 downsampleSize) {
164 | 	int3 position = (int3)(get_global_id(0), get_global_id(1), get_global_id(2));
165 | 	
166 | 	float sample = 0.0f;
167 | 	
168 | 	for (int dx = 0; dx < downsampleSize.x; dx++)
169 | 	for (int dy = 0; dy < downsampleSize.y; dy++) {
170 | 		float layerOutput = read_imagef(layerOutputs, (int4)(position.x * downsampleSize.x + dx, position.y * downsampleSize.y + dy, position.z, 0)).x;
171 | 	
172 | 		sample = max(sample, layerOutput);
173 | 	}
174 | 	
175 | 	write_imagef(downsampledOutputs, (int4)(position.x, position.y, position.z, 0), (float4)(sample, sample, sample, sample));
176 | }


--------------------------------------------------------------------------------
/ContinuousHTMGPU/source/htm/HTMRL.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <system/ComputeSystem.h>
  4 | #include <system/ComputeProgram.h>
  5 | 
  6 | #include <SFML/Graphics.hpp>
  7 | 
  8 | #include <htm/AnythingEncoder.h>
  9 | 
 10 | #include <vector>
 11 | #include <list>
 12 | 
 13 | #include <random>
 14 | 
 15 | #include <memory>
 16 | 
 17 | namespace htm {
 18 | 	class HTMRL {
 19 | 	public:
 20 | 		enum InputType {
 21 | 			_state, _action, _unused
 22 | 		};
 23 | 
 24 | 		struct LayerDesc {
 25 | 			int _width, _height;
 26 | 
 27 | 			int _receptiveFieldRadius;
 28 | 			int _lateralConnectionRadius;
 29 | 			int _inhibitionRadius;
 30 | 
 31 | 			int _cellsInColumn;
 32 | 
 33 | 			int _numSegmentsPerCell;
 34 | 
 35 | 			float _qInfluenceMultiplier;
 36 | 
 37 | 			int _numColumnStateBlurPasses;
 38 | 			float _columnStateBlurKernelWidthMultiplier;
 39 | 
 40 | 			int _columnInfluenceRadius;
 41 | 
 42 | 			float _qImportance;
 43 | 
 44 | 			LayerDesc()
 45 | 				: _width(16), _height(16), _receptiveFieldRadius(3), _lateralConnectionRadius(4), _inhibitionRadius(2), _cellsInColumn(3), _numSegmentsPerCell(3),
 46 | 				_qInfluenceMultiplier(1.0f), _numColumnStateBlurPasses(1), _columnStateBlurKernelWidthMultiplier(1.0f), _columnInfluenceRadius(5), _qImportance(1.0f)
 47 | 			{}
 48 | 		};
 49 | 
 50 | 		static float sigmoid(float x) {
 51 | 			return 1.0f / (1.0f + std::exp(-x));
 52 | 		}
 53 | 
 54 | 	private:
 55 | 		struct Layer {
 56 | 			cl::Image2D _columnActivations;
 57 | 	
 58 | 			cl::Image2D _columnStatesPrev;
 59 | 			cl::Image2D _columnStates;
 60 | 
 61 | 			cl::Image2D _inputBiasesPrev;
 62 | 			cl::Image2D _inputBiases;
 63 | 
 64 | 			cl::Image2D _reconstruction;
 65 | 
 66 | 			cl::Image2D _columnPredictionsPrev;
 67 | 			cl::Image2D _columnPredictions;
 68 | 
 69 | 			cl::Image3D _columnFeedForwardWeightsPrev;
 70 | 			cl::Image3D _columnFeedForwardWeights;
 71 | 
 72 | 			cl::Image3D _cellWeightsPrev;
 73 | 			cl::Image3D _cellWeights;
 74 | 
 75 | 			cl::Image3D _cellStatesPrev;
 76 | 			cl::Image3D _cellStates;
 77 | 
 78 | 			cl::Image3D _segmentStatesPrev;
 79 | 			cl::Image3D _segmentStates;
 80 | 
 81 | 			//cl::Image3D _segmentWeightsPrev;
 82 | 			//cl::Image3D _segmentWeights;
 83 | 
 84 | 			cl::Image3D _cellQValuesPrev;
 85 | 			cl::Image3D _cellQValues;
 86 | 
 87 | 			cl::Image2D _columnQValues;
 88 | 
 89 | 			cl::Image2D _columnPrevValues;
 90 | 			cl::Image2D _columnPrevValuesPrev;
 91 | 
 92 | 			// Contains just tdError
 93 | 			cl::Image2D _columnTdErrors;
 94 | 
 95 | 			cl::Image3D _cellPredictionsPrev;
 96 | 			cl::Image3D _cellPredictions;
 97 | 		};
 98 | 
 99 | 		int _inputWidth, _inputHeight;
100 | 
101 | 		std::vector<LayerDesc> _layerDescs;
102 | 		std::vector<Layer> _layers;
103 | 
104 | 		cl::Kernel _layerColumnActivateKernel;
105 | 		cl::Kernel _layerColumnInhibitKernel;
106 | 		cl::Kernel _layerCellActivateKernel;
107 | 		cl::Kernel _layerCellWeightUpdateKernel;
108 | 		cl::Kernel _layerCellWeightUpdateLastKernel;
109 | 		cl::Kernel _layerCellPredictKernel;
110 | 		cl::Kernel _layerCellPredictLastKernel;
111 | 		cl::Kernel _layerColumnWeightUpdateKernel;
112 | 		cl::Kernel _layerColumnPredictionKernel;
113 | 		cl::Kernel _layerColumnQKernel;
114 | 		cl::Kernel _layerColumnQLastKernel;
115 | 		cl::Kernel _layerAssignQKernel;
116 | 
117 | 		cl::Kernel _reconstructInputKernel;
118 | 		cl::Kernel _inputBiasUpdateKernel;
119 | 	
120 | 		// For blur
121 | 		cl::Kernel _gaussianBlurXKernel;
122 | 		cl::Kernel _gaussianBlurYKernel;
123 | 
124 | 		std::vector<float> _input;
125 | 
126 | 		std::vector<InputType> _inputTypes;
127 | 
128 | 		std::vector<float> _output;
129 | 		std::vector<float> _prediction;
130 | 		std::vector<float> _exploratoryOutput;
131 | 		std::vector<float> _prevOutput;
132 | 		std::vector<float> _prevOutputExploratory;
133 | 		std::vector<float> _prevInput;
134 | 
135 | 		float _prevMaxQ;
136 | 		float _prevValue;
137 | 		float _prevPrevValue;
138 | 		float _prevQ;
139 | 		float _prevTDError;
140 | 
141 | 		cl::Image2D _inputImage;
142 | 		cl::Image2D _reconstructedPrediction;
143 | 
144 | 		int _addReplaySampleStepCounter;
145 | 
146 | 		std::list<cl::Image2D> _inputReplayChain;
147 | 
148 | 		void stepBegin(sys::ComputeSystem &cs, int addReplaySampleSteps, int maxReplayChainSize);
149 | 
150 | 		void activate(std::vector<float> &input, sys::ComputeSystem &cs, float reward, float alpha, float gamma, float columnDecay, float cellStateDecay, float columnConnectionAlpha, float columnConnectionBeta, float columnConnectionGamma, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, int maxReplayChainSize, int numReplaySamples, int addSampleSteps, unsigned long seed);
151 | 	
152 | 		void learnSpatialReplay(sys::ComputeSystem &cs, float cellStateDecay, float alpha, float beta, float gamma, int maxReplayChainSize, int numReplaySamples, unsigned long seed);
153 | 		
154 | 		void learnTemporal(sys::ComputeSystem &cs, float tdError, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, unsigned long seed);
155 | 
156 | 		void initLayer(sys::ComputeSystem &cs, cl::Kernel &initPartOneKernel, cl::Kernel &initPartTwoKernel, cl::Kernel &initPartThreeKernel, int inputWidth, int inputHeight, int inputCellsPerColumn, Layer &layer, const LayerDesc &layerDesc, bool isTopmost, float minInitWeight, float maxInitWeight, float minInitCenter, float maxInitCenter, float minInitWidth, float maxInitWidth, std::mt19937 &generator);
157 | 		void spatialPoolLayer(sys::ComputeSystem &cs, cl::Image2D &prevLayerOutput, int prevLayerWidth, int prevLayerHeight, Layer &layer, const LayerDesc &layerDesc, float columnDecay, std::mt19937 &generator);
158 | 		void cellActivateLayer(sys::ComputeSystem &cs, Layer &layer, const LayerDesc &layerDesc, float cellStateDecay, std::mt19937 &generator);
159 | 		void predictLayer(sys::ComputeSystem &cs, cl::Image2D &nextLayerPrediction, cl::Image2D &nextLayerPredictionPrev, int nextLayerWidth, int nextLayerHeight, Layer &layer, const LayerDesc &layerDesc, std::mt19937 &generator);
160 | 		void predictLayerLast(sys::ComputeSystem &cs, Layer &layer, const LayerDesc &layerDesc, std::mt19937 &generator);
161 | 		void determineLayerColumnQ(sys::ComputeSystem &cs, Layer &layer, LayerDesc &layerDesc, Layer &nextLayer, LayerDesc &nextLayerDesc);
162 | 		void determineLayerColumnQLast(sys::ComputeSystem &cs, Layer &layer, LayerDesc &layerDesc);
163 | 		void assignLayerQ(sys::ComputeSystem &cs, Layer &layer, LayerDesc &layerDesc, float alpha);
164 | 		void learnLayerSpatial(sys::ComputeSystem &cs, Layer &layer, cl::Image2D &prevLayerOutput, int prevLayerWidth, int prevLayerHeight, const LayerDesc &layerDesc, float alpha, float beta, float gamma, std::mt19937 &generator);
165 | 		void learnLayerTemporal(sys::ComputeSystem &cs, Layer &layer, cl::Image2D &prevLayerOutput, int prevLayerWidth, int prevLayerHeight, cl::Image2D &nextLayerPrediction, int nextLayerWidth, int nextLayerHeight, const LayerDesc &layerDesc, float tdError, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, std::mt19937 &generator);
166 | 		void learnLayerTemporalLast(sys::ComputeSystem &cs, Layer &layer, cl::Image2D &prevLayerOutput, int prevLayerWidth, int prevLayerHeight, const LayerDesc &layerDesc, float tdError, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, std::mt19937 &generator);
167 | 		void dutyCycleLayerUpdate(sys::ComputeSystem &cs, Layer &layer, const LayerDesc &layerDesc, float activationDutyCycleDecay, float stateDutyCycleDecay);
168 | 
169 | 		// Reconstruction
170 | 		void getReconstructedPrediction(std::vector<float> &prediction, sys::ComputeSystem &cs);
171 | 	
172 | 		// Blur
173 | 		void gaussianBlur(sys::ComputeSystem &cs, cl::Image2D &source, cl::Image2D &ping, cl::Image2D &pong, int imageSizeX, int imageSizeY, int passes, float kernelWidth);
174 | 
175 | 		// Q
176 | 		float retreiveQ(sys::ComputeSystem &cs);
177 | 
178 | 	public:
179 | 		void createRandom(sys::ComputeSystem &cs, sys::ComputeProgram &program, int inputWidth, int inputHeight, int reconstructionReceptiveRadius, const std::vector<LayerDesc> &layerDescs, const std::vector<InputType> &inputTypes, float minInitWeight, float maxInitWeight, float minInitCenter, float maxInitCenter, std::mt19937 &generator);
180 | 	
181 | 		void step(sys::ComputeSystem &cs, float reward, float reconstructionAlpha, float columnDecay, float cellStateDecay, float columnConnectionAlpha, float columnConnectionBeta, float columnConnectionGamma, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, float alpha, float gamma, float breakChance, float perturbationStdDev, int maxReplayChainSize, int numReplaySamples, int addReplaySampleSteps, std::mt19937 &generator);
182 | 
183 | 		int getInputWidth() const {
184 | 			return _inputWidth;
185 | 		}
186 | 
187 | 		int getInputHeight() const {
188 | 			return _inputHeight;
189 | 		}
190 | 
191 | 		const std::vector<LayerDesc> &getLayerDescs() const {
192 | 			return _layerDescs;
193 | 		}
194 | 
195 | 		void setInput(int i, float value) {
196 | 			_input[i] = value;
197 | 		}
198 | 
199 | 		void setInput(int x, int y, float value) {
200 | 			setInput(x + y * _inputWidth, value);
201 | 		}
202 | 
203 | 		float getOutput(int i) const {
204 | 			return _input[i];
205 | 		}
206 | 
207 | 		float getOutput(int x, int y) const {
208 | 			return getOutput(x + y * _inputWidth);
209 | 		}
210 | 
211 | 		void exportCellData(sys::ComputeSystem &cs, std::vector<std::shared_ptr<sf::Image>> &images, unsigned long seed) const;
212 | 	};
213 | }


--------------------------------------------------------------------------------
/ContinuousHTMGPU/FindSFML.cmake:
--------------------------------------------------------------------------------
  1 | # This script locates the SFML library
  2 | # ------------------------------------
  3 | #
  4 | # Usage
  5 | # -----
  6 | #
  7 | # When you try to locate the SFML libraries, you must specify which modules you want to use (system, window, graphics, network, audio, main).
  8 | # If none is given, the SFML_LIBRARIES variable will be empty and you'll end up linking to nothing.
  9 | # example:
 10 | #   find_package(SFML COMPONENTS graphics window system) // find the graphics, window and system modules
 11 | #
 12 | # You can enforce a specific version, either MAJOR.MINOR or only MAJOR.
 13 | # If nothing is specified, the version won't be checked (ie. any version will be accepted).
 14 | # example:
 15 | #   find_package(SFML COMPONENTS ...)     // no specific version required
 16 | #   find_package(SFML 2 COMPONENTS ...)   // any 2.x version
 17 | #   find_package(SFML 2.4 COMPONENTS ...) // version 2.4 or greater
 18 | #
 19 | # By default, the dynamic libraries of SFML will be found. To find the static ones instead,
 20 | # you must set the SFML_STATIC_LIBRARIES variable to TRUE before calling find_package(SFML ...).
 21 | # In case of static linking, the SFML_STATIC macro will also be defined by this script.
 22 | # example:
 23 | #   set(SFML_STATIC_LIBRARIES TRUE)
 24 | #   find_package(SFML 2 COMPONENTS network system)
 25 | #
 26 | # On Mac OS X if SFML_STATIC_LIBRARIES is not set to TRUE then by default CMake will search for frameworks unless
 27 | # CMAKE_FIND_FRAMEWORK is set to "NEVER" for example. Please refer to CMake documentation for more details.
 28 | # Moreover, keep in mind that SFML frameworks are only available as release libraries unlike dylibs which
 29 | # are available for both release and debug modes.
 30 | #
 31 | # If SFML is not installed in a standard path, you can use the SFML_ROOT CMake (or environment) variable
 32 | # to tell CMake where SFML is.
 33 | #
 34 | # Output
 35 | # ------
 36 | #
 37 | # This script defines the following variables:
 38 | # - For each specified module XXX (system, window, graphics, network, audio, main):
 39 | #   - SFML_XXX_LIBRARY_DEBUG:   the name of the debug library of the xxx module (set to SFML_XXX_LIBRARY_RELEASE is no debug version is found)
 40 | #   - SFML_XXX_LIBRARY_RELEASE: the name of the release library of the xxx module (set to SFML_XXX_LIBRARY_DEBUG is no release version is found)
 41 | #   - SFML_XXX_LIBRARY:         the name of the library to link to for the xxx module (includes both debug and optimized names if necessary)
 42 | #   - SFML_XXX_FOUND:           true if either the debug or release library of the xxx module is found
 43 | # - SFML_LIBRARIES:   the list of all libraries corresponding to the required modules
 44 | # - SFML_FOUND:       true if all the required modules are found
 45 | # - SFML_INCLUDE_DIR: the path where SFML headers are located (the directory containing the SFML/Config.hpp file)
 46 | #
 47 | # example:
 48 | #   find_package(SFML 2 COMPONENTS system window graphics audio REQUIRED)
 49 | #   include_directories(${SFML_INCLUDE_DIR})
 50 | #   add_executable(myapp ...)
 51 | #   target_link_libraries(myapp ${SFML_LIBRARIES})
 52 | 
 53 | # define the SFML_STATIC macro if static build was chosen
 54 | if(SFML_STATIC_LIBRARIES)
 55 |     add_definitions(-DSFML_STATIC)
 56 | endif()
 57 | 
 58 | # deduce the libraries suffix from the options
 59 | set(FIND_SFML_LIB_SUFFIX "")
 60 | if(SFML_STATIC_LIBRARIES)
 61 |     set(FIND_SFML_LIB_SUFFIX "${FIND_SFML_LIB_SUFFIX}-s")
 62 | endif()
 63 | 
 64 | # find the SFML include directory
 65 | find_path(SFML_INCLUDE_DIR SFML/Config.hpp
 66 |           PATH_SUFFIXES include
 67 |           PATHS
 68 |           ${SFML_ROOT}
 69 |           $ENV{SFML_ROOT}
 70 |           ~/Library/Frameworks
 71 |           /Library/Frameworks
 72 |           /usr/local/
 73 |           /usr/
 74 |           /sw          # Fink
 75 |           /opt/local/  # DarwinPorts
 76 |           /opt/csw/    # Blastwave
 77 |           /opt/)
 78 | 
 79 | # check the version number
 80 | set(SFML_VERSION_OK TRUE)
 81 | if(SFML_FIND_VERSION AND SFML_INCLUDE_DIR)
 82 |     # extract the major and minor version numbers from SFML/Config.hpp
 83 |     # we have to handle framework a little bit differently :
 84 |     if("${SFML_INCLUDE_DIR}" MATCHES "SFML.framework")
 85 |         set(SFML_CONFIG_HPP_INPUT "${SFML_INCLUDE_DIR}/Headers/Config.hpp")
 86 |     else()
 87 |         set(SFML_CONFIG_HPP_INPUT "${SFML_INCLUDE_DIR}/SFML/Config.hpp")
 88 |     endif()
 89 |     FILE(READ "${SFML_CONFIG_HPP_INPUT}" SFML_CONFIG_HPP_CONTENTS)
 90 |     STRING(REGEX MATCH ".*#define SFML_VERSION_MAJOR ([0-9]+).*#define SFML_VERSION_MINOR ([0-9]+).*" SFML_CONFIG_HPP_CONTENTS "${SFML_CONFIG_HPP_CONTENTS}")
 91 |     STRING(REGEX REPLACE ".*#define SFML_VERSION_MAJOR ([0-9]+).*" "\\1" SFML_VERSION_MAJOR "${SFML_CONFIG_HPP_CONTENTS}")
 92 |     STRING(REGEX REPLACE ".*#define SFML_VERSION_MINOR ([0-9]+).*" "\\1" SFML_VERSION_MINOR "${SFML_CONFIG_HPP_CONTENTS}")
 93 |     math(EXPR SFML_REQUESTED_VERSION "${SFML_FIND_VERSION_MAJOR} * 10 + ${SFML_FIND_VERSION_MINOR}")
 94 | 
 95 |     # if we could extract them, compare with the requested version number
 96 |     if (SFML_VERSION_MAJOR)
 97 |         # transform version numbers to an integer
 98 |         math(EXPR SFML_VERSION "${SFML_VERSION_MAJOR} * 10 + ${SFML_VERSION_MINOR}")
 99 | 
100 |         # compare them
101 |         if(SFML_VERSION LESS SFML_REQUESTED_VERSION)
102 |             set(SFML_VERSION_OK FALSE)
103 |         endif()
104 |     else()
105 |         # SFML version is < 2.0
106 |         if (SFML_REQUESTED_VERSION GREATER 19)
107 |             set(SFML_VERSION_OK FALSE)
108 |             set(SFML_VERSION_MAJOR 1)
109 |             set(SFML_VERSION_MINOR x)
110 |         endif()
111 |     endif()
112 | endif()
113 | 
114 | # find the requested modules
115 | set(SFML_FOUND TRUE) # will be set to false if one of the required modules is not found
116 | set(FIND_SFML_LIB_PATHS
117 |     ${SFML_ROOT}
118 |     $ENV{SFML_ROOT}
119 |     ~/Library/Frameworks
120 |     /Library/Frameworks
121 |     /usr/local
122 |     /usr
123 |     /sw
124 |     /opt/local
125 |     /opt/csw
126 |     /opt)
127 | foreach(FIND_SFML_COMPONENT ${SFML_FIND_COMPONENTS})
128 |     string(TOLOWER ${FIND_SFML_COMPONENT} FIND_SFML_COMPONENT_LOWER)
129 |     string(TOUPPER ${FIND_SFML_COMPONENT} FIND_SFML_COMPONENT_UPPER)
130 |     set(FIND_SFML_COMPONENT_NAME sfml-${FIND_SFML_COMPONENT_LOWER}${FIND_SFML_LIB_SUFFIX})
131 | 
132 |     # no suffix for sfml-main, it is always a static library
133 |     if(FIND_SFML_COMPONENT_LOWER STREQUAL "main")
134 |         set(FIND_SFML_COMPONENT_NAME sfml-${FIND_SFML_COMPONENT_LOWER})
135 |     endif()
136 | 
137 |     # debug library
138 |     find_library(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG
139 |                  NAMES ${FIND_SFML_COMPONENT_NAME}-d
140 |                  PATH_SUFFIXES lib64 lib
141 |                  PATHS ${FIND_SFML_LIB_PATHS})
142 | 
143 |     # release library
144 |     find_library(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE
145 |                  NAMES ${FIND_SFML_COMPONENT_NAME}
146 |                  PATH_SUFFIXES lib64 lib
147 |                  PATHS ${FIND_SFML_LIB_PATHS})
148 | 
149 |     if (SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG OR SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE)
150 |         # library found
151 |         set(SFML_${FIND_SFML_COMPONENT_UPPER}_FOUND TRUE)
152 |         
153 |         # if both are found, set SFML_XXX_LIBRARY to contain both
154 |         if (SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG AND SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE)
155 |             set(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY debug     ${SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG}
156 |                                                           optimized ${SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE})
157 |         endif()
158 | 
159 |         # if only one debug/release variant is found, set the other to be equal to the found one
160 |         if (SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG AND NOT SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE)
161 |             # debug and not release
162 |             set(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE ${SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG})
163 |             set(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY         ${SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG})
164 |         endif()
165 |         if (SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE AND NOT SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG)
166 |             # release and not debug
167 |             set(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG ${SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE})
168 |             set(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY       ${SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE})
169 |         endif()
170 |     else()
171 |         # library not found
172 |         set(SFML_FOUND FALSE)
173 |         set(SFML_${FIND_SFML_COMPONENT_UPPER}_FOUND FALSE)
174 |         set(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY "")
175 |         set(FIND_SFML_MISSING "${FIND_SFML_MISSING} SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY")
176 |     endif()
177 | 
178 |     # mark as advanced
179 |     MARK_AS_ADVANCED(SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY
180 |                      SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_RELEASE
181 |                      SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY_DEBUG)
182 | 
183 |     # add to the global list of libraries
184 |     set(SFML_LIBRARIES ${SFML_LIBRARIES} "${SFML_${FIND_SFML_COMPONENT_UPPER}_LIBRARY}")
185 | endforeach()
186 | 
187 | # handle errors
188 | if(NOT SFML_VERSION_OK)
189 |     # SFML version not ok
190 |     set(FIND_SFML_ERROR "SFML found but version too low (requested: ${SFML_FIND_VERSION}, found: ${SFML_VERSION_MAJOR}.${SFML_VERSION_MINOR})")
191 |     set(SFML_FOUND FALSE)
192 | elseif(NOT SFML_FOUND)
193 |     # include directory or library not found
194 |     set(FIND_SFML_ERROR "Could NOT find SFML (missing: ${FIND_SFML_MISSING})")
195 | endif()
196 | if (NOT SFML_FOUND)
197 |     if(SFML_FIND_REQUIRED)
198 |         # fatal error
199 |         message(FATAL_ERROR ${FIND_SFML_ERROR})
200 |     elseif(NOT SFML_FIND_QUIETLY)
201 |         # error but continue
202 |         message("${FIND_SFML_ERROR}")
203 |     endif()
204 | endif()
205 | 
206 | # handle success
207 | if(SFML_FOUND)
208 |     message(STATUS "Found SFML ${SFML_VERSION_MAJOR}.${SFML_VERSION_MINOR} in ${SFML_INCLUDE_DIR}")
209 | endif()
210 | 


--------------------------------------------------------------------------------
/ContinuousHTMGPU/source/vis/Plot.cpp:
--------------------------------------------------------------------------------
  1 | #include "Plot.h"
  2 | 
  3 | #include <sstream>
  4 | 
  5 | using namespace vis;
  6 | 
  7 | void Plot::draw(sf::RenderTarget &target, const sf::Texture &lineGradientTexture, const sf::Font &tickFont, float tickTextScale,
  8 | 	const sf::Vector2f &domain, const sf::Vector2f &range, const sf::Vector2f &margins, const sf::Vector2f &tickIncrements, float axesSize, float lineSize, float tickSize, float tickLength, float textTickOffset, int precision)
  9 | {
 10 | 	target.clear(_backgroundColor);
 11 | 	
 12 | 	sf::Vector2f plotSize = sf::Vector2f(target.getSize().x - margins.x, target.getSize().y - margins.y);
 13 | 
 14 | 	sf::Vector2f origin = sf::Vector2f(margins.x, target.getSize().y - margins.y);
 15 | 
 16 | 	// Draw curves
 17 | 	for (int c = 0; c < _curves.size(); c++) {
 18 | 		if (_curves[c]._points.empty())
 19 | 			continue;
 20 | 
 21 | 		sf::VertexArray vertexArray;
 22 | 
 23 | 		vertexArray.resize((_curves[c]._points.size() - 1) * 6);
 24 | 
 25 | 		int index = 0;
 26 | 
 27 | 		// Go through points
 28 | 		for (int p = 0; p < _curves[c]._points.size() - 1; p++) {
 29 | 			Point &point = _curves[c]._points[p];
 30 | 			Point &pointNext = _curves[c]._points[p + 1];
 31 | 
 32 | 			sf::Vector2f difference = pointNext._position - point._position;
 33 | 			sf::Vector2f direction = vectorNormalize(difference);
 34 | 
 35 | 			sf::Vector2f renderPointFirst, renderPointSecond;
 36 | 
 37 | 			bool pointVisible = point._position.x >= domain.x && point._position.x <= domain.y &&
 38 | 				point._position.y >= range.x && point._position.y <= range.y;
 39 | 
 40 | 			bool pointNextVisible = pointNext._position.x >= domain.x && pointNext._position.x <= domain.y &&
 41 | 				pointNext._position.y >= range.x && pointNext._position.y <= range.y;
 42 | 
 43 | 			if (pointVisible || pointNextVisible) {
 44 | 				sf::Vector2f renderPoint = sf::Vector2f(origin.x + (point._position.x - domain.x) / (domain.y - domain.x) * plotSize.x,
 45 | 					origin.y - (point._position.y - range.x) / (range.y - range.x) * plotSize.y);
 46 | 
 47 | 				sf::Vector2f renderPointNext = sf::Vector2f(origin.x + (pointNext._position.x - domain.x) / (domain.y - domain.x) * plotSize.x,
 48 | 					origin.y - (pointNext._position.y - range.x) / (range.y - range.x) * plotSize.y);
 49 | 
 50 | 				sf::Vector2f renderDirection = vectorNormalize(renderPointNext - renderPoint);
 51 | 
 52 | 				sf::Vector2f sizeOffset;
 53 | 				sf::Vector2f sizeOffsetNext;
 54 | 
 55 | 				if (p > 0) {
 56 | 					sf::Vector2f renderPointPrev = sf::Vector2f(origin.x + (_curves[c]._points[p - 1]._position.x - domain.x) / (domain.y - domain.x) * plotSize.x,
 57 | 						origin.y - (_curves[c]._points[p - 1]._position.y - range.x) / (range.y - range.x) * plotSize.y);
 58 | 
 59 | 					sf::Vector2f averageDirection = (renderDirection + vectorNormalize(renderPoint - renderPointPrev)) * 0.5f;
 60 | 					
 61 | 					sizeOffset = vectorNormalize(sf::Vector2f(-averageDirection.y, averageDirection.x));
 62 | 				}
 63 | 				else
 64 | 					sizeOffset = vectorNormalize(sf::Vector2f(-renderDirection.y, renderDirection.x));
 65 | 
 66 | 				if (p < _curves[c]._points.size() - 2) {
 67 | 					sf::Vector2f renderPointNextNext = sf::Vector2f(origin.x + (_curves[c]._points[p + 2]._position.x - domain.x) / (domain.y - domain.x) * plotSize.x,
 68 | 						origin.y - (_curves[c]._points[p + 2]._position.y - range.x) / (range.y - range.x) * plotSize.y);
 69 | 
 70 | 					sf::Vector2f averageDirection = (renderDirection + vectorNormalize(renderPointNextNext - renderPointNext)) * 0.5f;
 71 | 
 72 | 					sizeOffsetNext = vectorNormalize(sf::Vector2f(-averageDirection.y, averageDirection.x));
 73 | 				}
 74 | 				else
 75 | 					sizeOffsetNext = vectorNormalize(sf::Vector2f(-renderDirection.y, renderDirection.x));
 76 | 
 77 | 				sf::Vector2f perpendicular = vectorNormalize(sf::Vector2f(-renderDirection.y, renderDirection.x));
 78 | 
 79 | 				sizeOffset *= 1.0f / vectorDot(perpendicular, sizeOffset) * lineSize * 0.5f;
 80 | 				sizeOffsetNext *= 1.0f / vectorDot(perpendicular, sizeOffsetNext) * lineSize * 0.5f;
 81 | 
 82 | 				vertexArray[index].position = renderPoint - sizeOffset;
 83 | 				vertexArray[index].texCoords = sf::Vector2f(0.0f, 0.0f);
 84 | 				vertexArray[index].color = point._color;
 85 | 
 86 | 				index++;
 87 | 
 88 | 				vertexArray[index].position = renderPointNext - sizeOffsetNext;
 89 | 				vertexArray[index].texCoords = sf::Vector2f(0.0f, 0.0f);
 90 | 				vertexArray[index].color = pointNext._color;
 91 | 
 92 | 				index++;
 93 | 
 94 | 				vertexArray[index].position = renderPointNext + sizeOffsetNext;
 95 | 				vertexArray[index].texCoords = sf::Vector2f(0.0f, lineGradientTexture.getSize().y);
 96 | 				vertexArray[index].color = pointNext._color;
 97 | 
 98 | 				index++;
 99 | 
100 | 				vertexArray[index].position = renderPoint - sizeOffset;
101 | 				vertexArray[index].texCoords = sf::Vector2f(0.0f, 0.0f);
102 | 				vertexArray[index].color = point._color;
103 | 
104 | 				index++;
105 | 
106 | 				vertexArray[index].position = renderPointNext + sizeOffsetNext;
107 | 				vertexArray[index].texCoords = sf::Vector2f(0.0f, lineGradientTexture.getSize().y);
108 | 				vertexArray[index].color = pointNext._color;
109 | 
110 | 				index++;
111 | 
112 | 				vertexArray[index].position = renderPoint + sizeOffset;
113 | 				vertexArray[index].texCoords = sf::Vector2f(0.0f, lineGradientTexture.getSize().y);
114 | 				vertexArray[index].color = point._color;
115 | 
116 | 				index++;
117 | 			}
118 | 		}
119 | 
120 | 		vertexArray.resize(index);
121 | 
122 | 		vertexArray.setPrimitiveType(sf::PrimitiveType::Triangles);
123 | 
124 | 		if (_curves[c]._shadow != 0.0f) {
125 | 			sf::VertexArray shadowArray = vertexArray;
126 | 
127 | 			for (int v = 0; v < shadowArray.getVertexCount(); v++) {
128 | 				shadowArray[v].position += _curves[c]._shadowOffset;
129 | 				shadowArray[v].color = sf::Color(0, 0, 0, _curves[c]._shadow * 255.0f);
130 | 			}
131 | 
132 | 			target.draw(shadowArray, &lineGradientTexture);
133 | 		}
134 | 
135 | 		target.draw(vertexArray, &lineGradientTexture);
136 | 	}
137 | 
138 | 	// Mask off parts of the curve that go beyond bounds
139 | 	sf::RectangleShape leftMask;
140 | 	leftMask.setSize(sf::Vector2f(margins.x, target.getSize().y));
141 | 	leftMask.setFillColor(_backgroundColor);
142 | 
143 | 	target.draw(leftMask);
144 | 
145 | 	sf::RectangleShape rightMask;
146 | 	rightMask.setSize(sf::Vector2f(target.getSize().x, margins.y));
147 | 	rightMask.setPosition(sf::Vector2f(0.0f, target.getSize().y - margins.y));
148 | 	rightMask.setFillColor(_backgroundColor);
149 | 
150 | 	target.draw(rightMask);
151 | 
152 | 	// Draw axes
153 | 	sf::RectangleShape xAxis;
154 | 	xAxis.setSize(sf::Vector2f(plotSize.x + axesSize * 0.5f, axesSize));
155 | 	xAxis.setPosition(sf::Vector2f(origin.x - axesSize * 0.5f, origin.y - axesSize * 0.5f));
156 | 	xAxis.setFillColor(_axesColor);
157 | 
158 | 	target.draw(xAxis);
159 | 
160 | 	sf::RectangleShape yAxis;
161 | 	yAxis.setSize(sf::Vector2f(axesSize, plotSize.y + axesSize * 0.5f));
162 | 	yAxis.setPosition(sf::Vector2f(origin.x - axesSize * 0.5f, origin.y - axesSize * 0.5f - plotSize.y));
163 | 	yAxis.setFillColor(_axesColor);
164 | 
165 | 	target.draw(yAxis);
166 | 
167 | 	// Draw ticks
168 | 	{
169 | 		float xDistance = domain.y - domain.x;
170 | 		int xTicks = std::floor(xDistance / tickIncrements.x);
171 | 		float xTickOffset = std::fmod(domain.x, tickIncrements.x);
172 | 
173 | 		if (xTickOffset < 0.0f)
174 | 			xTickOffset += tickIncrements.x;
175 | 
176 | 		float xTickRenderOffset = xTickOffset / xDistance;
177 | 
178 | 		float xTickRenderDistance = tickIncrements.x / xDistance * plotSize.x;
179 | 
180 | 		std::ostringstream os;
181 | 
182 | 		os.precision(precision);
183 | 
184 | 		for (int t = 0; t < xTicks; t++) {
185 | 			sf::RectangleShape xTick;
186 | 			xTick.setSize(sf::Vector2f(axesSize, tickLength));
187 | 			xTick.setPosition(sf::Vector2f(origin.x + xTickRenderOffset + xTickRenderDistance * t - tickSize * 0.5f, origin.y));
188 | 			xTick.setFillColor(_axesColor);
189 | 
190 | 			target.draw(xTick);
191 | 
192 | 			float value = domain.x + xTickOffset + t * tickIncrements.x;
193 | 
194 | 			os.str("");
195 | 			os << value;
196 | 
197 | 			sf::Text xTickText;
198 | 			xTickText.setString(os.str());
199 | 			xTickText.setFont(tickFont);
200 | 			xTickText.setPosition(sf::Vector2f(xTick.getPosition().x, xTick.getPosition().y + tickLength + textTickOffset));
201 | 			xTickText.setRotation(45.0f);
202 | 			xTickText.setColor(_axesColor);
203 | 			xTickText.setScale(sf::Vector2f(tickTextScale, tickTextScale));
204 | 
205 | 			target.draw(xTickText);
206 | 		}
207 | 	}
208 | 
209 | 	{
210 | 		float yDistance = range.y - range.x;
211 | 		int yTicks = std::floor(yDistance / tickIncrements.y);
212 | 		float yTickOffset = std::fmod(range.x, tickIncrements.y);
213 | 
214 | 		if (yTickOffset < 0.0f)
215 | 			yTickOffset += tickIncrements.y;
216 | 
217 | 		float yTickRenderOffset = yTickOffset / yDistance;
218 | 
219 | 		float yTickRenderDistance = tickIncrements.y / yDistance * plotSize.y;
220 | 
221 | 		std::ostringstream os;
222 | 
223 | 		os.precision(precision);
224 | 
225 | 		for (int t = 0; t < yTicks; t++) {
226 | 			sf::RectangleShape yTick;
227 | 			yTick.setSize(sf::Vector2f(tickLength, axesSize));
228 | 			yTick.setPosition(sf::Vector2f(origin.x - tickLength, origin.y - yTickRenderOffset - yTickRenderDistance * t - tickSize * 0.5f));
229 | 			yTick.setFillColor(_axesColor);
230 | 
231 | 			target.draw(yTick);
232 | 
233 | 			float value = range.x + yTickOffset + t * tickIncrements.y;
234 | 
235 | 			os.str("");
236 | 			os << value;
237 | 
238 | 			sf::Text yTickText;
239 | 			yTickText.setString(os.str());
240 | 			yTickText.setFont(tickFont);
241 | 			sf::FloatRect bounds = yTickText.getLocalBounds();
242 | 			yTickText.setPosition(sf::Vector2f(yTick.getPosition().x - bounds.width * 0.5f - tickLength * 0.5f - textTickOffset, yTick.getPosition().y - bounds.height * 0.5f));
243 | 			yTickText.setRotation(0.0f);
244 | 			yTickText.setColor(_axesColor);
245 | 			yTickText.setScale(sf::Vector2f(tickTextScale, tickTextScale));
246 | 
247 | 			target.draw(yTickText);
248 | 		}
249 | 	}
250 | }
251 | 
252 | float vis::vectorMagnitude(const sf::Vector2f &vector) {
253 | 	return std::sqrt(vector.x * vector.x + vector.y * vector.y);
254 | }
255 | 
256 | sf::Vector2f vis::vectorNormalize(const sf::Vector2f &vector) {
257 | 	float magnitude = vectorMagnitude(vector);
258 | 
259 | 	return vector / magnitude;
260 | }
261 | 
262 | float vis::vectorDot(const sf::Vector2f &left, const sf::Vector2f &right) {
263 | 	return left.x * right.x + left.y * right.y;
264 | }


--------------------------------------------------------------------------------
/ContinuousHTMGPU/source/Main.cpp:
--------------------------------------------------------------------------------
  1 | #include <htm/HTMRL.h>
  2 | 
  3 | #include <SFML/Window.hpp>
  4 | #include <SFML/Graphics.hpp>
  5 | 
  6 | #include <vis/HTMRLVisualizer.h>
  7 | #include <vis/Plot.h>
  8 | 
  9 | #include <time.h>
 10 | #include <iostream>
 11 | #include <functional>
 12 | #include <unordered_map>
 13 | 
 14 | int main() {
 15 | 	std::mt19937 generator(time(nullptr));
 16 | 
 17 | 	sys::ComputeSystem cs;
 18 | 
 19 | 	cs.create(sys::ComputeSystem::_gpu);
 20 | 
 21 | 	sys::ComputeProgram program;
 22 | 
 23 | 	program.loadFromFile("resources/htmrl.cl", cs);
 24 | 
 25 | 	float reward = 0.0f;
 26 | 	float prevReward = 0.0f;
 27 | 
 28 | 	float initReward = 0.0f;
 29 | 
 30 | 	float totalReward = 0.0f;
 31 | 
 32 | 	sf::RenderWindow window;
 33 | 
 34 | 	window.create(sf::VideoMode(800, 600), "Pole Balancing");
 35 | 
 36 | 	//window.setVerticalSyncEnabled(true);
 37 | 
 38 | 	window.setFramerateLimit(60);
 39 | 
 40 | 	// -------------------------- Load Resources --------------------------
 41 | 
 42 | 	sf::Texture backgroundTexture;
 43 | 	sf::Texture cartTexture;
 44 | 	sf::Texture poleTexture;
 45 | 
 46 | 	backgroundTexture.loadFromFile("resources/background.png");
 47 | 	cartTexture.loadFromFile("resources/cart.png");
 48 | 	poleTexture.loadFromFile("resources/pole.png");
 49 | 
 50 | 	sf::Texture inputCartTexture;
 51 | 	sf::Texture inputPoleTexture;
 52 | 
 53 | 	inputCartTexture.loadFromFile("resources/inputCart.png");
 54 | 	inputPoleTexture.loadFromFile("resources/inputPole.png");
 55 | 
 56 | 	// --------------------------------------------------------------------
 57 | 
 58 | 	sf::Sprite backgroundSprite;
 59 | 	sf::Sprite cartSprite;
 60 | 	sf::Sprite poleSprite;
 61 | 
 62 | 	backgroundSprite.setTexture(backgroundTexture);
 63 | 	cartSprite.setTexture(cartTexture);
 64 | 	poleSprite.setTexture(poleTexture);
 65 | 
 66 | 	backgroundSprite.setPosition(sf::Vector2f(0.0f, 0.0f));
 67 | 
 68 | 	cartSprite.setOrigin(sf::Vector2f(static_cast<float>(cartSprite.getTexture()->getSize().x) * 0.5f, static_cast<float>(cartSprite.getTexture()->getSize().y)));
 69 | 	poleSprite.setOrigin(sf::Vector2f(static_cast<float>(poleSprite.getTexture()->getSize().x) * 0.5f, static_cast<float>(poleSprite.getTexture()->getSize().y)));
 70 | 
 71 | 	sf::Sprite inputCartSprite;
 72 | 	sf::Sprite inputPoleSprite;
 73 | 
 74 | 	inputCartSprite.setTexture(inputCartTexture);
 75 | 	inputPoleSprite.setTexture(inputPoleTexture);
 76 | 
 77 | 	inputCartSprite.setOrigin(sf::Vector2f(static_cast<float>(inputCartSprite.getTexture()->getSize().x) * 0.5f, static_cast<float>(inputCartSprite.getTexture()->getSize().y)));
 78 | 	inputPoleSprite.setOrigin(sf::Vector2f(static_cast<float>(inputPoleSprite.getTexture()->getSize().x) * 0.5f, static_cast<float>(inputPoleSprite.getTexture()->getSize().y)));
 79 | 
 80 | 	// ----------------------------- Physics ------------------------------
 81 | 
 82 | 	float pixelsPerMeter = 128.0f;
 83 | 	float inputPixelsPerMeter = 8.0f;
 84 | 	float poleLength = 1.0f;
 85 | 	float g = -2.8f;
 86 | 	float massMass = 40.0f;
 87 | 	float cartMass = 2.0f;
 88 | 	sf::Vector2f massPos(0.0f, poleLength);
 89 | 	sf::Vector2f massVel(0.0f, 0.0f);
 90 | 	float poleAngle = static_cast<float>(3.14159f) * 0.0f;
 91 | 	float poleAngleVel = 0.0f;
 92 | 	float poleAngleAccel = 0.0f;
 93 | 	float cartX = 0.0f;
 94 | 	float cartVelX = 0.0f;
 95 | 	float cartAccelX = 0.0f;
 96 | 	float poleRotationalFriction = 0.008f;
 97 | 	float cartMoveRadius = 1.8f;
 98 | 	float cartFriction = 0.02f;
 99 | 	float maxSpeed = 3.0f;
100 | 
101 | 	// ---------------------------- Game Loop -----------------------------
102 | 
103 | 	bool quit = false;
104 | 
105 | 	sf::Clock clock;
106 | 
107 | 	float dt = 0.017f;
108 | 
109 | 	float fitness = 0.0f;
110 | 	float prevFitness = 0.0f;
111 | 
112 | 	float lowPassFitness = 0.0f;
113 | 
114 | 	bool reverseDirection = false;
115 | 
116 | 	bool trainMode = true;
117 | 
118 | 	bool tDownLastFrame = false;
119 | 
120 | 	std::uniform_real_distribution<float> dist01(0.0f, 1.0f);
121 | 
122 | 	sf::Font font;
123 | 
124 | 	font.loadFromFile("resources/pixelated.ttf");
125 | 
126 | 	sf::RenderTexture inputRT;
127 | 
128 | 	inputRT.create(64, 32);
129 | 
130 | 	float avgReward = 0.0f;
131 | 	float avgRewardDecay = 0.003f;
132 | 
133 | 	float minReward = 0.0f;
134 | 	float maxReward = 1.0f;
135 | 
136 | 	float totalTime = 0.0f;
137 | 
138 | 	float plotUpdateTimer = 0.0f;
139 | 
140 | 	htm::HTMRL agent;
141 | 
142 | 	std::vector<htm::HTMRL::LayerDesc> layerDescs(5);
143 | 
144 | 	layerDescs[0]._width = 64;
145 | 	layerDescs[0]._height = 64;
146 | 
147 | 	layerDescs[1]._width = 44;
148 | 	layerDescs[1]._height = 44;
149 | 
150 | 	layerDescs[2]._width = 32;
151 | 	layerDescs[2]._height = 32;
152 | 
153 | 	layerDescs[3]._width = 20;
154 | 	layerDescs[3]._height = 20;
155 | 
156 | 	layerDescs[4]._width = 16;
157 | 	layerDescs[4]._height = 16;
158 | 
159 | 	std::vector<htm::HTMRL::InputType> inputTypes(64 * 64, htm::HTMRL::_state);
160 | 
161 | 	for (int x = 0; x < 64; x++) {
162 | 		for (int y = 32; y < 64; y++) {
163 | 			inputTypes[x + y * 64] = htm::HTMRL::_unused;
164 | 		}
165 | 	}
166 | 
167 | 	std::uniform_int_distribution<int> actionXDist(0, 63);
168 | 	std::uniform_int_distribution<int> actionYDist(33, 63);
169 | 
170 | 	std::vector<int> actionIndices;
171 | 
172 | 	/*for (int x = 28; x < 34; x++)
173 | 	for (int y = 48; y < 54; y++) {
174 | 		if (inputTypes[x + y * 64] == htm::HTMRL::_action)
175 | 			continue;
176 | 
177 | 		inputTypes[x + y * 64] = htm::HTMRL::_action;
178 | 
179 | 		actionIndices.push_back(x + y * 64);
180 | 	}*/
181 | 
182 | 	for (int i = 0; i < 8; i++) {
183 | 		int x = actionXDist(generator);
184 | 		int y = actionYDist(generator);
185 | 
186 | 		if (inputTypes[x + y * 64] == htm::HTMRL::_action)
187 | 			continue;
188 | 
189 | 		inputTypes[x + y * 64] = htm::HTMRL::_action;
190 | 
191 | 		actionIndices.push_back(x + y * 64);
192 | 	}
193 | 
194 | 	agent.createRandom(cs, program, 64, 64, 4, layerDescs, inputTypes, -0.05f, 0.05f, -0.05f, 0.05f, generator);
195 | 
196 | 	sf::RenderTexture htmRT;
197 | 	htmRT.create(1024, 1024, false);
198 | 
199 | 	vis::HTMRLVisualizer visualizer;
200 | 	visualizer.create(1024);
201 | 
202 | 	vis::Plot plot;
203 | 
204 | 	plot._curves.resize(1);
205 | 	
206 | 	sf::RenderTexture plotRT;
207 | 	plotRT.create(800, 600, false);
208 | 
209 | 	sf::Texture lineGradient;
210 | 	lineGradient.loadFromFile("resources/lineGradient.png");
211 | 
212 | 	sf::Font tickFont;
213 | 	tickFont.loadFromFile("resources/arial.ttf");
214 | 
215 | 	const int plotSampleTicks = 60;
216 | 	int plotSampleCounter = 0;
217 | 	
218 | 	do {
219 | 		clock.restart();
220 | 
221 | 		// ----------------------------- Input -----------------------------
222 | 
223 | 		sf::Event windowEvent;
224 | 
225 | 		while (window.pollEvent(windowEvent))
226 | 		{
227 | 			switch (windowEvent.type)
228 | 			{
229 | 			case sf::Event::Closed:
230 | 				quit = true;
231 | 				break;
232 | 			}
233 | 		}
234 | 
235 | 		if (sf::Keyboard::isKeyPressed(sf::Keyboard::Escape))
236 | 			quit = true;
237 | 
238 | 		// Update fitness
239 | 		if (poleAngle < static_cast<float>(3.14159f))
240 | 			fitness = -(static_cast<float>(3.14159f)* 0.5f - poleAngle);
241 | 		else
242 | 			fitness = -(static_cast<float>(3.14159f)* 0.5f - (static_cast<float>(3.14159f)* 2.0f - poleAngle));
243 | 
244 | 		//fitness += static_cast<float>(3.14159f)* 0.5f;
245 | 
246 | 		//fitness = fitness - std::abs(poleAngleVel * 1.0f);
247 | 
248 | 		//fitness = -std::abs(cartX);
249 | 
250 | 		if (sf::Keyboard::isKeyPressed(sf::Keyboard::A))
251 | 			fitness = -cartX;
252 | 		else if (sf::Keyboard::isKeyPressed(sf::Keyboard::D))
253 | 			fitness = cartX;
254 | 
255 | 		// ------------------------------ AI -------------------------------
256 | 
257 | 		float dFitness = fitness - prevFitness;
258 | 
259 | 		//reward = dFitness * 5.0f;
260 | 
261 | 		reward = fitness;
262 | 
263 | 		if (totalTime == 0.0f)
264 | 			avgReward = reward;
265 | 		else
266 | 			avgReward = (1.0f - avgRewardDecay) * avgReward + avgRewardDecay * reward;
267 | 
268 | 		minReward = std::min<float>(minReward, avgReward);
269 | 		maxReward = std::max<float>(maxReward, avgReward);
270 | 
271 | 		if (plotSampleCounter == plotSampleTicks) {
272 | 			plotSampleCounter = 0;
273 | 
274 | 			vis::Point p;
275 | 			p._position.x = plot._curves[0]._points.size() - 1;
276 | 			p._position.y = avgReward;
277 | 			p._color = sf::Color::Red;
278 | 
279 | 			plot._curves[0]._points.push_back(p);
280 | 		}
281 | 
282 | 		plotSampleCounter++;
283 | 
284 | 		sf::Image img = inputRT.getTexture().copyToImage();
285 | 
286 | 		for (int x = 0; x < 64; x++)
287 | 		for (int y = 0; y < 32; y++) {
288 | 			agent.setInput(x, y, img.getPixel(x, y).r / 255.0f);
289 | 		}
290 | 
291 | 		agent.step(cs, reward, 0.01f, 0.01f, 0.01f, 0.05f, 0.1f, 0.05f, 0.05f, 0.5f, 0.5f, 0.5f, 0.01f, 0.2f, 0.992f, 0.15f, 0.15f, 120, 10, 2, generator);
292 | 
293 | 		float output = 0.0f;
294 | 		int c = 0;
295 | 
296 | 		for (int i = 0; i < actionIndices.size(); i++) {
297 | 			output += agent.getOutput(actionIndices[i]);
298 | 			c++;
299 | 		}
300 | 
301 | 		output /= c;
302 | 
303 | 		float dir = std::min<float>(1.0f, std::max<float>(-1.0f, 1.6f * (output * 2.0f - 1.0f)));
304 | 
305 | 		//std::cout << dir << std::endl;
306 | 
307 | 		float agentForce = 4000.0f * dir;
308 | 	
309 | 		prevFitness = fitness;
310 | 
311 | 		// ---------------------------- Physics ----------------------------
312 | 
313 | 		float pendulumCartAccelX = cartAccelX;
314 | 
315 | 		if (cartX < -cartMoveRadius)
316 | 			pendulumCartAccelX = 0.0f;
317 | 		else if (cartX > cartMoveRadius)
318 | 			pendulumCartAccelX = 0.0f;
319 | 
320 | 		poleAngleAccel = pendulumCartAccelX * std::cos(poleAngle) + g * std::sin(poleAngle);
321 | 		poleAngleVel += -poleRotationalFriction * poleAngleVel + poleAngleAccel * dt;
322 | 		poleAngle += poleAngleVel * dt;
323 | 
324 | 		massPos = sf::Vector2f(cartX + std::cos(poleAngle + static_cast<float>(3.14159f)* 0.5f) * poleLength, std::sin(poleAngle + static_cast<float>(3.14159f)* 0.5f) * poleLength);
325 | 
326 | 		float force = 0.0f;
327 | 
328 | 		if (std::abs(cartVelX) < maxSpeed) {
329 | 			force = std::max<float>(-4000.0f, std::min<float>(4000.0f, agentForce));
330 | 
331 | 			if (sf::Keyboard::isKeyPressed(sf::Keyboard::Left))
332 | 				force = -4000.0f;
333 | 
334 | 			if (sf::Keyboard::isKeyPressed(sf::Keyboard::Right))
335 | 				force = 4000.0f;
336 | 		}
337 | 
338 | 		if (cartX < -cartMoveRadius) {
339 | 			cartX = -cartMoveRadius;
340 | 
341 | 			cartAccelX = -cartVelX / dt;
342 | 			cartVelX = -0.5f * cartVelX;
343 | 		}
344 | 		else if (cartX > cartMoveRadius) {
345 | 			cartX = cartMoveRadius;
346 | 
347 | 			cartAccelX = -cartVelX / dt;
348 | 			cartVelX = -0.5f * cartVelX;
349 | 		}
350 | 
351 | 		cartAccelX = 0.25f * (force + massMass * poleLength * poleAngleAccel * std::cos(poleAngle) - massMass * poleLength * poleAngleVel * poleAngleVel * std::sin(poleAngle)) / (massMass + cartMass);
352 | 		cartVelX += -cartFriction * cartVelX + cartAccelX * dt;
353 | 		cartX += cartVelX * dt;
354 | 
355 | 		poleAngle = std::fmod(poleAngle, (2.0f * static_cast<float>(3.14159f)));
356 | 
357 | 		if (poleAngle < 0.0f)
358 | 			poleAngle += static_cast<float>(3.14159f)* 2.0f;
359 | 
360 | 		if (sf::Keyboard::isKeyPressed(sf::Keyboard::T)) {
361 | 			if (!tDownLastFrame) {
362 | 				trainMode = !trainMode;
363 | 			}
364 | 
365 | 			tDownLastFrame = true;
366 | 		}
367 | 		else
368 | 			tDownLastFrame = false;
369 | 
370 | 		// ---------------------------- Rendering ----------------------------
371 | 
372 | 		// Render to input buffer
373 | 		inputRT.clear();
374 | 
375 | 		inputCartSprite.setPosition(sf::Vector2f(inputRT.getSize().x * 0.5f + inputPixelsPerMeter * cartX, inputRT.getSize().y * 0.5f + 4.0f));
376 | 
377 | 		inputRT.draw(inputCartSprite);
378 | 
379 | 		inputPoleSprite.setPosition(inputCartSprite.getPosition() + sf::Vector2f(0.0f, -4.0f));
380 | 		inputPoleSprite.setRotation(poleAngle * 180.0f / static_cast<float>(3.14159f) + 180.0f);
381 | 
382 | 		inputRT.draw(inputPoleSprite);
383 | 
384 | 		inputRT.display();
385 | 
386 | 		window.clear();
387 | 
388 | 		window.draw(backgroundSprite);
389 | 
390 | 		cartSprite.setPosition(sf::Vector2f(800.0f * 0.5f + pixelsPerMeter * cartX, 600.0f * 0.5f + 3.0f));
391 | 
392 | 		window.draw(cartSprite);
393 | 
394 | 		poleSprite.setPosition(cartSprite.getPosition() + sf::Vector2f(0.0f, -45.0f));
395 | 		poleSprite.setRotation(poleAngle * 180.0f / static_cast<float>(3.14159f) + 180.0f);
396 | 
397 | 		window.draw(poleSprite);
398 | 
399 | 		sf::Sprite inputSprite;
400 | 
401 | 		inputSprite.setTexture(inputRT.getTexture());
402 | 
403 | 		inputSprite.setPosition(0, 0);
404 | 		inputSprite.setScale(4.0f, 4.0f);
405 | 
406 | 		window.draw(inputSprite);
407 | 
408 | 		if (sf::Keyboard::isKeyPressed(sf::Keyboard::B)) {
409 | 			plotRT.setActive();
410 | 			plotRT.clear(sf::Color::White);
411 | 
412 | 			plot.draw(plotRT, lineGradient, tickFont, 0.5f, sf::Vector2f(0.0f, plot._curves[0]._points.size()), sf::Vector2f(minReward, maxReward), sf::Vector2f(64.0f, 64.0f), sf::Vector2f(plot._curves[0]._points.size() / 10.0f, (maxReward - minReward) / 10.0f), 2.0f, 4.0f, 2.0f, 6.0f, 2.0f, 4);
413 | 
414 | 			plotRT.display();
415 | 
416 | 			sf::Sprite plotSprite;
417 | 			plotSprite.setTexture(plotRT.getTexture());
418 | 
419 | 			window.draw(plotSprite);
420 | 		}
421 | 
422 | 		if (sf::Keyboard::isKeyPressed(sf::Keyboard::V)) {
423 | 			htmRT.setActive();
424 | 			htmRT.clear(sf::Color::White);
425 | 
426 | 			visualizer.update(htmRT, sf::Vector2f(512.0f, 512.0f), sf::Vector2f(1.95f, 1.95f), cs, agent, generator);
427 | 
428 | 			htmRT.display();
429 | 
430 | 			sf::Sprite htmSprite;
431 | 			htmSprite.setTexture(htmRT.getTexture());
432 | 
433 | 			htmSprite.setScale(0.8f, 0.8f);
434 | 			htmSprite.setOrigin(512, 512);
435 | 			htmSprite.setPosition(400.0f, 300.0f);
436 | 
437 | 			window.draw(htmSprite);
438 | 		}
439 | 
440 | 		// -------------------------------------------------------------------
441 | 
442 | 		window.display();
443 | 
444 | 		//dt = clock.getElapsedTime().asSeconds();
445 | 
446 | 		totalTime += dt;
447 | 		plotUpdateTimer += dt;
448 | 	} while (!quit);
449 | 
450 | 	return 0;
451 | }


--------------------------------------------------------------------------------
/ContinuousHTMGPU/resources/htmrl.cl:
--------------------------------------------------------------------------------
  1 | constant sampler_t normalizedClampedNearestSampler = CLK_NORMALIZED_COORDS_TRUE |
  2 | 	CLK_ADDRESS_CLAMP |
  3 | 	CLK_FILTER_NEAREST;
  4 | 	
  5 | constant sampler_t normalizedClampedToEdgeNearestSampler = CLK_NORMALIZED_COORDS_TRUE |
  6 | 	CLK_ADDRESS_CLAMP_TO_EDGE |
  7 | 	CLK_FILTER_NEAREST;
  8 | 	
  9 | constant sampler_t unnormalizedClampedNearestSampler = CLK_NORMALIZED_COORDS_FALSE |
 10 | 	CLK_ADDRESS_CLAMP |
 11 | 	CLK_FILTER_NEAREST;
 12 | 	
 13 | constant sampler_t defaultNormalizedSampler = CLK_NORMALIZED_COORDS_TRUE |
 14 | 	CLK_ADDRESS_CLAMP_TO_EDGE |
 15 | 	CLK_FILTER_NEAREST;
 16 | 	
 17 | constant sampler_t defaultUnnormalizedSampler = CLK_NORMALIZED_COORDS_FALSE |
 18 | 	CLK_ADDRESS_CLAMP_TO_EDGE |
 19 | 	CLK_FILTER_NEAREST;
 20 | 	
 21 | #define MAX_RECEPTIVE_SIZE 81
 22 | #define MAX_SEGMENTS_PER_CELL 4
 23 | 	
 24 | constant float columnIntensity = 1.0f;
 25 | constant float learnTolerance = 0.01f;
 26 | constant float sparsityMultiplier = 10.0f;
 27 | constant float sparsityThreshold = 0.04f;
 28 | constant float sparsity = 0.06f;
 29 | constant float segmentSparsity = 0.3f;
 30 | constant float columnTraceDecay = 0.002f;
 31 | constant float columnMomentum = 0.1f;
 32 | constant float columnRandomness = 0.1f;
 33 | constant float minDerivative = 0.1f;
 34 | constant float minSimilarity = 0.0001f;
 35 | constant float minLearn = 0.0f;
 36 | constant float learnFalloff = 0.1f;
 37 | constant float noMatchTolerance = 0.0001f;
 38 | constant float falloffIntensity = 0.5f;
 39 | constant float activationModulationPower = 4.0f;
 40 | constant float qModulationPower = 1.0f;
 41 | constant float crowdingIntensity = 8.0f;
 42 | constant float cellStateIntensity = 32.0f;
 43 | constant float cellPredictionIntensity = 4.0f;
 44 | constant float minLearningThreshold = 0.0f;
 45 | constant float predictionRangeExtension = 0.1f;
 46 | constant float localActivity = 1.0f;
 47 | constant float reconstructionErrorActivity = 2.0f;
 48 | constant float boostThreshold = 0.01f;
 49 | constant float rectifierLeak = 0.03f;
 50 | constant float minDivisor = 0.0001f;
 51 | constant float higherLayerQPower = 16.0f;
 52 | constant float dutyCycleDecay = 0.005f;
 53 | constant float minReconstructionError = 0.1f;
 54 | 
 55 | // LCA
 56 | constant float lcaTauInv = 0.01f;
 57 | constant float lcaAlpha = 0.01f;
 58 | constant float lcaLambda = 0.01f;
 59 | constant float lcaGamma = 100.0f;
 60 | 
 61 | float randFloat(uint2* state) {
 62 |     const float invMaxInt = 1.0f / 4294967296.0f;
 63 |     uint x = (*state).x * 17 + (*state).y * 13123;
 64 |     (*state).x = (x << 13) ^ x;
 65 |     (*state).y ^= (x << 7);
 66 | 
 67 |     uint tmp = x * (x * x * 15731 + 74323) + 871483;
 68 | 
 69 |     return convert_float(tmp) * invMaxInt;
 70 | }
 71 | 
 72 | float sigmoid(float x) {
 73 | 	return 1.0f / (1.0f + exp(-x));
 74 | }
 75 | 
 76 | float relu(float x) {
 77 | 	return log(1.0f + exp(x));
 78 | }
 79 | 
 80 | float rectifier(float x) {
 81 | 	return fmax(0.0f, x);
 82 | }
 83 | 
 84 | float rectifierDerivative(float x) {
 85 | 	return x > rectifierLeak ? 1.0f : rectifierLeak;
 86 | }
 87 | 
 88 | float scaledSigmoid(float x) {
 89 | 	return 2.0f / (1.0f + exp(-x)) - 1.0f;
 90 | }
 91 | 
 92 | float lcaThreshold(float potential) {
 93 | 	return (potential - lcaAlpha * lcaLambda) / (1.0f + exp(-lcaGamma * (potential - lcaLambda)));
 94 | }
 95 | 
 96 | float boostFunction(float dutyCycle, float threshold) {
 97 | 	return fmin(1.0f, fmax(0.0f, threshold - dutyCycle) / threshold);
 98 | }
 99 | 
100 | void kernel initializePartOne(write_only image2d_t columnActivations, write_only image2d_t columnStates, write_only image3d_t columnFeedForwardWeights, write_only image2d_t columnPrevValues,
101 | 	int cellsInColumn, int receptiveFieldSize, int lateralConnectionsSize, uint2 seed, float minWeight, float maxWeight)
102 | {
103 | 	uint2 seedValue = seed + (uint2)(get_global_id(0) * 29 - 12, get_global_id(1) * 16 + 23) * 36;
104 | 
105 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
106 | 
107 | 	write_imagef(columnActivations, columnPosition, (float4)(0.0f, 0.0f, 0.0f, 0.0f));
108 | 	write_imagef(columnStates, columnPosition, (float4)(0.0f, localActivity / receptiveFieldSize, 0.0f, 0.0f));
109 | 	write_imagef(columnPrevValues, columnPosition, (float4)(0.0f, 0.0f, 0.0f, 0.0f));
110 | 
111 | 	for (int wi = 0; wi < receptiveFieldSize; wi++) {
112 | 		int4 weightPosition = (int4)(columnPosition.x, columnPosition.y, wi, 0);
113 | 	
114 | 		float columnConnectionWeight = randFloat(&seedValue) * (maxWeight - minWeight) + minWeight;
115 | 
116 | 		write_imagef(columnFeedForwardWeights, weightPosition, (float4)(columnConnectionWeight, 0.0f, 0.0f, 0.0f));
117 | 	}
118 | }
119 | 
120 | void kernel initializePartTwo(write_only image3d_t cellStates, write_only image3d_t segmentStates, write_only image3d_t cellWeights, write_only image3d_t cellPredictions, write_only image3d_t cellQValues,
121 | 	int cellsInColumn, int receptiveFieldSize, int lateralConnectionsSize, int segmentsPerCell, uint2 seed, float minWeight, float maxWeight)
122 | {
123 | 	uint2 seedValue = seed + (uint2)(get_global_id(0) * 32 + 24, get_global_id(1) * 11 - 66) * 23;
124 | 
125 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
126 | 	
127 | 	for (int ci = 0; ci < cellsInColumn; ci++) {
128 | 		write_imagef(cellStates, (int4)(columnPosition.x, columnPosition.y, ci, 0), (float4)(0.0f, 0.0f, 0.0f, 0.0f));
129 | 		
130 | 		for (int i = 0; i < segmentsPerCell; i++) {
131 | 			write_imagef(segmentStates, (int4)(columnPosition.x, columnPosition.y, ci * segmentsPerCell + i, 0), (float4)(0.0f, 0.0f, 0.0f, 0.0f));
132 | 		}
133 | 		
134 | 		write_imagef(cellPredictions, (int4)(columnPosition.x, columnPosition.y, ci, 0), (float4)(0.0f, 0.0f, 0.0f, 0.0f));
135 | 		write_imagef(cellQValues, (int4)(columnPosition.x, columnPosition.y, ci, 0), (float4)(0.0f, 0.0f, 0.0f, 0.0f));
136 | 		
137 | 		int weightSecondCoordinate = ci + columnPosition.y * cellsInColumn;
138 | 	
139 | 		for (int wi = 0; wi < lateralConnectionsSize; wi++) {
140 | 			int4 weightPosition = (int4)(columnPosition.x, weightSecondCoordinate, wi, 0);
141 | 		
142 | 			float cellWeight = randFloat(&seedValue) * (maxWeight - minWeight) + minWeight;
143 | 	
144 | 			write_imagef(cellWeights, weightPosition, (float4)(cellWeight, 0.0f, 0.0f, 0.0f));
145 | 		}
146 | 	}
147 | }
148 | 
149 | void kernel layerColumnActivate(read_only image2d_t columnStatesInput, read_only image3d_t columnFeedForwardWeightsPrev, read_only image2d_t columnStatesPrev, write_only image2d_t columnActivations,
150 | 	float2 layerSizeMinusOneInv, int2 inputReceptiveFieldRadius, int2 inputSize, int2 inputSizeMinusOne, uint2 seed)
151 | {
152 | 	uint2 seedValue = seed + (uint2)(get_global_id(0), get_global_id(1)) * 20;
153 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
154 | 	
155 | 	float2 inputCenterPositionNormalized = (float2)(columnPosition.x * layerSizeMinusOneInv.x, columnPosition.y * layerSizeMinusOneInv.y);
156 | 	float2 inputCenterPosition = (float2)(inputCenterPositionNormalized.x * inputSizeMinusOne.x, inputCenterPositionNormalized.y * inputSizeMinusOne.y);
157 | 
158 | 	float sum = 0.0f;
159 | 
160 | 	int weightIndex = 0;
161 | 
162 | 	for (int dx = -inputReceptiveFieldRadius.x; dx <= inputReceptiveFieldRadius.x; dx++)
163 | 	for (int dy = -inputReceptiveFieldRadius.y; dy <= inputReceptiveFieldRadius.y; dy++) {
164 | 		int2 inputPosition = (int2)(inputCenterPosition.x + dx, inputCenterPosition.y + dy);
165 | 		
166 | 		if (inputPosition.x >= 0 && inputPosition.x < inputSize.x && inputPosition.y >= 0 && inputPosition.y < inputSize.y) {
167 | 			float input = read_imagef(columnStatesInput, inputPosition).x;
168 | 	
169 | 			float weight = read_imagef(columnFeedForwardWeightsPrev, (int4)(columnPosition.x, columnPosition.y, weightIndex, 0)).x;
170 | 				
171 | 			sum += weight * input;
172 | 		}
173 | 		
174 | 		weightIndex++;
175 | 	}
176 | 	
177 | 	// Bias
178 | 	float bias = read_imagef(columnFeedForwardWeightsPrev, (int4)(columnPosition.x, columnPosition.y, weightIndex, 0)).x;
179 | 	
180 | 	sum += bias;
181 | 	
182 | 	write_imagef(columnActivations, columnPosition, (float4)(sigmoid(sum), 0.0f, 0.0f, 0.0f));
183 | }
184 | 
185 | void kernel layerColumnInhibit(read_only image2d_t columnActivations, read_only image2d_t columnStatesPrev, read_only image3d_t columnFeedForwardWeightsPrev, write_only image2d_t columnStates,
186 | 	int2 layerSize, float2 layerSizeInv, int2 inhibitionRadii, int receptiveFieldSize)
187 | {
188 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
189 | 	
190 | 	float thisActivation = read_imagef(columnActivations, columnPosition).x;
191 | 	
192 | 	float numHigher = 0.0f;
193 | 	
194 | 	for (int dx = -inhibitionRadii.x; dx <= inhibitionRadii.x; dx++)
195 | 	for (int dy = -inhibitionRadii.y; dy <= inhibitionRadii.y; dy++) {
196 | 		int2 layerPosition = (int2)(columnPosition.x + dx, columnPosition.y + dy);
197 | 		
198 | 		if (layerPosition.x >= 0 && layerPosition.x < layerSize.x && layerPosition.y >= 0 && layerPosition.y < layerSize.y) {
199 | 			float activation = read_imagef(columnActivations, layerPosition).x;
200 | 	
201 | 			if (activation > thisActivation)
202 | 				numHigher++;
203 | 		}
204 | 	}
205 | 	
206 | 	float prevTrace = read_imagef(columnStatesPrev, columnPosition).y;
207 | 	
208 | 	float newState = numHigher < localActivity ? 1.0f : 0.0f;//exp(-numHigher * columnIntensity) * sigmoid(thisActivation); //&& thisActivation > 0.0f 
209 | 	
210 | 	float newTrace = (1.0f - columnTraceDecay) * prevTrace + columnTraceDecay * newState;
211 | 	
212 | 	write_imagef(columnStates, columnPosition, (float4)(newState, newTrace, 0.0f, 0.0f));
213 | }
214 | 
215 | void kernel layerColumnWeightUpdate(read_only image2d_t reconstruction, read_only image2d_t inputs, read_only image2d_t columnActivations, read_only image2d_t columnStates, read_only image2d_t columnPredictions, read_only image3d_t columnFeedForwardWeightsPrev, write_only image3d_t columnFeedForwardWeights,
216 | 	int2 layerSize, float2 layerSizeMinusOneInv, int2 inputReceptiveFieldRadius, int2 inhibitionRadii, int2 inputSize, int2 inputSizeMinusOne, int receptiveFieldSize, float alpha, float beta, float gamma, uint2 seed)
217 | {
218 | 	uint2 seedValue = seed + (uint2)(get_global_id(0), get_global_id(1)) * 130;
219 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
220 | 	
221 | 	float2 inputCenterPositionNormalized = (float2)(columnPosition.x * layerSizeMinusOneInv.x, columnPosition.y * layerSizeMinusOneInv.y);
222 | 	float2 inputCenterPosition = (float2)(inputCenterPositionNormalized.x * inputSizeMinusOne.x, inputCenterPositionNormalized.y * inputSizeMinusOne.y);
223 | 
224 | 	float2 thisState = read_imagef(columnStates, columnPosition).xy;
225 | 	float thisActivation = read_imagef(columnActivations, columnPosition).x;
226 | 	
227 | 	// Inhibition
228 | 	/*float averageState = 0.0f;
229 | 	
230 | 	int count = 0;
231 | 	
232 | 	for (int dx = -inhibitionRadii.x; dx <= inhibitionRadii.x; dx++)
233 | 	for (int dy = -inhibitionRadii.y; dy <= inhibitionRadii.y; dy++) {
234 | 		int2 layerPosition = (int2)(columnPosition.x + dx, columnPosition.y + dy);
235 | 		
236 | 		if (layerPosition.x >= 0 && layerPosition.x < layerSize.x && layerPosition.y >= 0 && layerPosition.y < layerSize.y) {
237 | 			float state = read_imagef(columnStates, layerPosition).x;
238 | 	
239 | 			averageState += state;
240 | 
241 | 			count++;
242 | 		}
243 | 	}
244 | 	
245 | 	averageState /= count;
246 | 	
247 | 	float sparsityPenalty = beta * (sparsity - averageState);*/
248 | 
249 | 	float sum = 0.0f;
250 | 	
251 | 	int weightIndex = 0;
252 | 
253 | 	for (int dx = -inputReceptiveFieldRadius.x; dx <= inputReceptiveFieldRadius.x; dx++)
254 | 	for (int dy = -inputReceptiveFieldRadius.y; dy <= inputReceptiveFieldRadius.y; dy++) {
255 | 		int2 inputPosition = (int2)(inputCenterPosition.x + dx, inputCenterPosition.y + dy);
256 | 		
257 | 		if (inputPosition.x >= 0 && inputPosition.x < inputSize.x && inputPosition.y >= 0 && inputPosition.y < inputSize.y) {
258 | 			float input = read_imagef(inputs, inputPosition).x;
259 | 	
260 | 			float recon = read_imagef(reconstruction, inputPosition).x;
261 | 				
262 | 			int4 weightPosition = (int4)(columnPosition.x, columnPosition.y, weightIndex, 0);
263 | 	
264 | 			float2 prevWeight = read_imagef(columnFeedForwardWeightsPrev, weightPosition).xy;
265 | 			
266 | 			sum += (input - recon) * prevWeight.x;
267 | 		}
268 | 		
269 | 		weightIndex++;
270 | 	}
271 | 	
272 | 	float hiddenError = sum / weightIndex * thisActivation * (1.0f - thisActivation);
273 | 	float sparsity = localActivity / weightIndex;
274 | 	
275 | 	weightIndex = 0;
276 | 
277 | 	for (int dx = -inputReceptiveFieldRadius.x; dx <= inputReceptiveFieldRadius.x; dx++)
278 | 	for (int dy = -inputReceptiveFieldRadius.y; dy <= inputReceptiveFieldRadius.y; dy++) {
279 | 		int2 inputPosition = (int2)(inputCenterPosition.x + dx, inputCenterPosition.y + dy);
280 | 		
281 | 		if (inputPosition.x >= 0 && inputPosition.x < inputSize.x && inputPosition.y >= 0 && inputPosition.y < inputSize.y) {
282 | 			float input = read_imagef(inputs, inputPosition).x;
283 | 	
284 | 			float recon = read_imagef(reconstruction, inputPosition).x;
285 | 				
286 | 			int4 weightPosition = (int4)(columnPosition.x, columnPosition.y, weightIndex, 0);
287 | 	
288 | 			float2 prevWeight = read_imagef(columnFeedForwardWeightsPrev, weightPosition).xy;
289 | 			
290 | 			float delta = prevWeight.y * columnMomentum + alpha * 0.5f * ((input - recon) * thisState.x + hiddenError * input);// + beta * (sparsity - thisState.y) * input;
291 | 			
292 | 			float newWeight = prevWeight.x + delta;
293 | 			
294 | 			write_imagef(columnFeedForwardWeights, weightPosition, (float4)(newWeight, delta, 0.0f, 0.0f));
295 | 		}
296 | 		
297 | 		weightIndex++;
298 | 	}
299 | 	
300 | 	// Bias
301 | 	int4 weightPosition = (int4)(columnPosition.x, columnPosition.y, weightIndex, 0);
302 | 
303 | 	float2 prevWeight = read_imagef(columnFeedForwardWeightsPrev, weightPosition).xy;
304 | 	
305 | 	float delta = prevWeight.y * columnMomentum + alpha * hiddenError;
306 | 	
307 | 	float newWeight = prevWeight.x + delta;// + beta * (sparsity - thisState.y);
308 | 	
309 | 	write_imagef(columnFeedForwardWeights, weightPosition, (float4)(newWeight, delta, 0.0f, 0.0f));
310 | }
311 | 
312 | void kernel layerCellActivate(read_only image2d_t columnStates, read_only image3d_t cellStatesPrev, read_only image3d_t cellPredictionsPrev, read_only image3d_t cellWeightsPrev, read_only image2d_t columnPredictionsPrev,
313 | 	write_only image3d_t cellStates, int cellsInColumn, int2 lateralConnectionsRadii, float cellTraceDecay, uint2 seed)
314 | {
315 | 	/*int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
316 | 	
317 | 	float columnState = read_imagef(columnStates, columnPosition).x;
318 | 	
319 | 	float minPredictionError = 1.0f;
320 | 	
321 | 	for (int ci = 0; ci < cellsInColumn; ci++) {
322 | 		float prediction = read_imagef(cellPredictionsPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x;
323 | 		
324 | 		float predictionError = fabs(columnState - prediction);
325 | 		
326 | 		minPredictionError = fmin(minPredictionError, predictionError);
327 | 	}
328 | 	
329 | 	for (int ci = 0; ci < cellsInColumn; ci++) {
330 | 		float prediction = read_imagef(cellPredictionsPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x;
331 | 		
332 | 		float predictionError = fabs(columnState - prediction);
333 | 		
334 | 		float newCellState = exp((minPredictionError - predictionError) * cellStateIntensity) * columnState;
335 | 		
336 | 		float prevTrace = read_imagef(cellStatesPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).y;
337 | 	
338 | 		float newTrace = fmax((1.0f - cellTraceDecay) * prevTrace, newCellState);
339 | 	
340 | 		write_imagef(cellStates, (int4)(columnPosition.x, columnPosition.y, ci, 0), (float4)(newCellState, newTrace, 0.0f, 0.0f));
341 | 	}*/
342 | 	
343 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
344 | 	
345 | 	float columnState = read_imagef(columnStates, columnPosition).x;
346 | 	
347 | 	float maxCellPrediction = 0.0f;
348 | 	int maxCellPredictionIndex = 0;
349 | 	
350 | 	for (int ci = 0; ci < cellsInColumn; ci++) {
351 | 		float prediction = read_imagef(cellPredictionsPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x;
352 | 		
353 | 		if (prediction > maxCellPrediction) {
354 | 			maxCellPredictionIndex = ci;
355 | 			
356 | 			maxCellPrediction = prediction;
357 | 		}
358 | 	}
359 | 	
360 | 	float allCellsIncrease = 1.0f - maxCellPrediction;
361 | 	
362 | 	for (int ci = 0; ci < cellsInColumn; ci++) {
363 | 		//float prediction = read_imagef(cellPredictionsPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x;
364 | 		
365 | 		float umodulatedCellState = (1.0f - allCellsIncrease) * (ci == maxCellPredictionIndex ? 1.0f : 0.0f) + allCellsIncrease;
366 | 	
367 | 		//float umodulatedCellState = (1.0f - maximum) * prediction + maximum;
368 | 		
369 | 		float newCellState = umodulatedCellState * columnState;
370 | 	
371 | 		float prevTrace = read_imagef(cellStatesPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).y;
372 | 	
373 | 		float newTrace = fmax((1.0f - cellTraceDecay) * prevTrace, newCellState);
374 | 	
375 | 		write_imagef(cellStates, (int4)(columnPosition.x, columnPosition.y, ci, 0), (float4)(newCellState, newTrace, 0.0f, 0.0f));
376 | 	}
377 | }
378 | 
379 | void kernel layerCellWeightUpdate(read_only image2d_t columnStates, read_only image2d_t columnPredictionsPrev, read_only image3d_t cellPredictionsPrev, read_only image3d_t cellStates, read_only image3d_t cellStatesPrev, read_only image2d_t nextLayerContextPrev, read_only image3d_t segmentStatesPrev, read_only image3d_t cellWeightsPrev,
380 | 	write_only image3d_t cellWeights, int cellsInColumn, int2 layerSize, int2 lateralConnectionsRadii, int numSegmentsPerCell, float2 layerSizeMinusOneInv, int2 nextLayerSize, int2 nextLayerSizeMinusOne, float tdError, float alpha, float beta, float gamma, float temperature, float eligibilityDecay)
381 | {
382 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
383 | 	
384 | 	float2 normalizedColumnCoords = (float2)(columnPosition.x * layerSizeMinusOneInv.x, columnPosition.y * layerSizeMinusOneInv.y);
385 | 	int2 connectionCoordsNextCenter = (int2)(normalizedColumnCoords.x * nextLayerSizeMinusOne.x, normalizedColumnCoords.y * nextLayerSizeMinusOne.y);
386 | 	
387 | 	float columnState = read_imagef(columnStates, columnPosition).x;
388 | 	float columnPredictionPrev = read_imagef(columnPredictionsPrev, columnPosition).x;
389 | 	
390 | 	//float tdError = read_imagef(columnTdErrors, columnPosition).x;
391 | 	
392 | 	//float learn = tdError > 0.0f ? 1.0f : 0.0f;
393 | 	
394 | 	//float predictionError = columnState - columnPredictionPrev;
395 | 	
396 | 	for (int ci = 0; ci < cellsInColumn; ci++) {
397 | 		int weightSecondCoordinate = ci + columnPosition.y * cellsInColumn;
398 | 		
399 | 		float cellState = read_imagef(cellStates, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x;
400 | 		float2 cellPredictionPrev = read_imagef(cellPredictionsPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).xy;
401 | 		
402 | 		float cellError = cellState - cellPredictionPrev.y;
403 | 		
404 | 		float errors[MAX_SEGMENTS_PER_CELL];
405 | 		
406 | 		int wi = 0;
407 | 		
408 | 		if (cellState > 0.5f) {
409 | 			for (int i = 0; i < numSegmentsPerCell; i++) {
410 | 				float value = read_imagef(segmentStatesPrev, (int4)(columnPosition.x, columnPosition.y, ci * numSegmentsPerCell + i, 0)).x;
411 | 		
412 | 				if (value == cellPredictionPrev.y)
413 | 					errors[i] = 1.0f - value;
414 | 				else
415 | 					errors[i] = 0.0f - value;
416 | 			}
417 | 		}
418 | 		else {
419 | 			for (int i = 0; i < numSegmentsPerCell; i++) {
420 | 				float value = read_imagef(segmentStatesPrev, (int4)(columnPosition.x, columnPosition.y, ci * numSegmentsPerCell + i, 0)).x;
421 | 		
422 | 				errors[i] = 0.0f - value;
423 | 			}
424 | 		}
425 | 		
426 | 		// Go through all connections and update them
427 | 		for (int dx = -lateralConnectionsRadii.x; dx <= lateralConnectionsRadii.x; dx++)
428 | 		for (int dy = -lateralConnectionsRadii.y; dy <= lateralConnectionsRadii.y; dy++) {
429 | 			int2 connectionCoords = (int2)(columnPosition.x + dx, columnPosition.y + dy);
430 | 				
431 | 			if (connectionCoords.x >= 0 && connectionCoords.x < layerSize.x && connectionCoords.y >= 0 && connectionCoords.y < layerSize.y) {	
432 | 				for (int cio = 0; cio < cellsInColumn; cio++) {
433 | 					float connection = read_imagef(cellStatesPrev, (int4)(connectionCoords.x, connectionCoords.y, cio, 0)).x;
434 | 	
435 | 					for (int i = 0; i < numSegmentsPerCell; i++) {
436 | 						int4 weightPosition = (int4)(columnPosition.x, weightSecondCoordinate, wi, 0);
437 | 					
438 | 						float2 cellWeightPrev = read_imagef(cellWeightsPrev, weightPosition).xy;
439 | 						
440 | 						float eligibility = errors[i] * connection;
441 | 						
442 | 						float newTrace = (1.0f - eligibilityDecay) * cellWeightPrev.y + beta * exp(-fabs(cellWeightPrev.y) * temperature) * eligibility;
443 | 						
444 | 						float2 newCellWeight = (float2)(cellWeightPrev.x + alpha * newTrace, newTrace);
445 | 						
446 | 						write_imagef(cellWeights, weightPosition, (float4)(newCellWeight.x, newCellWeight.y, 0.0f, 0.0f));
447 | 						
448 | 						wi++;
449 | 					}
450 | 				}
451 | 				
452 | 				// Additional context from next layer
453 | 				int2 connectionCoordsNext = (int2)(connectionCoordsNextCenter.x + dx, connectionCoordsNextCenter.y + dy);
454 | 			
455 | 				if (connectionCoordsNext.x >= 0 && connectionCoordsNext.x < nextLayerSize.x && connectionCoordsNext.y >= 0 && connectionCoordsNext.y < nextLayerSize.y) {
456 | 					float nextContextPrev = read_imagef(nextLayerContextPrev, connectionCoordsNext).x;
457 | 	
458 | 					for (int i = 0; i < numSegmentsPerCell; i++) {
459 | 						int4 weightPosition = (int4)(columnPosition.x, weightSecondCoordinate, wi, 0);
460 | 					
461 | 						float2 cellWeightPrev = read_imagef(cellWeightsPrev, weightPosition).xy;
462 | 				
463 | 						float eligibility = errors[i] * nextContextPrev;
464 | 						
465 | 						float newTrace = (1.0f - eligibilityDecay) * cellWeightPrev.y + beta * exp(-fabs(cellWeightPrev.y) * temperature) * eligibility;
466 | 						
467 | 						float2 newCellWeight = (float2)(cellWeightPrev.x + alpha * newTrace, newTrace);
468 | 						
469 | 						write_imagef(cellWeights, weightPosition, (float4)(newCellWeight.x, newCellWeight.y, 0.0f, 0.0f));
470 | 						
471 | 						wi++;
472 | 					}
473 | 				}
474 | 				else
475 | 					wi += numSegmentsPerCell;
476 | 			}
477 | 			else
478 | 				wi += numSegmentsPerCell * (cellsInColumn + 1);
479 | 		}
480 | 	}
481 | }
482 | 
483 | void kernel layerCellWeightUpdateLast(read_only image2d_t columnStates, read_only image2d_t columnPredictionsPrev, read_only image3d_t cellPredictionsPrev, read_only image3d_t cellStates, read_only image3d_t cellStatesPrev, read_only image3d_t segmentStatesPrev, read_only image3d_t cellWeightsPrev,
484 | 	write_only image3d_t cellWeights, int cellsInColumn, int2 layerSize, int2 lateralConnectionsRadii, int numSegmentsPerCell, float tdError, float alpha, float beta, float gamma, float temperature, float eligibilityDecay)
485 | {
486 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
487 | 	
488 | 	float columnState = read_imagef(columnStates, columnPosition).x;
489 | 	float columnPredictionPrev = read_imagef(columnPredictionsPrev, columnPosition).x;
490 | 	
491 | 	//float tdError = read_imagef(columnTdErrors, columnPosition).x;
492 | 	
493 | 	//float predictionError = columnState - columnPredictionPrev;
494 | 	
495 | 	for (int ci = 0; ci < cellsInColumn; ci++) {
496 | 		int weightSecondCoordinate = ci + columnPosition.y * cellsInColumn;
497 | 		
498 | 		float cellState = read_imagef(cellStates, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x;
499 | 		float2 cellPredictionPrev = read_imagef(cellPredictionsPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).xy;
500 | 		
501 | 		float cellError = cellState - cellPredictionPrev.y;//(cellState - cellPredictionPrev);//((1.0f - columnState) * columnPredictionPrev + columnState) * 
502 | 		
503 | 		float errors[MAX_SEGMENTS_PER_CELL];
504 | 		
505 | 		int wi = 0;
506 | 		
507 | 		if (cellState > 0.5f) {
508 | 			for (int i = 0; i < numSegmentsPerCell; i++) {
509 | 				float value = read_imagef(segmentStatesPrev, (int4)(columnPosition.x, columnPosition.y, ci * numSegmentsPerCell + i, 0)).x;
510 | 		
511 | 				if (value == cellPredictionPrev.y)
512 | 					errors[i] = 1.0f - value;
513 | 				else
514 | 					errors[i] = 0.0f - value;
515 | 			}
516 | 		}
517 | 		else {
518 | 			for (int i = 0; i < numSegmentsPerCell; i++) {
519 | 				float value = read_imagef(segmentStatesPrev, (int4)(columnPosition.x, columnPosition.y, ci * numSegmentsPerCell + i, 0)).x;
520 | 		
521 | 				errors[i] = 0.0f - value;
522 | 			}
523 | 		}
524 | 		
525 | 		// Go through all connections and update them
526 | 		for (int dx = -lateralConnectionsRadii.x; dx <= lateralConnectionsRadii.x; dx++)
527 | 		for (int dy = -lateralConnectionsRadii.y; dy <= lateralConnectionsRadii.y; dy++) {
528 | 			int2 connectionCoords = (int2)(columnPosition.x + dx, columnPosition.y + dy);
529 | 			
530 | 			if (connectionCoords.x >= 0 && connectionCoords.x < layerSize.x && connectionCoords.y >= 0 && connectionCoords.y < layerSize.y) {	
531 | 				for (int cio = 0; cio < cellsInColumn; cio++) {
532 | 					float connection = read_imagef(cellStatesPrev, (int4)(connectionCoords.x, connectionCoords.y, cio, 0)).x;
533 | 	
534 | 					for (int i = 0; i < numSegmentsPerCell; i++) {
535 | 						int4 weightPosition = (int4)(columnPosition.x, weightSecondCoordinate, wi, 0);
536 | 					
537 | 						float2 cellWeightPrev = read_imagef(cellWeightsPrev, weightPosition).xy;
538 | 						
539 | 						float eligibility = errors[i] * connection;
540 | 						
541 | 						float newTrace = (1.0f - eligibilityDecay) * cellWeightPrev.y + beta * exp(-fabs(cellWeightPrev.y) * temperature) * eligibility;
542 | 						
543 | 						float2 newCellWeight = (float2)(cellWeightPrev.x + alpha * newTrace, newTrace);
544 | 						
545 | 						write_imagef(cellWeights, weightPosition, (float4)(newCellWeight.x, newCellWeight.y, 0.0f, 0.0f));
546 | 						
547 | 						wi++;
548 | 					}
549 | 				}
550 | 			}
551 | 			else
552 | 				wi += numSegmentsPerCell * cellsInColumn;
553 | 		}
554 | 	}
555 | }
556 | 
557 | void kernel layerCellPredict(read_only image3d_t cellStates, read_only image3d_t cellStatesPrev, read_only image3d_t cellWeights, read_only image2d_t nextLayerContext, read_only image2d_t nextLayerContextPrev,
558 | 	write_only image3d_t cellPredictions, write_only image3d_t segmentStates, int cellsInColumn, int2 layerSize, int2 lateralConnectionsRadii, int numSegmentsPerCell, float2 layerSizeMinusOneInv, int2 nextLayerSize, int2 nextLayerSizeMinusOne)
559 | {
560 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
561 | 	
562 | 	float2 normalizedColumnCoords = (float2)(columnPosition.x * layerSizeMinusOneInv.x, columnPosition.y * layerSizeMinusOneInv.y);
563 | 	int2 connectionCoordsNextCenter = (int2)(normalizedColumnCoords.x * nextLayerSizeMinusOne.x, normalizedColumnCoords.y * nextLayerSizeMinusOne.y);
564 | 		
565 | 	for (int ci = 0; ci < cellsInColumn; ci++) {
566 | 		float sums[MAX_SEGMENTS_PER_CELL];
567 | 		
568 | 		for (int i = 0; i < numSegmentsPerCell; i++)
569 | 			sums[i] = 0.0f;
570 | 		
571 | 		int weightSecondCoordinate = ci + columnPosition.y * cellsInColumn;
572 | 		
573 | 		int wi = 0;
574 | 		
575 | 		// Go through all connections 
576 | 		for (int dx = -lateralConnectionsRadii.x; dx <= lateralConnectionsRadii.x; dx++)
577 | 		for (int dy = -lateralConnectionsRadii.y; dy <= lateralConnectionsRadii.y; dy++) {
578 | 			int2 connectionCoords = (int2)(columnPosition.x + dx, columnPosition.y + dy);
579 | 			
580 | 			if (connectionCoords.x >= 0 && connectionCoords.x < layerSize.x && connectionCoords.y >= 0 && connectionCoords.y < layerSize.y) {	
581 | 				for (int cio = 0; cio < cellsInColumn; cio++) {
582 | 					float connectionState = read_imagef(cellStates, (int4)(connectionCoords.x, connectionCoords.y, cio, 0)).x;
583 | 					//float connectionStatePrev = read_imagef(cellStatesPrev, (int4)(connectionCoords.x, connectionCoords.y, cio, 0)).x;
584 | 					
585 | 					for (int i = 0; i < numSegmentsPerCell; i++) {
586 | 						int4 weightPosition = (int4)(columnPosition.x, weightSecondCoordinate, wi, 0);
587 | 					
588 | 						float cellWeight = read_imagef(cellWeights, weightPosition).x;
589 | 			
590 | 						sums[i] += cellWeight * connectionState;
591 | 						
592 | 						wi++;
593 | 					}
594 | 				}
595 | 				
596 | 				int2 connectionCoordsNext = (int2)(connectionCoordsNextCenter.x + dx, connectionCoordsNextCenter.y + dy);
597 | 				
598 | 				if (connectionCoordsNext.x >= 0 && connectionCoordsNext.x < nextLayerSize.x && connectionCoordsNext.y >= 0 && connectionCoordsNext.y < nextLayerSize.y) {
599 | 					float nextContext = read_imagef(nextLayerContext, connectionCoordsNext).x;
600 | 					//float nextContextPrev = read_imagef(nextLayerContextPrev, connectionCoordsNext).x;
601 | 					
602 | 					for (int i = 0; i < numSegmentsPerCell; i++) {
603 | 						int4 weightPosition = (int4)(columnPosition.x, weightSecondCoordinate, wi, 0);
604 | 				
605 | 						float cellWeight = read_imagef(cellWeights, weightPosition).x;
606 | 					
607 | 						sums[i] += cellWeight * nextContext;
608 | 						
609 | 						wi++;
610 | 					}
611 | 				}
612 | 				else
613 | 					wi += numSegmentsPerCell;
614 | 			}
615 | 			else
616 | 				wi += numSegmentsPerCell * (cellsInColumn + 1); // + 1 for context from higher layer
617 | 		}
618 | 		
619 | 		float maximum = 0.0f;
620 | 		
621 | 		for (int i = 0; i < numSegmentsPerCell; i++) {
622 | 			float s = sigmoid(sums[i]);
623 | 			
624 | 			maximum = fmax(maximum, s);
625 | 			
626 | 			write_imagef(segmentStates, (int4)(columnPosition.x, columnPosition.y, ci * numSegmentsPerCell + i, 0), (float4)(s, 0.0f, 0.0f, 0.0f));
627 | 		}
628 | 		
629 | 		write_imagef(cellPredictions, (int4)(columnPosition.x, columnPosition.y, ci, 0), (float4)(maximum > 0.5f ? 1.0f : 0.0f, maximum, 0.0f, 0.0f));
630 | 	}
631 | }
632 | 
633 | void kernel layerCellPredictLast(read_only image3d_t cellStates, read_only image3d_t cellStatesPrev, read_only image3d_t cellWeights,
634 | 	write_only image3d_t cellPredictions, write_only image3d_t segmentStates, int cellsInColumn, int2 layerSize, int2 lateralConnectionsRadii, int numSegmentsPerCell)
635 | {
636 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
637 | 	
638 | 	for (int ci = 0; ci < cellsInColumn; ci++) {
639 | 		float sums[MAX_SEGMENTS_PER_CELL];
640 | 		
641 | 		for (int i = 0; i < numSegmentsPerCell; i++)
642 | 			sums[i] = 0.0f;
643 | 			
644 | 		int weightSecondCoordinate = ci + columnPosition.y * cellsInColumn;
645 | 		
646 | 		int wi = 0;
647 | 		
648 | 		// Go through all connections 
649 | 		for (int dx = -lateralConnectionsRadii.x; dx <= lateralConnectionsRadii.x; dx++)
650 | 		for (int dy = -lateralConnectionsRadii.y; dy <= lateralConnectionsRadii.y; dy++) {
651 | 			int2 connectionCoords = (int2)(columnPosition.x + dx, columnPosition.y + dy);
652 | 
653 | 			if (connectionCoords.x >= 0 && connectionCoords.x < layerSize.x && connectionCoords.y >= 0 && connectionCoords.y < layerSize.y) {	
654 | 				for (int cio = 0; cio < cellsInColumn; cio++) {
655 | 					float connectionState = read_imagef(cellStates, (int4)(connectionCoords.x, connectionCoords.y, cio, 0)).x;
656 | 					//float connectionStatePrev = read_imagef(cellStatesPrev, (int4)(connectionCoords.x, connectionCoords.y, cio, 0)).x;
657 | 					
658 | 					for (int i = 0; i < numSegmentsPerCell; i++) {
659 | 						int4 weightPosition = (int4)(columnPosition.x, weightSecondCoordinate, wi, 0);
660 | 					
661 | 						float cellWeight = read_imagef(cellWeights, weightPosition).x;
662 | 			
663 | 						sums[i] += cellWeight * connectionState;
664 | 						
665 | 						wi++;
666 | 					}
667 | 				}
668 | 			}
669 | 			else
670 | 				wi += cellsInColumn * numSegmentsPerCell;
671 | 		}
672 | 		
673 | 		float maximum = 0.0f;
674 | 		
675 | 		for (int i = 0; i < numSegmentsPerCell; i++) {
676 | 			float s = sigmoid(sums[i]);
677 | 			
678 | 			maximum = fmax(maximum, s);
679 | 			
680 | 			write_imagef(segmentStates, (int4)(columnPosition.x, columnPosition.y, ci * numSegmentsPerCell + i, 0), (float4)(s, 0.0f, 0.0f, 0.0f));
681 | 		}
682 | 		
683 | 		write_imagef(cellPredictions, (int4)(columnPosition.x, columnPosition.y, ci, 0), (float4)(maximum > 0.5f ? 1.0f : 0.0f, maximum, 0.0f, 0.0f));
684 | 	}
685 | }
686 | 
687 | void kernel layerColumnPrediction(read_only image3d_t cellPredictions, read_only image3d_t cellStates, write_only image2d_t columnPredictions, int cellsInColumn) {
688 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
689 | 	
690 | 	float maxPrediction = 0.0f;
691 | 	
692 | 	for (int ci = 0; ci < cellsInColumn; ci++) {
693 | 		float prediction = read_imagef(cellPredictions, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x;
694 | 	
695 | 		maxPrediction = fmax(maxPrediction, prediction);
696 | 	}
697 | 	
698 | 	float output = maxPrediction;
699 | 	
700 | 	write_imagef(columnPredictions, columnPosition, (float4)(output, 0.0f, 0.0f, 0.0f));
701 | }
702 | 
703 | void kernel layerAssignQ(read_only image3d_t cellQValuesPrev, read_only image3d_t cellStatesPrev, write_only image3d_t cellQValues,
704 | 	int cellsInColumn, float alpha)
705 | {
706 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
707 | 	
708 | 	for (int ci = 0; ci < cellsInColumn; ci++) {
709 | 		float qPrev = read_imagef(cellQValuesPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x;
710 | 		
711 | 		float cellEligibility = read_imagef(cellStatesPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).y;
712 | 		
713 | 		float storeQ = qPrev + cellEligibility * alpha;
714 | 		
715 | 		write_imagef(cellQValues, (int4)(columnPosition.x, columnPosition.y, ci, 0), (float4)(storeQ, 0.0f, 0.0f, 0.0f));
716 | 	}
717 | }
718 | 
719 | void kernel layerColumnQ(read_only image3d_t cellQValuesPrev, read_only image3d_t cellStatesPrev, read_only image3d_t cellStates, read_only image2d_t columnStates, read_only image2d_t columnStatesNext, read_only image2d_t columnQValuesNext, write_only image2d_t columnQValues,
720 | 	int cellsInColumn, float2 layerSizeMinusOneInv, int2 nextLayerSize, int2 nextLayerSizeMinusOne)
721 | {
722 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
723 | 	
724 | 	float2 columnPositionNormalized = (float2)(columnPosition.x * layerSizeMinusOneInv.x, columnPosition.y * layerSizeMinusOneInv.y);
725 | 	int2 nextLayerPositionCenter = (int2)(columnPositionNormalized.x * nextLayerSizeMinusOne.x, columnPositionNormalized.y * nextLayerSizeMinusOne.y);
726 | 	
727 | 	float sum = 0.0f;
728 | 	float divisor = 0.0f;
729 | 
730 | 	for (int ci = 0; ci < cellsInColumn; ci++) {
731 | 		float state = read_imagef(cellStates, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x;
732 | 		float cellQ = read_imagef(cellQValuesPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x;
733 | 		
734 | 		sum += state * cellQ;
735 | 		divisor += state;
736 | 	}
737 | 	
738 | 	float thisQ = sum / fmax(minDivisor, divisor);
739 | 	
740 | 	float output = thisQ;
741 | 		
742 | 	write_imagef(columnQValues, columnPosition, (float4)(output, 0.0f, 0.0f, 0.0f));
743 | }
744 | 
745 | void kernel layerColumnQLast(read_only image3d_t cellQValuesPrev, read_only image3d_t cellStatesPrev, read_only image3d_t cellStates, write_only image2d_t columnQValues,
746 | 	int cellsInColumn)
747 | {
748 | 	int2 columnPosition = (int2)(get_global_id(0), get_global_id(1));
749 | 	
750 | 	float sum = 0.0f;
751 | 	float divisor = 0.0f;
752 | 	
753 | 	for (int ci = 0; ci < cellsInColumn; ci++) {
754 | 		float state = read_imagef(cellStates, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x;
755 | 		float cellQ = read_imagef(cellQValuesPrev, (int4)(columnPosition.x, columnPosition.y, ci, 0)).x;
756 | 		
757 | 		sum += state * cellQ;
758 | 		divisor += state;
759 | 	}
760 | 	
761 | 	float thisQ = sum / fmax(minDivisor, divisor);
762 | 	
763 | 	write_imagef(columnQValues, columnPosition, (float4)(thisQ, 0.0f, 0.0f, 0.0f));
764 | }
765 | 
766 | void kernel initializePartThree(write_only image2d_t inputBiases, uint2 seed, float minBias, float maxBias) {
767 | 	uint2 seedValue = seed + (uint2)(get_global_id(0), get_global_id(1)) * 130;
768 | 	int2 inputPosition = (int2)(get_global_id(0), get_global_id(1));
769 | 	
770 | 	float bias = randFloat(&seedValue) * (maxBias - minBias) + minBias;
771 | 	
772 | 	write_imagef(inputBiases, inputPosition, (float4)(bias, 0.0f, 0.0f, 0.0f));
773 | }
774 | 
775 | void kernel reconstructInput(read_only image3d_t columnFeedForwardWeights, read_only image2d_t inputBiases, read_only image2d_t columnStates, write_only image2d_t reconstruction,
776 | 	int2 reverseReceptiveFieldRadius, int2 sdrReceptiveFieldRadius, int2 inputSizeMinusOne, float2 inputSizeMinusOneInv, int2 sdrSize, int2 sdrSizeMinusOne, float2 sdrSizeMinusOneInv)
777 | {
778 | 	int2 inputPosition = (int2)(get_global_id(0), get_global_id(1));
779 | 	float2 inputPositionNormalized = (float2)(inputPosition.x * inputSizeMinusOneInv.x, inputPosition.y * inputSizeMinusOneInv.y);
780 | 	float2 sdrPositionCenter = (float2)(inputPositionNormalized.x * sdrSizeMinusOne.x, inputPositionNormalized.y * sdrSizeMinusOne.y);
781 | 	
782 | 	float sum = 0.0f;
783 | 
784 | 	for (int dx = -reverseReceptiveFieldRadius.x; dx <= reverseReceptiveFieldRadius.x; dx++)
785 | 	for (int dy = -reverseReceptiveFieldRadius.y; dy <= reverseReceptiveFieldRadius.y; dy++) {
786 | 		int2 sdrPosition = (int2)(sdrPositionCenter.x + dx, sdrPositionCenter.y + dy);
787 | 		
788 | 		if (sdrPosition.x >= 0 && sdrPosition.x < sdrSize.x && sdrPosition.y >= 0 && sdrPosition.y < sdrSize.y) {
789 | 			// Next layer node's receptive field
790 | 			int2 fieldCenter = (int2)(sdrPosition.x * sdrSizeMinusOneInv.x * inputSizeMinusOne.x, sdrPosition.y * sdrSizeMinusOneInv.y * inputSizeMinusOne.y);
791 | 
792 | 			int2 fieldLowerBounds = fieldCenter - sdrReceptiveFieldRadius;
793 | 			int2 fieldUpperBounds = fieldCenter + sdrReceptiveFieldRadius;
794 | 		
795 | 			// Check for containment
796 | 			if (inputPosition.x >= fieldLowerBounds.x && inputPosition.x <= fieldUpperBounds.x && inputPosition.y >= fieldLowerBounds.y && inputPosition.y <= fieldUpperBounds.y) {	
797 | 				int rdx = inputPosition.x - fieldCenter.x;
798 | 				int rdy = inputPosition.y - fieldCenter.y;
799 | 				
800 | 				float source = read_imagef(columnStates, sdrPosition).x;
801 | 
802 | 				int weightIndex = (sdrReceptiveFieldRadius.y + rdy) + (sdrReceptiveFieldRadius.x + rdx) * (sdrReceptiveFieldRadius.y * 2 + 1);
803 | 
804 | 				float weight = read_imagef(columnFeedForwardWeights, (int4)(sdrPosition.x, sdrPosition.y, weightIndex, 0)).x;
805 | 				
806 | 				sum += source * weight;
807 | 			}
808 | 		}
809 | 	}
810 | 
811 | 	float bias = read_imagef(inputBiases, inputPosition).x;
812 | 				
813 | 	sum += bias;
814 | 	
815 | 	write_imagef(reconstruction, inputPosition, (float4)(sum, 0.0f, 0.0f, 0.0f));
816 | }
817 | 
818 | void kernel inputBiasUpdate(read_only image2d_t inputs, read_only image2d_t reconstruction, read_only image2d_t inputBiasesPrev, write_only image2d_t inputBiases, 
819 | 	float gamma)
820 | {
821 | 	int2 inputPosition = (int2)(get_global_id(0), get_global_id(1));
822 | 	
823 | 	float2 prevBias = read_imagef(inputBiasesPrev, inputPosition).xy;
824 | 	
825 | 	float recon = read_imagef(reconstruction, inputPosition).x;
826 | 	float input = read_imagef(inputs, inputPosition).x;
827 | 	
828 | 	float delta = prevBias.y * columnMomentum + gamma * (input - recon);
829 | 	
830 | 	float newBias = prevBias.x + delta;
831 | 	
832 | 	write_imagef(inputBiases, inputPosition, (float4)(newBias, delta, 0.0f, 0.0f));
833 | }
834 | 
835 | void kernel gaussianBlurX(read_only image2d_t source, write_only image2d_t destination, float2 sizeInv, float kernelWidth) {
836 | 	int2 destinationPosition = (int2)(get_global_id(0), get_global_id(1));
837 | 	float2 destinationPositionNormalized = (float2)(destinationPosition.x * sizeInv.x, destinationPosition.y * sizeInv.y);
838 | 	
839 | 	float4 sum = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
840 | 	
841 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x - 4.0f * kernelWidth, destinationPositionNormalized.y)) * 0.05f;
842 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x - 3.0f * kernelWidth, destinationPositionNormalized.y)) * 0.09f;
843 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x - 2.0f * kernelWidth, destinationPositionNormalized.y)) * 0.12f;
844 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x - kernelWidth, destinationPositionNormalized.y)) * 0.15f;
845 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y)) * 0.16f;
846 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x + kernelWidth, destinationPositionNormalized.y)) * 0.15f;
847 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x + 2.0f * kernelWidth, destinationPositionNormalized.y)) * 0.12f;
848 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x + 3.0f * kernelWidth, destinationPositionNormalized.y)) * 0.09f;
849 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x + 4.0f * kernelWidth, destinationPositionNormalized.y)) * 0.05f;
850 |  
851 | 	write_imagef(destination, destinationPosition, sum);
852 | }
853 | 
854 | void kernel gaussianBlurY(read_only image2d_t source, write_only image2d_t destination, float2 sizeInv, float kernelWidth) {
855 | 	int2 destinationPosition = (int2)(get_global_id(0), get_global_id(1));
856 | 	float2 destinationPositionNormalized = (float2)(destinationPosition.x * sizeInv.x, destinationPosition.y * sizeInv.y);
857 | 	
858 | 	float4 sum = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
859 | 	
860 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y - 4.0f * kernelWidth)) * 0.05f;
861 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y - 3.0f * kernelWidth)) * 0.09f;
862 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y - 2.0f * kernelWidth)) * 0.12f;
863 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y - kernelWidth)) * 0.15f;
864 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y)) * 0.16f;
865 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y + kernelWidth)) * 0.15f;
866 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y + 2.0f * kernelWidth)) * 0.12f;
867 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y + 3.0f * kernelWidth)) * 0.09f;
868 | 	sum += read_imagef(source, defaultNormalizedSampler, (float2)(destinationPositionNormalized.x, destinationPositionNormalized.y + 4.0f * kernelWidth)) * 0.05f;
869 |  
870 | 	write_imagef(destination, destinationPosition, sum);
871 | }


--------------------------------------------------------------------------------
/ContinuousHTMGPU/source/htm/HTMRL.cpp:
--------------------------------------------------------------------------------
   1 | #include "HTMRL.h"
   2 | 
   3 | #include <iostream>
   4 | 
   5 | using namespace htm;
   6 | 
   7 | void HTMRL::createRandom(sys::ComputeSystem &cs, sys::ComputeProgram &program, int inputWidth, int inputHeight, int reconstructionReceptiveRadius, const std::vector<LayerDesc> &layerDescs, const std::vector<InputType> &inputTypes, float minInitWeight, float maxInitWeight, float minInitCenter, float maxInitCenter, std::mt19937 &generator) {
   8 | 	struct Uint2 {
   9 | 		unsigned int _x, _y;
  10 | 	};
  11 | 
  12 | 	_addReplaySampleStepCounter = 0;
  13 | 	
  14 | 	_inputWidth = inputWidth;
  15 | 	_inputHeight = inputHeight;
  16 | 
  17 | 	_layerDescs = layerDescs;
  18 | 	
  19 | 	_layers.resize(_layerDescs.size());
  20 | 
  21 | 	_inputTypes = inputTypes;
  22 | 
  23 | 	std::uniform_real_distribution<float> weightDist(minInitWeight, maxInitWeight);
  24 | 	std::uniform_real_distribution<float> actionDist(0.0f, 1.0f);
  25 | 
  26 | 	_prevMaxQ = 0.0f;
  27 | 	_prevValue = 0.0f;
  28 | 	_prevPrevValue = 0.0f;
  29 | 	_prevQ = 0.0f;
  30 | 	_prevTDError = 0.0f;
  31 | 
  32 | 	cl::Kernel initPartOneKernel = cl::Kernel(program.getProgram(), "initializePartOne");
  33 | 	cl::Kernel initPartTwoKernel = cl::Kernel(program.getProgram(), "initializePartTwo");
  34 | 	cl::Kernel initPartThreeKernel = cl::Kernel(program.getProgram(), "initializePartThree");
  35 | 
  36 | 	_input.clear();
  37 | 	_input.resize(_inputWidth * _inputHeight);
  38 | 
  39 | 	_output.clear();
  40 | 	_output.assign(_inputWidth * _inputHeight, 0.0f);
  41 | 
  42 | 	_prediction.clear();
  43 | 	_prediction.assign(_inputWidth * _inputHeight, 0.0f);
  44 | 
  45 | 	_exploratoryOutput.clear();
  46 | 	_exploratoryOutput.assign(_inputWidth * _inputHeight, 0.0f);
  47 | 
  48 | 	_prevOutput.clear();
  49 | 	_prevOutput.assign(_inputWidth * _inputHeight, 0.0f);
  50 | 
  51 | 	_prevOutputExploratory.clear();
  52 | 	_prevOutputExploratory.assign(_inputWidth * _inputHeight, 0.0f);
  53 | 
  54 | 	_prevInput.clear();
  55 | 	_prevInput.assign(_inputWidth * _inputHeight, 0.0f);
  56 | 
  57 | 	// Initialize action portions randomly
  58 | 	for (int i = 0; i < _input.size(); i++)
  59 | 	if (_inputTypes[i] == _action) {
  60 | 		float value = actionDist(generator);
  61 | 
  62 | 		_input[i] = value;
  63 | 
  64 | 		_exploratoryOutput[i] = value;
  65 | 
  66 | 		_prevOutput[i] = value;
  67 | 
  68 | 		_prevOutputExploratory[i] = value;
  69 | 
  70 | 		_prevInput[i] = value;
  71 | 	}
  72 | 
  73 | 	_inputImage = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _inputWidth, _inputHeight);
  74 | 
  75 | 	_reconstructedPrediction = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _inputWidth, _inputHeight);
  76 | 
  77 | 	int prevWidth = _inputWidth;
  78 | 	int prevHeight = _inputHeight;
  79 | 	int prevCellsPerColumn = 1;
  80 | 
  81 | 	for (int l = 0; l < _layers.size(); l++) {
  82 | 		initLayer(cs, initPartOneKernel, initPartTwoKernel, initPartThreeKernel, prevWidth, prevHeight, prevCellsPerColumn, _layers[l], _layerDescs[l], l == _layers.size() - 1, minInitWeight, maxInitWeight, minInitCenter, maxInitCenter, minInitWeight, maxInitWeight, generator);
  83 | 
  84 | 		prevWidth = _layerDescs[l]._width;
  85 | 		prevHeight = _layerDescs[l]._height;
  86 | 		prevCellsPerColumn = _layerDescs[l]._cellsInColumn;
  87 | 	}
  88 | 
  89 | 	_layerColumnActivateKernel = cl::Kernel(program.getProgram(), "layerColumnActivate");
  90 | 	_layerColumnInhibitKernel = cl::Kernel(program.getProgram(), "layerColumnInhibit");
  91 | 	_layerCellActivateKernel = cl::Kernel(program.getProgram(), "layerCellActivate");
  92 | 	_layerCellWeightUpdateKernel = cl::Kernel(program.getProgram(), "layerCellWeightUpdate");
  93 | 	_layerCellWeightUpdateLastKernel = cl::Kernel(program.getProgram(), "layerCellWeightUpdateLast");
  94 | 	_layerCellPredictKernel = cl::Kernel(program.getProgram(), "layerCellPredict");
  95 | 	_layerCellPredictLastKernel = cl::Kernel(program.getProgram(), "layerCellPredictLast");
  96 | 	_layerColumnWeightUpdateKernel = cl::Kernel(program.getProgram(), "layerColumnWeightUpdate");
  97 | 	_layerColumnPredictionKernel = cl::Kernel(program.getProgram(), "layerColumnPrediction");
  98 | 	_layerColumnQKernel = cl::Kernel(program.getProgram(), "layerColumnQ");
  99 | 	_layerColumnQLastKernel = cl::Kernel(program.getProgram(), "layerColumnQLast");
 100 | 	_layerAssignQKernel = cl::Kernel(program.getProgram(), "layerAssignQ");
 101 | 
 102 | 	_gaussianBlurXKernel = cl::Kernel(program.getProgram(), "gaussianBlurX");
 103 | 	_gaussianBlurYKernel = cl::Kernel(program.getProgram(), "gaussianBlurY");
 104 | 
 105 | 	_reconstructInputKernel = cl::Kernel(program.getProgram(), "reconstructInput");
 106 | 	_inputBiasUpdateKernel = cl::Kernel(program.getProgram(), "inputBiasUpdate");
 107 | }
 108 | 
 109 | void HTMRL::initLayer(sys::ComputeSystem &cs, cl::Kernel &initPartOneKernel, cl::Kernel &initPartTwoKernel, cl::Kernel &initPartThreeKernel, int inputWidth, int inputHeight, int inputCellsPerColumn, Layer &layer, const LayerDesc &layerDesc, bool isTopmost, float minInitWeight, float maxInitWeight, float minInitCenter, float maxInitCenter, float minInitWidth, float maxInitWidth, std::mt19937 &generator) {
 110 | 	struct Uint2 {
 111 | 		unsigned int _x, _y;
 112 | 	};
 113 | 
 114 | 	struct Float2 {
 115 | 		float _x, _y;
 116 | 	};
 117 | 	
 118 | 	std::uniform_int_distribution<int> uniformDist(0, 10000);
 119 | 
 120 | 	int receptiveFieldSize = std::pow(layerDesc._receptiveFieldRadius * 2 + 1, 2) + 1; // + 1 for bias
 121 | 	int lateralConnectionsSize;
 122 | 
 123 | 	// If not the last layer, add weights for additional context from next layer
 124 | 	if (isTopmost)
 125 | 		lateralConnectionsSize = layerDesc._numSegmentsPerCell * (std::pow(layerDesc._lateralConnectionRadius * 2 + 1, 2) * (layerDesc._cellsInColumn) + 1); // + 1 for bias
 126 | 	else
 127 | 		lateralConnectionsSize = layerDesc._numSegmentsPerCell * (std::pow(layerDesc._lateralConnectionRadius * 2 + 1, 2) * (layerDesc._cellsInColumn + 1) + 1); // + 1 for bias
 128 | 
 129 | 	layer._columnActivations = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height);
 130 | 	
 131 | 	layer._columnStates = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height);
 132 | 	layer._columnStatesPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height);
 133 | 	
 134 | 	layer._columnFeedForwardWeights = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height, receptiveFieldSize);
 135 | 	layer._columnFeedForwardWeightsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height, receptiveFieldSize);
 136 | 
 137 | 	layer._cellStates = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn);
 138 | 	layer._cellStatesPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn);
 139 | 
 140 | 	layer._segmentStatesPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn * layerDesc._numSegmentsPerCell);
 141 | 	layer._segmentStates = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn * layerDesc._numSegmentsPerCell);
 142 | 
 143 | 	//layer._segmentWeightsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn * layerDesc._numSegmentsPerCell);
 144 | 	//layer._segmentWeights = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn * layerDesc._numSegmentsPerCell);
 145 | 
 146 | 	layer._cellQValues = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn);
 147 | 	layer._cellQValuesPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn);
 148 | 
 149 | 	layer._columnQValues = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height);
 150 | 	
 151 | 	layer._columnPrevValues = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height);
 152 | 	layer._columnPrevValuesPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height);
 153 | 
 154 | 	layer._columnTdErrors = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height);
 155 | 
 156 | 	layer._cellPredictions = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn);
 157 | 	layer._cellPredictionsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height, layerDesc._cellsInColumn);
 158 | 
 159 | 	layer._cellWeights = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height * layerDesc._cellsInColumn, lateralConnectionsSize);
 160 | 	layer._cellWeightsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), layerDesc._width, layerDesc._height * layerDesc._cellsInColumn, lateralConnectionsSize);
 161 | 
 162 | 	layer._columnPredictions = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height);
 163 | 	layer._columnPredictionsPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height);
 164 | 
 165 | 	//layer._blurPing = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height);
 166 | 	//layer._blurPong = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), layerDesc._width, layerDesc._height);
 167 | 
 168 | 	layer._reconstruction = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), inputWidth, inputHeight);
 169 | 	
 170 | 	layer._inputBiases = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), inputWidth, inputHeight);
 171 | 	layer._inputBiasesPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), inputWidth, inputHeight);
 172 | 
 173 | 	{
 174 | 		cl::size_t<3> origin;
 175 | 		cl::size_t<3> region;
 176 | 
 177 | 		origin[0] = 0;
 178 | 		origin[1] = 0;
 179 | 		origin[2] = 0;
 180 | 
 181 | 		region[0] = inputWidth;
 182 | 		region[1] = inputHeight;
 183 | 		region[2] = 1;
 184 | 
 185 | 		cl_uint4 fillColor;
 186 | 
 187 | 		fillColor.x = 0;
 188 | 	
 189 | 		cs.getQueue().enqueueFillImage(layer._reconstruction, fillColor, origin, region);
 190 | 	}
 191 | 	
 192 | 
 193 | 	Uint2 seed1;
 194 | 	seed1._x = uniformDist(generator);
 195 | 	seed1._y = uniformDist(generator);
 196 | 
 197 | 	initPartOneKernel.setArg(0, layer._columnActivations);
 198 | 	initPartOneKernel.setArg(1, layer._columnStates);
 199 | 	initPartOneKernel.setArg(2, layer._columnFeedForwardWeights);
 200 | 	initPartOneKernel.setArg(3, layer._columnPrevValues);
 201 | 	initPartOneKernel.setArg(4, layerDesc._cellsInColumn);
 202 | 	initPartOneKernel.setArg(5, receptiveFieldSize);
 203 | 	initPartOneKernel.setArg(6, lateralConnectionsSize);
 204 | 	initPartOneKernel.setArg(7, seed1);
 205 | 	initPartOneKernel.setArg(8, minInitCenter);
 206 | 	initPartOneKernel.setArg(9, maxInitCenter);
 207 | 
 208 | 	cs.getQueue().enqueueNDRangeKernel(initPartOneKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height));
 209 | 
 210 | 	Uint2 seed2;
 211 | 	seed2._x = uniformDist(generator);
 212 | 	seed2._y = uniformDist(generator);
 213 | 
 214 | 	initPartTwoKernel.setArg(0, layer._cellStates);
 215 | 	initPartTwoKernel.setArg(1, layer._segmentStates);
 216 | 	initPartTwoKernel.setArg(2, layer._cellWeights);
 217 | 	initPartTwoKernel.setArg(3, layer._cellPredictions);
 218 | 	initPartTwoKernel.setArg(4, layer._cellQValues);
 219 | 	initPartTwoKernel.setArg(5, layerDesc._cellsInColumn);
 220 | 	initPartTwoKernel.setArg(6, receptiveFieldSize);
 221 | 	initPartTwoKernel.setArg(7, lateralConnectionsSize);
 222 | 	initPartTwoKernel.setArg(8, layerDesc._numSegmentsPerCell);
 223 | 	initPartTwoKernel.setArg(9, seed2);
 224 | 	initPartTwoKernel.setArg(10, minInitWeight);
 225 | 	initPartTwoKernel.setArg(11, maxInitWeight);
 226 | 
 227 | 	cs.getQueue().enqueueNDRangeKernel(initPartTwoKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height));
 228 | 
 229 | 	Uint2 seed3;
 230 | 	seed3._x = uniformDist(generator);
 231 | 	seed3._y = uniformDist(generator);
 232 | 
 233 | 	initPartThreeKernel.setArg(0, layer._inputBiases);
 234 | 	initPartThreeKernel.setArg(1, seed2);
 235 | 	initPartThreeKernel.setArg(2, minInitWeight);
 236 | 	initPartThreeKernel.setArg(3, maxInitWeight);
 237 | 
 238 | 	cs.getQueue().enqueueNDRangeKernel(initPartThreeKernel, cl::NullRange, cl::NDRange(inputWidth, inputHeight));
 239 | 
 240 | 	{
 241 | 		cl::size_t<3> origin;
 242 | 		cl::size_t<3> region;
 243 | 
 244 | 		origin[0] = 0;
 245 | 		origin[1] = 0;
 246 | 		origin[2] = 0;
 247 | 
 248 | 		region[0] = layerDesc._width;
 249 | 		region[1] = layerDesc._height;
 250 | 		region[2] = 1;
 251 | 
 252 | 		cs.getQueue().enqueueCopyImage(layer._columnStates, layer._columnStatesPrev, origin, origin, region);
 253 | 	}
 254 | 
 255 | 	{
 256 | 		cl::size_t<3> origin;
 257 | 		cl::size_t<3> region;
 258 | 
 259 | 		origin[0] = 0;
 260 | 		origin[1] = 0;
 261 | 		origin[2] = 0;
 262 | 
 263 | 		region[0] = layerDesc._width;
 264 | 		region[1] = layerDesc._height;
 265 | 		region[2] = 1;
 266 | 
 267 | 		cs.getQueue().enqueueCopyImage(layer._columnPredictions, layer._columnPredictionsPrev, origin, origin, region);
 268 | 	}
 269 | 
 270 | 	{
 271 | 		cl::size_t<3> origin;
 272 | 		cl::size_t<3> region;
 273 | 
 274 | 		origin[0] = 0;
 275 | 		origin[1] = 0;
 276 | 		origin[2] = 0;
 277 | 
 278 | 		region[0] = layerDesc._width;
 279 | 		region[1] = layerDesc._height;
 280 | 		region[2] = 1;
 281 | 
 282 | 		cs.getQueue().enqueueCopyImage(layer._columnPrevValues, layer._columnPrevValuesPrev, origin, origin, region);
 283 | 	}
 284 | 
 285 | 	{
 286 | 		cl::size_t<3> origin;
 287 | 		cl::size_t<3> region;
 288 | 
 289 | 		origin[0] = 0;
 290 | 		origin[1] = 0;
 291 | 		origin[2] = 0;
 292 | 
 293 | 		region[0] = layerDesc._width;
 294 | 		region[1] = layerDesc._height;
 295 | 		region[2] = receptiveFieldSize;
 296 | 
 297 | 		cs.getQueue().enqueueCopyImage(layer._columnFeedForwardWeights, layer._columnFeedForwardWeightsPrev, origin, origin, region);
 298 | 	}
 299 | 
 300 | 	{
 301 | 		cl::size_t<3> origin;
 302 | 		cl::size_t<3> region;
 303 | 
 304 | 		origin[0] = 0;
 305 | 		origin[1] = 0;
 306 | 		origin[2] = 0;
 307 | 
 308 | 		region[0] = layerDesc._width;
 309 | 		region[1] = layerDesc._height;
 310 | 		region[2] = layerDesc._cellsInColumn;
 311 | 
 312 | 		cs.getQueue().enqueueCopyImage(layer._cellStates, layer._cellStatesPrev, origin, origin, region);
 313 | 	}
 314 | 
 315 | 	{
 316 | 		cl::size_t<3> origin;
 317 | 		cl::size_t<3> region;
 318 | 
 319 | 		origin[0] = 0;
 320 | 		origin[1] = 0;
 321 | 		origin[2] = 0;
 322 | 
 323 | 		region[0] = layerDesc._width;
 324 | 		region[1] = layerDesc._height;
 325 | 		region[2] = layerDesc._cellsInColumn * layerDesc._numSegmentsPerCell;
 326 | 
 327 | 		cs.getQueue().enqueueCopyImage(layer._segmentStates, layer._segmentStatesPrev, origin, origin, region);
 328 | 	}
 329 | 
 330 | 	/*{
 331 | 		cl::size_t<3> origin;
 332 | 		cl::size_t<3> region;
 333 | 
 334 | 		origin[0] = 0;
 335 | 		origin[1] = 0;
 336 | 		origin[2] = 0;
 337 | 
 338 | 		region[0] = layerDesc._width;
 339 | 		region[1] = layerDesc._height;
 340 | 		region[2] = layerDesc._cellsInColumn * layerDesc._numSegmentsPerCell;
 341 | 
 342 | 		cs.getQueue().enqueueCopyImage(layer._segmentWeights, layer._segmentWeightsPrev, origin, origin, region);
 343 | 	}*/
 344 | 
 345 | 	{
 346 | 		cl::size_t<3> origin;
 347 | 		cl::size_t<3> region;
 348 | 
 349 | 		origin[0] = 0;
 350 | 		origin[1] = 0;
 351 | 		origin[2] = 0;
 352 | 
 353 | 		region[0] = layerDesc._width;
 354 | 		region[1] = layerDesc._height;
 355 | 		region[2] = layerDesc._cellsInColumn;
 356 | 
 357 | 		cs.getQueue().enqueueCopyImage(layer._cellQValues, layer._cellQValuesPrev, origin, origin, region);
 358 | 	}
 359 | 
 360 | 	{
 361 | 		cl::size_t<3> origin;
 362 | 		cl::size_t<3> region;
 363 | 
 364 | 		origin[0] = 0;
 365 | 		origin[1] = 0;
 366 | 		origin[2] = 0;
 367 | 
 368 | 		region[0] = layerDesc._width;
 369 | 		region[1] = layerDesc._height * layerDesc._cellsInColumn;
 370 | 		region[2] = lateralConnectionsSize;
 371 | 
 372 | 		cs.getQueue().enqueueCopyImage(layer._cellWeights, layer._cellWeightsPrev, origin, origin, region);
 373 | 	}
 374 | 
 375 | 	{
 376 | 		cl::size_t<3> origin;
 377 | 		cl::size_t<3> region;
 378 | 
 379 | 		origin[0] = 0;
 380 | 		origin[1] = 0;
 381 | 		origin[2] = 0;
 382 | 
 383 | 		region[0] = layerDesc._width;
 384 | 		region[1] = layerDesc._height;
 385 | 		region[2] = layerDesc._cellsInColumn;
 386 | 
 387 | 		cs.getQueue().enqueueCopyImage(layer._cellPredictions, layer._cellPredictionsPrev, origin, origin, region);
 388 | 	}
 389 | 
 390 | 	{
 391 | 		cl::size_t<3> origin;
 392 | 		cl::size_t<3> region;
 393 | 
 394 | 		origin[0] = 0;
 395 | 		origin[1] = 0;
 396 | 		origin[2] = 0;
 397 | 
 398 | 		region[0] = inputWidth;
 399 | 		region[1] = inputHeight;
 400 | 		region[2] = 1;
 401 | 
 402 | 		cs.getQueue().enqueueCopyImage(layer._inputBiases, layer._inputBiasesPrev, origin, origin, region);
 403 | 	}
 404 | }
 405 | 
 406 | void HTMRL::stepBegin(sys::ComputeSystem &cs, int addReplaySampleSteps, int maxReplayChainSize) {
 407 | 	for (int l = 0; l < _layers.size(); l++) {
 408 | 		std::swap(_layers[l]._columnStates, _layers[l]._columnStatesPrev);	
 409 | 		std::swap(_layers[l]._columnPredictions, _layers[l]._columnPredictionsPrev);
 410 | 		std::swap(_layers[l]._columnFeedForwardWeights, _layers[l]._columnFeedForwardWeightsPrev);
 411 | 		std::swap(_layers[l]._columnPrevValues, _layers[l]._columnPrevValuesPrev);
 412 | 		std::swap(_layers[l]._cellStates, _layers[l]._cellStatesPrev);
 413 | 		std::swap(_layers[l]._segmentStates, _layers[l]._segmentStatesPrev);
 414 | 		//std::swap(_layers[l]._segmentWeights, _layers[l]._segmentWeightsPrev);
 415 | 		std::swap(_layers[l]._cellQValues, _layers[l]._cellQValuesPrev);
 416 | 		std::swap(_layers[l]._cellPredictions, _layers[l]._cellPredictionsPrev);
 417 | 		std::swap(_layers[l]._cellWeights, _layers[l]._cellWeightsPrev);
 418 | 		std::swap(_layers[l]._inputBiases, _layers[l]._inputBiasesPrev);
 419 | 	}
 420 | 
 421 | 	if (_addReplaySampleStepCounter >= addReplaySampleSteps) {
 422 | 		_addReplaySampleStepCounter = 0;
 423 | 
 424 | 		if (_inputReplayChain.size() < maxReplayChainSize) {
 425 | 			cl::size_t<3> origin;
 426 | 			cl::size_t<3> region;
 427 | 
 428 | 			origin[0] = 0;
 429 | 			origin[1] = 0;
 430 | 			origin[2] = 0;
 431 | 
 432 | 			region[0] = _inputWidth;
 433 | 			region[1] = _inputHeight;
 434 | 			region[2] = 1;
 435 | 
 436 | 			cl::Image2D newSample = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _inputWidth, _inputHeight);
 437 | 
 438 | 			cs.getQueue().enqueueCopyImage(_inputImage, newSample, origin, origin, region);
 439 | 
 440 | 			_inputReplayChain.push_back(newSample);
 441 | 		}
 442 | 		else {
 443 | 			cl::size_t<3> origin;
 444 | 			cl::size_t<3> region;
 445 | 
 446 | 			origin[0] = 0;
 447 | 			origin[1] = 0;
 448 | 			origin[2] = 0;
 449 | 
 450 | 			region[0] = _inputWidth;
 451 | 			region[1] = _inputHeight;
 452 | 			region[2] = 1;
 453 | 
 454 | 			cl::Image2D temp = _inputReplayChain.back();
 455 | 
 456 | 			_inputReplayChain.pop_back();
 457 | 
 458 | 			cs.getQueue().enqueueCopyImage(_inputImage, temp, origin, origin, region);
 459 | 
 460 | 			_inputReplayChain.push_back(temp);
 461 | 		}
 462 | 	}
 463 | 	
 464 | 	_addReplaySampleStepCounter++;
 465 | }
 466 | 
 467 | void HTMRL::spatialPoolLayer(sys::ComputeSystem &cs, cl::Image2D &prevLayerOutput, int prevLayerWidth, int prevLayerHeight, Layer &layer, const LayerDesc &layerDesc, float columnDecay, std::mt19937 &generator) {
 468 | 	struct Uint2 {
 469 | 		unsigned int _x, _y;
 470 | 	};
 471 | 
 472 | 	struct Int2 {
 473 | 		int _x, _y;
 474 | 	};
 475 | 
 476 | 	struct Float2 {
 477 | 		float _x, _y;
 478 | 	};
 479 | 
 480 | 	std::uniform_int_distribution<int> uniformDist(0, 10000);
 481 | 
 482 | 	Uint2 seed1;
 483 | 	seed1._x = uniformDist(generator);
 484 | 	seed1._y = uniformDist(generator);
 485 | 
 486 | 	Uint2 seed2;
 487 | 	seed2._x = uniformDist(generator);
 488 | 	seed2._y = uniformDist(generator);
 489 | 
 490 | 	Uint2 seed3;
 491 | 	seed3._x = uniformDist(generator);
 492 | 	seed3._y = uniformDist(generator);
 493 | 
 494 | 	Uint2 seed4;
 495 | 	seed4._x = uniformDist(generator);
 496 | 	seed4._y = uniformDist(generator);
 497 | 
 498 | 	Int2 inputSize;
 499 | 	inputSize._x = prevLayerWidth;
 500 | 	inputSize._y = prevLayerHeight;
 501 | 
 502 | 	Int2 layerSize;
 503 | 	layerSize._x = layerDesc._width;
 504 | 	layerSize._y = layerDesc._height;
 505 | 
 506 | 	Float2 layerSizeMinusOneInv;
 507 | 	layerSizeMinusOneInv._x = 1.0f / (layerDesc._width - 1);
 508 | 	layerSizeMinusOneInv._y = 1.0f / (layerDesc._height - 1);
 509 | 
 510 | 	Float2 layerSizeInv;
 511 | 	layerSizeInv._x = 1.0f / layerDesc._width;
 512 | 	layerSizeInv._y = 1.0f / layerDesc._height;
 513 | 
 514 | 	Int2 inputReceptiveFieldRadius;
 515 | 	inputReceptiveFieldRadius._x = layerDesc._receptiveFieldRadius;
 516 | 	inputReceptiveFieldRadius._y = layerDesc._receptiveFieldRadius;
 517 | 
 518 | 	Int2 inputSizeMinusOne;
 519 | 	inputSizeMinusOne._x = prevLayerWidth - 1;
 520 | 	inputSizeMinusOne._y = prevLayerHeight - 1;
 521 | 
 522 | 	// Activation
 523 | 	_layerColumnActivateKernel.setArg(0, prevLayerOutput);
 524 | 	_layerColumnActivateKernel.setArg(1, layer._columnFeedForwardWeightsPrev);
 525 | 	_layerColumnActivateKernel.setArg(2, layer._columnStatesPrev);
 526 | 	_layerColumnActivateKernel.setArg(3, layer._columnActivations);
 527 | 	_layerColumnActivateKernel.setArg(4, layerSizeMinusOneInv);
 528 | 	_layerColumnActivateKernel.setArg(5, inputReceptiveFieldRadius);
 529 | 	_layerColumnActivateKernel.setArg(6, inputSize);
 530 | 	_layerColumnActivateKernel.setArg(7, inputSizeMinusOne);
 531 | 	_layerColumnActivateKernel.setArg(8, seed1);
 532 | 
 533 | 	cs.getQueue().enqueueNDRangeKernel(_layerColumnActivateKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height));
 534 | 
 535 | 	Int2 layerInhibitionRadius;
 536 | 	layerInhibitionRadius._x = layerDesc._inhibitionRadius;
 537 | 	layerInhibitionRadius._y = layerDesc._inhibitionRadius;
 538 | 
 539 | 	int receptiveFieldSize = std::pow(layerDesc._receptiveFieldRadius * 2 + 1, 2) + 1;
 540 | 
 541 | 	// Inhibition
 542 | 	_layerColumnInhibitKernel.setArg(0, layer._columnActivations);
 543 | 	_layerColumnInhibitKernel.setArg(1, layer._columnStatesPrev);
 544 | 	_layerColumnInhibitKernel.setArg(2, layer._columnFeedForwardWeightsPrev);
 545 | 	_layerColumnInhibitKernel.setArg(3, layer._columnStates);
 546 | 	_layerColumnInhibitKernel.setArg(4, layerSize);
 547 | 	_layerColumnInhibitKernel.setArg(5, layerSizeInv);
 548 | 	_layerColumnInhibitKernel.setArg(6, layerInhibitionRadius);
 549 | 	_layerColumnInhibitKernel.setArg(7, receptiveFieldSize);
 550 | 
 551 | 	cs.getQueue().enqueueNDRangeKernel(_layerColumnInhibitKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height));
 552 | 
 553 | 	Float2 inputSizeMinusOneInv;
 554 | 	inputSizeMinusOneInv._x = 1.0f / (prevLayerWidth - 1);
 555 | 	inputSizeMinusOneInv._y = 1.0f / (prevLayerHeight - 1);
 556 | 
 557 | 	Int2 reconstructionReceptiveFieldRadii;
 558 | 	reconstructionReceptiveFieldRadii._x = std::ceil(static_cast<float>(layerDesc._width) / prevLayerWidth * layerDesc._receptiveFieldRadius);
 559 | 	reconstructionReceptiveFieldRadii._y = std::ceil(static_cast<float>(layerDesc._height) / prevLayerHeight * layerDesc._receptiveFieldRadius);
 560 | 
 561 | 	Int2 layerSizeMinusOne;
 562 | 	layerSizeMinusOne._x = layerDesc._width - 1;
 563 | 	layerSizeMinusOne._y = layerDesc._height - 1;
 564 | 
 565 | 	// Reconstruct
 566 | 	_reconstructInputKernel.setArg(0, layer._columnFeedForwardWeightsPrev);
 567 | 	_reconstructInputKernel.setArg(1, layer._inputBiasesPrev);
 568 | 	_reconstructInputKernel.setArg(2, layer._columnStates);
 569 | 	_reconstructInputKernel.setArg(3, layer._reconstruction);
 570 | 	_reconstructInputKernel.setArg(4, reconstructionReceptiveFieldRadii);
 571 | 	_reconstructInputKernel.setArg(5, inputReceptiveFieldRadius);
 572 | 	_reconstructInputKernel.setArg(6, inputSizeMinusOne);
 573 | 	_reconstructInputKernel.setArg(7, inputSizeMinusOneInv);
 574 | 	_reconstructInputKernel.setArg(8, layerSize);
 575 | 	_reconstructInputKernel.setArg(9, layerSizeMinusOne);
 576 | 	_reconstructInputKernel.setArg(10, layerSizeMinusOneInv);
 577 | 
 578 | 	cs.getQueue().enqueueNDRangeKernel(_reconstructInputKernel, cl::NullRange, cl::NDRange(prevLayerWidth, prevLayerHeight));
 579 | }
 580 | 
 581 | void HTMRL::cellActivateLayer(sys::ComputeSystem &cs, Layer &layer, const LayerDesc &layerDesc, float cellStateDecay, std::mt19937 &generator) {
 582 | 	struct Uint2 {
 583 | 		unsigned int _x, _y;
 584 | 	};
 585 | 
 586 | 	struct Int2 {
 587 | 		int _x, _y;
 588 | 	};
 589 | 
 590 | 	struct Float2 {
 591 | 		float _x, _y;
 592 | 	};
 593 | 
 594 | 	std::uniform_int_distribution<int> uniformDist(0, 10000);
 595 | 
 596 | 	Uint2 seed;
 597 | 	seed._x = uniformDist(generator);
 598 | 	seed._y = uniformDist(generator);
 599 | 
 600 | 	Int2 layerSize;
 601 | 	layerSize._x = layerDesc._width;
 602 | 	layerSize._y = layerDesc._height;
 603 | 
 604 | 	Float2 layerSizeMinusOneInv;
 605 | 	layerSizeMinusOneInv._x = 1.0f / (layerDesc._width - 1);
 606 | 	layerSizeMinusOneInv._y = 1.0f / (layerDesc._height - 1);
 607 | 
 608 | 	Float2 layerSizeInv;
 609 | 	layerSizeInv._x = 1.0f / layerDesc._width;
 610 | 	layerSizeInv._y = 1.0f / layerDesc._height;
 611 | 
 612 | 	Int2 lateralConnectionRadii;
 613 | 	lateralConnectionRadii._x = layerDesc._lateralConnectionRadius;
 614 | 	lateralConnectionRadii._y = layerDesc._lateralConnectionRadius;
 615 | 
 616 | 	// Cell activation
 617 | 	_layerCellActivateKernel.setArg(0, layer._columnStates);
 618 | 	_layerCellActivateKernel.setArg(1, layer._cellStatesPrev);
 619 | 	_layerCellActivateKernel.setArg(2, layer._cellPredictionsPrev);
 620 | 	_layerCellActivateKernel.setArg(3, layer._cellWeightsPrev);
 621 | 	_layerCellActivateKernel.setArg(4, layer._columnPredictionsPrev);
 622 | 	_layerCellActivateKernel.setArg(5, layer._cellStates);
 623 | 	_layerCellActivateKernel.setArg(6, layerDesc._cellsInColumn);
 624 | 	_layerCellActivateKernel.setArg(7, lateralConnectionRadii);
 625 | 	_layerCellActivateKernel.setArg(8, cellStateDecay);
 626 | 	_layerCellActivateKernel.setArg(9, seed);
 627 | 
 628 | 	cs.getQueue().enqueueNDRangeKernel(_layerCellActivateKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height));
 629 | }
 630 | 
 631 | void HTMRL::predictLayer(sys::ComputeSystem &cs, cl::Image2D &nextLayerPrediction, cl::Image2D &nextLayerPredictionPrev, int nextLayerWidth, int nextLayerHeight, Layer &layer, const LayerDesc &layerDesc, std::mt19937 &generator) {
 632 | 	struct Int2 {
 633 | 		int _x, _y;
 634 | 	};
 635 | 
 636 | 	struct Float2 {
 637 | 		float _x, _y;
 638 | 	};
 639 | 	
 640 | 	Int2 layerSize;
 641 | 	layerSize._x = layerDesc._width;
 642 | 	layerSize._y = layerDesc._height;
 643 | 
 644 | 	Float2 layerSizeMinusOneInv;
 645 | 	layerSizeMinusOneInv._x = 1.0f / (layerDesc._width - 1);
 646 | 	layerSizeMinusOneInv._y = 1.0f / (layerDesc._height - 1);
 647 | 
 648 | 	Int2 lateralConnectionRadii;
 649 | 	lateralConnectionRadii._x = layerDesc._lateralConnectionRadius;
 650 | 	lateralConnectionRadii._y = layerDesc._lateralConnectionRadius;
 651 | 
 652 | 	// Cell prediction
 653 | 	Int2 nextLayerSize;
 654 | 	nextLayerSize._x = nextLayerWidth;
 655 | 	nextLayerSize._y = nextLayerHeight;
 656 | 
 657 | 	Int2 nextLayerSizeMinusOne;
 658 | 	nextLayerSizeMinusOne._x = nextLayerWidth - 1;
 659 | 	nextLayerSizeMinusOne._y = nextLayerHeight - 1;
 660 | 
 661 | 	_layerCellPredictKernel.setArg(0, layer._cellStates);
 662 | 	_layerCellPredictKernel.setArg(1, layer._cellStatesPrev);
 663 | 	_layerCellPredictKernel.setArg(2, layer._cellWeights);
 664 | 	_layerCellPredictKernel.setArg(3, nextLayerPrediction);
 665 | 	_layerCellPredictKernel.setArg(4, nextLayerPredictionPrev);
 666 | 	_layerCellPredictKernel.setArg(5, layer._cellPredictions);
 667 | 	_layerCellPredictKernel.setArg(6, layer._segmentStates);
 668 | 	_layerCellPredictKernel.setArg(7, layerDesc._cellsInColumn);
 669 | 	_layerCellPredictKernel.setArg(8, layerSize);
 670 | 	_layerCellPredictKernel.setArg(9, lateralConnectionRadii);
 671 | 	_layerCellPredictKernel.setArg(10, layerDesc._numSegmentsPerCell);
 672 | 	_layerCellPredictKernel.setArg(11, layerSizeMinusOneInv);
 673 | 	_layerCellPredictKernel.setArg(12, nextLayerSize);
 674 | 	_layerCellPredictKernel.setArg(13, nextLayerSizeMinusOne);
 675 | 
 676 | 	cs.getQueue().enqueueNDRangeKernel(_layerCellPredictKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height));
 677 | 
 678 | 	// Column prediction
 679 | 	_layerColumnPredictionKernel.setArg(0, layer._cellPredictions);
 680 | 	_layerColumnPredictionKernel.setArg(1, layer._cellStates);
 681 | 	_layerColumnPredictionKernel.setArg(2, layer._columnPredictions);
 682 | 	_layerColumnPredictionKernel.setArg(3, layerDesc._cellsInColumn);
 683 | 
 684 | 	cs.getQueue().enqueueNDRangeKernel(_layerColumnPredictionKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height));
 685 | }
 686 | 
 687 | void HTMRL::predictLayerLast(sys::ComputeSystem &cs, Layer &layer, const LayerDesc &layerDesc, std::mt19937 &generator) {
 688 | 	struct Int2 {
 689 | 		int _x, _y;
 690 | 	};
 691 | 
 692 | 	struct Float2 {
 693 | 		float _x, _y;
 694 | 	};
 695 | 
 696 | 	Int2 layerSize;
 697 | 	layerSize._x = layerDesc._width;
 698 | 	layerSize._y = layerDesc._height;
 699 | 
 700 | 	Float2 layerSizeInv;
 701 | 	layerSizeInv._x = 1.0f / layerDesc._width;
 702 | 	layerSizeInv._y = 1.0f / layerDesc._height;
 703 | 
 704 | 	Int2 lateralConnectionRadii;
 705 | 	lateralConnectionRadii._x = layerDesc._lateralConnectionRadius;
 706 | 	lateralConnectionRadii._y = layerDesc._lateralConnectionRadius;
 707 | 
 708 | 	// Cell prediction
 709 | 	_layerCellPredictLastKernel.setArg(0, layer._cellStates);
 710 | 	_layerCellPredictLastKernel.setArg(1, layer._cellStatesPrev);
 711 | 	_layerCellPredictLastKernel.setArg(2, layer._cellWeights);
 712 | 	_layerCellPredictLastKernel.setArg(3, layer._cellPredictions);
 713 | 	_layerCellPredictLastKernel.setArg(4, layer._segmentStates);
 714 | 	_layerCellPredictLastKernel.setArg(5, layerDesc._cellsInColumn);
 715 | 	_layerCellPredictLastKernel.setArg(6, layerSize);
 716 | 	_layerCellPredictLastKernel.setArg(7, lateralConnectionRadii);
 717 | 	_layerCellPredictLastKernel.setArg(8, layerDesc._numSegmentsPerCell);
 718 | 
 719 | 	cs.getQueue().enqueueNDRangeKernel(_layerCellPredictLastKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height));
 720 | 
 721 | 	// Column prediction
 722 | 	_layerColumnPredictionKernel.setArg(0, layer._cellPredictions);
 723 | 	_layerColumnPredictionKernel.setArg(1, layer._cellStates);
 724 | 	_layerColumnPredictionKernel.setArg(2, layer._columnPredictions);
 725 | 	_layerColumnPredictionKernel.setArg(3, layerDesc._cellsInColumn);
 726 | 
 727 | 	cs.getQueue().enqueueNDRangeKernel(_layerColumnPredictionKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height));
 728 | }
 729 | 
 730 | void HTMRL::activate(std::vector<float> &input, sys::ComputeSystem &cs, float reward, float alpha, float gamma, float columnDecay, float cellStateDecay, float columnConnectionAlpha, float columnConnectionBeta, float columnConnectionGamma, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, int maxReplayChainSize, int numReplaySamples, int addSampleSteps, unsigned long seed) {
 731 | 	// Create buffer from input
 732 | 	{
 733 | 		cl::size_t<3> origin;
 734 | 		origin[0] = 0;
 735 | 		origin[1] = 0;
 736 | 		origin[2] = 0;
 737 | 
 738 | 		cl::size_t<3> region;
 739 | 		region[0] = _inputWidth;
 740 | 		region[1] = _inputHeight;
 741 | 		region[2] = 1;
 742 | 
 743 | 		cs.getQueue().enqueueWriteImage(_inputImage, CL_TRUE, origin, region, 0, 0, &input[0]);
 744 | 	}
 745 | 
 746 | 	std::mt19937 generator(seed);
 747 | 
 748 | 	learnSpatialReplay(cs, cellStateDecay, columnConnectionAlpha, columnConnectionBeta, columnConnectionGamma, maxReplayChainSize, numReplaySamples, seed);
 749 | 
 750 | 	for (int l = 0; l < _layers.size(); l++)
 751 | 		cellActivateLayer(cs, _layers[l], _layerDescs[l], cellStateDecay, generator);
 752 | 
 753 | 	for (int l = _layers.size() - 1; l >= 0; l--) {
 754 | 		if (l == _layers.size() - 1)
 755 | 			determineLayerColumnQLast(cs, _layers[l], _layerDescs[l]);
 756 | 		else
 757 | 			determineLayerColumnQ(cs, _layers[l], _layerDescs[l], _layers[l + 1], _layerDescs[l + 1]);
 758 | 	}
 759 | 
 760 | 	float value = retreiveQ(cs);
 761 | 
 762 | 	float tdError = reward + gamma * value - _prevValue;
 763 | 
 764 | 	std::cout << "R: " << reward << "Q: " << reward + gamma * value << " T: " << tdError << std::endl;
 765 | 
 766 | 	_prevValue = value;
 767 | 
 768 | 	for (int l = _layers.size() - 1; l >= 0; l--)
 769 | 		assignLayerQ(cs, _layers[l], _layerDescs[l], alpha * tdError);
 770 | 
 771 | 	learnTemporal(cs, tdError, cellConnectionAlpha * (tdError > 0.0f ? 1.0f : 0.0f), cellConnectionBeta, cellConnectionGamma, cellConnectionTemperature, cellWeightEligibilityDecay, seed + 1);
 772 | 
 773 | 	for (int l = _layers.size() - 1; l >= 0; l--) {
 774 | 		if (l == _layers.size() - 1)
 775 | 			predictLayerLast(cs, _layers[l], _layerDescs[l], generator);
 776 | 		else
 777 | 			predictLayer(cs, _layers[l + 1]._columnPredictions, _layers[l + 1]._columnPredictionsPrev, _layerDescs[l + 1]._width, _layerDescs[l + 1]._height, _layers[l], _layerDescs[l], generator);
 778 | 	}
 779 | 
 780 | 	/*pPrevLayerOutput = &_inputImage;
 781 | 	prevLayerWidth = _inputWidth;
 782 | 	prevLayerHeight = _inputHeight;
 783 | 
 784 | 	for (int l = 0; l < _layers.size(); l++) {
 785 | 		learnLayerSpatial(cs, _layers[l], *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layerDescs[l], columnConnectionAlpha, columnConnectionBeta, columnConnectionGamma, generator);
 786 | 
 787 | 		pPrevLayerOutput = &_layers[l]._columnStates;
 788 | 		prevLayerWidth = _layerDescs[l]._width;
 789 | 		prevLayerHeight = _layerDescs[l]._height;
 790 | 	}*/
 791 | }
 792 | 
 793 | void HTMRL::determineLayerColumnQ(sys::ComputeSystem &cs, Layer &layer, LayerDesc &layerDesc, Layer &nextLayer, LayerDesc &nextLayerDesc) {
 794 | 	struct Int2 {
 795 | 		int _x, _y;
 796 | 	};
 797 | 
 798 | 	struct Float2 {
 799 | 		float _x, _y;
 800 | 	};
 801 | 
 802 | 	Int2 layerSize;
 803 | 	layerSize._x = layerDesc._width;
 804 | 	layerSize._y = layerDesc._height;
 805 | 
 806 | 	Int2 nextLayerSize;
 807 | 	nextLayerSize._x = nextLayerDesc._width;
 808 | 	nextLayerSize._y = nextLayerDesc._height;
 809 | 
 810 | 	Int2 nextLayerSizeMinusOne;
 811 | 	nextLayerSizeMinusOne._x = nextLayerDesc._width - 1;
 812 | 	nextLayerSizeMinusOne._y = nextLayerDesc._height - 1;
 813 | 
 814 | 	Float2 layerSizeMinusOneInv;
 815 | 	layerSizeMinusOneInv._x = 1.0f / (layerDesc._width - 1);
 816 | 	layerSizeMinusOneInv._y = 1.0f / (layerDesc._height - 1);
 817 | 
 818 | 	_layerColumnQKernel.setArg(0, layer._cellQValuesPrev);
 819 | 	_layerColumnQKernel.setArg(1, layer._cellStatesPrev);
 820 | 	_layerColumnQKernel.setArg(2, layer._cellStates);
 821 | 	_layerColumnQKernel.setArg(3, layer._columnStates);
 822 | 	_layerColumnQKernel.setArg(4, nextLayer._columnStates);
 823 | 	_layerColumnQKernel.setArg(5, nextLayer._columnQValues);
 824 | 	_layerColumnQKernel.setArg(6, layer._columnQValues);
 825 | 	_layerColumnQKernel.setArg(7, layerDesc._cellsInColumn);
 826 | 	_layerColumnQKernel.setArg(8, layerSizeMinusOneInv);
 827 | 	_layerColumnQKernel.setArg(9, nextLayerSize);
 828 | 	_layerColumnQKernel.setArg(10, nextLayerSizeMinusOne);
 829 | 
 830 | 	cs.getQueue().enqueueNDRangeKernel(_layerColumnQKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height));
 831 | }
 832 | 
 833 | void HTMRL::determineLayerColumnQLast(sys::ComputeSystem &cs, Layer &layer, LayerDesc &layerDesc) {
 834 | 	struct Int2 {
 835 | 		int _x, _y;
 836 | 	};
 837 | 
 838 | 	struct Float2 {
 839 | 		float _x, _y;
 840 | 	};
 841 | 
 842 | 	_layerColumnQLastKernel.setArg(0, layer._cellQValuesPrev);
 843 | 	_layerColumnQLastKernel.setArg(1, layer._cellStatesPrev);
 844 | 	_layerColumnQLastKernel.setArg(2, layer._cellStates);
 845 | 	_layerColumnQLastKernel.setArg(3, layer._columnQValues);
 846 | 	_layerColumnQLastKernel.setArg(4, layerDesc._cellsInColumn);
 847 | 
 848 | 	cs.getQueue().enqueueNDRangeKernel(_layerColumnQLastKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height));
 849 | }
 850 | 
 851 | float HTMRL::retreiveQ(sys::ComputeSystem &cs) {
 852 | 	float total = 0.0f;
 853 | 
 854 | 	float sum = 0.0f;
 855 | 	float divisor = 0.0f;
 856 | 
 857 | 	for (int l = 0; l < _layers.size(); l++) {
 858 | 		cl::size_t<3> origin;
 859 | 		origin[0] = 0;
 860 | 		origin[1] = 0;
 861 | 		origin[2] = 0;
 862 | 
 863 | 		cl::size_t<3> region;
 864 | 		region[0] = _layerDescs[l]._width;
 865 | 		region[1] = _layerDescs[l]._height;
 866 | 		region[2] = 1;
 867 | 
 868 | 		std::vector<float> layerQ(_layerDescs[l]._width * _layerDescs[l]._height);
 869 | 
 870 | 		cs.getQueue().enqueueReadImage(_layers[l]._columnQValues, CL_TRUE, origin, region, 0, 0, &layerQ[0]);
 871 | 
 872 | 		std::vector<float> layerColumns(_layerDescs[l]._width * _layerDescs.front()._height * 2);
 873 | 
 874 | 		cs.getQueue().enqueueReadImage(_layers[l]._columnStates, CL_TRUE, origin, region, 0, 0, &layerColumns[0]);
 875 | 
 876 | 		for (int i = 0; i < layerQ.size(); i++) {
 877 | 			sum += layerQ[i] * _layerDescs[l]._qImportance * layerColumns[i * 2];
 878 | 			divisor += _layerDescs[l]._qImportance * layerColumns[i * 2];
 879 | 		}
 880 | 	}
 881 | 
 882 | 	if (divisor == 0.0f)
 883 | 		return 0.0f;
 884 | 
 885 | 	return sum / divisor;
 886 | }
 887 | 
 888 | void HTMRL::assignLayerQ(sys::ComputeSystem &cs, Layer &layer, LayerDesc &layerDesc, float alpha) {
 889 | 	struct Int2 {
 890 | 		int _x, _y;
 891 | 	};
 892 | 
 893 | 	struct Float2 {
 894 | 		float _x, _y;
 895 | 	};
 896 | 
 897 | 	Int2 layerSize;
 898 | 	layerSize._x = layerDesc._width;
 899 | 	layerSize._y = layerDesc._height;
 900 | 
 901 | 	Float2 layerSizeMinusOneInv;
 902 | 	layerSizeMinusOneInv._x = 1.0f / (layerDesc._width - 1);
 903 | 	layerSizeMinusOneInv._y = 1.0f / (layerDesc._height - 1);
 904 | 
 905 | 	_layerAssignQKernel.setArg(0, layer._cellQValuesPrev);
 906 | 	_layerAssignQKernel.setArg(1, layer._cellStatesPrev);
 907 | 	_layerAssignQKernel.setArg(2, layer._cellQValues);
 908 | 	_layerAssignQKernel.setArg(3, layerDesc._cellsInColumn);
 909 | 	_layerAssignQKernel.setArg(4, alpha);
 910 | 
 911 | 	cs.getQueue().enqueueNDRangeKernel(_layerAssignQKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height));
 912 | }
 913 | 
 914 | void HTMRL::learnLayerSpatial(sys::ComputeSystem &cs, Layer &layer, cl::Image2D &prevLayerOutput, int prevLayerWidth, int prevLayerHeight, const LayerDesc &layerDesc, float alpha, float beta, float gamma, std::mt19937 &generator) {
 915 | 	struct Uint2 {
 916 | 		unsigned int _x, _y;
 917 | 	};
 918 | 
 919 | 	struct Int2 {
 920 | 		int _x, _y;
 921 | 	};
 922 | 
 923 | 	struct Float2 {
 924 | 		float _x, _y;
 925 | 	};
 926 | 
 927 | 	std::uniform_int_distribution<int> uniformDist(0, 10000);
 928 | 
 929 | 	Uint2 seed;
 930 | 	seed._x = uniformDist(generator);
 931 | 	seed._y = uniformDist(generator);
 932 | 
 933 | 	Int2 inputSize;
 934 | 	inputSize._x = prevLayerWidth;
 935 | 	inputSize._y = prevLayerHeight;
 936 | 
 937 | 	Int2 layerSize;
 938 | 	layerSize._x = layerDesc._width;
 939 | 	layerSize._y = layerDesc._height;
 940 | 
 941 | 	Float2 layerSizeMinusOneInv;
 942 | 	layerSizeMinusOneInv._x = 1.0f / (layerDesc._width - 1);
 943 | 	layerSizeMinusOneInv._y = 1.0f / (layerDesc._height - 1);
 944 | 
 945 | 	Int2 inputReceptiveFieldRadius;
 946 | 	inputReceptiveFieldRadius._x = layerDesc._receptiveFieldRadius;
 947 | 	inputReceptiveFieldRadius._y = layerDesc._receptiveFieldRadius;
 948 | 
 949 | 	int receptiveFieldSize = std::pow(layerDesc._receptiveFieldRadius * 2 + 1, 2) + 1;
 950 | 
 951 | 	Int2 influenceRadius;
 952 | 	influenceRadius._x = layerDesc._columnInfluenceRadius;
 953 | 	influenceRadius._y = layerDesc._columnInfluenceRadius;
 954 | 
 955 | 	Int2 inputSizeMinusOne;
 956 | 	inputSizeMinusOne._x = layerDesc._width - 1;
 957 | 	inputSizeMinusOne._y = layerDesc._height - 1;
 958 | 
 959 | 	Int2 inhibitionRadii;
 960 | 	inhibitionRadii._x = layerDesc._inhibitionRadius;
 961 | 	inhibitionRadii._y = layerDesc._inhibitionRadius;
 962 | 
 963 | 	// Column weight update
 964 | 	_layerColumnWeightUpdateKernel.setArg(0, layer._reconstruction);
 965 | 	_layerColumnWeightUpdateKernel.setArg(1, prevLayerOutput);
 966 | 	_layerColumnWeightUpdateKernel.setArg(2, layer._columnActivations);
 967 | 	_layerColumnWeightUpdateKernel.setArg(3, layer._columnStates);
 968 | 	_layerColumnWeightUpdateKernel.setArg(4, layer._columnPredictions);
 969 | 	_layerColumnWeightUpdateKernel.setArg(5, layer._columnFeedForwardWeightsPrev);
 970 | 	_layerColumnWeightUpdateKernel.setArg(6, layer._columnFeedForwardWeights);
 971 | 	_layerColumnWeightUpdateKernel.setArg(7, layerSize);
 972 | 	_layerColumnWeightUpdateKernel.setArg(8, layerSizeMinusOneInv);
 973 | 	_layerColumnWeightUpdateKernel.setArg(9, inputReceptiveFieldRadius);
 974 | 	_layerColumnWeightUpdateKernel.setArg(10, inhibitionRadii);
 975 | 	_layerColumnWeightUpdateKernel.setArg(11, inputSize);
 976 | 	_layerColumnWeightUpdateKernel.setArg(12, inputSizeMinusOne);
 977 | 	_layerColumnWeightUpdateKernel.setArg(13, receptiveFieldSize);
 978 | 	_layerColumnWeightUpdateKernel.setArg(14, alpha);
 979 | 	_layerColumnWeightUpdateKernel.setArg(15, beta);
 980 | 	_layerColumnWeightUpdateKernel.setArg(16, gamma);
 981 | 	_layerColumnWeightUpdateKernel.setArg(17, seed);
 982 | 
 983 | 	cs.getQueue().enqueueNDRangeKernel(_layerColumnWeightUpdateKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height));
 984 | 
 985 | 	Float2 inputSizeMinusOneInv;
 986 | 	inputSizeMinusOneInv._x = 1.0f / (prevLayerWidth - 1);
 987 | 	inputSizeMinusOneInv._y = 1.0f / (prevLayerHeight - 1);
 988 | 
 989 | 	Int2 reconstructionReceptiveFieldRadii;
 990 | 	reconstructionReceptiveFieldRadii._x = std::ceil(static_cast<float>(layerDesc._width) / prevLayerWidth * layerDesc._receptiveFieldRadius);
 991 | 	reconstructionReceptiveFieldRadii._y = std::ceil(static_cast<float>(layerDesc._height) / prevLayerHeight * layerDesc._receptiveFieldRadius);
 992 | 
 993 | 	Int2 layerSizeMinusOne;
 994 | 	layerSizeMinusOne._x = layerDesc._width - 1;
 995 | 	layerSizeMinusOne._y = layerDesc._height - 1;
 996 | 
 997 | 	// Reconstruct
 998 | 	_inputBiasUpdateKernel.setArg(0, prevLayerOutput);
 999 | 	_inputBiasUpdateKernel.setArg(1, layer._reconstruction);
1000 | 	_inputBiasUpdateKernel.setArg(2, layer._inputBiasesPrev);
1001 | 	_inputBiasUpdateKernel.setArg(3, layer._inputBiases);
1002 | 	_inputBiasUpdateKernel.setArg(4, gamma);
1003 | 
1004 | 	cs.getQueue().enqueueNDRangeKernel(_inputBiasUpdateKernel, cl::NullRange, cl::NDRange(prevLayerWidth, prevLayerHeight));
1005 | }
1006 | 
1007 | void HTMRL::learnLayerTemporal(sys::ComputeSystem &cs, Layer &layer, cl::Image2D &prevLayerOutput, int prevLayerWidth, int prevLayerHeight, cl::Image2D &nextLayerPrediction, int nextLayerWidth, int nextLayerHeight, const LayerDesc &layerDesc, float tdError, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, std::mt19937 &generator) {
1008 | 	struct Uint2 {
1009 | 		unsigned int _x, _y;
1010 | 	};
1011 | 
1012 | 	struct Int2 {
1013 | 		int _x, _y;
1014 | 	};
1015 | 
1016 | 	struct Float2 {
1017 | 		float _x, _y;
1018 | 	};
1019 | 
1020 | 	std::uniform_int_distribution<int> uniformDist(0, 10000);
1021 | 
1022 | 	Uint2 seed;
1023 | 	seed._x = uniformDist(generator);
1024 | 	seed._y = uniformDist(generator);
1025 | 
1026 | 	Int2 inputSize;
1027 | 	inputSize._x = prevLayerWidth;
1028 | 	inputSize._y = prevLayerHeight;
1029 | 
1030 | 	Int2 layerSize;
1031 | 	layerSize._x = layerDesc._width;
1032 | 	layerSize._y = layerDesc._height;
1033 | 
1034 | 	Float2 inputSizeInv;
1035 | 	inputSizeInv._x = 1.0f / prevLayerWidth;
1036 | 	inputSizeInv._y = 1.0f / prevLayerHeight;
1037 | 
1038 | 	Float2 layerSizeMinusOneInv;
1039 | 	layerSizeMinusOneInv._x = 1.0f / (layerDesc._width - 1);
1040 | 	layerSizeMinusOneInv._y = 1.0f / (layerDesc._height - 1);
1041 | 
1042 | 	Int2 inputReceptiveFieldRadius;
1043 | 	inputReceptiveFieldRadius._x = layerDesc._receptiveFieldRadius;
1044 | 	inputReceptiveFieldRadius._y = layerDesc._receptiveFieldRadius;
1045 | 
1046 | 	Int2 layerReceptiveFieldRadius;
1047 | 	layerReceptiveFieldRadius._x = layerDesc._receptiveFieldRadius;
1048 | 	layerReceptiveFieldRadius._y = layerDesc._receptiveFieldRadius;
1049 | 
1050 | 	Int2 lateralConnectionRadii;
1051 | 	lateralConnectionRadii._x = layerDesc._lateralConnectionRadius;
1052 | 	lateralConnectionRadii._y = layerDesc._lateralConnectionRadius;
1053 | 
1054 | 	// Lateral weight update
1055 | 	Int2 nextLayerSize;
1056 | 	nextLayerSize._x = nextLayerWidth;
1057 | 	nextLayerSize._y = nextLayerHeight;
1058 | 
1059 | 	Int2 nextLayerSizeMinusOne;
1060 | 	nextLayerSizeMinusOne._x = nextLayerWidth - 1;
1061 | 	nextLayerSizeMinusOne._y = nextLayerHeight - 1;
1062 | 
1063 | 	_layerCellWeightUpdateKernel.setArg(0, layer._columnStates);
1064 | 	_layerCellWeightUpdateKernel.setArg(1, layer._columnPredictionsPrev);
1065 | 	_layerCellWeightUpdateKernel.setArg(2, layer._cellPredictionsPrev);
1066 | 	_layerCellWeightUpdateKernel.setArg(3, layer._cellStates);
1067 | 	_layerCellWeightUpdateKernel.setArg(4, layer._cellStatesPrev);
1068 | 	_layerCellWeightUpdateKernel.setArg(5, nextLayerPrediction);
1069 | 	_layerCellWeightUpdateKernel.setArg(6, layer._segmentStatesPrev);
1070 | 	_layerCellWeightUpdateKernel.setArg(7, layer._cellWeightsPrev);
1071 | 	_layerCellWeightUpdateKernel.setArg(8, layer._cellWeights);
1072 | 	_layerCellWeightUpdateKernel.setArg(9, layerDesc._cellsInColumn);
1073 | 	_layerCellWeightUpdateKernel.setArg(10, layerSize);
1074 | 	_layerCellWeightUpdateKernel.setArg(11, lateralConnectionRadii);
1075 | 	_layerCellWeightUpdateKernel.setArg(12, layerDesc._numSegmentsPerCell);
1076 | 	_layerCellWeightUpdateKernel.setArg(13, layerSizeMinusOneInv);
1077 | 	_layerCellWeightUpdateKernel.setArg(14, nextLayerSize);
1078 | 	_layerCellWeightUpdateKernel.setArg(15, nextLayerSizeMinusOne);
1079 | 	_layerCellWeightUpdateKernel.setArg(16, tdError);
1080 | 	_layerCellWeightUpdateKernel.setArg(17, cellConnectionAlpha);
1081 | 	_layerCellWeightUpdateKernel.setArg(18, cellConnectionBeta);
1082 | 	_layerCellWeightUpdateKernel.setArg(19, cellConnectionGamma);
1083 | 	_layerCellWeightUpdateKernel.setArg(20, cellConnectionTemperature);
1084 | 	_layerCellWeightUpdateKernel.setArg(21, cellWeightEligibilityDecay);
1085 | 
1086 | 	cs.getQueue().enqueueNDRangeKernel(_layerCellWeightUpdateKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height));
1087 | }
1088 | 
1089 | void HTMRL::learnLayerTemporalLast(sys::ComputeSystem &cs, Layer &layer, cl::Image2D &prevLayerOutput, int prevLayerWidth, int prevLayerHeight, const LayerDesc &layerDesc, float tdError, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, std::mt19937 &generator) {
1090 | 	struct Uint2 {
1091 | 		unsigned int _x, _y;
1092 | 	};
1093 | 
1094 | 	struct Int2 {
1095 | 		int _x, _y;
1096 | 	};
1097 | 
1098 | 	struct Float2 {
1099 | 		float _x, _y;
1100 | 	};
1101 | 
1102 | 	std::uniform_int_distribution<int> uniformDist(0, 10000);
1103 | 
1104 | 	Uint2 seed;
1105 | 	seed._x = uniformDist(generator);
1106 | 	seed._y = uniformDist(generator);
1107 | 
1108 | 	Int2 inputSize;
1109 | 	inputSize._x = prevLayerWidth;
1110 | 	inputSize._y = prevLayerHeight;
1111 | 
1112 | 	Int2 layerSize;
1113 | 	layerSize._x = layerDesc._width;
1114 | 	layerSize._y = layerDesc._height;
1115 | 
1116 | 	Float2 inputSizeInv;
1117 | 	inputSizeInv._x = 1.0f / prevLayerWidth;
1118 | 	inputSizeInv._y = 1.0f / prevLayerHeight;
1119 | 
1120 | 	Float2 layerSizeInv;
1121 | 	layerSizeInv._x = 1.0f / layerDesc._width;
1122 | 	layerSizeInv._y = 1.0f / layerDesc._height;
1123 | 
1124 | 	Int2 lateralConnectionRadii;
1125 | 	lateralConnectionRadii._x = layerDesc._lateralConnectionRadius;
1126 | 	lateralConnectionRadii._y = layerDesc._lateralConnectionRadius;
1127 | 
1128 | 	// Lateral weight update
1129 | 	_layerCellWeightUpdateLastKernel.setArg(0, layer._columnStates);
1130 | 	_layerCellWeightUpdateLastKernel.setArg(1, layer._columnPredictionsPrev);
1131 | 	_layerCellWeightUpdateLastKernel.setArg(2, layer._cellPredictionsPrev);
1132 | 	_layerCellWeightUpdateLastKernel.setArg(3, layer._cellStates);
1133 | 	_layerCellWeightUpdateLastKernel.setArg(4, layer._cellStatesPrev);
1134 | 	_layerCellWeightUpdateLastKernel.setArg(5, layer._segmentStatesPrev);
1135 | 	_layerCellWeightUpdateLastKernel.setArg(6, layer._cellWeightsPrev);
1136 | 	_layerCellWeightUpdateLastKernel.setArg(7, layer._cellWeights);
1137 | 	_layerCellWeightUpdateLastKernel.setArg(8, layerDesc._cellsInColumn);
1138 | 	_layerCellWeightUpdateLastKernel.setArg(9, layerSize);
1139 | 	_layerCellWeightUpdateLastKernel.setArg(10, lateralConnectionRadii);
1140 | 	_layerCellWeightUpdateLastKernel.setArg(11, layerDesc._numSegmentsPerCell);
1141 | 	_layerCellWeightUpdateLastKernel.setArg(12, tdError);
1142 | 	_layerCellWeightUpdateLastKernel.setArg(13, cellConnectionAlpha);
1143 | 	_layerCellWeightUpdateLastKernel.setArg(14, cellConnectionBeta);
1144 | 	_layerCellWeightUpdateLastKernel.setArg(15, cellConnectionGamma);
1145 | 	_layerCellWeightUpdateLastKernel.setArg(16, cellConnectionTemperature);
1146 | 	_layerCellWeightUpdateLastKernel.setArg(17, cellWeightEligibilityDecay);
1147 | 
1148 | 	cs.getQueue().enqueueNDRangeKernel(_layerCellWeightUpdateLastKernel, cl::NullRange, cl::NDRange(layerDesc._width, layerDesc._height));
1149 | }
1150 | 
1151 | void HTMRL::learnSpatialReplay(sys::ComputeSystem &cs, float cellStateDecay, float alpha, float beta, float gamma, int maxReplayChainSize, int numReplaySamples, unsigned long seed) {
1152 | 	std::mt19937 generator(seed);
1153 | 
1154 | 	std::uniform_int_distribution<int> sampleDist(0, _inputReplayChain.size());
1155 | 
1156 | 	for (int i = 0; i < numReplaySamples; i++) {
1157 | 		int sampleIndex = sampleDist(generator);
1158 | 
1159 | 		if (sampleIndex == 0) {
1160 | 			// Replay input
1161 | 			cl::Image2D* pPrevLayerOutput = &_inputImage;
1162 | 			int prevLayerWidth = _inputWidth;
1163 | 			int prevLayerHeight = _inputHeight;
1164 | 
1165 | 			for (int l = 0; l < _layers.size(); l++) {
1166 | 				spatialPoolLayer(cs, *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layers[l], _layerDescs[l], 0.0f, generator);
1167 | 				learnLayerSpatial(cs, _layers[l], *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layerDescs[l], alpha, beta, gamma, generator);
1168 | 
1169 | 				pPrevLayerOutput = &_layers[l]._columnStates;
1170 | 				prevLayerWidth = _layerDescs[l]._width;
1171 | 				prevLayerHeight = _layerDescs[l]._height;
1172 | 			}
1173 | 		}
1174 | 		else {
1175 | 			int index = 0;
1176 | 
1177 | 			cl::Image2D* pPrevLayerOutput;
1178 | 
1179 | 			for (std::list<cl::Image2D>::iterator it = _inputReplayChain.begin(); it != _inputReplayChain.end(); it++, index++) {
1180 | 				if (index >= sampleIndex - 1) {
1181 | 					pPrevLayerOutput = &(*it);
1182 | 					break;
1183 | 				}
1184 | 			}
1185 | 
1186 | 			// Replay input
1187 | 			int prevLayerWidth = _inputWidth;
1188 | 			int prevLayerHeight = _inputHeight;
1189 | 
1190 | 			for (int l = 0; l < _layers.size(); l++) {
1191 | 				spatialPoolLayer(cs, *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layers[l], _layerDescs[l], 0.0f, generator);
1192 | 				learnLayerSpatial(cs, _layers[l], *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layerDescs[l], alpha, beta, gamma, generator);
1193 | 
1194 | 				pPrevLayerOutput = &_layers[l]._columnStates;
1195 | 				prevLayerWidth = _layerDescs[l]._width;
1196 | 				prevLayerHeight = _layerDescs[l]._height;
1197 | 			}
1198 | 		}
1199 | 
1200 | 		for (int l = 0; l < _layers.size(); l++) {
1201 | 			std::swap(_layers[l]._columnFeedForwardWeights, _layers[l]._columnFeedForwardWeightsPrev);
1202 | 			std::swap(_layers[l]._inputBiases, _layers[l]._inputBiasesPrev);
1203 | 		}
1204 | 	}
1205 | 
1206 | 	// Replay input to set state properly
1207 | 	cl::Image2D* pPrevLayerOutput = &_inputImage;
1208 | 	int prevLayerWidth = _inputWidth;
1209 | 	int prevLayerHeight = _inputHeight;
1210 | 
1211 | 	for (int l = 0; l < _layers.size(); l++) {
1212 | 		spatialPoolLayer(cs, *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layers[l], _layerDescs[l], 0.0f, generator);
1213 | 		learnLayerSpatial(cs, _layers[l], *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layerDescs[l], alpha, beta, gamma, generator);
1214 | 
1215 | 		pPrevLayerOutput = &_layers[l]._columnStates;
1216 | 		prevLayerWidth = _layerDescs[l]._width;
1217 | 		prevLayerHeight = _layerDescs[l]._height;
1218 | 	}
1219 | }
1220 | 
1221 | void HTMRL::learnTemporal(sys::ComputeSystem &cs, float tdError, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, unsigned long seed) {
1222 | 	std::mt19937 generator(seed);
1223 | 
1224 | 	cl::Image2D* pPrevLayerOutput = &_inputImage;
1225 | 	int prevLayerWidth = _inputWidth;
1226 | 	int prevLayerHeight = _inputHeight;
1227 | 
1228 | 	for (int l = 0; l < _layers.size(); l++) {
1229 | 		if (l == _layers.size() - 1)
1230 | 			learnLayerTemporalLast(cs, _layers[l], *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layerDescs[l], tdError, cellConnectionAlpha, cellConnectionBeta, cellConnectionGamma, cellConnectionTemperature, cellWeightEligibilityDecay, generator);
1231 | 		else
1232 | 			learnLayerTemporal(cs, _layers[l], *pPrevLayerOutput, prevLayerWidth, prevLayerHeight, _layers[l + 1]._columnPredictionsPrev, _layerDescs[l + 1]._width, _layerDescs[l + 1]._width, _layerDescs[l], tdError, cellConnectionAlpha, cellConnectionBeta, cellConnectionGamma, cellConnectionTemperature, cellWeightEligibilityDecay, generator);
1233 | 
1234 | 		pPrevLayerOutput = &_layers[l]._columnStates;
1235 | 		prevLayerWidth = _layerDescs[l]._width;
1236 | 		prevLayerHeight = _layerDescs[l]._height;
1237 | 	}
1238 | }
1239 | 
1240 | void HTMRL::gaussianBlur(sys::ComputeSystem &cs, cl::Image2D &source, cl::Image2D &ping, cl::Image2D &pong, int imageSizeX, int imageSizeY, int passes, float kernelWidth) {
1241 | 	struct Int2 {
1242 | 		int _x, _y;
1243 | 	};
1244 | 
1245 | 	struct Float2 {
1246 | 		float _x, _y;
1247 | 	};
1248 | 
1249 | 	Float2 imageSizeInv;
1250 | 	imageSizeInv._x = 1.0f / imageSizeX;
1251 | 	imageSizeInv._y = 1.0f / imageSizeY;
1252 | 
1253 | 	// Blur source to ping
1254 | 	_gaussianBlurXKernel.setArg(0, source);
1255 | 	_gaussianBlurXKernel.setArg(1, ping);
1256 | 	_gaussianBlurXKernel.setArg(2, imageSizeInv);
1257 | 	_gaussianBlurXKernel.setArg(3, kernelWidth * imageSizeInv._x);
1258 | 
1259 | 	cs.getQueue().enqueueNDRangeKernel(_gaussianBlurXKernel, cl::NullRange, cl::NDRange(imageSizeX, imageSizeY));
1260 | 
1261 | 	for (int p = 0; p < passes - 1; p++) {
1262 | 		_gaussianBlurYKernel.setArg(0, ping);
1263 | 		_gaussianBlurYKernel.setArg(1, pong);
1264 | 		_gaussianBlurYKernel.setArg(2, imageSizeInv);
1265 | 		_gaussianBlurYKernel.setArg(3, kernelWidth * imageSizeInv._y);
1266 | 
1267 | 		cs.getQueue().enqueueNDRangeKernel(_gaussianBlurYKernel, cl::NullRange, cl::NDRange(imageSizeX, imageSizeY));
1268 | 
1269 | 		_gaussianBlurXKernel.setArg(0, pong);
1270 | 		_gaussianBlurXKernel.setArg(1, ping);
1271 | 		_gaussianBlurXKernel.setArg(2, imageSizeInv);
1272 | 		_gaussianBlurXKernel.setArg(3, kernelWidth * imageSizeInv._x);
1273 | 
1274 | 		cs.getQueue().enqueueNDRangeKernel(_gaussianBlurXKernel, cl::NullRange, cl::NDRange(imageSizeX, imageSizeY));
1275 | 	}
1276 | 
1277 | 	_gaussianBlurYKernel.setArg(0, ping);
1278 | 	_gaussianBlurYKernel.setArg(1, pong);
1279 | 	_gaussianBlurYKernel.setArg(2, imageSizeInv);
1280 | 	_gaussianBlurYKernel.setArg(3, kernelWidth * imageSizeInv._y);
1281 | 
1282 | 	cs.getQueue().enqueueNDRangeKernel(_gaussianBlurYKernel, cl::NullRange, cl::NDRange(imageSizeX, imageSizeY));
1283 | }
1284 | 
1285 | void HTMRL::getReconstructedPrediction(std::vector<float> &prediction, sys::ComputeSystem &cs) {
1286 | 	struct Int2 {
1287 | 		int _x, _y;
1288 | 	};
1289 | 
1290 | 	struct Float2 {
1291 | 		float _x, _y;
1292 | 	};
1293 | 
1294 | 	Int2 layerSize;
1295 | 	layerSize._x = _layerDescs.front()._width;
1296 | 	layerSize._y = _layerDescs.front()._height;
1297 | 
1298 | 	Int2 inputSizeMinusOne;
1299 | 	inputSizeMinusOne._x = _inputWidth - 1;
1300 | 	inputSizeMinusOne._y = _inputHeight - 1;
1301 | 
1302 | 	Float2 inputSizeMinusOneInv;
1303 | 	inputSizeMinusOneInv._x = 1.0f / (_inputWidth - 1);
1304 | 	inputSizeMinusOneInv._y = 1.0f / (_inputHeight - 1);
1305 | 
1306 | 	Int2 reconstructionReceptiveFieldRadii;
1307 | 	reconstructionReceptiveFieldRadii._x = std::ceil(static_cast<float>(_layerDescs.front()._width) / _inputWidth * _layerDescs.front()._receptiveFieldRadius);
1308 | 	reconstructionReceptiveFieldRadii._y = std::ceil(static_cast<float>(_layerDescs.front()._height) / _inputHeight * _layerDescs.front()._receptiveFieldRadius);
1309 | 
1310 | 	Int2 sdrReceptiveFieldRadii;
1311 | 	sdrReceptiveFieldRadii._x = _layerDescs.front()._receptiveFieldRadius;
1312 | 	sdrReceptiveFieldRadii._y = _layerDescs.front()._receptiveFieldRadius;
1313 | 
1314 | 	Int2 sdrSizeMinusOne;
1315 | 	sdrSizeMinusOne._x = _layerDescs.front()._width - 1;
1316 | 	sdrSizeMinusOne._y = _layerDescs.front()._height - 1;
1317 | 
1318 | 	Float2 sdrSizeMinusOneInv;
1319 | 	sdrSizeMinusOneInv._x = 1.0f / (_layerDescs.front()._width - 1);
1320 | 	sdrSizeMinusOneInv._y = 1.0f / (_layerDescs.front()._height - 1);
1321 | 
1322 | 	_reconstructInputKernel.setArg(0, _layers.front()._columnFeedForwardWeights);
1323 | 	_reconstructInputKernel.setArg(1, _layers.front()._inputBiases);
1324 | 	_reconstructInputKernel.setArg(2, _layers.front()._columnPredictions);
1325 | 	_reconstructInputKernel.setArg(3, _reconstructedPrediction);
1326 | 	_reconstructInputKernel.setArg(4, reconstructionReceptiveFieldRadii);
1327 | 	_reconstructInputKernel.setArg(5, sdrReceptiveFieldRadii);
1328 | 	_reconstructInputKernel.setArg(6, inputSizeMinusOne);
1329 | 	_reconstructInputKernel.setArg(7, inputSizeMinusOneInv);
1330 | 	_reconstructInputKernel.setArg(8, layerSize);
1331 | 	_reconstructInputKernel.setArg(9, sdrSizeMinusOne);
1332 | 	_reconstructInputKernel.setArg(10, sdrSizeMinusOneInv);
1333 | 
1334 | 	cs.getQueue().enqueueNDRangeKernel(_reconstructInputKernel, cl::NullRange, cl::NDRange(_inputWidth, _inputHeight));
1335 | 
1336 | 	if (prediction.size() != _input.size())
1337 | 		prediction.resize(_input.size());
1338 | 
1339 | 	// Read prediction
1340 | 	{
1341 | 		cl::size_t<3> origin;
1342 | 		origin[0] = 0;
1343 | 		origin[1] = 0;
1344 | 		origin[2] = 0;
1345 | 
1346 | 		cl::size_t<3> region;
1347 | 		region[0] = _inputWidth;
1348 | 		region[1] = _inputHeight;
1349 | 		region[2] = 1;
1350 | 
1351 | 		cs.getQueue().enqueueReadImage(_reconstructedPrediction, CL_TRUE, origin, region, 0, 0, &prediction[0]);
1352 | 	}
1353 | }
1354 | 
1355 | void HTMRL::step(sys::ComputeSystem &cs, float reward, float reconstructionAlpha, float columnDecay, float cellStateDecay, float columnConnectionAlpha, float columnConnectionBeta, float columnConnectionGamma, float cellConnectionAlpha, float cellConnectionBeta, float cellConnectionGamma, float cellConnectionTemperature, float cellWeightEligibilityDecay, float alpha, float gamma, float breakChance, float perturbationStdDev, int maxReplayChainSize, int numReplaySamples, int addReplaySampleSteps, std::mt19937 &generator) {
1356 | 	std::uniform_int_distribution<int> seedDist(0, 10000);
1357 | 
1358 | 	unsigned long seed = seedDist(generator);
1359 | 
1360 | 	stepBegin(cs, addReplaySampleSteps, maxReplayChainSize);
1361 | 
1362 | 	activate(_input, cs, reward, alpha, gamma, columnDecay, cellStateDecay, columnConnectionAlpha, columnConnectionBeta, columnConnectionGamma, cellConnectionAlpha, cellConnectionBeta, cellConnectionGamma, cellConnectionTemperature, cellWeightEligibilityDecay, maxReplayChainSize, numReplaySamples, addReplaySampleSteps, seed);
1363 | 
1364 | 	std::vector<float> output;
1365 | 
1366 | 	getReconstructedPrediction(output, cs);
1367 | 
1368 | 	// Exploratory action
1369 | 	std::uniform_real_distribution<float> dist01(0.0f, 1.0f);
1370 | 	std::normal_distribution<float> pertDist(0.0f, perturbationStdDev);
1371 | 
1372 | 	for (int i = 0; i < _input.size(); i++)
1373 | 	if (_inputTypes[i] == _action) {
1374 | 		if (dist01(generator) < breakChance)
1375 | 			_input[i] = dist01(generator) > 0.5f ? 1.0f : 0.0f;
1376 | 		else
1377 | 			_input[i] = output[i] > 0.5f ? 1.0f : 0.0f;// std::min<float>(1.0f, std::max<float>(0.0f, std::min<float>(1.0f, std::max<float>(0.0f, output[i])) + pertDist(generator)));
1378 | 	}
1379 | 	else if (_inputTypes[i] == _unused)
1380 | 		_input[i] = 0.0f;
1381 | }
1382 | 
1383 | void HTMRL::exportCellData(sys::ComputeSystem &cs, std::vector<std::shared_ptr<sf::Image>> &images, unsigned long seed) const {
1384 | 	std::mt19937 generator(seed);
1385 | 	
1386 | 	int maxWidth = _inputWidth;
1387 | 	int maxHeight = _inputHeight;
1388 | 
1389 | 	for (int l = 0; l < _layers.size(); l++) {
1390 | 		maxWidth = std::max<int>(maxWidth, _layerDescs[l]._width);
1391 | 		maxHeight = std::max<int>(maxHeight, _layerDescs[l]._height);
1392 | 	}
1393 | 	
1394 | 	std::uniform_real_distribution<float> uniformDist(0.0f, 1.0f);
1395 | 
1396 | 	{
1397 | 		std::vector<float> state(_inputWidth * _inputHeight);
1398 | 
1399 | 		cl::size_t<3> origin;
1400 | 		origin[0] = 0;
1401 | 		origin[1] = 0;
1402 | 		origin[2] = 0;
1403 | 
1404 | 		cl::size_t<3> region;
1405 | 		region[0] = _inputWidth;
1406 | 		region[1] = _inputHeight;
1407 | 		region[2] = 1;
1408 | 
1409 | 		cs.getQueue().enqueueReadImage(_layers.front()._reconstruction, CL_TRUE, origin, region, 0, 0, &state[0]);
1410 | 
1411 | 		sf::Color c;
1412 | 		c.r = uniformDist(generator) * 255.0f;
1413 | 		c.g = uniformDist(generator) * 255.0f;
1414 | 		c.b = uniformDist(generator) * 255.0f;
1415 | 
1416 | 		// Convert to colors
1417 | 		std::shared_ptr<sf::Image> image = std::make_shared<sf::Image>();
1418 | 
1419 | 		image->create(maxWidth, maxHeight, sf::Color::Transparent);
1420 | 
1421 | 		for (int x = 0; x < _inputWidth; x++)
1422 | 		for (int y = 0; y < _inputHeight; y++) {
1423 | 			sf::Color color;
1424 | 
1425 | 			color = c;
1426 | 
1427 | 			color.a = std::min<float>(1.0f, std::max<float>(0.0f, state[x + y * _inputWidth])) * (255.0f - 3.0f) + 3;
1428 | 
1429 | 			image->setPixel(x - _inputWidth / 2 + maxWidth / 2, y - _inputHeight / 2 + maxHeight / 2, color);
1430 | 		}
1431 | 
1432 | 		images.push_back(image);
1433 | 	}
1434 | 
1435 | 	/*{
1436 | 		sf::Color c;
1437 | 		c.r = uniformDist(generator) * 255.0f;
1438 | 		c.g = uniformDist(generator) * 255.0f;
1439 | 		c.b = uniformDist(generator) * 255.0f;
1440 | 
1441 | 		// Convert to colors
1442 | 		std::shared_ptr<sf::Image> image = std::make_shared<sf::Image>();
1443 | 
1444 | 		image->create(maxWidth, maxHeight, sf::Color::Transparent);
1445 | 
1446 | 		for (int x = 0; x < _inputWidth; x++)
1447 | 		for (int y = 0; y < _inputHeight; y++) {
1448 | 			sf::Color color;
1449 | 
1450 | 			color = c;
1451 | 
1452 | 			color.a = std::min<float>(1.0f, std::max<float>(0.0f, _exploratoryOutput[x + y * _inputWidth])) * (255.0f - 3.0f) + 3;
1453 | 
1454 | 			image->setPixel(x - _inputWidth / 2 + maxWidth / 2, y - _inputHeight / 2 + maxHeight / 2, color);
1455 | 		}
1456 | 
1457 | 		images.push_back(image);
1458 | 	}*/
1459 | 
1460 | 	if (sf::Keyboard::isKeyPressed(sf::Keyboard::P)) {
1461 | 		for (int l = 0; l < _layers.size(); l++) {
1462 | 			std::vector<float> state(_layerDescs[l]._width * _layerDescs[l]._height * _layerDescs[l]._cellsInColumn * 2);
1463 | 
1464 | 			cl::size_t<3> origin;
1465 | 			origin[0] = 0;
1466 | 			origin[1] = 0;
1467 | 			origin[2] = 0;
1468 | 
1469 | 			cl::size_t<3> region;
1470 | 			region[0] = _layerDescs[l]._width;
1471 | 			region[1] = _layerDescs[l]._height;
1472 | 			region[2] = _layerDescs[l]._cellsInColumn;
1473 | 
1474 | 			cs.getQueue().enqueueReadImage(_layers[l]._cellPredictions, CL_TRUE, origin, region, 0, 0, &state[0]);
1475 | 
1476 | 			sf::Color c;
1477 | 			c.r = uniformDist(generator) * 255.0f;
1478 | 			c.g = uniformDist(generator) * 255.0f;
1479 | 			c.b = uniformDist(generator) * 255.0f;
1480 | 
1481 | 			// Convert to colors
1482 | 			for (int ci = 0; ci < _layerDescs[l]._cellsInColumn; ci++) {
1483 | 				std::shared_ptr<sf::Image> image = std::make_shared<sf::Image>();
1484 | 
1485 | 				image->create(maxWidth, maxHeight, sf::Color::Transparent);
1486 | 
1487 | 				for (int x = 0; x < _layerDescs[l]._width; x++)
1488 | 				for (int y = 0; y < _layerDescs[l]._height; y++) {
1489 | 					sf::Color color;
1490 | 
1491 | 					color = c;
1492 | 
1493 | 					color.a = std::min<float>(1.0f, std::max<float>(0.0f, state[2 * (x + y * _layerDescs[l]._width + ci * _layerDescs[l]._width *_layerDescs[l]._height)])) * (255.0f - 3.0f) + 3;
1494 | 
1495 | 					int wx = x - _layerDescs[l]._width / 2 + maxWidth / 2;
1496 | 					int wy = y - _layerDescs[l]._height / 2 + maxHeight / 2;
1497 | 
1498 | 					assert(wx >= 0 && wy >= 0 && wx < maxWidth && wy < maxHeight);
1499 | 
1500 | 					image->setPixel(wx, wy, color);
1501 | 				}
1502 | 
1503 | 				images.push_back(image);
1504 | 			}
1505 | 		}
1506 | 	}
1507 | 	else {
1508 | 		for (int l = 0; l < _layers.size(); l++) {
1509 | 			std::vector<float> state(_layerDescs[l]._width * _layerDescs[l]._height * _layerDescs[l]._cellsInColumn * 2);
1510 | 
1511 | 			cl::size_t<3> origin;
1512 | 			origin[0] = 0;
1513 | 			origin[1] = 0;
1514 | 			origin[2] = 0;
1515 | 
1516 | 			cl::size_t<3> region;
1517 | 			region[0] = _layerDescs[l]._width;
1518 | 			region[1] = _layerDescs[l]._height;
1519 | 			region[2] = _layerDescs[l]._cellsInColumn;
1520 | 
1521 | 			cs.getQueue().enqueueReadImage(_layers[l]._cellStates, CL_TRUE, origin, region, 0, 0, &state[0]);
1522 | 
1523 | 			sf::Color c;
1524 | 			c.r = uniformDist(generator) * 255.0f;
1525 | 			c.g = uniformDist(generator) * 255.0f;
1526 | 			c.b = uniformDist(generator) * 255.0f;
1527 | 
1528 | 			// Convert to colors
1529 | 			for (int ci = 0; ci < _layerDescs[l]._cellsInColumn; ci++) {
1530 | 				std::shared_ptr<sf::Image> image = std::make_shared<sf::Image>();
1531 | 
1532 | 				image->create(maxWidth, maxHeight, sf::Color::Transparent);
1533 | 
1534 | 				for (int x = 0; x < _layerDescs[l]._width; x++)
1535 | 				for (int y = 0; y < _layerDescs[l]._height; y++) {
1536 | 					sf::Color color;
1537 | 
1538 | 					color = c;
1539 | 
1540 | 					color.a = std::min<float>(1.0f, std::max<float>(0.0f, std::max<float>(0.0f, state[0 + 2 * (x + y * _layerDescs[l]._width + ci * _layerDescs[l]._width *_layerDescs[l]._height)]))) * (255.0f - 3.0f) + 3;
1541 | 
1542 | 					//color.g = std::min<float>(1.0f, std::max<float>(0.0f, std::max<float>(0.0f, state[2 + 4 * (x + y * _layerDescs[l]._width + ci * _layerDescs[l]._width *_layerDescs[l]._height)]))) * (255.0f - 3.0f) + 3;
1543 | 
1544 | 					//color.b = 0;
1545 | 					//color.a = 0.5f * (color.r + color.g);
1546 | 
1547 | 					int wx = x - _layerDescs[l]._width / 2 + maxWidth / 2;
1548 | 					int wy = y - _layerDescs[l]._height / 2 + maxHeight / 2;
1549 | 
1550 | 					assert(wx >= 0 && wy >= 0 && wx < maxWidth && wy < maxHeight);
1551 | 
1552 | 					image->setPixel(wx, wy, color);
1553 | 				}
1554 | 
1555 | 				images.push_back(image);
1556 | 			}
1557 | 		}
1558 | 
1559 | 		/*for (int l = 0; l < _layers.size(); l++) {
1560 | 			std::vector<float> state(_layerDescs[l]._width * _layerDescs[l]._height * 2);
1561 | 
1562 | 			cl::size_t<3> origin;
1563 | 			origin[0] = 0;
1564 | 			origin[1] = 0;
1565 | 			origin[2] = 0;
1566 | 
1567 | 			cl::size_t<3> region;
1568 | 			region[0] = _layerDescs[l]._width;
1569 | 			region[1] = _layerDescs[l]._height;
1570 | 			region[2] = 1;
1571 | 
1572 | 			cs.getQueue().enqueueReadImage(_layers[l]._blurPong, CL_TRUE, origin, region, 0, 0, &state[0]);
1573 | 
1574 | 			sf::Color c;
1575 | 			c.r = uniformDist(generator) * 255.0f;
1576 | 			c.g = uniformDist(generator) * 255.0f;
1577 | 			c.b = uniformDist(generator) * 255.0f;
1578 | 
1579 | 			// Convert to colors
1580 | 			std::shared_ptr<sf::Image> image = std::make_shared<sf::Image>();
1581 | 
1582 | 			image->create(maxWidth, maxHeight, sf::Color::Transparent);
1583 | 
1584 | 			for (int x = 0; x < _layerDescs[l]._width; x++)
1585 | 			for (int y = 0; y < _layerDescs[l]._height; y++) {
1586 | 				sf::Color color;
1587 | 
1588 | 				color = c;
1589 | 
1590 | 				color.a = std::min<float>(1.0f, std::max<float>(0.0f, state[0 + 2 * (x + y * _layerDescs[l]._width)] > 0.0f ? 1.0f : 0.0f)) * (255.0f - 3.0f) + 3;
1591 | 
1592 | 				int wx = x - _layerDescs[l]._width / 2 + maxWidth / 2;
1593 | 				int wy = y - _layerDescs[l]._height / 2 + maxHeight / 2;
1594 | 
1595 | 				assert(wx >= 0 && wy >= 0 && wx < maxWidth && wy < maxHeight);
1596 | 
1597 | 				image->setPixel(wx, wy, color);
1598 | 			}
1599 | 
1600 | 			images.push_back(image);
1601 | 		}*/
1602 | 	}
1603 | }


--------------------------------------------------------------------------------