├── .gitignore ├── LICENSE.md ├── MiniNeoRL.sln ├── MiniNeoRL ├── MiniNeoRL.pyproj ├── MiniNeoRL_Pred_Demo.py ├── MiniNeoRL_RL_Demo.py ├── ball.png ├── neo │ ├── Agent.py │ ├── Hierarchy.py │ ├── Layer.py │ └── LayerRL.py └── paddle.png ├── NeoRL_presentation.odp └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | #Ipython Notebook 62 | .ipynb_checkpoints 63 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MiniNeoRL 2 | Copyright (C) 2015 Eric Laukien 3 | 4 | This software is provided 'as-is', without any express or implied 5 | warranty. In no event will the authors be held liable for any damages 6 | arising from the use of this software. 7 | 8 | Permission is granted to anyone to use this software for any purpose, 9 | including commercial applications, and to alter it and redistribute it 10 | freely, subject to the following restrictions: 11 | 12 | 1. The origin of this software must not be misrepresented; you must not 13 | claim that you wrote the original software. If you use this software 14 | in a product, an acknowledgement in the product documentation would be 15 | appreciated but is not required. 16 | 2. Altered source versions must be plainly marked as such, and must not be 17 | misrepresented as being the original software. 18 | 3. This notice may not be removed or altered from any source distribution. 19 | 20 | ------------------------------------------------------------------------------ -------------------------------------------------------------------------------- /MiniNeoRL.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.24720.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "MiniNeoRL", "MiniNeoRL\MiniNeoRL.pyproj", "{A42870A8-744B-43F5-83DD-8308A6CB211D}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {A42870A8-744B-43F5-83DD-8308A6CB211D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {A42870A8-744B-43F5-83DD-8308A6CB211D}.Release|Any CPU.ActiveCfg = Release|Any CPU 16 | EndGlobalSection 17 | GlobalSection(SolutionProperties) = preSolution 18 | HideSolutionNode = FALSE 19 | EndGlobalSection 20 | EndGlobal 21 | -------------------------------------------------------------------------------- /MiniNeoRL/MiniNeoRL.pyproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Debug 5 | 2.0 6 | a42870a8-744b-43f5-83dd-8308a6cb211d 7 | . 8 | MiniNeoRL_RL_Demo.py 9 | 10 | 11 | . 12 | . 13 | MiniNeoRL 14 | MiniNeoRL 15 | 16 | 17 | true 18 | false 19 | 20 | 21 | true 22 | false 23 | 24 | 25 | 26 | 27 | Code 28 | 29 | 30 | Code 31 | 32 | 33 | Code 34 | 35 | 36 | Code 37 | 38 | 39 | Code 40 | 41 | 42 | 43 | 44 | 45 | 46 | 10.0 47 | $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\Python Tools\Microsoft.PythonTools.targets 48 | 49 | 50 | 51 | 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /MiniNeoRL/MiniNeoRL_Pred_Demo.py: -------------------------------------------------------------------------------- 1 | from neo.Agent import Agent 2 | from neo.Hierarchy import Hierarchy 3 | import numpy as np 4 | 5 | sequence = [ 6 | [ 0.0, 0.0, 0.0, 1.0 ], 7 | [ 0.0, 0.0, 1.0, 1.0 ], 8 | [ 0.0, 1.0, 0.0, 0.0 ], 9 | [ 0.0, 1.0, 0.0, 0.0 ], 10 | [ 0.0, 1.0, 0.0, 0.0 ], 11 | [ 0.0, 1.0, 0.0, 0.0 ], 12 | [ 1.0, 0.0, 0.0, 0.0 ], 13 | [ 0.0, 1.0, 1.0, 0.0 ], 14 | [ 0.0, 0.0, 0.0, 1.0 ], 15 | [ 1.0, 0.0, 0.0, 0.0 ], 16 | [ 0.0, 1.0, 0.0, 0.0 ], 17 | [ 1.0, 0.0, 0.0, 0.0 ], 18 | [ 0.0, 1.0, 1.0, 0.0 ], 19 | [ 0.0, 0.0, 0.0, 1.0 ], 20 | [ 1.0, 0.0, 0.0, 0.0 ], 21 | [ 1.0, 0.0, 0.0, 0.0 ], 22 | [ 0.0, 1.0, 0.0, 0.0 ], 23 | [ 1.0, 0.0, 0.0, 0.0 ], 24 | [ 0.0, 1.0, 1.0, 0.0 ], 25 | [ 0.0, 0.0, 0.0, 1.0 ], 26 | [ 1.0, 0.0, 0.0, 0.0 ], 27 | [ 0.0, 1.0, 0.0, 0.0 ], 28 | [ 0.0, 1.0, 0.0, 0.0 ], 29 | [ 0.0, 1.0, 0.0, 0.0 ], 30 | [ 0.0, 1.0, 0.0, 0.0 ], 31 | [ 1.0, 0.0, 0.0, 0.0 ], 32 | [ 0.0, 1.0, 1.0, 0.0 ], 33 | [ 0.0, 0.0, 0.0, 1.0 ], 34 | [ 1.0, 0.0, 0.0, 0.0 ], 35 | [ 0.0, 1.0, 0.0, 0.0 ], 36 | [ 1.0, 0.0, 0.0, 0.0 ], 37 | [ 0.0, 1.0, 1.0, 0.0 ], 38 | [ 0.0, 0.0, 0.0, 1.0 ], 39 | [ 1.0, 0.0, 0.0, 0.0 ], 40 | [ 1.0, 0.0, 0.0, 0.0 ], 41 | [ 1.0, 0.0, 0.0, 0.0 ], 42 | [ 0.0, 1.0, 0.0, 0.0 ], 43 | [ 1.0, 0.0, 0.0, 0.0 ], 44 | [ 0.0, 1.0, 1.0, 0.0 ], 45 | [ 0.0, 0.0, 0.0, 1.0 ], 46 | [ 1.0, 0.0, 0.0, 0.0 ], 47 | [ 0.0, 1.0, 0.0, 0.0 ] 48 | ] 49 | 50 | h = Hierarchy(4, [ 40, 40, 40 ], -0.01, 0.01, 0.1) 51 | 52 | averageError = 0 53 | 54 | for i in range(0, 10000): 55 | h.simStep(np.matrix([sequence[i % len(sequence)]]).T, 0.0001, 0.0001, 0.001, 0.001, 0.95) 56 | 57 | error = None 58 | 59 | if np.allclose(np.greater(h.getPrediction(), 0.5), np.matrix([sequence[(i + 1) % len(sequence)]]).T): 60 | error = 0 61 | else: 62 | error = 1 63 | 64 | averageError = 0.99 * averageError + 0.01 * error 65 | 66 | #print(h._layers[0]._states) 67 | 68 | print(str(i % 4) + str(np.matrix([sequence[i % len(sequence)]]).T.ravel()) + " " + str(np.greater(h.getPrediction(), 0.5).ravel()) + " Error: " + str(error) + " Average Error: " + str(averageError)) -------------------------------------------------------------------------------- /MiniNeoRL/MiniNeoRL_RL_Demo.py: -------------------------------------------------------------------------------- 1 | from neo.Agent import Agent 2 | from neo.Hierarchy import Hierarchy 3 | import numpy as np 4 | import pygame 5 | 6 | # The environment 7 | displayWidth = 600 8 | displayHeight = 600 9 | 10 | ballPosition = np.array([ np.random.rand(), np.random.rand() * 0.5 + 0.5 ]) 11 | 12 | ballVelocity = np.array([ 0.353, 1.0 ]) * 0.04 13 | 14 | paddleX = 0.5 15 | 16 | ballRadius = 16.0 / displayWidth 17 | paddleRadius = 64.0 / displayWidth 18 | 19 | encoderSize = 10 20 | numInputs = 5 21 | numActions = 1 22 | 23 | a = Agent(numInputs * encoderSize, numActions, [ 50, 50 ], -0.1, 0.1, 0.1) 24 | 25 | averageReward = 0.0 26 | 27 | reward = 0.0 28 | prevReward = 0.0 29 | 30 | rewardPunishmentTime = 2.0 31 | punishmentTimer = 0.0 32 | rewardTimer = 0.0 33 | 34 | # Resources 35 | ballImage = pygame.image.load("ball.png") 36 | paddleImage = pygame.image.load("paddle.png") 37 | 38 | # Game setup 39 | pygame.init() 40 | 41 | display = pygame.display.set_mode((displayWidth, displayHeight)) 42 | clock = pygame.time.Clock() 43 | done = False 44 | 45 | dir = 1.0 46 | 47 | timer = 0.0 48 | 49 | while not done: 50 | for event in pygame.event.get(): 51 | if event.type == pygame.QUIT: 52 | done = True 53 | 54 | if event.type == pygame.KEYDOWN: 55 | if event.key == pygame.K_LEFT: 56 | dir = -1.0 57 | if event.key == pygame.K_RIGHT: 58 | dir = 1.0 59 | 60 | # Update physics 61 | ballPosition += ballVelocity 62 | #timer += 1.0 63 | #ballPosition[0] = np.sin(timer * 0.05) * 0.5 + 0.5 64 | #ballPosition[1] = 0.5 65 | 66 | if ballPosition[0] < 0.0: 67 | ballPosition[0] = 0.0 68 | ballVelocity[0] *= -1.0 69 | elif ballPosition[0] > 1.0: 70 | ballPosition[0] = 1.0 71 | ballVelocity[0] *= -1.0 72 | 73 | if ballPosition[1] < 32.0 / displayWidth: 74 | # If hit paddle 75 | if ballPosition[0] + ballRadius > paddleX - paddleRadius and ballPosition[0] - ballRadius < paddleX + paddleRadius: 76 | rewardTimer = rewardPunishmentTime 77 | 78 | # Bounce ball 79 | ballPosition[1] = 32.0 / displayWidth 80 | ballVelocity[1] *= -1.0 81 | else: 82 | punishmentTimer = rewardPunishmentTime 83 | 84 | # Reset ball 85 | ballPosition = np.array([ np.random.rand(), np.random.rand() * 0.5 + 0.5 ]) 86 | 87 | if np.random.rand() < 0.5: 88 | ballVelocity = np.array([ 0.353, 1.0 ]) * 0.04 89 | else: 90 | ballVelocity = np.array([ -0.353, 1.0 ]) * 0.04 91 | 92 | elif ballPosition[1] > 1.0: 93 | ballPosition[1] = 1.0 94 | ballVelocity[1] *= -1.0 95 | 96 | reward = (rewardTimer > 0.0)# - (punishmentTimer > 0.0) 97 | 98 | #reward = reward * 0.5 + 0.5 99 | 100 | averageReward = 0.99 * averageReward + 0.01 * reward 101 | 102 | # Control 103 | inputs = [ paddleX * 2.0 - 1.0, ballPosition[0] * 2.0 - 1.0, ballPosition[1] * 2.0 - 1.0, ballVelocity[0] * 30.0, ballVelocity[1] * 30.0 ] 104 | 105 | assert(len(inputs) == numInputs) 106 | 107 | inputArr = [] 108 | 109 | encoderSharpness = 30.0 110 | 111 | for v in inputs: 112 | for i in range(0, encoderSize): 113 | center = i / encoderSize * 2.0 - 1.0 114 | delta = center - v 115 | #intensity = np.exp(-delta * delta * encoderSharpness) 116 | 117 | intensity = np.absolute(delta) < 0.5 / encoderSize 118 | 119 | inputArr.append(intensity) 120 | 121 | #reward = dir * paddleX * 0.01 122 | 123 | #reward = np.abs(paddleX - ballPosition[0]) < 0.1 124 | 125 | a.simStep(reward, 0.001, 0.95, 0.05, np.matrix([inputArr]).T, 0.001, 0.001, 0.01, 0.01, 0.92) 126 | 127 | print(a._prevValue) 128 | 129 | prevReward = reward 130 | 131 | if rewardTimer > 0.0: 132 | rewardTimer -= 1.0 133 | if punishmentTimer > 0.0: 134 | punishmentTimer -= 1.0 135 | 136 | paddleX = np.minimum(1.0, np.maximum(0.0, paddleX + 0.2 * np.sum(a.getActions()) / numActions)) 137 | 138 | # Render 139 | display.fill((255,255,255)) 140 | 141 | display.blit(paddleImage, (displayWidth * paddleX - 64.0, displayHeight - 32.0)) 142 | display.blit(ballImage, (displayWidth * ballPosition[0] - 16.0, displayHeight * (1.0 - ballPosition[1]) - 16.0)) 143 | 144 | pygame.display.flip() 145 | clock.tick(60) 146 | -------------------------------------------------------------------------------- /MiniNeoRL/ball.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/222464/MiniNeoRL/cf9fd6128913be3603a371766cf2e58c0fca2c2b/MiniNeoRL/ball.png -------------------------------------------------------------------------------- /MiniNeoRL/neo/Agent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from neo.Layer import Layer 3 | from neo.LayerRL import LayerRL 4 | 5 | class Agent: 6 | """A hierarchy of fully connected NeoRL layers that functions as a reinforcement learning agent""" 7 | 8 | def __init__(self, numInputs, numActions, layerSizes, initMinWeight, initMaxWeight, activeRatio): 9 | self._layers = [] 10 | 11 | self._numInputs = numInputs 12 | self._numActions = numActions 13 | 14 | self._actions = np.zeros((numActions, 1)) 15 | self._actionsExploratory = np.zeros((numActions, 1)) 16 | 17 | self._qPredictiveWeights = np.random.rand(1, layerSizes[0]) 18 | self._qPredictiveTraces = np.zeros((1, layerSizes[0])) 19 | 20 | self._qFeedBackWeights = np.random.rand(1, layerSizes[0]) 21 | self._qFeedBackTraces = np.zeros((1, layerSizes[0])) 22 | 23 | self._averageAbsTDError = 1.0 24 | 25 | mask = [] 26 | 27 | for i in range(0, self._numInputs): 28 | mask.append(0.0) 29 | 30 | for i in range(0, self._numActions): 31 | mask.append(1.0) 32 | 33 | self._actionMask = np.matrix([mask]).T 34 | 35 | self._prevValue = 0.0 36 | 37 | # Create layers 38 | for l in range(0, len(layerSizes)): 39 | layer = None 40 | 41 | if l == 0: 42 | if l < len(layerSizes) - 1: 43 | layer = LayerRL(numInputs + numActions, layerSizes[l], layerSizes[l], initMinWeight, initMaxWeight, activeRatio) 44 | else: 45 | layer = LayerRL(numInputs + numActions, layerSizes[l], 1, initMinWeight, initMaxWeight, activeRatio) 46 | else: 47 | if l < len(layerSizes) - 1: 48 | layer = LayerRL(layerSizes[l - 1], layerSizes[l], layerSizes[l], initMinWeight, initMaxWeight, activeRatio) 49 | else: 50 | layer = LayerRL(layerSizes[l - 1], layerSizes[l], 1, initMinWeight, initMaxWeight, activeRatio) 51 | 52 | self._layers.append(layer) 53 | 54 | def simStep(self, reward, qAlpha, qGamma, exploration, input, learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay): 55 | assert(len(input) == self._numInputs) 56 | 57 | usedInputArr = [] 58 | 59 | for i in range(0, self._numInputs): 60 | usedInputArr.append(input.item(i)) 61 | 62 | for i in range(0, self._numActions): 63 | usedInputArr.append(self._actionsExploratory.item(i)) 64 | 65 | usedInput = np.matrix([ usedInputArr ]).T 66 | 67 | # Up pass 68 | for l in range(0, len(self._layers)): 69 | if l == 0: 70 | self._layers[l].upPass(usedInput) 71 | else: 72 | self._layers[l].upPass(self._layers[l - 1]._states) 73 | 74 | # Down pass 75 | for l in range(0, len(self._layers)): 76 | rl = len(self._layers) - 1 - l 77 | 78 | if rl < len(self._layers) - 1: 79 | if rl == 0: 80 | self._layers[rl].downPass(self._layers[rl + 1]._predictions, False) 81 | else: 82 | self._layers[rl].downPass(self._layers[rl + 1]._predictions, True) 83 | else: 84 | if rl == 0: 85 | self._layers[rl].downPass(np.matrix([[ 0 ]]), False) 86 | else: 87 | self._layers[rl].downPass(np.matrix([[ 0 ]]), True) 88 | 89 | # Get Q 90 | q = 0.0 91 | 92 | if len(self._layers) > 1: 93 | q = np.dot(self._qPredictiveWeights, self._layers[0]._states) + np.dot(self._qFeedBackWeights, self._layers[1]._predictions) 94 | else: 95 | q = np.dot(self._qPredictiveWeights, self._layers[0]._states) 96 | 97 | tdError = reward + qGamma * q.item(0) - self._prevValue 98 | 99 | self._qPredictiveWeights += qAlpha * tdError * self._qPredictiveTraces 100 | 101 | self._qPredictiveTraces = self._qPredictiveTraces * traceDecay + self._layers[0]._states.T 102 | 103 | if len(self._layers) > 1: 104 | self._qFeedBackWeights += qAlpha * tdError * self._qFeedBackTraces 105 | 106 | self._qFeedBackTraces = self._qFeedBackTraces * traceDecay + self._layers[1]._predictions.T 107 | 108 | predInputExpArr = [] 109 | 110 | for i in range(0, self._numInputs): 111 | predInputExpArr.append(input.item(i)) 112 | 113 | for i in range(0, self._numActions): 114 | predInputExpArr.append(self._actionsExploratory.item(i)) 115 | 116 | predInputExp = np.matrix([ predInputExpArr ]).T 117 | 118 | reinforce = np.sign(tdError) * 0.5 + 0.5 119 | 120 | # Learn 121 | for l in range(0, len(self._layers)): 122 | if l == 0: 123 | if l < len(self._layers) - 1: 124 | self._layers[l].learn(reinforce, predInputExp, self._layers[l + 1]._predictionsPrev, learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay) 125 | else: 126 | self._layers[l].learn(reinforce, predInputExp, np.matrix([[ 0 ]]), learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay) 127 | else: 128 | if l < len(self._layers) - 1: 129 | self._layers[l].learn(reinforce, self._layers[l - 1]._states, self._layers[l + 1]._predictionsPrev, learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay) 130 | else: 131 | self._layers[l].learn(reinforce, self._layers[l - 1]._states, np.matrix([[ 0 ]]), learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay) 132 | 133 | # Determine action 134 | for i in range(0, self._numActions): 135 | self._actions[i] = np.minimum(1.0, np.maximum(-1.0, self.getPrediction().item(self._numInputs + i))) 136 | 137 | if np.random.rand() < exploration: 138 | self._actionsExploratory[i] = np.random.rand() * 2.0 - 1.0 139 | else: 140 | self._actionsExploratory[i] = self._actions[i] 141 | 142 | self._prevValue = q.item(0) 143 | 144 | def getPrediction(self): 145 | return self._layers[0]._predictions 146 | 147 | def getActions(self): 148 | return self._actionsExploratory 149 | 150 | -------------------------------------------------------------------------------- /MiniNeoRL/neo/Hierarchy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from neo.Layer import Layer 3 | 4 | class Hierarchy: 5 | """A hierarchy of fully connected NeoRL layers""" 6 | 7 | def __init__(self, numInputs, layerSizes, initMinWeight, initMaxWeight, activeRatio): 8 | self._layers = [] 9 | 10 | # Create layers 11 | for l in range(0, len(layerSizes)): 12 | layer = None 13 | 14 | if l == 0: 15 | if l < len(layerSizes) - 1: 16 | layer = Layer(numInputs, layerSizes[l], layerSizes[l], initMinWeight, initMaxWeight, activeRatio) 17 | else: 18 | layer = Layer(numInputs, layerSizes[l], 1, initMinWeight, initMaxWeight, activeRatio) 19 | else: 20 | if l < len(layerSizes) - 1: 21 | layer = Layer(layerSizes[l - 1], layerSizes[l], layerSizes[l], initMinWeight, initMaxWeight, activeRatio) 22 | else: 23 | layer = Layer(layerSizes[l - 1], layerSizes[l], 1, initMinWeight, initMaxWeight, activeRatio) 24 | 25 | self._layers.append(layer) 26 | 27 | def simStep(self, input, learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay): 28 | # Up pass 29 | for l in range(0, len(self._layers)): 30 | if l == 0: 31 | self._layers[l].upPass(input) 32 | else: 33 | self._layers[l].upPass(self._layers[l - 1]._states) 34 | 35 | # Down pass 36 | for l in range(0, len(self._layers)): 37 | rl = len(self._layers) - 1 - l 38 | 39 | if rl < len(self._layers) - 1: 40 | self._layers[rl].downPass(self._layers[rl + 1]._predictions, rl != 0) 41 | else: 42 | self._layers[rl].downPass(np.matrix([[ 0 ]]), rl != 0) 43 | 44 | # Learn 45 | for l in range(0, len(self._layers)): 46 | if l == 0: 47 | if l < len(self._layers) - 1: 48 | self._layers[l].learn(input, self._layers[l + 1]._predictionsPrev, learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay) 49 | else: 50 | self._layers[l].learn(input, np.matrix([[ 0 ]]), learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay) 51 | else: 52 | if l < len(self._layers) - 1: 53 | self._layers[l].learn(self._layers[l - 1]._states, self._layers[l + 1]._predictionsPrev, learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay) 54 | else: 55 | self._layers[l].learn(self._layers[l - 1]._states, np.matrix([[ 0 ]]), learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay) 56 | 57 | def getPrediction(self): 58 | return self._layers[0]._predictions 59 | 60 | -------------------------------------------------------------------------------- /MiniNeoRL/neo/Layer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from operator import itemgetter, attrgetter, methodcaller 3 | 4 | class Layer: 5 | """A fully-connected NeoRL layer""" 6 | 7 | def __init__(self, numInputs, numHidden, numFeedBack, initMinWeight, initMaxWeight, activeRatio): 8 | self._input = np.zeros((numInputs, 1)) 9 | self._inputPrev = np.zeros((numInputs, 1)) 10 | 11 | self._feedForwardWeights = np.random.rand(numHidden, numInputs) * (initMaxWeight - initMinWeight) + initMinWeight 12 | 13 | self._recurrentWeights = np.random.rand(numHidden, numHidden) * (initMaxWeight - initMinWeight) + initMinWeight 14 | 15 | self._predictiveWeights = np.random.rand(numInputs, numHidden) * (initMaxWeight - initMinWeight) + initMinWeight 16 | 17 | self._feedBackWeights = np.random.rand(numInputs, numFeedBack) * (initMaxWeight - initMinWeight) + initMinWeight 18 | 19 | self._stateTraces = np.zeros((numHidden, 1)) 20 | 21 | self._inputTraces = np.zeros((numInputs, 1)) 22 | 23 | self._biases = np.zeros((numHidden, 1))#np.random.rand(numHidden, 1) * (initMaxWeight - initMinWeight) + initMinWeight 24 | 25 | self._states = np.zeros((numHidden, 1)) 26 | self._statesPrev = np.zeros((numHidden, 1)) 27 | 28 | self._feedForwardLearn = np.zeros((numHidden, 1)) 29 | self._recurrentLearn = np.zeros((numHidden, 1)) 30 | 31 | self._predictions = np.zeros((numInputs, 1)) 32 | self._predictionsPrev = np.zeros((numInputs, 1)) 33 | 34 | self._averageSquaredError = np.zeros((numHidden, 1)) 35 | 36 | self._activeRatio = activeRatio 37 | 38 | def upPass(self, input): 39 | self._inputPrev = self._input 40 | 41 | self._input = input 42 | 43 | self._statesPrev = self._states 44 | 45 | numActive = int(self._activeRatio * len(self._states)) 46 | 47 | # Activate 48 | activations = self._biases + np.dot(self._feedForwardWeights, input) + np.dot(self._recurrentWeights, self._statesPrev) 49 | 50 | # Generate tuples for sorting 51 | activationsPairs = [] 52 | 53 | for i in range(0, len(self._states)): 54 | activationsPairs.append((activations[i], i)) 55 | 56 | # Sort 57 | activationsPairs = sorted(activationsPairs, key=itemgetter(0)) 58 | 59 | # Use sorted information for inhibition 60 | self._states = np.zeros((len(self._states), 1)) 61 | 62 | for i in range(0, numActive): 63 | self._states[activationsPairs[len(activationsPairs) - 1 - i][1]] = 1.0 64 | 65 | def downPass(self, feedBack, thresholdedPred = True): 66 | self._predictionsPrev = self._predictions 67 | 68 | # Find states 69 | self._predictions = np.dot(self._predictiveWeights, self._states) + np.dot(self._feedBackWeights, feedBack) 70 | 71 | if thresholdedPred: 72 | self._predictions[self._predictions > 0.5] = 1.0 73 | self._predictions[self._predictions <= 0.5] = 0.0 74 | 75 | def learn(self, target, feedBackPrev, learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay): 76 | # Find prediction error 77 | predError = target - self._predictionsPrev 78 | 79 | # Propagate error 80 | hiddenError = np.dot(self._predictiveWeights.T, predError) 81 | 82 | hiddenError = np.multiply(hiddenError, self._statesPrev) 83 | 84 | # Update feed forward and recurrent weights 85 | self._inputTraces = self._inputTraces * traceDecay + self._input 86 | self._stateTraces = self._stateTraces * traceDecay + self._statesPrev 87 | 88 | self._feedForwardWeights += learnEncoderRate * (np.dot(self._states, self._inputTraces.T) - np.dot(self._states.T, self._feedForwardWeights)) 89 | self._recurrentWeights += learnRecurrentRate * (np.dot(self._states, self._statesPrev.T) - np.dot(self._states.T, self._recurrentWeights)) 90 | 91 | # Update predictive and feed back weights 92 | self._predictiveWeights += learnDecoderRate * np.dot(predError, self._statesPrev.T) 93 | self._feedBackWeights += learnDecoderRate * np.dot(predError, feedBackPrev.T) 94 | 95 | # Update thresholds 96 | self._biases += learnBiasRate * (self._activeRatio - self._states) -------------------------------------------------------------------------------- /MiniNeoRL/neo/LayerRL.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from operator import itemgetter, attrgetter, methodcaller 3 | 4 | class LayerRL: 5 | """A fully-connected NeoRL layer for RL""" 6 | 7 | def __init__(self, numInputs, numHidden, numFeedBack, initMinWeight, initMaxWeight, activeRatio): 8 | self._input = np.zeros((numInputs, 1)) 9 | 10 | self._feedForwardWeights = np.random.rand(numHidden, numInputs) * (initMaxWeight - initMinWeight) + initMinWeight 11 | 12 | self._recurrentWeights = np.random.rand(numHidden, numHidden) * (initMaxWeight - initMinWeight) + initMinWeight 13 | 14 | self._stateTraces = np.zeros((numHidden, 1)) 15 | 16 | self._inputTraces = np.zeros((numInputs, 1)) 17 | 18 | self._predictiveWeights = np.random.rand(numInputs, numHidden) * (initMaxWeight - initMinWeight) + initMinWeight 19 | self._predictiveTraces = np.zeros((numInputs, numHidden)) 20 | 21 | self._feedBackWeights = np.random.rand(numInputs, numFeedBack) * (initMaxWeight - initMinWeight) + initMinWeight 22 | self._feedBackTraces = np.zeros((numInputs, numFeedBack)) 23 | 24 | self._biases = np.zeros((numHidden, 1))#np.random.rand(numHidden, 1) * (initMaxWeight - initMinWeight) + initMinWeight 25 | 26 | self._statesRecurrent = np.zeros((numHidden, 1)) 27 | self._statesRecurrentPrev = np.zeros((numHidden, 1)) 28 | 29 | self._statesFeedForward = np.zeros((numHidden, 1)) 30 | 31 | self._states = np.zeros((numHidden, 1)) 32 | self._statesPrev = np.zeros((numHidden, 1)) 33 | 34 | self._feedForwardLearn = np.zeros((numHidden, 1)) 35 | self._recurrentLearn = np.zeros((numHidden, 1)) 36 | 37 | self._predictions = np.zeros((numInputs, 1)) 38 | self._predictionsPrev = np.zeros((numInputs, 1)) 39 | 40 | self._activeRatio = activeRatio 41 | 42 | def upPass(self, input): 43 | self._input = input 44 | self._statesPrev = self._states 45 | self._statesRecurrentPrev = self._statesRecurrent 46 | 47 | numActive = int(self._activeRatio * len(self._states)) 48 | 49 | # Activate 50 | activations = self._biases + np.dot(self._feedForwardWeights, input) + np.dot(self._recurrentWeights, self._statesPrev) 51 | 52 | # Generate tuples for sorting 53 | activationsPairs = [] 54 | 55 | for i in range(0, len(self._states)): 56 | activationsPairs.append((activations[i], i)) 57 | 58 | # Sort 59 | activationsPairs = sorted(activationsPairs, key=itemgetter(0)) 60 | 61 | # Use sorted information for inhibition 62 | self._states = np.zeros((len(self._states), 1)) 63 | 64 | for i in range(0, numActive): 65 | self._states[activationsPairs[len(activationsPairs) - 1 - i][1]] = 1.0 66 | 67 | def downPass(self, feedBack, thresholdedPred = True): 68 | self._predictionsPrev = self._predictions 69 | 70 | # Find states 71 | self._predictions = np.dot(self._predictiveWeights, self._states) + np.dot(self._feedBackWeights, feedBack) 72 | 73 | if thresholdedPred: 74 | self._predictions[self._predictions > 0.5] = 1.0 75 | self._predictions[self._predictions <= 0.5] = 0.0 76 | else: 77 | self._predictions = np.tanh(self._predictions) 78 | 79 | def learn(self, reinforce, targetExp, feedBackPrev, learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay): 80 | # Find prediction error 81 | predErrorExp = targetExp - self._predictionsPrev 82 | 83 | # Propagate error 84 | hiddenError = np.dot(self._predictiveWeights.T, predErrorExp) 85 | 86 | hiddenError = np.multiply(hiddenError, self._statesPrev) 87 | 88 | # Update feed forward and recurrent weights 89 | self._inputTraces = self._inputTraces * traceDecay + self._input 90 | self._stateTraces = self._stateTraces * traceDecay + self._statesPrev 91 | 92 | self._feedForwardWeights += learnEncoderRate * (np.dot(self._states, self._inputTraces.T) - np.dot(self._states.T, self._feedForwardWeights)) 93 | self._recurrentWeights += learnRecurrentRate * (np.dot(self._states, self._statesPrev.T) - np.dot(self._states.T, self._recurrentWeights)) 94 | 95 | # Update predictive and feed back weights 96 | self._predictiveTraces = self._predictiveTraces * traceDecay + np.dot(predErrorExp, self._statesPrev.T) 97 | self._feedBackTraces = self._feedBackTraces * traceDecay + np.dot(predErrorExp, feedBackPrev.T) 98 | 99 | self._predictiveWeights += learnDecoderRate * reinforce * self._predictiveTraces 100 | self._feedBackWeights += learnDecoderRate * reinforce * self._feedBackTraces 101 | 102 | # Update thresholds 103 | self._biases += learnBiasRate * (self._activeRatio - self._states) -------------------------------------------------------------------------------- /MiniNeoRL/paddle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/222464/MiniNeoRL/cf9fd6128913be3603a371766cf2e58c0fca2c2b/MiniNeoRL/paddle.png -------------------------------------------------------------------------------- /NeoRL_presentation.odp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/222464/MiniNeoRL/cf9fd6128913be3603a371766cf2e58c0fca2c2b/NeoRL_presentation.odp -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Tiny Python version of NeoRL 2 | 3 | Blog: [http://twistedkeyboardsoftware.com/](http://twistedkeyboardsoftware.com/) 4 | 5 | C++ GPU version: [https://github.com/222464/NeoRL](https://github.com/222464/NeoRL) --------------------------------------------------------------------------------