├── .gitignore
├── LICENSE.md
├── MiniNeoRL.sln
├── MiniNeoRL
    ├── MiniNeoRL.pyproj
    ├── MiniNeoRL_Pred_Demo.py
    ├── MiniNeoRL_RL_Demo.py
    ├── ball.png
    ├── neo
    │   ├── Agent.py
    │   ├── Hierarchy.py
    │   ├── Layer.py
    │   └── LayerRL.py
    └── paddle.png
├── NeoRL_presentation.odp
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | 
55 | # Sphinx documentation
56 | docs/_build/
57 | 
58 | # PyBuilder
59 | target/
60 | 
61 | #Ipython Notebook
62 | .ipynb_checkpoints
63 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MiniNeoRL
 2 | Copyright (C) 2015 Eric Laukien
 3 | 
 4 | This software is provided 'as-is', without any express or implied
 5 | warranty.  In no event will the authors be held liable for any damages
 6 | arising from the use of this software.
 7 | 
 8 | Permission is granted to anyone to use this software for any purpose,
 9 | including commercial applications, and to alter it and redistribute it
10 | freely, subject to the following restrictions:
11 | 
12 | 1. The origin of this software must not be misrepresented; you must not
13 | 	claim that you wrote the original software. If you use this software
14 | 	in a product, an acknowledgement in the product documentation would be
15 | 	appreciated but is not required.
16 | 2. Altered source versions must be plainly marked as such, and must not be
17 | 	misrepresented as being the original software.
18 | 3. This notice may not be removed or altered from any source distribution.
19 | 
20 | ------------------------------------------------------------------------------


--------------------------------------------------------------------------------
/MiniNeoRL.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio 14
 4 | VisualStudioVersion = 14.0.24720.0
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "MiniNeoRL", "MiniNeoRL\MiniNeoRL.pyproj", "{A42870A8-744B-43F5-83DD-8308A6CB211D}"
 7 | EndProject
 8 | Global
 9 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | 		Debug|Any CPU = Debug|Any CPU
11 | 		Release|Any CPU = Release|Any CPU
12 | 	EndGlobalSection
13 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | 		{A42870A8-744B-43F5-83DD-8308A6CB211D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15 | 		{A42870A8-744B-43F5-83DD-8308A6CB211D}.Release|Any CPU.ActiveCfg = Release|Any CPU
16 | 	EndGlobalSection
17 | 	GlobalSection(SolutionProperties) = preSolution
18 | 		HideSolutionNode = FALSE
19 | 	EndGlobalSection
20 | EndGlobal
21 | 


--------------------------------------------------------------------------------
/MiniNeoRL/MiniNeoRL.pyproj:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="4.0">
 3 |   <PropertyGroup>
 4 |     <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
 5 |     <SchemaVersion>2.0</SchemaVersion>
 6 |     <ProjectGuid>a42870a8-744b-43f5-83dd-8308a6cb211d</ProjectGuid>
 7 |     <ProjectHome>.</ProjectHome>
 8 |     <StartupFile>MiniNeoRL_RL_Demo.py</StartupFile>
 9 |     <SearchPath>
10 |     </SearchPath>
11 |     <WorkingDirectory>.</WorkingDirectory>
12 |     <OutputPath>.</OutputPath>
13 |     <Name>MiniNeoRL</Name>
14 |     <RootNamespace>MiniNeoRL</RootNamespace>
15 |   </PropertyGroup>
16 |   <PropertyGroup Condition=" '$(Configuration)' == 'Debug' ">
17 |     <DebugSymbols>true</DebugSymbols>
18 |     <EnableUnmanagedDebugging>false</EnableUnmanagedDebugging>
19 |   </PropertyGroup>
20 |   <PropertyGroup Condition=" '$(Configuration)' == 'Release' ">
21 |     <DebugSymbols>true</DebugSymbols>
22 |     <EnableUnmanagedDebugging>false</EnableUnmanagedDebugging>
23 |   </PropertyGroup>
24 |   <ItemGroup>
25 |     <Compile Include="MiniNeoRL_Pred_Demo.py" />
26 |     <Compile Include="MiniNeoRL_RL_Demo.py">
27 |       <SubType>Code</SubType>
28 |     </Compile>
29 |     <Compile Include="neo\Agent.py">
30 |       <SubType>Code</SubType>
31 |     </Compile>
32 |     <Compile Include="neo\Hierarchy.py">
33 |       <SubType>Code</SubType>
34 |     </Compile>
35 |     <Compile Include="neo\Layer.py">
36 |       <SubType>Code</SubType>
37 |     </Compile>
38 |     <Compile Include="neo\LayerRL.py">
39 |       <SubType>Code</SubType>
40 |     </Compile>
41 |   </ItemGroup>
42 |   <ItemGroup>
43 |     <Folder Include="neo\" />
44 |   </ItemGroup>
45 |   <PropertyGroup>
46 |     <VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">10.0</VisualStudioVersion>
47 |     <PtvsTargetsFile>$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\Python Tools\Microsoft.PythonTools.targets</PtvsTargetsFile>
48 |   </PropertyGroup>
49 |   <Import Condition="Exists($(PtvsTargetsFile))" Project="$(PtvsTargetsFile)" />
50 |   <Import Condition="!Exists($(PtvsTargetsFile))" Project="$(MSBuildToolsPath)\Microsoft.Common.targets" />
51 |   <!-- Uncomment the CoreCompile target to enable the Build command in
52 |        Visual Studio and specify your pre- and post-build commands in
53 |        the BeforeBuild and AfterBuild targets below. -->
54 |   <!--<Target Name="CoreCompile" />-->
55 |   <Target Name="BeforeBuild">
56 |   </Target>
57 |   <Target Name="AfterBuild">
58 |   </Target>
59 | </Project>


--------------------------------------------------------------------------------
/MiniNeoRL/MiniNeoRL_Pred_Demo.py:
--------------------------------------------------------------------------------
 1 | from neo.Agent import Agent
 2 | from neo.Hierarchy import Hierarchy
 3 | import numpy as np
 4 | 
 5 | sequence = [
 6 |         [ 0.0, 0.0, 0.0, 1.0 ],
 7 |         [ 0.0, 0.0, 1.0, 1.0 ],
 8 |         [ 0.0, 1.0, 0.0, 0.0 ],
 9 |         [ 0.0, 1.0, 0.0, 0.0 ],
10 |         [ 0.0, 1.0, 0.0, 0.0 ],
11 |         [ 0.0, 1.0, 0.0, 0.0 ],
12 |         [ 1.0, 0.0, 0.0, 0.0 ],
13 |         [ 0.0, 1.0, 1.0, 0.0 ],
14 |         [ 0.0, 0.0, 0.0, 1.0 ],
15 |         [ 1.0, 0.0, 0.0, 0.0 ],
16 |         [ 0.0, 1.0, 0.0, 0.0 ],
17 |         [ 1.0, 0.0, 0.0, 0.0 ],
18 |         [ 0.0, 1.0, 1.0, 0.0 ],
19 |         [ 0.0, 0.0, 0.0, 1.0 ],
20 |         [ 1.0, 0.0, 0.0, 0.0 ],
21 |         [ 1.0, 0.0, 0.0, 0.0 ],
22 |         [ 0.0, 1.0, 0.0, 0.0 ],
23 |         [ 1.0, 0.0, 0.0, 0.0 ],
24 |         [ 0.0, 1.0, 1.0, 0.0 ],
25 |         [ 0.0, 0.0, 0.0, 1.0 ],
26 |         [ 1.0, 0.0, 0.0, 0.0 ],
27 |         [ 0.0, 1.0, 0.0, 0.0 ],
28 |         [ 0.0, 1.0, 0.0, 0.0 ],
29 |         [ 0.0, 1.0, 0.0, 0.0 ],
30 |         [ 0.0, 1.0, 0.0, 0.0 ],
31 |         [ 1.0, 0.0, 0.0, 0.0 ],
32 |         [ 0.0, 1.0, 1.0, 0.0 ],
33 |         [ 0.0, 0.0, 0.0, 1.0 ],
34 |         [ 1.0, 0.0, 0.0, 0.0 ],
35 |         [ 0.0, 1.0, 0.0, 0.0 ],
36 |         [ 1.0, 0.0, 0.0, 0.0 ],
37 |         [ 0.0, 1.0, 1.0, 0.0 ],
38 |         [ 0.0, 0.0, 0.0, 1.0 ],
39 |         [ 1.0, 0.0, 0.0, 0.0 ],
40 |         [ 1.0, 0.0, 0.0, 0.0 ],
41 |         [ 1.0, 0.0, 0.0, 0.0 ],
42 |         [ 0.0, 1.0, 0.0, 0.0 ],
43 |         [ 1.0, 0.0, 0.0, 0.0 ],
44 |         [ 0.0, 1.0, 1.0, 0.0 ],
45 |         [ 0.0, 0.0, 0.0, 1.0 ],
46 |         [ 1.0, 0.0, 0.0, 0.0 ],
47 |         [ 0.0, 1.0, 0.0, 0.0 ]
48 |     ]
49 | 
50 | h = Hierarchy(4, [ 40, 40, 40 ], -0.01, 0.01, 0.1)
51 | 
52 | averageError = 0
53 | 
54 | for i in range(0, 10000):
55 |     h.simStep(np.matrix([sequence[i % len(sequence)]]).T, 0.0001, 0.0001, 0.001, 0.001, 0.95)
56 | 
57 |     error = None
58 | 
59 |     if np.allclose(np.greater(h.getPrediction(), 0.5), np.matrix([sequence[(i + 1) % len(sequence)]]).T):
60 |         error = 0
61 |     else:
62 |         error = 1
63 | 
64 |     averageError = 0.99 * averageError + 0.01 * error
65 | 
66 |     #print(h._layers[0]._states)
67 | 
68 |     print(str(i % 4) + str(np.matrix([sequence[i % len(sequence)]]).T.ravel()) + " " + str(np.greater(h.getPrediction(), 0.5).ravel()) + " Error: " + str(error) + " Average Error: " + str(averageError))


--------------------------------------------------------------------------------
/MiniNeoRL/MiniNeoRL_RL_Demo.py:
--------------------------------------------------------------------------------
  1 | from neo.Agent import Agent
  2 | from neo.Hierarchy import Hierarchy
  3 | import numpy as np
  4 | import pygame
  5 | 
  6 | # The environment
  7 | displayWidth = 600
  8 | displayHeight = 600
  9 | 
 10 | ballPosition = np.array([ np.random.rand(), np.random.rand() * 0.5 + 0.5 ])
 11 | 
 12 | ballVelocity = np.array([ 0.353, 1.0 ]) * 0.04
 13 | 
 14 | paddleX = 0.5
 15 | 
 16 | ballRadius = 16.0 / displayWidth
 17 | paddleRadius = 64.0 / displayWidth
 18 | 
 19 | encoderSize = 10
 20 | numInputs = 5
 21 | numActions = 1
 22 | 
 23 | a = Agent(numInputs * encoderSize, numActions, [ 50, 50 ], -0.1, 0.1, 0.1)
 24 | 
 25 | averageReward = 0.0
 26 | 
 27 | reward = 0.0
 28 | prevReward = 0.0
 29 | 
 30 | rewardPunishmentTime = 2.0
 31 | punishmentTimer = 0.0
 32 | rewardTimer = 0.0
 33 | 
 34 | # Resources
 35 | ballImage = pygame.image.load("ball.png")
 36 | paddleImage = pygame.image.load("paddle.png")
 37 | 
 38 | # Game setup
 39 | pygame.init()
 40 | 
 41 | display = pygame.display.set_mode((displayWidth, displayHeight))
 42 | clock = pygame.time.Clock()
 43 | done = False
 44 | 
 45 | dir = 1.0
 46 | 
 47 | timer = 0.0
 48 | 
 49 | while not done:
 50 |     for event in pygame.event.get():
 51 |         if event.type == pygame.QUIT:
 52 |             done = True
 53 |         
 54 |         if event.type == pygame.KEYDOWN:
 55 |             if event.key == pygame.K_LEFT:
 56 |                 dir = -1.0
 57 |             if event.key == pygame.K_RIGHT:
 58 |                 dir = 1.0
 59 | 
 60 |     # Update physics
 61 |     ballPosition += ballVelocity
 62 |     #timer += 1.0
 63 |     #ballPosition[0] = np.sin(timer * 0.05) * 0.5 + 0.5
 64 |     #ballPosition[1] = 0.5
 65 | 
 66 |     if ballPosition[0] < 0.0:
 67 |         ballPosition[0] = 0.0
 68 |         ballVelocity[0] *= -1.0
 69 |     elif ballPosition[0] > 1.0:
 70 |         ballPosition[0] = 1.0
 71 |         ballVelocity[0] *= -1.0
 72 | 
 73 |     if ballPosition[1] < 32.0 / displayWidth:       
 74 |         # If hit paddle
 75 |         if ballPosition[0] + ballRadius > paddleX - paddleRadius and ballPosition[0] - ballRadius < paddleX + paddleRadius:
 76 |             rewardTimer = rewardPunishmentTime
 77 | 
 78 |             # Bounce ball
 79 |             ballPosition[1] = 32.0 / displayWidth
 80 |             ballVelocity[1] *= -1.0
 81 |         else:
 82 |             punishmentTimer = rewardPunishmentTime
 83 | 
 84 |             # Reset ball
 85 |             ballPosition = np.array([ np.random.rand(), np.random.rand() * 0.5 + 0.5 ])
 86 | 
 87 |             if np.random.rand() < 0.5:
 88 |                 ballVelocity = np.array([ 0.353, 1.0 ]) * 0.04
 89 |             else:
 90 |                 ballVelocity = np.array([ -0.353, 1.0 ]) * 0.04
 91 | 
 92 |     elif ballPosition[1] > 1.0:
 93 |         ballPosition[1] = 1.0
 94 |         ballVelocity[1] *= -1.0
 95 | 
 96 |     reward = (rewardTimer > 0.0)# - (punishmentTimer > 0.0)
 97 | 
 98 |     #reward = reward * 0.5 + 0.5
 99 | 
100 |     averageReward = 0.99 * averageReward + 0.01 * reward
101 | 
102 |     # Control
103 |     inputs = [ paddleX * 2.0 - 1.0, ballPosition[0] * 2.0 - 1.0, ballPosition[1] * 2.0 - 1.0, ballVelocity[0] * 30.0, ballVelocity[1] * 30.0 ]
104 | 
105 |     assert(len(inputs) == numInputs)
106 | 
107 |     inputArr = []
108 | 
109 |     encoderSharpness = 30.0
110 | 
111 |     for v in inputs:
112 |         for i in range(0, encoderSize):
113 |             center = i / encoderSize * 2.0 - 1.0
114 |             delta = center - v
115 |             #intensity = np.exp(-delta * delta * encoderSharpness)
116 | 
117 |             intensity = np.absolute(delta) < 0.5 / encoderSize
118 | 
119 |             inputArr.append(intensity)
120 | 
121 |     #reward = dir * paddleX * 0.01
122 | 
123 |     #reward = np.abs(paddleX - ballPosition[0]) < 0.1
124 | 
125 |     a.simStep(reward, 0.001, 0.95, 0.05, np.matrix([inputArr]).T, 0.001, 0.001, 0.01, 0.01, 0.92)
126 | 
127 |     print(a._prevValue)
128 | 
129 |     prevReward = reward
130 | 
131 |     if rewardTimer > 0.0:
132 |         rewardTimer -= 1.0
133 |     if punishmentTimer > 0.0:
134 |         punishmentTimer -= 1.0
135 | 
136 |     paddleX = np.minimum(1.0, np.maximum(0.0, paddleX + 0.2 * np.sum(a.getActions()) / numActions))
137 | 
138 |     # Render
139 |     display.fill((255,255,255))
140 | 
141 |     display.blit(paddleImage, (displayWidth * paddleX - 64.0, displayHeight - 32.0))
142 |     display.blit(ballImage, (displayWidth * ballPosition[0] - 16.0, displayHeight * (1.0 - ballPosition[1]) - 16.0))
143 | 
144 |     pygame.display.flip()
145 |     clock.tick(60)
146 | 


--------------------------------------------------------------------------------
/MiniNeoRL/ball.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/222464/MiniNeoRL/cf9fd6128913be3603a371766cf2e58c0fca2c2b/MiniNeoRL/ball.png


--------------------------------------------------------------------------------
/MiniNeoRL/neo/Agent.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from neo.Layer import Layer
  3 | from neo.LayerRL import LayerRL
  4 | 
  5 | class Agent:
  6 |     """A hierarchy of fully connected NeoRL layers that functions as a reinforcement learning agent"""
  7 | 
  8 |     def __init__(self, numInputs, numActions, layerSizes, initMinWeight, initMaxWeight, activeRatio):
  9 |         self._layers = []
 10 | 
 11 |         self._numInputs = numInputs
 12 |         self._numActions = numActions
 13 | 
 14 |         self._actions = np.zeros((numActions, 1))
 15 |         self._actionsExploratory = np.zeros((numActions, 1))
 16 | 
 17 |         self._qPredictiveWeights = np.random.rand(1, layerSizes[0])
 18 |         self._qPredictiveTraces = np.zeros((1, layerSizes[0]))
 19 | 
 20 |         self._qFeedBackWeights = np.random.rand(1, layerSizes[0])
 21 |         self._qFeedBackTraces = np.zeros((1, layerSizes[0]))
 22 | 
 23 |         self._averageAbsTDError = 1.0
 24 | 
 25 |         mask = []
 26 | 
 27 |         for i in range(0, self._numInputs):
 28 |             mask.append(0.0)
 29 | 
 30 |         for i in range(0, self._numActions):
 31 |             mask.append(1.0)
 32 | 
 33 |         self._actionMask = np.matrix([mask]).T
 34 | 
 35 |         self._prevValue = 0.0
 36 | 
 37 |         # Create layers
 38 |         for l in range(0, len(layerSizes)):
 39 |             layer = None
 40 | 
 41 |             if l == 0:
 42 |                 if l < len(layerSizes) - 1:
 43 |                     layer = LayerRL(numInputs + numActions, layerSizes[l], layerSizes[l], initMinWeight, initMaxWeight, activeRatio)
 44 |                 else:
 45 |                     layer = LayerRL(numInputs + numActions, layerSizes[l], 1, initMinWeight, initMaxWeight, activeRatio)
 46 |             else:
 47 |                 if l < len(layerSizes) - 1:
 48 |                     layer = LayerRL(layerSizes[l - 1], layerSizes[l], layerSizes[l], initMinWeight, initMaxWeight, activeRatio)
 49 |                 else:
 50 |                     layer = LayerRL(layerSizes[l - 1], layerSizes[l], 1, initMinWeight, initMaxWeight, activeRatio)
 51 | 
 52 |             self._layers.append(layer)
 53 | 
 54 |     def simStep(self, reward, qAlpha, qGamma, exploration, input, learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay):
 55 |         assert(len(input) == self._numInputs)
 56 | 
 57 |         usedInputArr = []
 58 | 
 59 |         for i in range(0, self._numInputs):
 60 |             usedInputArr.append(input.item(i))
 61 | 
 62 |         for i in range(0, self._numActions):
 63 |             usedInputArr.append(self._actionsExploratory.item(i))
 64 | 
 65 |         usedInput = np.matrix([ usedInputArr ]).T
 66 | 
 67 |         # Up pass
 68 |         for l in range(0, len(self._layers)):
 69 |             if l == 0:
 70 |                 self._layers[l].upPass(usedInput)
 71 |             else:
 72 |                 self._layers[l].upPass(self._layers[l - 1]._states)
 73 | 
 74 |         # Down pass
 75 |         for l in range(0, len(self._layers)):
 76 |             rl = len(self._layers) - 1 - l
 77 | 
 78 |             if rl < len(self._layers) - 1:
 79 |                 if rl == 0:
 80 |                     self._layers[rl].downPass(self._layers[rl + 1]._predictions, False)
 81 |                 else:
 82 |                     self._layers[rl].downPass(self._layers[rl + 1]._predictions, True)
 83 |             else:
 84 |                 if rl == 0:
 85 |                     self._layers[rl].downPass(np.matrix([[ 0 ]]), False)
 86 |                 else:
 87 |                     self._layers[rl].downPass(np.matrix([[ 0 ]]), True)
 88 | 
 89 |         # Get Q
 90 |         q = 0.0
 91 | 
 92 |         if len(self._layers) > 1:
 93 |             q = np.dot(self._qPredictiveWeights, self._layers[0]._states) + np.dot(self._qFeedBackWeights, self._layers[1]._predictions)
 94 |         else:
 95 |             q = np.dot(self._qPredictiveWeights, self._layers[0]._states)
 96 | 
 97 |         tdError = reward + qGamma * q.item(0) - self._prevValue
 98 | 
 99 |         self._qPredictiveWeights += qAlpha * tdError * self._qPredictiveTraces
100 | 
101 |         self._qPredictiveTraces = self._qPredictiveTraces * traceDecay + self._layers[0]._states.T
102 | 
103 |         if len(self._layers) > 1:
104 |             self._qFeedBackWeights += qAlpha * tdError * self._qFeedBackTraces
105 | 
106 |             self._qFeedBackTraces = self._qFeedBackTraces * traceDecay + self._layers[1]._predictions.T
107 | 
108 |         predInputExpArr = []
109 | 
110 |         for i in range(0, self._numInputs):
111 |             predInputExpArr.append(input.item(i))
112 | 
113 |         for i in range(0, self._numActions):
114 |             predInputExpArr.append(self._actionsExploratory.item(i))
115 | 
116 |         predInputExp = np.matrix([ predInputExpArr ]).T
117 | 
118 |         reinforce = np.sign(tdError) * 0.5 + 0.5
119 | 
120 |         # Learn
121 |         for l in range(0, len(self._layers)):
122 |             if l == 0:
123 |                 if l < len(self._layers) - 1:
124 |                     self._layers[l].learn(reinforce, predInputExp, self._layers[l + 1]._predictionsPrev, learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay)
125 |                 else:
126 |                     self._layers[l].learn(reinforce, predInputExp, np.matrix([[ 0 ]]), learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay)
127 |             else:
128 |                 if l < len(self._layers) - 1:
129 |                     self._layers[l].learn(reinforce, self._layers[l - 1]._states, self._layers[l + 1]._predictionsPrev, learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay)
130 |                 else:
131 |                     self._layers[l].learn(reinforce, self._layers[l - 1]._states, np.matrix([[ 0 ]]), learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay)
132 | 
133 |         # Determine action
134 |         for i in range(0, self._numActions):
135 |             self._actions[i] = np.minimum(1.0, np.maximum(-1.0, self.getPrediction().item(self._numInputs + i)))
136 | 
137 |             if np.random.rand() < exploration:
138 |                 self._actionsExploratory[i] = np.random.rand() * 2.0 - 1.0
139 |             else:
140 |                 self._actionsExploratory[i] = self._actions[i]
141 | 
142 |         self._prevValue = q.item(0)
143 | 
144 |     def getPrediction(self):
145 |         return self._layers[0]._predictions
146 | 
147 |     def getActions(self):
148 |         return self._actionsExploratory
149 |           
150 |        


--------------------------------------------------------------------------------
/MiniNeoRL/neo/Hierarchy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from neo.Layer import Layer
 3 | 
 4 | class Hierarchy:
 5 |     """A hierarchy of fully connected NeoRL layers"""
 6 | 
 7 |     def __init__(self, numInputs, layerSizes, initMinWeight, initMaxWeight, activeRatio):
 8 |         self._layers = []
 9 | 
10 |         # Create layers
11 |         for l in range(0, len(layerSizes)):
12 |             layer = None
13 | 
14 |             if l == 0:
15 |                 if l < len(layerSizes) - 1:
16 |                     layer = Layer(numInputs, layerSizes[l], layerSizes[l], initMinWeight, initMaxWeight, activeRatio)
17 |                 else:
18 |                     layer = Layer(numInputs, layerSizes[l], 1, initMinWeight, initMaxWeight, activeRatio)
19 |             else:
20 |                 if l < len(layerSizes) - 1:
21 |                     layer = Layer(layerSizes[l - 1], layerSizes[l], layerSizes[l], initMinWeight, initMaxWeight, activeRatio)
22 |                 else:
23 |                     layer = Layer(layerSizes[l - 1], layerSizes[l], 1, initMinWeight, initMaxWeight, activeRatio)
24 | 
25 |             self._layers.append(layer)
26 | 
27 |     def simStep(self, input, learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay):
28 |         # Up pass
29 |         for l in range(0, len(self._layers)):
30 |             if l == 0:
31 |                 self._layers[l].upPass(input)
32 |             else:
33 |                 self._layers[l].upPass(self._layers[l - 1]._states)
34 | 
35 |         # Down pass
36 |         for l in range(0, len(self._layers)):
37 |             rl = len(self._layers) - 1 - l
38 | 
39 |             if rl < len(self._layers) - 1:
40 |                 self._layers[rl].downPass(self._layers[rl + 1]._predictions, rl != 0)
41 |             else:
42 |                 self._layers[rl].downPass(np.matrix([[ 0 ]]), rl != 0)
43 | 
44 |         # Learn
45 |         for l in range(0, len(self._layers)):
46 |             if l == 0:
47 |                 if l < len(self._layers) - 1:
48 |                     self._layers[l].learn(input, self._layers[l + 1]._predictionsPrev, learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay)
49 |                 else:
50 |                     self._layers[l].learn(input, np.matrix([[ 0 ]]), learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay)
51 |             else:
52 |                 if l < len(self._layers) - 1:
53 |                     self._layers[l].learn(self._layers[l - 1]._states, self._layers[l + 1]._predictionsPrev, learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay)
54 |                 else:
55 |                     self._layers[l].learn(self._layers[l - 1]._states, np.matrix([[ 0 ]]), learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay)
56 | 
57 |     def getPrediction(self):
58 |         return self._layers[0]._predictions
59 |           
60 |        


--------------------------------------------------------------------------------
/MiniNeoRL/neo/Layer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from operator import itemgetter, attrgetter, methodcaller
 3 | 
 4 | class Layer:
 5 |     """A fully-connected NeoRL layer"""
 6 | 
 7 |     def __init__(self, numInputs, numHidden, numFeedBack, initMinWeight, initMaxWeight, activeRatio):
 8 |         self._input = np.zeros((numInputs, 1))
 9 |         self._inputPrev = np.zeros((numInputs, 1))
10 | 
11 |         self._feedForwardWeights = np.random.rand(numHidden, numInputs) * (initMaxWeight - initMinWeight) + initMinWeight
12 |   
13 |         self._recurrentWeights = np.random.rand(numHidden, numHidden) * (initMaxWeight - initMinWeight) + initMinWeight
14 | 
15 |         self._predictiveWeights = np.random.rand(numInputs, numHidden) * (initMaxWeight - initMinWeight) + initMinWeight
16 |   
17 |         self._feedBackWeights = np.random.rand(numInputs, numFeedBack) * (initMaxWeight - initMinWeight) + initMinWeight
18 | 
19 |         self._stateTraces = np.zeros((numHidden, 1))
20 | 
21 |         self._inputTraces = np.zeros((numInputs, 1))
22 | 
23 |         self._biases = np.zeros((numHidden, 1))#np.random.rand(numHidden, 1) * (initMaxWeight - initMinWeight) + initMinWeight
24 | 
25 |         self._states = np.zeros((numHidden, 1))
26 |         self._statesPrev = np.zeros((numHidden, 1))
27 | 
28 |         self._feedForwardLearn = np.zeros((numHidden, 1))
29 |         self._recurrentLearn = np.zeros((numHidden, 1))
30 | 
31 |         self._predictions = np.zeros((numInputs, 1))
32 |         self._predictionsPrev = np.zeros((numInputs, 1))
33 | 
34 |         self._averageSquaredError = np.zeros((numHidden, 1))
35 | 
36 |         self._activeRatio = activeRatio
37 | 
38 |     def upPass(self, input):
39 |         self._inputPrev = self._input
40 | 
41 |         self._input = input
42 | 
43 |         self._statesPrev = self._states
44 | 
45 |         numActive = int(self._activeRatio * len(self._states))
46 |   
47 |         # Activate
48 |         activations = self._biases + np.dot(self._feedForwardWeights, input) + np.dot(self._recurrentWeights, self._statesPrev)
49 |        
50 |         # Generate tuples for sorting
51 |         activationsPairs = []
52 | 
53 |         for i in range(0, len(self._states)):
54 |             activationsPairs.append((activations[i], i))
55 | 
56 |         # Sort
57 |         activationsPairs = sorted(activationsPairs, key=itemgetter(0))
58 | 
59 |         # Use sorted information for inhibition
60 |         self._states = np.zeros((len(self._states), 1))
61 | 
62 |         for i in range(0, numActive):
63 |             self._states[activationsPairs[len(activationsPairs) - 1 - i][1]] = 1.0
64 |  
65 |     def downPass(self, feedBack, thresholdedPred = True):
66 |         self._predictionsPrev = self._predictions
67 | 
68 |         # Find states
69 |         self._predictions = np.dot(self._predictiveWeights, self._states) + np.dot(self._feedBackWeights, feedBack)
70 | 
71 |         if thresholdedPred:
72 |             self._predictions[self._predictions > 0.5] = 1.0
73 |             self._predictions[self._predictions <= 0.5] = 0.0
74 | 
75 |     def learn(self, target, feedBackPrev, learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay):
76 |         # Find prediction error
77 |         predError = target - self._predictionsPrev
78 | 
79 |         # Propagate error
80 |         hiddenError = np.dot(self._predictiveWeights.T, predError)
81 | 
82 |         hiddenError = np.multiply(hiddenError, self._statesPrev)
83 | 
84 |         # Update feed forward and recurrent weights        
85 |         self._inputTraces = self._inputTraces * traceDecay + self._input
86 |         self._stateTraces = self._stateTraces * traceDecay + self._statesPrev
87 | 
88 |         self._feedForwardWeights += learnEncoderRate * (np.dot(self._states, self._inputTraces.T) - np.dot(self._states.T, self._feedForwardWeights))
89 |         self._recurrentWeights += learnRecurrentRate * (np.dot(self._states, self._statesPrev.T) - np.dot(self._states.T, self._recurrentWeights))
90 |         
91 |         # Update predictive and feed back weights
92 |         self._predictiveWeights += learnDecoderRate * np.dot(predError, self._statesPrev.T)
93 |         self._feedBackWeights += learnDecoderRate * np.dot(predError, feedBackPrev.T)
94 | 
95 |         # Update thresholds
96 |         self._biases += learnBiasRate * (self._activeRatio - self._states)


--------------------------------------------------------------------------------
/MiniNeoRL/neo/LayerRL.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from operator import itemgetter, attrgetter, methodcaller
  3 | 
  4 | class LayerRL:
  5 |     """A fully-connected NeoRL layer for RL"""
  6 | 
  7 |     def __init__(self, numInputs, numHidden, numFeedBack, initMinWeight, initMaxWeight, activeRatio):
  8 |         self._input = np.zeros((numInputs, 1))
  9 | 
 10 |         self._feedForwardWeights = np.random.rand(numHidden, numInputs) * (initMaxWeight - initMinWeight) + initMinWeight
 11 |  
 12 |         self._recurrentWeights = np.random.rand(numHidden, numHidden) * (initMaxWeight - initMinWeight) + initMinWeight
 13 | 
 14 |         self._stateTraces = np.zeros((numHidden, 1))
 15 | 
 16 |         self._inputTraces = np.zeros((numInputs, 1))
 17 | 
 18 |         self._predictiveWeights = np.random.rand(numInputs, numHidden) * (initMaxWeight - initMinWeight) + initMinWeight
 19 |         self._predictiveTraces = np.zeros((numInputs, numHidden))
 20 |   
 21 |         self._feedBackWeights = np.random.rand(numInputs, numFeedBack) * (initMaxWeight - initMinWeight) + initMinWeight
 22 |         self._feedBackTraces = np.zeros((numInputs, numFeedBack))
 23 |   
 24 |         self._biases = np.zeros((numHidden, 1))#np.random.rand(numHidden, 1) * (initMaxWeight - initMinWeight) + initMinWeight
 25 | 
 26 |         self._statesRecurrent = np.zeros((numHidden, 1))
 27 |         self._statesRecurrentPrev = np.zeros((numHidden, 1))
 28 | 
 29 |         self._statesFeedForward = np.zeros((numHidden, 1))
 30 | 
 31 |         self._states = np.zeros((numHidden, 1))
 32 |         self._statesPrev = np.zeros((numHidden, 1))
 33 | 
 34 |         self._feedForwardLearn = np.zeros((numHidden, 1))
 35 |         self._recurrentLearn = np.zeros((numHidden, 1))
 36 | 
 37 |         self._predictions = np.zeros((numInputs, 1))
 38 |         self._predictionsPrev = np.zeros((numInputs, 1))
 39 | 
 40 |         self._activeRatio = activeRatio
 41 | 
 42 |     def upPass(self, input):
 43 |         self._input = input
 44 |         self._statesPrev = self._states
 45 |         self._statesRecurrentPrev = self._statesRecurrent
 46 | 
 47 |         numActive = int(self._activeRatio * len(self._states))
 48 |   
 49 |         # Activate
 50 |         activations = self._biases + np.dot(self._feedForwardWeights, input) + np.dot(self._recurrentWeights, self._statesPrev)
 51 |        
 52 |         # Generate tuples for sorting
 53 |         activationsPairs = []
 54 | 
 55 |         for i in range(0, len(self._states)):
 56 |             activationsPairs.append((activations[i], i))
 57 | 
 58 |         # Sort
 59 |         activationsPairs = sorted(activationsPairs, key=itemgetter(0))
 60 | 
 61 |         # Use sorted information for inhibition
 62 |         self._states = np.zeros((len(self._states), 1))
 63 | 
 64 |         for i in range(0, numActive):
 65 |             self._states[activationsPairs[len(activationsPairs) - 1 - i][1]] = 1.0
 66 | 
 67 |     def downPass(self, feedBack, thresholdedPred = True):
 68 |         self._predictionsPrev = self._predictions
 69 | 
 70 |         # Find states
 71 |         self._predictions = np.dot(self._predictiveWeights, self._states) + np.dot(self._feedBackWeights, feedBack)
 72 | 
 73 |         if thresholdedPred:
 74 |             self._predictions[self._predictions > 0.5] = 1.0
 75 |             self._predictions[self._predictions <= 0.5] = 0.0
 76 |         else:
 77 |             self._predictions = np.tanh(self._predictions)
 78 | 
 79 |     def learn(self, reinforce, targetExp, feedBackPrev, learnEncoderRate, learnRecurrentRate, learnDecoderRate, learnBiasRate, traceDecay):
 80 |         # Find prediction error
 81 |         predErrorExp = targetExp - self._predictionsPrev
 82 | 
 83 |         # Propagate error
 84 |         hiddenError = np.dot(self._predictiveWeights.T, predErrorExp)
 85 | 
 86 |         hiddenError = np.multiply(hiddenError, self._statesPrev)
 87 | 
 88 |         # Update feed forward and recurrent weights 
 89 |         self._inputTraces = self._inputTraces * traceDecay + self._input
 90 |         self._stateTraces = self._stateTraces * traceDecay + self._statesPrev
 91 | 
 92 |         self._feedForwardWeights += learnEncoderRate * (np.dot(self._states, self._inputTraces.T) - np.dot(self._states.T, self._feedForwardWeights))
 93 |         self._recurrentWeights += learnRecurrentRate * (np.dot(self._states, self._statesPrev.T) - np.dot(self._states.T, self._recurrentWeights))
 94 |         
 95 |         # Update predictive and feed back weights
 96 |         self._predictiveTraces = self._predictiveTraces * traceDecay + np.dot(predErrorExp, self._statesPrev.T)
 97 |         self._feedBackTraces = self._feedBackTraces * traceDecay + np.dot(predErrorExp, feedBackPrev.T)
 98 | 
 99 |         self._predictiveWeights += learnDecoderRate * reinforce * self._predictiveTraces
100 |         self._feedBackWeights += learnDecoderRate * reinforce * self._feedBackTraces
101 | 
102 |         # Update thresholds
103 |         self._biases += learnBiasRate * (self._activeRatio - self._states)


--------------------------------------------------------------------------------
/MiniNeoRL/paddle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/222464/MiniNeoRL/cf9fd6128913be3603a371766cf2e58c0fca2c2b/MiniNeoRL/paddle.png


--------------------------------------------------------------------------------
/NeoRL_presentation.odp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/222464/MiniNeoRL/cf9fd6128913be3603a371766cf2e58c0fca2c2b/NeoRL_presentation.odp


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Tiny Python version of NeoRL
2 | 
3 | Blog: [http://twistedkeyboardsoftware.com/](http://twistedkeyboardsoftware.com/)
4 | 
5 | C++ GPU version: [https://github.com/222464/NeoRL](https://github.com/222464/NeoRL)


--------------------------------------------------------------------------------