├── LICENSE ├── README.md ├── cpp └── src │ ├── .dropbox │ ├── .kdev4 │ └── src.kdev4 │ ├── Agent.h │ ├── AgentManic.cpp │ ├── AgentManic.h │ ├── AgentRandy.cpp │ ├── AgentRandy.h │ ├── ContentmentModel.cpp │ ├── ContentmentModel.h │ ├── DriftingPlatform.cpp │ ├── DriftingPlatform.h │ ├── Makefile │ ├── Mentor.h │ ├── ObservationModel.cpp │ ├── ObservationModel.h │ ├── PlanningSystem.cpp │ ├── PlanningSystem.h │ ├── Test.h │ ├── TransitionModel.cpp │ ├── TransitionModel.h │ └── main.cpp ├── docs └── index.html ├── index.html └── java ├── class └── stub.txt └── src ├── AgentManic.java ├── AgentRandy.java ├── ContentmentModel.java ├── DriftingPlatform.java ├── IAgent.java ├── IMentor.java ├── ITest.java ├── ITutor.java ├── Json.java ├── Layer.java ├── Main.java ├── Makefile ├── Matrix.java ├── NeuralNet.java ├── ObservationModel.java ├── PlanningSystem.java ├── TransitionModel.java └── Vec.java /LICENSE: -------------------------------------------------------------------------------- 1 | CC0 1.0 Universal 2 | 3 | Statement of Purpose 4 | 5 | The laws of most jurisdictions throughout the world automatically confer 6 | exclusive Copyright and Related Rights (defined below) upon the creator and 7 | subsequent owner(s) (each and all, an "owner") of an original work of 8 | authorship and/or a database (each, a "Work"). 9 | 10 | Certain owners wish to permanently relinquish those rights to a Work for the 11 | purpose of contributing to a commons of creative, cultural and scientific 12 | works ("Commons") that the public can reliably and without fear of later 13 | claims of infringement build upon, modify, incorporate in other works, reuse 14 | and redistribute as freely as possible in any form whatsoever and for any 15 | purposes, including without limitation commercial purposes. These owners may 16 | contribute to the Commons to promote the ideal of a free culture and the 17 | further production of creative, cultural and scientific works, or to gain 18 | reputation or greater distribution for their Work in part through the use and 19 | efforts of others. 20 | 21 | For these and/or other purposes and motivations, and without any expectation 22 | of additional consideration or compensation, the person associating CC0 with a 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work 25 | and publicly distribute the Work under its terms, with knowledge of his or her 26 | Copyright and Related Rights in the Work and the meaning and intended legal 27 | effect of CC0 on those rights. 28 | 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be 30 | protected by copyright and related or neighboring rights ("Copyright and 31 | Related Rights"). Copyright and Related Rights include, but are not limited 32 | to, the following: 33 | 34 | i. the right to reproduce, adapt, distribute, perform, display, communicate, 35 | and translate a Work; 36 | 37 | ii. moral rights retained by the original author(s) and/or performer(s); 38 | 39 | iii. publicity and privacy rights pertaining to a person's image or likeness 40 | depicted in a Work; 41 | 42 | iv. rights protecting against unfair competition in regards to a Work, 43 | subject to the limitations in paragraph 4(a), below; 44 | 45 | v. rights protecting the extraction, dissemination, use and reuse of data in 46 | a Work; 47 | 48 | vi. database rights (such as those arising under Directive 96/9/EC of the 49 | European Parliament and of the Council of 11 March 1996 on the legal 50 | protection of databases, and under any national implementation thereof, 51 | including any amended or successor version of such directive); and 52 | 53 | vii. other similar, equivalent or corresponding rights throughout the world 54 | based on applicable law or treaty, and any national implementations thereof. 55 | 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of, 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright 59 | and Related Rights and associated claims and causes of action, whether now 60 | known or unknown (including existing as well as future claims and causes of 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum 62 | duration provided by applicable law or treaty (including future time 63 | extensions), (iii) in any current or future medium and for any number of 64 | copies, and (iv) for any purpose whatsoever, including without limitation 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes 66 | the Waiver for the benefit of each member of the public at large and to the 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver 68 | shall not be subject to revocation, rescission, cancellation, termination, or 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work 70 | by the public as contemplated by Affirmer's express Statement of Purpose. 71 | 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be 73 | judged legally invalid or ineffective under applicable law, then the Waiver 74 | shall be preserved to the maximum extent permitted taking into account 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver 76 | is so judged Affirmer hereby grants to each affected person a royalty-free, 77 | non transferable, non sublicensable, non exclusive, irrevocable and 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration 80 | provided by applicable law or treaty (including future time extensions), (iii) 81 | in any current or future medium and for any number of copies, and (iv) for any 82 | purpose whatsoever, including without limitation commercial, advertising or 83 | promotional purposes (the "License"). The License shall be deemed effective as 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the 85 | License for any reason be judged legally invalid or ineffective under 86 | applicable law, such partial invalidity or ineffectiveness shall not 87 | invalidate the remainder of the License, and in such case Affirmer hereby 88 | affirms that he or she will not (i) exercise any of his or her remaining 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims 90 | and causes of action with respect to the Work, in either case contrary to 91 | Affirmer's express Statement of Purpose. 92 | 93 | 4. Limitations and Disclaimers. 94 | 95 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 96 | surrendered, licensed or otherwise affected by this document. 97 | 98 | b. Affirmer offers the Work as-is and makes no representations or warranties 99 | of any kind concerning the Work, express, implied, statutory or otherwise, 100 | including without limitation warranties of title, merchantability, fitness 101 | for a particular purpose, non infringement, or the absence of latent or 102 | other defects, accuracy, or the present or absence of errors, whether or not 103 | discoverable, all to the greatest extent permissible under applicable law. 104 | 105 | c. Affirmer disclaims responsibility for clearing rights of other persons 106 | that may apply to the Work or any use thereof, including without limitation 107 | any person's Copyright and Related Rights in the Work. Further, Affirmer 108 | disclaims responsibility for obtaining any necessary consents, permissions 109 | or other rights required for any use of the Work. 110 | 111 | d. Affirmer understands and acknowledges that Creative Commons is not a 112 | party to this document and has no duty or obligation with respect to this 113 | CC0 or use of the Work. 114 | 115 | For more information, please see 116 | 117 | 118 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Manic 3 | An implementation of the MANIC cognitive architecture. 4 | Some tests and other agents are also included for comparison. 5 | 6 | # How to build and run the Java version 7 | cd java/src 8 | javac Main.java 9 | java Main 10 | 11 | # How to build and run the C++ version 12 | install Waffles (see below) 13 | cd cpp/src 14 | make opt 15 | ../bin/manic 16 | 17 | # How to install Waffles (only needed for the C++ version) 18 | git clone https://github.com/mikegashler/waffles.git ./waffles 19 | cd waffles/src 20 | sudo make install 21 | 22 | # How to run it 23 | java Main 24 | 25 | # More documentation 26 | Please view docs/index.html in your favorite browser. 27 | -------------------------------------------------------------------------------- /cpp/src/.dropbox: -------------------------------------------------------------------------------- 1 | {"tag": "shared", "ns": 1077591357} -------------------------------------------------------------------------------- /cpp/src/.kdev4/src.kdev4: -------------------------------------------------------------------------------- 1 | [Buildset] 2 | BuildItems=@Variant(\x00\x00\x00\t\x00\x00\x00\x00\x01\x00\x00\x00\x0b\x00\x00\x00\x00\x01\x00\x00\x00\x06\x00s\x00r\x00c) 3 | 4 | [Launch] 5 | Launch Configurations=Launch Configuration 0 6 | 7 | [Launch][Launch Configuration 0] 8 | Configured Launch Modes=execute 9 | Configured Launchers=nativeAppLauncher 10 | Name=New Application Launcher 11 | Type=Native Application 12 | 13 | [Launch][Launch Configuration 0][Data] 14 | Arguments= 15 | Dependencies=@Variant(\x00\x00\x00\t\x00\x00\x00\x00\x00) 16 | Dependency Action=Nothing 17 | EnvironmentGroup=default 18 | Executable=file:///home/mike/tmp/manic_cpp/bin/manicdbg 19 | External Terminal=konsole --noclose --workdir %workdir -e %exe 20 | Project Target= 21 | Use External Terminal=false 22 | Working Directory= 23 | isExecutable=true 24 | -------------------------------------------------------------------------------- /cpp/src/Agent.h: -------------------------------------------------------------------------------- 1 | #ifndef AGENT_H 2 | #define AGENT_H 3 | 4 | #include 5 | #include 6 | 7 | using namespace GClasses; 8 | using std::string; 9 | 10 | class Mentor; 11 | class Tutor; 12 | 13 | class Agent 14 | { 15 | public: 16 | Agent() {} 17 | 18 | virtual ~Agent() {} 19 | 20 | /// Returns this agent's name 21 | virtual string getName() = 0; 22 | 23 | /// This method is called to initialize the agent in a new world. 24 | /// oracle is an object that helps the agent learn what to do in this world. 25 | /// observationDims is the number of double values that the agent observes each time step. 26 | /// beliefDims is the number of double values that the agent uses internally to model the state of the world. (It should generally be <= observationDims.) 27 | /// actionDims is the number of double values the agent uses to specify an action. 28 | /// maxPlanLength specifies the maximum number of time-steps into the future that the agent should attempt to plan. 29 | virtual void reset(Mentor& oracle, size_t observationDims, size_t beliefDims, size_t actionDims, size_t maxPlanLength) = 0; 30 | 31 | /// Tells the agent that the next observation passed to learnFromExperience does not follow 32 | /// from the previous one. This should be called when a game is started over, or when the state is 33 | /// adjusted in a manner that the agent is not expected to anticipate. 34 | virtual void teleport() = 0; 35 | 36 | /// Sets the mentor to use with this agent 37 | virtual void setMentor(Mentor* m) = 0; 38 | 39 | /// Sets the tutor to use with this agent. 40 | virtual void setTutor(Tutor* tutor, bool helpWithObservations, bool helpWithTransitions, bool helpWithContentment, bool helpWithPlanning) = 0; 41 | 42 | /// Predict the observation that will occur if plan is executed. 43 | virtual void anticipateObservation(const GMatrix& plan, GVec& obs) = 0; 44 | 45 | /// A vector of observations goes in. All observed values may be expected to fall between -1 and 1. 46 | /// Returns a vector of chosen actions. All returned values should fall between 0 and 1. 47 | virtual GVec& think(GVec& observations) = 0; 48 | }; 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /cpp/src/AgentManic.cpp: -------------------------------------------------------------------------------- 1 | #include "AgentManic.h" 2 | 3 | AgentManic::AgentManic(GRand& r) 4 | : rand(r) 5 | { 6 | } 7 | 8 | // virtual 9 | AgentManic::~AgentManic() 10 | { 11 | delete(transitionModel); 12 | delete(observationModel); 13 | delete(contentmentModel); 14 | delete(planningSystem); 15 | } 16 | 17 | // virtual 18 | void AgentManic::reset(Mentor& oracle, size_t observationDims, size_t beliefDims, size_t actionDims, size_t maxPlanLength) 19 | { 20 | if(beliefDims > observationDims) 21 | throw Ex("Expected beliefDims to be <= observationDims"); 22 | transitionModel = new TransitionModel( 23 | actionDims + beliefDims, 24 | beliefDims, 25 | 2, // number of layers in the transition model 26 | 500, // size of short term memory for transitions 27 | 1000, // number of training iterations to perform with each new sample 28 | rand); 29 | observationModel = new ObservationModel( 30 | *transitionModel, 31 | observationDims, 32 | beliefDims, 33 | 2, // number of layers in the decoder 34 | 2, // number of layers in the encoder 35 | 500, // size of short term memory for observations 36 | 50, // number of training iterations to perform with each new sample 37 | 500, // number of iterations to calibrate beliefs to correspond with observations 38 | rand); 39 | contentmentModel = new ContentmentModel( 40 | beliefDims, 41 | 2, // number of layers in the contentment model 42 | 500, // size of short term memory for feedback from the mentor 43 | 50, // number of training iterations to perform with each new sample 44 | rand); 45 | planningSystem = new PlanningSystem( 46 | *this, 47 | *transitionModel, 48 | *observationModel, 49 | *contentmentModel, 50 | &oracle, 51 | actionDims, 52 | 30, // population size 53 | 50, // number of iterations to refine each member of the population per time step 54 | 500, // burn-in iterations (the number of times at the start to just pick a random action, so the transition function has a chance to explore its space) 55 | maxPlanLength, 56 | 0.99, // discount factor (to make short plans be preferred over long plans that ultimately arrive at nearly the same state) 57 | 0.0, // exploration rate (the probability that the agent will choose a random action, just to see what happens) 58 | rand); 59 | actions.resize(actionDims); 60 | actions.fill(0.0); 61 | beliefs.resize(beliefDims); 62 | beliefs.fill(0.0); 63 | anticipatedBeliefs.resize(beliefDims); 64 | anticipatedBeliefs.fill(0.0); 65 | teleport(); 66 | } 67 | 68 | 69 | AgentManic::AgentManic(GDomNode* pNode, GRand& r, Mentor& oracle) 70 | : rand(r) 71 | { 72 | transitionModel = new TransitionModel(pNode->field("transition"), r); 73 | observationModel = new ObservationModel(*transitionModel, pNode->field("observation"), r); 74 | contentmentModel = new ContentmentModel(pNode->field("contentment"), r); 75 | planningSystem = new PlanningSystem(pNode->field("planning"), *this, r, *transitionModel, *observationModel, *contentmentModel, &oracle); 76 | actions.resize(transitionModel->actionDims()); 77 | beliefs.deserialize(pNode->field("beliefs")); 78 | anticipatedBeliefs.resize(beliefs.size()); 79 | } 80 | 81 | 82 | GDomNode* AgentManic::marshal(GDom* pDoc) 83 | { 84 | GDomNode* pNode = pDoc->newObj(); 85 | pNode->addField(pDoc, "transition", transitionModel->marshal(pDoc)); 86 | pNode->addField(pDoc, "observation", observationModel->marshal(pDoc)); 87 | pNode->addField(pDoc, "contentment", contentmentModel->marshal(pDoc)); 88 | pNode->addField(pDoc, "planning", planningSystem->marshal(pDoc)); 89 | pNode->addField(pDoc, "beliefs", beliefs.serialize(pDoc)); 90 | return pNode; 91 | } 92 | 93 | 94 | void AgentManic::setMentor(Mentor* oracle) 95 | { 96 | planningSystem->setMentor(oracle); 97 | } 98 | 99 | 100 | void AgentManic::setTutor(Tutor* tutor, bool helpObservationFunction, bool helpTransitionFunction, bool helpContentmentModel, bool helpPlanningSystem) 101 | { 102 | observationModel->setTutor(helpObservationFunction ? tutor : nullptr); 103 | transitionModel->setTutor(helpTransitionFunction ? tutor : nullptr); 104 | contentmentModel->setTutor(helpContentmentModel ? tutor : nullptr); 105 | planningSystem->setTutor(helpPlanningSystem ? tutor : nullptr); 106 | } 107 | 108 | 109 | // virtual 110 | void AgentManic::teleport() 111 | { 112 | beliefs[0] = UNKNOWN_REAL_VALUE; 113 | } 114 | 115 | 116 | void AgentManic::learnFromExperience(GVec& observations) 117 | { 118 | // Learn to perceive the world a little better 119 | observationModel->trainIncremental(observations); 120 | 121 | // Refine beliefs to correspond with the new observations better 122 | observationModel->calibrateBeliefs(anticipatedBeliefs, observations); 123 | 124 | // Learn to anticipate consequences a little better 125 | if(beliefs[0] != UNKNOWN_REAL_VALUE) 126 | transitionModel->trainIncremental(beliefs, actions, anticipatedBeliefs); 127 | } 128 | 129 | 130 | GVec& AgentManic::decideWhatToDo() 131 | { 132 | // Make the anticipated beliefs the new beliefs 133 | beliefs.swapContents(anticipatedBeliefs); 134 | 135 | // Drop the first action in every plan 136 | planningSystem->advanceTime(); 137 | 138 | // Try to make the plans better 139 | planningSystem->refinePlans(beliefs); 140 | 141 | // Choose an action that is expected to maximize contentment (with the assistance of the mentor, if available) 142 | planningSystem->chooseNextActions(beliefs, actions); 143 | 144 | // Anticipate how the world will change with time 145 | transitionModel->anticipateNextBeliefs(beliefs, actions, anticipatedBeliefs); 146 | 147 | // Return the selected actions 148 | return actions; 149 | } 150 | 151 | 152 | // virtual 153 | void AgentManic::anticipateObservation(const GMatrix& plan, GVec& obs) 154 | { 155 | transitionModel->getFinalBeliefs(beliefs, plan, buf); 156 | observationModel->beliefsToObservations(buf, obs); 157 | } 158 | 159 | 160 | // virtual 161 | GVec& AgentManic::think(GVec& observations) 162 | { 163 | // Check the observations 164 | for(size_t i = 0; i < observations.size(); i++) { 165 | if(observations[i] < -1.0 || observations[i] > 1.0) 166 | throw Ex("Observed values must be between -1 and 1."); 167 | } 168 | 169 | learnFromExperience(observations); 170 | return decideWhatToDo(); 171 | } 172 | -------------------------------------------------------------------------------- /cpp/src/AgentManic.h: -------------------------------------------------------------------------------- 1 | #ifndef AGENTMANIC_H 2 | #define AGENTMANIC_H 3 | 4 | #include "Agent.h" 5 | #include 6 | #include 7 | #include 8 | #include "TransitionModel.h" 9 | #include "ObservationModel.h" 10 | #include "ContentmentModel.h" 11 | #include "PlanningSystem.h" 12 | #include 13 | 14 | using std::string; 15 | 16 | /// Implements a weak artificial general intelligence. 17 | class AgentManic : public Agent 18 | { 19 | public: 20 | GRand& rand; 21 | TransitionModel* transitionModel; 22 | ObservationModel* observationModel; 23 | ContentmentModel* contentmentModel; 24 | PlanningSystem* planningSystem; 25 | GVec actions; 26 | GVec beliefs; 27 | GVec anticipatedBeliefs; 28 | GVec buf; 29 | 30 | 31 | // General-purpose constructor. 32 | AgentManic(GRand& r); 33 | 34 | virtual ~AgentManic(); 35 | 36 | string getName() { return "Manic"; } 37 | 38 | virtual void reset(Mentor& oracle, size_t observationDims, size_t beliefDims, size_t actionDims, size_t maxPlanLength); 39 | 40 | /// Unmarshaling constructor 41 | AgentManic(GDomNode* pNode, GRand& r, Mentor& oracle); 42 | 43 | /// Marshals this agent to a JSON DOM. 44 | GDomNode* marshal(GDom* pDoc); 45 | 46 | /// Replaces the mentor with the specified one 47 | virtual void setMentor(Mentor* oracle); 48 | 49 | /// Sets a tutor. (Clears the tutor if tutor is nullptr.) 50 | virtual void setTutor(Tutor* tutor, bool helpObservationFunction, bool helpTransitionFunction, bool helpContentmentModel, bool helpPlanningSystem); 51 | 52 | /// Tells the agent that the next observation passed to learnFromExperience does not follow 53 | /// from the previous one. This should be called when a game is reset, or when the state is 54 | /// adjusted in a manner that the agent is not expected to anticipate. 55 | virtual void teleport(); 56 | 57 | /// Learns from observations 58 | void learnFromExperience(GVec& observations); 59 | 60 | /// Returns an action vector 61 | GVec& decideWhatToDo(); 62 | 63 | /// Returns the observation that would be expected after performing the plan. 64 | virtual void anticipateObservation(const GMatrix& plan, GVec& obs); 65 | 66 | /// A vector of observations goes in. All observed values may be expected to fall between -1 and 1. 67 | /// Returns a vector of chosen actions. All returned values should fall between 0 and 1. 68 | virtual GVec& think(GVec& observations); 69 | }; 70 | 71 | #endif 72 | -------------------------------------------------------------------------------- /cpp/src/AgentRandy.cpp: -------------------------------------------------------------------------------- 1 | #include "AgentRandy.h" 2 | 3 | AgentRandy::AgentRandy(GRand& r) 4 | : rand(r) 5 | { 6 | } 7 | 8 | // virtual 9 | AgentRandy::~AgentRandy() 10 | { 11 | } 12 | 13 | // virtual 14 | void AgentRandy::reset(Mentor& oracle, size_t observationDims, size_t beliefDims, size_t actionDims, size_t maxPlanLength) 15 | { 16 | actions.resize(actionDims); 17 | actions.fill(0.0); 18 | teleport(); 19 | } 20 | 21 | 22 | AgentRandy::AgentRandy(GDomNode* pNode, GRand& r, Mentor& oracle) 23 | : rand(r) 24 | { 25 | actions.resize(pNode->field("actions")->asInt()); 26 | } 27 | 28 | 29 | GDomNode* AgentRandy::marshal(GDom* pDoc) 30 | { 31 | GDomNode* pNode = pDoc->newObj(); 32 | pNode->addField(pDoc, "actions", pDoc->newInt(actions.size())); 33 | return pNode; 34 | } 35 | 36 | 37 | void AgentRandy::setMentor(Mentor* oracle) 38 | { 39 | } 40 | 41 | 42 | void AgentRandy::setTutor(Tutor* tutor, bool helpObservationFunction, bool helpTransitionFunction, bool helpContentmentModel, bool helpPlanningSystem) 43 | { 44 | } 45 | 46 | 47 | // virtual 48 | void AgentRandy::teleport() 49 | { 50 | } 51 | 52 | 53 | // virtual 54 | void AgentRandy::anticipateObservation(const GMatrix& plan, GVec& obs) 55 | { 56 | obs.copy(recent_obs); 57 | } 58 | 59 | 60 | // virtual 61 | GVec& AgentRandy::think(GVec& observations) 62 | { 63 | recent_obs.copy(observations); 64 | actions.fillUniform(rand); 65 | return actions; 66 | } 67 | -------------------------------------------------------------------------------- /cpp/src/AgentRandy.h: -------------------------------------------------------------------------------- 1 | #ifndef AGENTRANDY_H 2 | #define AGENTRANDY_H 3 | 4 | #include "Agent.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using std::string; 11 | 12 | /// Implements a weak artificial general intelligence. 13 | class AgentRandy : public Agent 14 | { 15 | public: 16 | GRand& rand; 17 | GVec actions; 18 | GVec recent_obs; 19 | 20 | 21 | // General-purpose constructor. 22 | AgentRandy(GRand& r); 23 | 24 | virtual ~AgentRandy(); 25 | 26 | string getName() { return "Randy"; } 27 | 28 | virtual void reset(Mentor& oracle, size_t observationDims, size_t beliefDims, size_t actionDims, size_t maxPlanLength); 29 | 30 | /// Unmarshaling constructor 31 | AgentRandy(GDomNode* pNode, GRand& r, Mentor& oracle); 32 | 33 | /// Marshals this agent to a JSON DOM. 34 | GDomNode* marshal(GDom* pDoc); 35 | 36 | virtual void setMentor(Mentor* oracle); 37 | 38 | virtual void setTutor(Tutor* tutor, bool helpObservationFunction, bool helpTransitionFunction, bool helpContentmentModel, bool helpPlanningSystem); 39 | 40 | virtual void teleport(); 41 | 42 | virtual void anticipateObservation(const GMatrix& plan, GVec& obs); 43 | 44 | virtual GVec& think(GVec& observations); 45 | }; 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /cpp/src/ContentmentModel.cpp: -------------------------------------------------------------------------------- 1 | #include "ContentmentModel.h" 2 | #include 3 | #include 4 | 5 | // General-purpose constructor 6 | ContentmentModel::ContentmentModel(size_t beliefDims, size_t total_layers, size_t queue_size, size_t trainItersPerPattern, GRand& r) 7 | : rand(r), 8 | samples(queue_size, beliefDims), 9 | contentment(queue_size, 1), 10 | tutor(nullptr), 11 | trainPos(0), 12 | trainSize(0), 13 | trainIters(0), 14 | trainProgress(0), 15 | err(0.0) 16 | { 17 | // Init the model 18 | rand = r; 19 | int hidden = std::min((size_t)30, beliefDims * 10); 20 | model.addLayer(new GLayerClassic(beliefDims, hidden, new GActivationBentIdentity())); 21 | model.addLayer(new GLayerClassic(hidden, 1, new GActivationBentIdentity())); 22 | GUniformRelation relIn(beliefDims); 23 | GUniformRelation relOut(1); 24 | model.beginIncrementalLearning(relIn, relOut); 25 | 26 | // Init the meta-parameters 27 | trainIters = trainItersPerPattern; 28 | model.setLearningRate(0.03); 29 | targBuf.resize(1); 30 | } 31 | 32 | 33 | /// Unmarshaling constructor 34 | ContentmentModel::ContentmentModel(GDomNode* obj, GRand& r) 35 | : rand(r), 36 | model(obj->field("model")), 37 | samples(obj->field("samples")), 38 | contentment(obj->field("contentment")), 39 | tutor(nullptr), 40 | trainPos(obj->field("trainPos")->asInt()), 41 | trainSize(obj->field("trainSize")->asInt()), 42 | trainIters(obj->field("trainIters")->asInt()), 43 | trainProgress(obj->field("trainProgress")->asInt()), 44 | err(obj->field("err")->asDouble()) 45 | { 46 | targBuf.resize(1); 47 | } 48 | 49 | 50 | /// Marshals this model to a JSON DOM. 51 | GDomNode* ContentmentModel::marshal(GDom* pDoc) 52 | { 53 | GDomNode* pNode = pDoc->newObj(); 54 | pNode->addField(pDoc, "model", model.serialize(pDoc)); 55 | pNode->addField(pDoc, "samples", samples.serialize(pDoc)); 56 | pNode->addField(pDoc, "contentment", contentment.serialize(pDoc)); 57 | pNode->addField(pDoc, "trainPos", pDoc->newInt(trainPos)); 58 | pNode->addField(pDoc, "trainSize", pDoc->newInt(trainSize)); 59 | pNode->addField(pDoc, "trainIters", pDoc->newInt(trainIters)); 60 | pNode->addField(pDoc, "trainProgress", pDoc->newInt(trainProgress)); 61 | pNode->addField(pDoc, "err", pDoc->newDouble(err)); 62 | return pNode; 63 | } 64 | 65 | 66 | /// Performs one pattern-presentation of stochastic gradient descent, and dynamically tunes the learning rate 67 | void ContentmentModel::doSomeTraining() 68 | { 69 | // Present a sample of beliefs and corresponding contentment for training 70 | size_t index = rand.next(trainSize); 71 | double lambda = model.learningRate() * 0.000001; 72 | model.scaleWeights(1.0 - lambda); 73 | model.diminishWeights(lambda); 74 | model.trainIncremental(samples.row(index), contentment.row(index)); 75 | err += contentment.row(index).squaredDistance(model.outputLayer().activation()); 76 | if(++trainProgress >= 1000) 77 | { 78 | trainProgress = 0; 79 | //std::cout << "Contentment error: " << to_str(err / 1000.0) << "\n"; 80 | err = 0.0; 81 | } 82 | } 83 | 84 | 85 | /// Refines this model based on feedback from the mentor 86 | void ContentmentModel::trainIncremental(const GVec& sample_beliefs, double sample_contentment) 87 | { 88 | // Buffer the samples 89 | GVec& dest = samples.row(trainPos); 90 | if(sample_beliefs.size() != dest.size()) 91 | throw Ex("size mismatch"); 92 | dest.copy(sample_beliefs); 93 | contentment.row(trainPos)[0] = sample_contentment; 94 | trainPos++; 95 | trainSize = std::max(trainSize, trainPos); 96 | if(trainPos >= samples.rows()) 97 | trainPos = 0; 98 | 99 | // Do a few iterations of stochastic gradient descent 100 | size_t iters = std::min(trainIters, trainSize); 101 | for(size_t i = 0; i < iters; i++) 102 | doSomeTraining(); 103 | } 104 | 105 | 106 | /// Computes the contentment of a particular belief vector 107 | double ContentmentModel::evaluate(const GVec& beliefs) 108 | { 109 | if(tutor) 110 | return tutor->evaluate_state(beliefs); 111 | else 112 | { 113 | model.forwardProp(beliefs); 114 | return model.outputLayer().activation()[0]; 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /cpp/src/ContentmentModel.h: -------------------------------------------------------------------------------- 1 | #ifndef CONTENTMENTMODEL_H 2 | #define CONTENTMENTMODEL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "Mentor.h" 9 | 10 | using namespace GClasses; 11 | 12 | 13 | /// A model that maps from anticipated beliefs to contentment (or utility). 14 | /// This model is trained by reinforcement from a mentor. 15 | class ContentmentModel 16 | { 17 | public: 18 | GRand& rand; 19 | GNeuralNet model; 20 | GMatrix samples; 21 | GMatrix contentment; 22 | Tutor* tutor; 23 | size_t trainPos; 24 | size_t trainSize; 25 | size_t trainIters; 26 | double learningRate; 27 | size_t trainProgress; 28 | double err; 29 | GVec targBuf; 30 | 31 | 32 | // General-purpose constructor 33 | ContentmentModel(size_t beliefDims, size_t total_layers, size_t queue_size, size_t trainItersPerPattern, GRand& r); 34 | 35 | /// Unmarshaling constructor 36 | ContentmentModel(GDomNode* pNode, GRand& r); 37 | 38 | /// Marshals this model to a JSON DOM. 39 | GDomNode* marshal(GDom* pDoc); 40 | 41 | /// Sets the tutor 42 | void setTutor(Tutor* t) { tutor = t; } 43 | 44 | /// Performs one pattern-presentation of stochastic gradient descent, and dynamically tunes the learning rate 45 | void doSomeTraining(); 46 | 47 | /// Refines this model based on feedback from the mentor 48 | void trainIncremental(const GVec& sample_beliefs, double sample_contentment); 49 | 50 | /// Computes the contentment of a particular belief vector 51 | double evaluate(const GVec& beliefs); 52 | }; 53 | 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /cpp/src/DriftingPlatform.cpp: -------------------------------------------------------------------------------- 1 | #include "DriftingPlatform.h" 2 | #include 3 | 4 | //#include "AgentManic.h" 5 | //#include "GClasses/GImage.h" 6 | 7 | using std::cout; 8 | 9 | 10 | 11 | 12 | 13 | // virtual 14 | void DriftingPlatformTutor::observations_to_state(const GClasses::GVec& observations, GClasses::GVec& state) 15 | { 16 | state.put(0, observations, 0, state.size()); 17 | } 18 | 19 | // virtual 20 | void DriftingPlatformTutor::state_to_observations(const GClasses::GVec& state, GClasses::GVec& observations) 21 | { 22 | world.computeObservations(state, observations); 23 | } 24 | 25 | // virtual 26 | void DriftingPlatformTutor::transition(const GClasses::GVec& current_state, const GClasses::GVec& actions, GClasses::GVec& next_state) 27 | { 28 | world.computeNextState(current_state, actions, next_state); 29 | } 30 | 31 | // virtual 32 | double DriftingPlatformTutor::evaluate_state(const GClasses::GVec& state) 33 | { 34 | state_to_observations(state, obs); 35 | bool oldActive = mentor.active; 36 | mentor.active = true; 37 | double utility = mentor.evaluateObservation(obs); 38 | mentor.active = oldActive; 39 | return utility; 40 | } 41 | 42 | // virtual 43 | void DriftingPlatformTutor::choose_actions(const GClasses::GVec& state, GClasses::GVec& actions) 44 | { 45 | double theta = atan2(state[1], state[0]); 46 | theta -= world.controlOrigin; 47 | theta += M_PI; 48 | theta /= (2.0 * M_PI); 49 | while(theta < 0.0) 50 | theta += 1.0; 51 | while(theta > 1.0) 52 | theta -= 1.0; 53 | actions[0] = theta; 54 | } 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | /* 64 | void visualizeContentment(AgentManic& agent) 65 | { 66 | GImage image; 67 | image.setSize(1000, 1000); 68 | GVec beliefs(2); 69 | 70 | // Find the min and max contentment 71 | ContentmentModel* pContentmentModel = agent.contentmentModel; 72 | double min = 1e300; 73 | double max = -1e300; 74 | for(int y = 0; y < (int)image.height(); y++) 75 | { 76 | beliefs[1] = (double)y / 1000.0 * 2.0 - 1.0; 77 | for(int x = 0; x < (int)image.width(); x++) 78 | { 79 | beliefs[0] = (double)x / 1000.0 * 2.0 - 1.0; 80 | double contentment = pContentmentModel->evaluate(beliefs); 81 | min = std::min(min, contentment); 82 | max = std::max(max, contentment); 83 | } 84 | } 85 | 86 | // Plot the contentment contours 87 | for(int y = 0; y < (int)image.height(); y++) 88 | { 89 | beliefs[1] = (double)y / 1000.0 * 2.0 - 1.0; 90 | for(int x = 0; x < (int)image.width(); x++) 91 | { 92 | beliefs[0] = (double)x / 1000.0 * 2.0 - 1.0; 93 | double contentment = pContentmentModel->evaluate(beliefs); 94 | int g = ClipChan((contentment - min) * 256.0 / (max - min)); 95 | int gg = g; 96 | if(g % 5 == 0) 97 | gg = (128 - (int)(tanh((double)(g - 128) * 0.03) * 127.0)); 98 | image.setPixel(x, y, gARGB(0xff, g, g, gg)); 99 | } 100 | } 101 | 102 | // Draw magenta dots at the sample locations for training the contentment function 103 | GVec r; 104 | for(size_t i = 0; i < agent.contentmentModel->trainSize; i++) { 105 | agent.observationModel->beliefsToObservations(agent.contentmentModel->samples.row(i), r); 106 | int x = (int)((r[0] + 1.0) * 500.0); 107 | int y = (int)((r[1] + 1.0) * 500.0); 108 | image.circleFill(x, y, 4.0, 0xffff00ff); 109 | } 110 | 111 | // Draw crosshairs at the origin 112 | image.line(449, 499, 549, 499, 0xff000000); 113 | image.line(499, 449, 499, 549, 0xff000000); 114 | 115 | string s = "min: "; 116 | s += to_str(min); 117 | s += ", max: "; 118 | s += to_str(max); 119 | image.text(s.c_str(), 50, 50, 2.0f, 0xffffff00); 120 | 121 | image.savePpm("contentment.ppm"); 122 | } 123 | */ 124 | double DriftingPlatform::test(Agent& agent) 125 | { 126 | cout << "----------------------\n"; 127 | cout << "Drifting platform test Agent: " + agent.getName() << "\n"; 128 | cout << "----------------------\n"; 129 | cout << "In this test, the agent is placed on an imaginary 2D platform of infinite size. " << 130 | "The agent's objective is to stay near the origin. Each time-step, the platform " << 131 | "drifts a small amount in a random direction. The agent can step in any direction "<< 132 | "(from 0 to 2*PI). Initially, a mentor will help it learn what to do.\n"; 133 | 134 | // Define some constants for this test 135 | double driftSpeed = 0.1; 136 | 137 | // Make an agent 138 | DriftingPlatformMentor mentor; 139 | agent.reset(mentor, // This mentor prefers plans that lead closer to the origin 140 | 2, // The agent observes its x,y position (which is the complete state of this world) 141 | 2, // the agent models state with 2 dimensions because it cannot be simplified further 142 | 1, // The agent chooses a direction for travel 143 | 1); // The agent plans up to 10 time-steps into the future 144 | 145 | // To debug an agent that isn't working, uncomment the following line and verify that it works. 146 | // Then, set each "true" to "false" until you find the component that isn't doing its job properly. 147 | //DriftingPlatformTutor tutor(mentor); 148 | //agent.setTutor(&tutor, false/*observation*/, false/*transition*/, false/*contentment*/, false/*planning*/); 149 | 150 | // Train with mentor 151 | cout << "Phase 1 of 3: Supervised learning...\n"; 152 | cout << "|------------------------------------------------|\n"; 153 | GVec state(2); 154 | GVec obs(2); 155 | GVec next_state(2); 156 | GVec drift(2); 157 | for(size_t i = 0; i < 2000; i++) { 158 | 159 | if(i % 40 == 0) 160 | { 161 | cout << ">"; 162 | cout.flush(); 163 | } 164 | 165 | // The platform drifts in a random direction 166 | drift[0] = rand.normal(); 167 | drift[1] = rand.normal(); 168 | drift.normalize(); 169 | drift *= driftSpeed; 170 | state += drift; 171 | state.clip(-1.0, 1.0); 172 | 173 | // The agent takes a step in a direction of its choice 174 | computeObservations(state, obs); 175 | GVec& act = agent.think(obs); 176 | computeNextState(state, act, next_state); 177 | state.copy(next_state); 178 | } 179 | //if(agent.getName().compare("manic") == 0) 180 | // visualizeContentment(*(AgentManic*)&agent); 181 | 182 | cout << "\n\n\nNow, the mentor is removed, so the agent is on its own.\n" ; 183 | mentor.active = false; 184 | 185 | cout << "Also, to make the problem more challenging, the agent's controls " << 186 | "are changed by 120 degrees. The agent will now have to figure out how to operate " << 187 | "the new controls without a mentor to help it.\n"; 188 | controlOrigin += M_PI * 2.0 / 3.0; 189 | 190 | // Train without mentor 191 | cout << "Phase 2 of 3: Unsupervised learning...\n"; 192 | cout << "|------------------------------------------------|\n"; 193 | for(size_t i = 0; i < 2000; i++) { 194 | 195 | if(i % 40 == 0) 196 | { 197 | cout << ">"; 198 | cout.flush(); 199 | } 200 | 201 | // The platform drifts in a random direction 202 | drift[0] = rand.normal(); 203 | drift[1] = rand.normal(); 204 | drift.normalize(); 205 | drift *= driftSpeed; 206 | state += drift; 207 | state.clip(-1.0, 1.0); 208 | 209 | // The agent takes a step in a direction of its choice 210 | computeObservations(state, obs); 211 | GVec& act = agent.think(obs); 212 | computeNextState(state, act, next_state); 213 | state.copy(next_state); 214 | } 215 | 216 | // Test 217 | cout << "\n\n\nThe agent has had enough time to figure out the new controls, so now we test the agent. " << 218 | "We will let the platform continue to drift randomly for 1000 iterations, and measure the average " << 219 | "distance between the origin and the agent. (If the agent is intelligent, it should achieve a low " << 220 | "average distance, such as 0.2. If it is unintelligent, it will achieve a higher average distance, " << 221 | "such as 0.7.\n"; 222 | cout << "Phase 3 of 3: Testing...\n"; 223 | cout << "|------------------------------------------------|\n"; 224 | double sumSqMag = 0.0; 225 | for(size_t i = 0; i < 1000; i++) { 226 | 227 | if(i % 20 == 0) 228 | { 229 | cout << ">"; 230 | cout.flush(); 231 | } 232 | 233 | // The platform drifts in a random direction 234 | drift[0] = rand.normal(); 235 | drift[1] = rand.normal(); 236 | drift.normalize(); 237 | drift *= driftSpeed; 238 | state += drift; 239 | state.clip(-1.0, 1.0); 240 | 241 | // The agent takes a step in a direction of its choice 242 | computeObservations(state, obs); 243 | GVec& act = agent.think(obs); 244 | computeNextState(state, act, next_state); 245 | state.copy(next_state); 246 | 247 | // Sum up how far the agent ever drifts from the origin 248 | sumSqMag += std::sqrt(state.squaredMagnitude()); 249 | } 250 | 251 | double aveDist = sumSqMag / 1000.0; 252 | cout << "\n\nThe agent's average distance from the origin during the testing phase was " << to_str(aveDist) << "\n\n"; 253 | 254 | return -aveDist; // Bigger is supposed to be better, so we negate the average distance 255 | } 256 | -------------------------------------------------------------------------------- /cpp/src/DriftingPlatform.h: -------------------------------------------------------------------------------- 1 | #ifndef DRIFTINGPLATFORM_H 2 | #define DRIFTINGPLATFORM_H 3 | 4 | #include "Mentor.h" 5 | #include "Agent.h" 6 | #include "Test.h" 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | class DriftingPlatform; 13 | 14 | 15 | class DriftingPlatformMentor : public Mentor 16 | { 17 | public: 18 | bool active; 19 | GVec anticipatedObs; 20 | 21 | DriftingPlatformMentor() 22 | { 23 | active = true; 24 | } 25 | 26 | virtual double evaluatePlan(Agent& agent, const GMatrix& plan) 27 | { 28 | if(!active) 29 | return UNKNOWN_REAL_VALUE; 30 | agent.anticipateObservation(plan, anticipatedObs); 31 | return evaluateObservation(anticipatedObs); 32 | } 33 | 34 | // Prefer the fantasy that minimizes the magnitude of the observation vector 35 | static double evaluateObservation(const GVec& anticipatedObservations) 36 | { 37 | double sqMag = anticipatedObservations.squaredMagnitude(); 38 | return exp(-sqMag); 39 | } 40 | }; 41 | 42 | 43 | 44 | class DriftingPlatformTutor : public Tutor 45 | { 46 | public: 47 | DriftingPlatform& world; 48 | DriftingPlatformMentor& mentor; 49 | GClasses::GVec obs; 50 | 51 | DriftingPlatformTutor(DriftingPlatform& w, DriftingPlatformMentor& m) : world(w), mentor(m), obs(2) 52 | { 53 | } 54 | 55 | virtual void observations_to_state(const GClasses::GVec& observations, GClasses::GVec& state); 56 | virtual void state_to_observations(const GClasses::GVec& state, GClasses::GVec& observations); 57 | virtual void transition(const GClasses::GVec& current_state, const GClasses::GVec& actions, GClasses::GVec& next_state); 58 | virtual double evaluate_state(const GClasses::GVec& state); 59 | virtual void choose_actions(const GClasses::GVec& state, GClasses::GVec& actions); 60 | }; 61 | 62 | 63 | 64 | class DriftingPlatform : public Test 65 | { 66 | public: 67 | double controlOrigin; 68 | double stepSize; 69 | GRand& rand; 70 | 71 | 72 | DriftingPlatform(GRand& r) 73 | : controlOrigin(0.0), stepSize(0.05), rand(r) 74 | { 75 | } 76 | 77 | void computeObservations(const GClasses::GVec& state, GClasses::GVec& observations) 78 | { 79 | observations.put(0, state, 0, state.size()); 80 | observations.fill(0.0, state.size(), observations.size()); 81 | } 82 | 83 | void computeNextState(const GClasses::GVec& current_state, const GClasses::GVec& actions, GClasses::GVec& next_state) 84 | { 85 | next_state.copy(current_state); 86 | double angle = actions[0] * 2.0 * M_PI + controlOrigin; 87 | next_state[0] += stepSize * std::cos(angle); 88 | next_state[1] += stepSize * std::sin(angle); 89 | next_state.clip(-1.0, 1.0); 90 | } 91 | 92 | 93 | /* 94 | /// Generates an image to visualize what's going on inside an AgentManic's artificial brain for debugging purposes 95 | static BufferedImage visualize(agents.manic.AgentManic agent, double[] state_orig, double[] state_drifted, double[] state); 96 | static void makeVisualization(String suffix, agents.manic.AgentManic agent, double[] state_orig, double[] state_drifted, double[] state); 97 | */ 98 | 99 | double test(Agent& agent); 100 | }; 101 | 102 | #endif 103 | -------------------------------------------------------------------------------- /cpp/src/Makefile: -------------------------------------------------------------------------------- 1 | ################ 2 | # Paths and Flags 3 | ################ 4 | SHELL = /bin/bash 5 | TARGET_PATH = ../bin 6 | TARGET_NAME_OPT = manic 7 | TARGET_NAME_DBG = $(TARGET_NAME_OPT)dbg 8 | OBJ_PATH = ../obj 9 | INSTALL_LOCATION_INCLUDE ?= /usr/local/include 10 | UNAME = $(shell uname -s) 11 | ifeq ($(UNAME),Darwin) 12 | CFLAGS = -Wshadow -std=c++11 -stdlib=libc++ -I/opt/local/include -I/usr/local/include -I/sw/include -I../../../src -I$(INSTALL_LOCATION_INCLUDE) -D_THREAD_SAFE -DDARWIN -no-cpp-precomp 13 | DBG_CFLAGS = $(CFLAGS) -g -D_DEBUG 14 | OPT_CFLAGS = $(CFLAGS) -O3 15 | LFLAGS = -L/opt/local/lib -L/usr/local/lib -L/sw/lib -L../../../lib -framework AppKit 16 | DBG_LFLAGS = $(LFLAGS) -lpthread -lGClassesDbg 17 | OPT_LFLAGS = $(LFLAGS) -lpthread -lGClasses 18 | else 19 | CFLAGS = -Wall -Werror -Wshadow -pedantic -std=c++11 -I$(INSTALL_LOCATION_INCLUDE) -I../../../src 20 | DBG_CFLAGS = $(CFLAGS) -g -D_DEBUG 21 | OPT_CFLAGS = $(CFLAGS) -O3 22 | LFLAGS = -L../../../lib 23 | DBG_LFLAGS = $(LFLAGS) -lGClassesDbg -lpthread 24 | OPT_LFLAGS = $(LFLAGS) -lGClasses -lpthread 25 | endif 26 | 27 | ################ 28 | # Source 29 | ################ 30 | 31 | CPP_FILES =\ 32 | main.cpp\ 33 | AgentManic.cpp\ 34 | AgentRandy.cpp\ 35 | ContentmentModel.cpp\ 36 | DriftingPlatform.cpp\ 37 | ObservationModel.cpp\ 38 | PlanningSystem.cpp\ 39 | TransitionModel.cpp\ 40 | 41 | ################ 42 | # Lists 43 | ################ 44 | 45 | TEMP_LIST_OPT = $(CPP_FILES:%=$(OBJ_PATH)/opt/%) 46 | TEMP_LIST_DBG = $(CPP_FILES:%=$(OBJ_PATH)/dbg/%) 47 | OBJECTS_OPT = $(TEMP_LIST_OPT:%.cpp=%.o) 48 | OBJECTS_DBG = $(TEMP_LIST_DBG:%.cpp=%.o) 49 | DEPS_OPT = $(TEMP_LIST_OPT:%.cpp=%.d) 50 | DEPS_DBG = $(TEMP_LIST_DBG:%.cpp=%.d) 51 | 52 | ################ 53 | # Rules 54 | ################ 55 | 56 | .DELETE_ON_ERROR: 57 | 58 | dbg : $(TARGET_PATH)/$(TARGET_NAME_DBG) 59 | 60 | opt : $(TARGET_PATH)/$(TARGET_NAME_OPT) 61 | 62 | usage: 63 | @echo "" 64 | @echo "Usage:" 65 | @echo " make usage (to see this info)" 66 | @echo " make clean (to delete all the .o files)" 67 | @echo " make dbg (to build a debug version)" 68 | @echo " make opt (to build an optimized version)" 69 | @echo "" 70 | 71 | # This rule makes the optimized binary by using g++ with the optimized ".o" files 72 | $(TARGET_PATH)/$(TARGET_NAME_OPT) : partialcleanopt $(OBJECTS_OPT) 73 | g++ -O3 -o $(TARGET_PATH)/$(TARGET_NAME_OPT) $(OBJECTS_OPT) $(OPT_LFLAGS) 74 | 75 | # This rule makes the debug binary by using g++ with the debug ".o" files 76 | $(TARGET_PATH)/$(TARGET_NAME_DBG) : partialcleandbg $(OBJECTS_DBG) 77 | g++ -g -o $(TARGET_PATH)/$(TARGET_NAME_DBG) $(OBJECTS_DBG) $(DBG_LFLAGS) 78 | 79 | # This includes all of the ".d" files. Each ".d" file contains a 80 | # generated rule that tells it how to make .o files. (The reason these are generated is so that 81 | # dependencies for these rules can be generated.) 82 | -include $(DEPS_OPT) 83 | 84 | -include $(DEPS_DBG) 85 | 86 | # This rule makes the optimized ".d" files by using "g++ -MM" with the corresponding ".cpp" file 87 | # The ".d" file will contain a rule that says how to make an optimized ".o" file. 88 | # "$<" refers to the ".cpp" file, and "$@" refers to the ".d" file 89 | $(DEPS_OPT) : $(OBJ_PATH)/opt/%.d : %.cpp 90 | @echo -e "Computing opt dependencies for $<" 91 | @-rm -f $$(dirname $@)/$$(basename $@ .d).o 92 | @if [ ! -d "$$(dirname $@)" ]; then mkdir -p "$$(dirname $@)"; fi 93 | @echo -en "$$(dirname $@)/" > $@ 94 | @g++ $(OPT_CFLAGS) -MM $< >> $@ 95 | @echo -e " g++ $(OPT_CFLAGS) -c $< -o $$(dirname $@)/$$(basename $@ .d).o" >> $@ 96 | 97 | # This rule makes the debug ".d" files by using "g++ -MM" with the corresponding ".cpp" file 98 | # The ".d" file will contain a rule that says how to make a debug ".o" file. 99 | # "$<" refers to the ".cpp" file, and "$@" refers to the ".d" file 100 | $(DEPS_DBG) : $(OBJ_PATH)/dbg/%.d : %.cpp 101 | @echo -e "Computing dbg dependencies for $<" 102 | @-rm -f $$(dirname $@)/$$(basename $@ .d).o 103 | @if [ ! -d "$$(dirname $@)" ]; then mkdir -p "$$(dirname $@)"; fi 104 | @echo -en "$$(dirname $@)/" > $@ 105 | @g++ $(DBG_CFLAGS) -MM $< >> $@ 106 | @echo -e " g++ $(DBG_CFLAGS) -c $< -o $$(dirname $@)/$$(basename $@ .d).o" >> $@ 107 | 108 | partialcleandbg : 109 | @if [ ! -d "$(TARGET_PATH)" ]; then mkdir -p "$(TARGET_PATH)"; fi 110 | @rm -f $(TARGET_PATH)/$(TARGET_NAME_DBG) 111 | 112 | partialcleanopt : 113 | @if [ ! -d "$(TARGET_PATH)" ]; then mkdir -p "$(TARGET_PATH)"; fi 114 | @rm -f $(TARGET_PATH)/$(TARGET_NAME_OPT) 115 | 116 | clean : partialcleandbg partialcleanopt 117 | rm -f $(OBJECTS_OPT) 118 | rm -f $(OBJECTS_DBG) 119 | rm -f $(DEPS_OPT) 120 | rm -f $(DEPS_DBG) 121 | -------------------------------------------------------------------------------- /cpp/src/Mentor.h: -------------------------------------------------------------------------------- 1 | #ifndef MENTOR_H 2 | #define MENTOR_H 3 | 4 | #include 5 | 6 | class Agent; 7 | 8 | /// Helps the agent learn what it should want to do. 9 | /// Does not tell the agent how to do anything. 10 | class Mentor 11 | { 12 | public: 13 | /// Implementations should evaluate the goodness of the plan. 14 | /// return 1 for the best possible plan. 15 | /// return 0 for the worst possible plan. 16 | /// return a value between 0 and 1 for observations that are neither the worst nor best. 17 | /// return UNKNOWN_REAL_VALUE if the mentor cannot determine the goodness of the anticpated observation, 18 | /// or if the mentor is not available, or if the mentor wants to test the agent by letting 19 | /// it decide for itself. 20 | virtual double evaluatePlan(Agent& agent, const GClasses::GMatrix& plan) = 0; 21 | }; 22 | 23 | 24 | /// Helps the agent cheat at performing some of its expected abilities, 25 | /// so you can debug which ones are giving it trouble. 26 | /// A tutor is not typically given to the agent. 27 | class Tutor 28 | { 29 | protected: 30 | /// The constructor is protected so that the user is forced to make a child class 31 | /// in order to instantiate a tutor. 32 | Tutor() {} 33 | 34 | public: 35 | /// Computes state from observations. 36 | virtual void observations_to_state(const GClasses::GVec& observations, GClasses::GVec& state) = 0; 37 | 38 | /// Computes observations from state. 39 | virtual void state_to_observations(const GClasses::GVec& state, GClasses::GVec& observations) = 0; 40 | 41 | /// Computes how actions will affect state. 42 | virtual void transition(const GClasses::GVec& current_state, const GClasses::GVec& actions, GClasses::GVec& next_state) = 0; 43 | 44 | /// Evaluates the utility of a state. 45 | virtual double evaluate_state(const GClasses::GVec& state) = 0; 46 | 47 | /// Picks the best action for the given state. 48 | virtual void choose_actions(const GClasses::GVec& state, GClasses::GVec& actions) = 0; 49 | }; 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /cpp/src/ObservationModel.cpp: -------------------------------------------------------------------------------- 1 | #include "ObservationModel.h" 2 | #include 3 | 4 | 5 | /// General-purpose constructor 6 | ObservationModel::ObservationModel(TransitionModel& transition, size_t observation_dims, size_t belief_dims, size_t decoder_layers, 7 | size_t encoder_layers, size_t queue_size, size_t trainItersPerPattern, size_t calibrationIterations, GRand& r) 8 | : rand(r), 9 | train(queue_size, observation_dims), 10 | validation(queue_size, observation_dims), 11 | tutor(nullptr), 12 | transitionModel(transition), 13 | trainPos(0), 14 | trainSize(0), 15 | validationPos(0), 16 | validationSize(0), 17 | trainIters(0), 18 | trainProgress(0), 19 | calibrationIters(0) 20 | { 21 | 22 | if(belief_dims > observation_dims) 23 | throw Ex("observation_dims must be >= belief_dims"); 24 | 25 | // Init the encoder 26 | int hidden = std::max((size_t)30, (observation_dims + belief_dims) / 2); 27 | encoder.addLayer(new GLayerClassic(observation_dims, hidden, new GActivationBentIdentity())); 28 | encoder.addLayer(new GLayerClassic(hidden, belief_dims, new GActivationBentIdentity())); 29 | GUniformRelation relInEnc(observation_dims); 30 | GUniformRelation relOutEnc(belief_dims); 31 | encoder.setLearningRate(0.03); 32 | encoder.beginIncrementalLearning(relInEnc, relOutEnc); 33 | 34 | // Init the decoder 35 | decoder.addLayer(new GLayerClassic(belief_dims, hidden, new GActivationBentIdentity())); 36 | decoder.addLayer(new GLayerClassic(hidden, observation_dims, new GActivationBentIdentity())); 37 | GUniformRelation relInDec(belief_dims); 38 | GUniformRelation relOutDec(observation_dims); 39 | decoder.setLearningRate(0.03); 40 | decoder.beginIncrementalLearning(relInDec, relOutDec); 41 | 42 | // Make the experimental nets 43 | decoderExperimental.copyStructure(&decoder); 44 | encoderExperimental.copyStructure(&encoder); 45 | 46 | // Init the meta-parameters 47 | trainIters = trainItersPerPattern; 48 | calibrationIters = calibrationIterations; 49 | } 50 | 51 | 52 | /// Unmarshaling constructor 53 | ObservationModel::ObservationModel(TransitionModel& transition, GDomNode* pNode, GRand& r) 54 | : rand(r), 55 | decoder(pNode->field("decoder")), 56 | encoder(pNode->field("encoder")), 57 | decoderExperimental(pNode->field("decoderExperimental")), 58 | encoderExperimental(pNode->field("encoderExperimental")), 59 | train(pNode->field("train")), 60 | validation(pNode->field("validation")), 61 | tutor(nullptr), 62 | transitionModel(transition), 63 | trainPos(pNode->field("trainPos")->asInt()), 64 | trainSize(pNode->field("trainSize")->asInt()), 65 | validationPos(pNode->field("validationPos")->asInt()), 66 | validationSize(pNode->field("validationSize")->asInt()), 67 | trainIters(pNode->field("trainIters")->asInt()), 68 | trainProgress(pNode->field("trainProgress")->asInt()), 69 | calibrationIters(pNode->field("calibrationIters")->asInt()) 70 | { 71 | } 72 | 73 | 74 | /// Marshals this model to a JSON DOM. 75 | GDomNode* ObservationModel::marshal(GDom* pDoc) 76 | { 77 | GDomNode* pNode = pDoc->newObj(); 78 | pNode->addField(pDoc, "decoder", decoder.serialize(pDoc)); 79 | pNode->addField(pDoc, "encoder", encoder.serialize(pDoc)); 80 | pNode->addField(pDoc, "decoderExperimental", decoderExperimental.serialize(pDoc)); 81 | pNode->addField(pDoc, "encoderExperimental", encoderExperimental.serialize(pDoc)); 82 | pNode->addField(pDoc, "train", train.serialize(pDoc)); 83 | pNode->addField(pDoc, "validation", validation.serialize(pDoc)); 84 | pNode->addField(pDoc, "trainPos", pDoc->newInt(trainPos)); 85 | pNode->addField(pDoc, "trainSize", pDoc->newInt(trainSize)); 86 | pNode->addField(pDoc, "validationPos", pDoc->newInt(validationPos)); 87 | pNode->addField(pDoc, "validationSize", pDoc->newInt(validationSize)); 88 | pNode->addField(pDoc, "trainIters", pDoc->newInt(trainIters)); 89 | pNode->addField(pDoc, "trainProgress", pDoc->newInt(trainProgress)); 90 | pNode->addField(pDoc, "calibrationIters", pDoc->newInt(calibrationIters)); 91 | return pNode; 92 | } 93 | 94 | 95 | /// Performs one pattern-presentation of stochastic gradient descent and dynamically tunes the learning rate 96 | void ObservationModel::doSomeTraining() 97 | { 98 | // Train the decoderExperimental and encoderExperimental together as an autoencoder 99 | double lambda = decoder.learningRate() * 0.00001; 100 | decoderExperimental.scaleWeights(1.0 - lambda); 101 | decoderExperimental.diminishWeights(lambda); 102 | encoderExperimental.scaleWeights(1.0 - lambda); 103 | encoderExperimental.diminishWeights(lambda); 104 | size_t index = rand.next(trainSize); 105 | GVec& observation = train.row(index); 106 | encoderExperimental.forwardProp(observation); 107 | GVec& belief = encoderExperimental.outputLayer().activation(); 108 | decoderExperimental.forwardProp(belief); 109 | decoderExperimental.backpropagate(observation); 110 | encoderExperimental.backpropagateFromLayer(&decoderExperimental.layer(0)); 111 | encoderExperimental.descendGradient(observation, encoderExperimental.learningRate(), 0.0); 112 | decoderExperimental.descendGradient(belief, decoderExperimental.learningRate(), 0.0); 113 | 114 | // Since changing the observation function resets the training data for the transition function, 115 | // we only want to change our perception when it will lead to big improvements. 116 | // Here, we test whether our experimental model is significantly better than the one we have been using. 117 | // If so, then the experimental model becomes the new model. 118 | trainProgress++; 119 | if(trainProgress >= train.rows()) 120 | { 121 | // Measure mean squared error 122 | trainProgress = 0; 123 | double err1 = 0.0; 124 | double err2 = 0.0; 125 | for(size_t i = 0; i < validationSize; i++) 126 | { 127 | GVec& targ = validation.row(i); 128 | encoder.forwardProp(targ); 129 | decoder.forwardProp(encoder.outputLayer().activation()); 130 | GVec& pred1 = decoder.outputLayer().activation(); 131 | encoderExperimental.forwardProp(targ); 132 | decoderExperimental.forwardProp(encoderExperimental.outputLayer().activation()); 133 | GVec& pred2 = decoderExperimental.outputLayer().activation(); 134 | for(size_t j = 0; j < targ.size(); j++) 135 | { 136 | err1 += (targ[j] - pred1[j]) * (targ[j] - pred1[j]); 137 | err2 += (targ[j] - pred2[j]) * (targ[j] - pred2[j]); 138 | } 139 | } 140 | err1 = std::sqrt(err1 / validationSize); 141 | err2 = std::sqrt(err2 / validationSize); 142 | if(err2 < 0.85 * err1) 143 | { 144 | // Update the observation model and reset the training data for the transition function 145 | encoder.copyWeights(&encoderExperimental); 146 | decoder.copyWeights(&decoderExperimental); 147 | transitionModel.trainPos = 0; 148 | transitionModel.trainSize = 0; 149 | } 150 | else if(err1 < 0.85 * err2) 151 | { 152 | // This should really never happen 153 | encoderExperimental.copyWeights(&encoder); 154 | decoderExperimental.copyWeights(&decoder); 155 | } 156 | //std::cout << "Observation error:" << to_str(err1) << ", " << to_str(err2) << "\n"; 157 | } 158 | } 159 | 160 | 161 | /// Refines the encoder and decoder based on the new observation. 162 | void ObservationModel::trainIncremental(const GVec& observation) 163 | { 164 | // Buffer the pattern 165 | GVec* dest; 166 | if(validationPos < trainPos) { 167 | dest = &validation.row(validationPos); 168 | if(++validationPos >= validation.rows()) 169 | validationPos = 0; 170 | validationSize = std::max(validationSize, validationPos); 171 | } else { 172 | dest = &train.row(trainPos); 173 | trainPos++; 174 | trainSize = std::max(trainSize, trainPos); 175 | if(trainPos >= train.rows()) 176 | trainPos = 0; 177 | } 178 | dest->copy(observation); 179 | 180 | // Train 181 | size_t iters = std::min(trainIters, trainSize); 182 | for(size_t i = 0; i < iters; i++) 183 | doSomeTraining(); 184 | } 185 | 186 | 187 | /// Refines the beliefs to correspond with actual observations 188 | void ObservationModel::calibrateBeliefs(GVec& beliefs, const GVec& observations) 189 | { 190 | if(tutor) 191 | tutor->observations_to_state(observations, beliefs); 192 | else 193 | { 194 | GNeuralNetLayer& layIn = encoder.outputLayer(); 195 | for(size_t i = 0; i < calibrationIters; i++) { 196 | decoder.forwardProp(beliefs); 197 | decoder.backpropagate(observations); 198 | decoder.layer(0).backPropError(&layIn); 199 | beliefs.addScaled(decoder.learningRate(), layIn.error()); 200 | beliefs.clip(-1.0, 1.0); 201 | } 202 | } 203 | } 204 | 205 | 206 | /// Decodes beliefs to predict observations 207 | void ObservationModel::beliefsToObservations(const GVec& beliefs, GVec& observations) 208 | { 209 | observations.resize(decoder.outputLayer().outputs()); 210 | if(tutor) 211 | tutor->state_to_observations(beliefs, observations); 212 | else 213 | { 214 | decoder.forwardProp(beliefs); 215 | observations.copy(decoder.outputLayer().activation()); 216 | } 217 | } 218 | 219 | 220 | /// Encodes observations to predict beliefs 221 | void ObservationModel::observationsToBeliefs(const GVec& observations, GVec& beliefs) 222 | { 223 | beliefs.resize(encoder.outputLayer().outputs()); 224 | if(tutor) 225 | tutor->observations_to_state(observations, beliefs); 226 | else 227 | { 228 | beliefs.put(0, observations, 0, beliefs.size()); 229 | encoder.forwardProp(observations); 230 | beliefs.copy(encoder.outputLayer().activation()); 231 | } 232 | } 233 | -------------------------------------------------------------------------------- /cpp/src/ObservationModel.h: -------------------------------------------------------------------------------- 1 | #ifndef OBSERVATION_H 2 | #define OBSERVATION_H 3 | 4 | #include "TransitionModel.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "Mentor.h" 10 | 11 | using namespace GClasses; 12 | 13 | 14 | /// A bidirectional model that maps between beliefs and observations. 15 | /// Mapping from observations to beliefs is done by the encoder. 16 | /// Mapping from beliefs to observations is done by the decoder. 17 | /// These two components are trained together in an unsupervised manner as an autoencoder. 18 | class ObservationModel 19 | { 20 | public: 21 | GRand& rand; 22 | GNeuralNet decoder; 23 | GNeuralNet encoder; 24 | GNeuralNet decoderExperimental; 25 | GNeuralNet encoderExperimental; 26 | GMatrix train; 27 | GMatrix validation; 28 | Tutor* tutor; 29 | TransitionModel& transitionModel; 30 | size_t trainPos; 31 | size_t trainSize; 32 | size_t validationPos; 33 | size_t validationSize; 34 | size_t trainIters; 35 | size_t trainProgress; 36 | size_t calibrationIters; 37 | 38 | 39 | /// General-purpose constructor 40 | ObservationModel(TransitionModel& transition, size_t observation_dims, size_t belief_dims, size_t decoder_layers, 41 | size_t encoder_layers, size_t queue_size, size_t trainItersPerPattern, size_t calibrationIterations, GRand& r); 42 | 43 | /// Unmarshaling constructor 44 | ObservationModel(TransitionModel& transition, GDomNode* pNode, GRand& r); 45 | 46 | /// Marshals this model to a JSON DOM. 47 | GDomNode* marshal(GDom* pDoc); 48 | 49 | /// Sets the tutor 50 | void setTutor(Tutor* t) { tutor = t; } 51 | 52 | /// Performs one pattern-presentation of stochastic gradient descent and dynamically tunes the learning rate 53 | void doSomeTraining(); 54 | 55 | /// Refines the encoder and decoder based on the new observation. 56 | void trainIncremental(const GVec& observation); 57 | 58 | /// Refines the beliefs to correspond with actual observations 59 | void calibrateBeliefs(GVec& beliefs, const GVec& observations); 60 | 61 | /// Decodes beliefs to predict observations 62 | void beliefsToObservations(const GVec& beliefs, GVec& observations); 63 | 64 | /// Encodes observations to predict beliefs 65 | void observationsToBeliefs(const GVec& observations, GVec& beliefs); 66 | }; 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /cpp/src/PlanningSystem.cpp: -------------------------------------------------------------------------------- 1 | #include "PlanningSystem.h" 2 | #include 3 | 4 | using std::cout; 5 | 6 | // General-purpose constructor 7 | PlanningSystem::PlanningSystem(Agent& agent, TransitionModel& transition, ObservationModel& observation, ContentmentModel& contentment, Mentor* oracle, 8 | size_t actionDimensions, size_t populationSize, size_t planRefinementIters, size_t burnInIters, size_t maxPlanLen, double discount, double explore, GRand& r) 9 | : self(agent), 10 | transitionModel(transition), 11 | observationModel(observation), 12 | contentmentModel(contentment), 13 | mentor(oracle), 14 | tutor(nullptr), 15 | actionDims(actionDimensions), 16 | burnIn(burnInIters), 17 | discountFactor(discount), 18 | explorationRate(explore), 19 | rand(r), 20 | randomPlan(1, actionDims) 21 | { 22 | GAssert(randomPlan[0].size() == actionDims); 23 | if(populationSize < 2) 24 | throw Ex("The population size must be at least 2"); 25 | refinementIters = populationSize * planRefinementIters; 26 | maxPlanLength = maxPlanLen; 27 | for(size_t i = 0; i < populationSize; i++) { 28 | GMatrix* p = new GMatrix(0, actionDims); 29 | plans.push_back(p); 30 | for(size_t j = std::min(maxPlanLen, rand.next(maxPlanLen) + 2); j > 0; j--) { 31 | // Add a random action vector to the end 32 | GVec& newActions = p->newRow(); 33 | newActions.fillUniform(rand); 34 | } 35 | } 36 | } 37 | 38 | 39 | /// Unmarshaling constructor 40 | PlanningSystem::PlanningSystem(GDomNode* pNode, Agent& agent, GRand& r, TransitionModel& transition, ObservationModel& observation, ContentmentModel& contentment, Mentor* oracle) 41 | : self(agent), 42 | transitionModel(transition), 43 | observationModel(observation), 44 | contentmentModel(contentment), 45 | mentor(oracle), 46 | tutor(nullptr), 47 | maxPlanLength(pNode->field("maxPlanLength")->asInt()), 48 | refinementIters(pNode->field("refinementIters")->asInt()), 49 | actionDims(pNode->field("actionDims")->asInt()), 50 | burnIn(pNode->field("burnIn")->asInt()), 51 | discountFactor(pNode->field("discount")->asDouble()), 52 | explorationRate(pNode->field("explore")->asDouble()), 53 | rand(r), 54 | randomPlan(1, actionDims) 55 | { 56 | GDomListIterator it(pNode->field("plans")); 57 | plans.resize(it.remaining()); 58 | for(size_t i = 0; it.current(); i++) 59 | { 60 | plans[i] = new GMatrix(it.current()); 61 | it.advance(); 62 | } 63 | } 64 | 65 | PlanningSystem::~PlanningSystem() 66 | { 67 | for(size_t i = 0; i < plans.size(); i++) 68 | delete(plans[i]); 69 | } 70 | 71 | /// Marshals this model to a JSON DOM. 72 | GDomNode* PlanningSystem::marshal(GDom* pDoc) 73 | { 74 | GDomNode* pNode = pDoc->newObj(); 75 | GDomNode* pPlans = pNode->addField(pDoc, "plans", pDoc->newList()); 76 | for(size_t i = 0; i < plans.size(); i++) 77 | pPlans->addItem(pDoc, plans[i]->serialize(pDoc)); 78 | pNode->addField(pDoc, "maxPlanLength", pDoc->newInt(maxPlanLength)); 79 | pNode->addField(pDoc, "discount", pDoc->newDouble(discountFactor)); 80 | pNode->addField(pDoc, "explore", pDoc->newDouble(explorationRate)); 81 | pNode->addField(pDoc, "refinementIters", pDoc->newInt(refinementIters)); 82 | pNode->addField(pDoc, "burnIn", pDoc->newInt(burnIn)); 83 | pNode->addField(pDoc, "actionDims", pDoc->newInt(actionDims)); 84 | return pNode; 85 | } 86 | 87 | 88 | /// Replaces the mentor with the specified one 89 | void PlanningSystem::setMentor(Mentor* oracle) 90 | { 91 | mentor = oracle; 92 | } 93 | 94 | 95 | /// Prints a representation of all the plans to stdout 96 | void PlanningSystem::printPlans() 97 | { 98 | for(size_t i = 0; i < plans.size(); i++) 99 | plans[i]->print(cout); 100 | } 101 | 102 | 103 | /// Perturbs a random plan 104 | void PlanningSystem::mutate() 105 | { 106 | double d = rand.uniform(); 107 | GMatrix& p = *plans[rand.next(plans.size())]; 108 | if(d < 0.1) { // lengthen the plan 109 | if(p.rows() < maxPlanLength) { 110 | GVec* newActions = new GVec(actionDims); 111 | newActions->fillUniform(rand); 112 | p.takeRow(newActions, rand.next(p.rows() + 1)); 113 | } 114 | } 115 | else if(d < 0.2) { // shorten the plan 116 | if(p.rows() > 1) { 117 | p.deleteRow(rand.next(p.rows())); 118 | } 119 | } 120 | else if(d < 0.7) { // perturb a single element of an action vector 121 | GVec& actions = p[rand.next(p.rows())]; 122 | size_t i = rand.next(actions.size()); 123 | actions[i] = std::max(0.0, std::min(1.0, actions[i] + 0.03 * rand.normal())); 124 | } 125 | else if(d < 0.9) { // perturb a whole action vector 126 | GVec& actions = p[rand.next(p.rows())]; 127 | for(size_t i = 0; i < actions.size(); i++) { 128 | actions[i] = std::max(0.0, std::min(1.0, actions[i] + 0.02 * rand.normal())); 129 | } 130 | } 131 | else { // perturb the whole plan 132 | for(size_t j = 0; j < p.rows(); j++) { 133 | GVec& actions = p[j]; 134 | for(size_t i = 0; i < actions.size(); i++) { 135 | actions[i] = std::max(0.0, std::min(1.0, actions[i] + 0.01 * rand.normal())); 136 | } 137 | } 138 | } 139 | } 140 | 141 | 142 | /// Replaces the specified plan with a new one. 143 | void PlanningSystem::replace(size_t childIndex) 144 | { 145 | double d = rand.uniform(); 146 | if(d < 0.2) { 147 | // Clone a random parent (asexual reproduction) 148 | size_t randomPlanIndex = rand.next(plans.size() - 1); 149 | if(randomPlanIndex >= childIndex) 150 | randomPlanIndex++; 151 | GMatrix& randPlan = *plans[randomPlanIndex]; 152 | GMatrix* pPlanCopy = new GMatrix(randPlan); 153 | delete(plans[childIndex]); 154 | plans[childIndex] = pPlanCopy; 155 | } else if(d < 0.7) { 156 | // Cross-over (sexual reproduction) 157 | GMatrix& mother = *plans[rand.next(plans.size())]; 158 | GMatrix& father = *plans[rand.next(plans.size())]; 159 | size_t crossOverPoint = rand.next(mother.rows()); 160 | GMatrix* pChild = new GMatrix(0, mother.cols()); 161 | for(size_t i = 0; i < crossOverPoint; i++) 162 | pChild->newRow().copy(mother[i]); 163 | for(size_t i = crossOverPoint; i < father.rows(); i++) 164 | pChild->newRow().copy(father[i]); 165 | delete(plans[childIndex]); 166 | plans[childIndex] = pChild; 167 | } else { 168 | // Interpolation/extrapolation 169 | GMatrix& mother = *plans[rand.next(plans.size())]; 170 | GMatrix& father = *plans[rand.next(plans.size())]; 171 | size_t len = std::min(mother.rows(), father.rows()); 172 | GMatrix* pChild = new GMatrix(len, mother.cols()); 173 | double alpha = rand.uniform() * 2.0; 174 | for(size_t i = 0; i < len; i++) 175 | { 176 | GVec& a = mother[i]; 177 | GVec& b = father[i]; 178 | GVec& c = (*pChild)[i]; 179 | for(size_t j = 0; j < c.size(); j++) 180 | c[j] = alpha * a[j] + (1.0 - alpha) * b[j]; 181 | c.clip(0.0, 1.0); 182 | } 183 | delete(plans[childIndex]); 184 | plans[childIndex] = pChild; 185 | } 186 | } 187 | 188 | 189 | /// Returns the expected contentment at the end of the plan 190 | double PlanningSystem::evaluatePlan(const GVec& beliefs, GMatrix& plan) 191 | { 192 | transitionModel.getFinalBeliefs(beliefs, plan, buf); 193 | return contentmentModel.evaluate(buf) * std::pow(discountFactor, plan.rows()); 194 | } 195 | 196 | 197 | /// Performs a tournament between two randomly-selected plans. 198 | /// One of them, usually the winner, is replaced. 199 | void PlanningSystem::tournament(const GVec& beliefs) 200 | { 201 | size_t a = rand.next(plans.size()); 202 | size_t b = rand.next(plans.size()); 203 | bool a_prevails; 204 | if(rand.uniform() < 0.3) 205 | a_prevails = true; // Let a random plan prevail 206 | else { 207 | // Let the better plan prevail 208 | double fitnessA = evaluatePlan(beliefs, *plans[a]); 209 | double fitnessB = evaluatePlan(beliefs, *plans[b]); 210 | if(fitnessA >= fitnessB) 211 | a_prevails = true; 212 | else 213 | a_prevails = false; 214 | } 215 | replace(a_prevails ? b : a); 216 | } 217 | 218 | 219 | /// Performs several iterations of plan refinement 220 | void PlanningSystem::refinePlans(const GVec& beliefs) 221 | { 222 | 223 | // If we are still burning in, then the models are probably not even reliable enough to make refining plans worthwhile 224 | if(burnIn > 0) 225 | return; 226 | 227 | for(size_t i = 0; i < refinementIters; i++) { 228 | double d = rand.uniform(); 229 | if(d < 0.65) 230 | mutate(); 231 | else 232 | tournament(beliefs); 233 | } 234 | } 235 | 236 | /* 237 | void PlanningSystem::checkPlans() 238 | { 239 | for(size_t i = 0; i < plans.size(); i++) 240 | { 241 | GMatrix& p = *plans[i]; 242 | for(size_t j = 0; j < p.rows(); j++) 243 | { 244 | if(p[j].size() != p.cols()) 245 | throw Ex("found the problem"); 246 | } 247 | } 248 | } 249 | */ 250 | 251 | /// Drops the first action in every plan 252 | void PlanningSystem::advanceTime() 253 | { 254 | for(size_t i = 0; i < plans.size(); i++) 255 | { 256 | GMatrix& p = *plans[i]; 257 | if(p.rows() > 0) 258 | { 259 | // Move the first action vector in each plan to the end 260 | GVec* tmp = p.releaseRowPreserveOrder(0); 261 | p.takeRow(tmp); 262 | } 263 | } 264 | } 265 | 266 | 267 | /// Asks the mentor to evaluate the plan, given our current beliefs, and learn from it 268 | void PlanningSystem::askMentorToEvaluatePlan(const GVec& beliefs, GMatrix& plan) 269 | { 270 | transitionModel.getFinalBeliefs(beliefs, plan, buf); 271 | observationModel.beliefsToObservations(buf, buf2); 272 | double feedback = mentor->evaluatePlan(self, plan); 273 | if(feedback != UNKNOWN_REAL_VALUE) 274 | contentmentModel.trainIncremental(buf, feedback); 275 | } 276 | 277 | 278 | /// Finds the best plan and copies its first step 279 | void PlanningSystem::chooseNextActions(const GVec& beliefs, GVec& actions) 280 | { 281 | if(tutor) 282 | tutor->choose_actions(beliefs, actions); 283 | else 284 | { 285 | // Find the best plan (according to the contentment model) and ask the mentor to evaluate it 286 | size_t planBestIndex = 0; 287 | double bestContentment = -1e300; 288 | for(size_t i = 0; i < plans.size(); i++) 289 | { 290 | double d = evaluatePlan(beliefs, *plans[i]); 291 | if(d > bestContentment) 292 | { 293 | bestContentment = d; 294 | planBestIndex = i; 295 | } 296 | } 297 | //std::cout << "Best contentment: " << to_str(bestContentment) << "\n"; 298 | GMatrix& bestPlan = *plans[planBestIndex]; 299 | askMentorToEvaluatePlan(beliefs, bestPlan); 300 | 301 | // Pick a random plan from the population and ask the mentor to evaluate it (for contrast) 302 | size_t planBindex = rand.next(plans.size() - 1); 303 | if(planBindex >= planBestIndex) 304 | planBindex++; 305 | askMentorToEvaluatePlan(beliefs, *plans[planBindex]); 306 | 307 | // Make a random one-step plan, and ask the mentor to evaluate it (for contrast) 308 | GVec& action = randomPlan[0]; 309 | action.fillUniform(rand); 310 | askMentorToEvaluatePlan(beliefs, randomPlan); 311 | 312 | // Copy the first action vector of the best plan for our chosen action 313 | GVec* bestActions = &bestPlan[0]; 314 | if(burnIn > 0 || rand.uniform() < explorationRate) 315 | bestActions = &randomPlan[0]; 316 | if(burnIn > 0) 317 | burnIn--; 318 | GAssert(bestActions->size() == actionDims); 319 | actions.copy(*bestActions); 320 | } 321 | } 322 | -------------------------------------------------------------------------------- /cpp/src/PlanningSystem.h: -------------------------------------------------------------------------------- 1 | #ifndef PLANNINGSYSTEM_H 2 | #define PLANNINGSYSTEM_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "Mentor.h" 10 | #include "TransitionModel.h" 11 | #include "ObservationModel.h" 12 | #include "ContentmentModel.h" 13 | 14 | using namespace GClasses; 15 | 16 | 17 | /// A genetic algorithm that sequences actions to form a plan intended to maximize contentment. 18 | class PlanningSystem 19 | { 20 | public: 21 | Agent& self; 22 | std::vector plans; 23 | TransitionModel& transitionModel; 24 | ObservationModel& observationModel; 25 | ContentmentModel& contentmentModel; 26 | Mentor* mentor; 27 | Tutor* tutor; 28 | size_t maxPlanLength; 29 | size_t refinementIters; 30 | size_t actionDims; 31 | size_t burnIn; 32 | double discountFactor; 33 | double explorationRate; 34 | GRand& rand; 35 | GMatrix randomPlan; 36 | GVec buf; 37 | GVec buf2; 38 | 39 | 40 | // General-purpose constructor 41 | PlanningSystem(Agent& agent, TransitionModel& transition, ObservationModel& observation, ContentmentModel& contentment, Mentor* oracle, 42 | size_t actionDimensions, size_t populationSize, size_t planRefinementIters, size_t burnInIters, size_t maxPlanLen, double discount, double explore, GRand& r); 43 | 44 | /// Unmarshaling constructor 45 | PlanningSystem(GDomNode* pNode, Agent& agent, GRand& r, TransitionModel& transition, ObservationModel& observation, ContentmentModel& contentment, Mentor* oracle); 46 | 47 | ~PlanningSystem(); 48 | 49 | /// Marshals this model to a JSON DOM. 50 | GDomNode* marshal(GDom* pDoc); 51 | 52 | /// Replaces the mentor with the specified one 53 | void setMentor(Mentor* oracle); 54 | 55 | /// Sets the tutor 56 | void setTutor(Tutor* t) { tutor = t; } 57 | 58 | /// Prints a representation of all the plans to stdout 59 | void printPlans(); 60 | 61 | /// Perturbs a random plan 62 | void mutate(); 63 | 64 | /// Replaces the specified plan with a new one. 65 | void replace(size_t childIndex); 66 | 67 | /// Returns the expected contentment at the end of the plan 68 | double evaluatePlan(const GVec& beliefs, GMatrix& plan); 69 | 70 | /// Performs a tournament between two randomly-selected plans. 71 | /// One of them, usually the winner, is replaced. 72 | void tournament(const GVec& beliefs); 73 | 74 | /// Performs several iterations of plan refinement 75 | void refinePlans(const GVec& beliefs); 76 | 77 | /// Drops the first action in every plan 78 | void advanceTime(); 79 | 80 | /// Asks the mentor to evaluate the plan, given our current beliefs, and learn from it 81 | void askMentorToEvaluatePlan(const GVec& beliefs, GMatrix& plan); 82 | 83 | /// Finds the best plan and copies its first step 84 | void chooseNextActions(const GVec& beliefs, GVec& actions); 85 | }; 86 | 87 | #endif 88 | -------------------------------------------------------------------------------- /cpp/src/Test.h: -------------------------------------------------------------------------------- 1 | #ifndef TEST_H 2 | #define TEST_H 3 | 4 | class Agent; 5 | 6 | class Test 7 | { 8 | public: 9 | virtual ~Test() {} 10 | 11 | /// Evaluates the general intelligence of the agent with some task. 12 | /// Returns a number that represents the intelligence of the agent. 13 | /// (More intelligent agents should achieve a higher score. 14 | /// Less intelligent agents should achieve a lower score. 15 | /// The scores may be span any range, even negative values.) 16 | virtual double test(Agent& agent) = 0; 17 | }; 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /cpp/src/TransitionModel.cpp: -------------------------------------------------------------------------------- 1 | #include "TransitionModel.h" 2 | 3 | /// General-purpose constructor 4 | TransitionModel::TransitionModel(size_t input_dims, size_t output_dims, size_t total_layers, size_t queue_size, size_t trainItersPerPattern, GRand& r) 5 | : rand(r), 6 | trainInput(queue_size, input_dims), 7 | trainOutput(queue_size, output_dims), 8 | tutor(nullptr), 9 | trainPos(0), 10 | trainSize(0), 11 | trainIters(trainItersPerPattern), 12 | trainProgress(0), 13 | err(0), 14 | prevErr(0) 15 | { 16 | size_t hidden = std::max((size_t)30, output_dims); 17 | model.addLayer(new GLayerClassic(input_dims, hidden)); 18 | model.addLayer(new GLayerClassic(hidden, output_dims)); 19 | GUniformRelation relIn(input_dims); 20 | GUniformRelation relOut(output_dims); 21 | model.beginIncrementalLearning(relIn, relOut); 22 | model.setLearningRate(0.03); 23 | } 24 | 25 | 26 | /// Unmarshaling constructor 27 | TransitionModel::TransitionModel(GDomNode* pNode, GRand& r) 28 | : rand(r), 29 | model(pNode->field("model")), 30 | trainInput(pNode->field("trainInput")), 31 | trainOutput(pNode->field("trainOutput")), 32 | tutor(nullptr), 33 | trainPos(pNode->field("trainPos")->asInt()), 34 | trainSize(pNode->field("trainSize")->asInt()), 35 | trainIters(pNode->field("trainIters")->asInt()), 36 | trainProgress(pNode->field("trainProgress")->asInt()), 37 | err(pNode->field("err")->asDouble()), 38 | prevErr(pNode->field("prevErr")->asDouble()) 39 | { 40 | } 41 | 42 | 43 | /// Marshals this model to a JSON DOM. 44 | GDomNode* TransitionModel::marshal(GDom* pDoc) 45 | { 46 | GDomNode* pNode = pDoc->newObj(); 47 | pNode->addField(pDoc, "model", model.serialize(pDoc)); 48 | pNode->addField(pDoc, "trainPos", pDoc->newInt(trainPos)); 49 | pNode->addField(pDoc, "trainSize", pDoc->newInt(trainSize)); 50 | pNode->addField(pDoc, "trainIters", pDoc->newInt(trainIters)); 51 | pNode->addField(pDoc, "trainInput", trainInput.serialize(pDoc)); 52 | pNode->addField(pDoc, "trainOutput", trainOutput.serialize(pDoc)); 53 | pNode->addField(pDoc, "trainProgress", pDoc->newInt(trainProgress)); 54 | pNode->addField(pDoc, "err", pDoc->newDouble(err)); 55 | pNode->addField(pDoc, "prevErr", pDoc->newDouble(prevErr)); 56 | return pNode; 57 | } 58 | 59 | 60 | /// Returns the number of action dims 61 | size_t TransitionModel::actionDims() 62 | { 63 | return model.layer(0).inputs() - model.layer(model.layerCount() - 1).outputs(); 64 | } 65 | 66 | 67 | /// Performs one pattern-presentation of stochastic gradient descent, and dynamically tunes the learning rate 68 | void TransitionModel::doSomeTraining() 69 | { 70 | // Present one pattern 71 | double lambda = model.learningRate() * 0.0000001; 72 | model.scaleWeights(1.0 - lambda); 73 | model.diminishWeights(lambda); 74 | size_t index = rand.next(trainSize); 75 | model.trainIncremental(trainInput.row(index), trainOutput.row(index)); 76 | err += trainOutput.row(index).squaredDistance(model.outputLayer().activation()); 77 | 78 | // Measure how we are doing 79 | trainProgress++; 80 | if(trainProgress >= trainInput.rows()) { 81 | trainProgress = 0; 82 | prevErr = std::sqrt(err / trainInput.rows()); 83 | err = 0.0; 84 | //std::cout << "Transition error:" << to_str(prevErr) << "\n"; 85 | } 86 | } 87 | 88 | 89 | /// Refines this model based on a recently performed action and change in beliefs 90 | void TransitionModel::trainIncremental(const GVec& beliefs, const GVec& actions, const GVec& nextBeliefs) 91 | { 92 | // Buffer the pattern 93 | GVec& destIn = trainInput.row(trainPos); 94 | GVec& destOut = trainOutput.row(trainPos); 95 | trainPos++; 96 | trainSize = std::max(trainSize, trainPos); 97 | if(trainPos >= trainInput.rows()) 98 | trainPos = 0; 99 | if(beliefs.size() + actions.size() != destIn.size() || beliefs.size() != destOut.size()) 100 | throw Ex("size mismatch"); 101 | destIn.put(0, beliefs); 102 | destIn.put(beliefs.size(), actions); 103 | for(size_t i = 0; i < destOut.size(); i++) 104 | destOut[i] = 0.5 * (nextBeliefs[i] - beliefs[i]); 105 | /* 106 | destIn.print(); 107 | std::cout << "->"; 108 | destOut.print(); 109 | std::cout << "\n"; 110 | std::cout << to_str(0.5 * cos(destIn[2])) << ", " << to_str(0.5 * sin(destIn[2])) << "\n"; 111 | */ 112 | // Refine the model 113 | size_t iters = std::min(trainIters, 1000 * trainSize); 114 | for(size_t i = 0; i < iters; i++) 115 | doSomeTraining(); 116 | } 117 | 118 | 119 | /// Predict the belief vector that will result if the specified action is performed 120 | void TransitionModel::anticipateNextBeliefs(const GVec& beliefs, const GVec& actions, GVec& anticipatedBeliefs) 121 | { 122 | if(tutor) 123 | tutor->transition(beliefs, actions, anticipatedBeliefs); 124 | else 125 | { 126 | GAssert(beliefs.size() + actions.size() == model.layer(0).inputs()); 127 | buf.resize(beliefs.size() + actions.size()); 128 | buf.put(0, beliefs); 129 | buf.put(beliefs.size(), actions); 130 | model.forwardProp(buf); 131 | anticipatedBeliefs.copy(beliefs); 132 | anticipatedBeliefs.addScaled(2.0, model.outputLayer().activation()); 133 | anticipatedBeliefs.clip(-1.0, 1.0); 134 | } 135 | } 136 | 137 | 138 | /// Compute the anticipated belief vector that will result if the specified plan is executed. 139 | void TransitionModel::getFinalBeliefs(const GVec& beliefs, const GMatrix& plan, GVec& outFinalBeliefs) 140 | { 141 | if(plan.rows() > 0) 142 | anticipateNextBeliefs(beliefs, plan[0], outFinalBeliefs); 143 | for(size_t i = 1; i < plan.rows(); i++) { 144 | anticipateNextBeliefs(outFinalBeliefs, plan[i], outFinalBeliefs); 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /cpp/src/TransitionModel.h: -------------------------------------------------------------------------------- 1 | #ifndef TRANSITIONMODEL_H 2 | #define TRANSITIONMODEL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "Mentor.h" 10 | 11 | using namespace GClasses; 12 | 13 | 14 | /// A model that maps from current beliefs and actions to anticipated beliefs. 15 | /// This model is trained in a supervised manner. 16 | class TransitionModel { 17 | public: 18 | GRand& rand; 19 | GNeuralNet model; 20 | GMatrix trainInput; 21 | GMatrix trainOutput; 22 | Tutor* tutor; 23 | size_t trainPos; 24 | size_t trainSize; 25 | size_t trainIters; 26 | size_t trainProgress; 27 | double err; 28 | double prevErr; 29 | GVec buf; 30 | 31 | 32 | /// General-purpose constructor 33 | TransitionModel(size_t input_dims, size_t output_dims, size_t total_layers, size_t queue_size, size_t trainItersPerPattern, GRand& r); 34 | 35 | /// Unmarshaling constructor 36 | TransitionModel(GDomNode* pNode, GRand& r); 37 | 38 | /// Marshals this model to a JSON DOM. 39 | GDomNode* marshal(GDom* pDoc); 40 | 41 | /// Returns the number of action dims 42 | size_t actionDims(); 43 | 44 | /// Sets the tutor 45 | void setTutor(Tutor* t) { tutor = t; } 46 | 47 | /// Performs one pattern-presentation of stochastic gradient descent, and dynamically tunes the learning rate 48 | void doSomeTraining(); 49 | 50 | /// Refines this model based on a recently performed action and change in beliefs 51 | void trainIncremental(const GVec& beliefs, const GVec& actions, const GVec& nextBeliefs); 52 | 53 | /// Predict the belief vector that will result if the specified action is performed 54 | void anticipateNextBeliefs(const GVec& beliefs, const GVec& actions, GVec& anticipatedBeliefs); 55 | 56 | /// Compute the anticipated belief vector that will result if the specified plan is executed. 57 | void getFinalBeliefs(const GVec& beliefs, const GMatrix& plan, GVec& outFinalBeliefs); 58 | }; 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /cpp/src/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | The contents of this file are dedicated by all of its authors, including 3 | 4 | Michael S. Gashler, 5 | anonymous contributors, 6 | 7 | to the public domain (http://creativecommons.org/publicdomain/zero/1.0/). 8 | 9 | Note that some moral obligations still exist in the absence of legal ones. 10 | For example, it would still be dishonest to deliberately misrepresent the 11 | origin of a work. Although we impose no legal requirements to obtain a 12 | license, it is beseeming for those who build on the works of others to 13 | give back useful improvements, or pay it forward in their own field. If 14 | you would like to cite us, a published paper about Waffles can be found 15 | at http://jmlr.org/papers/volume12/gashler11a/gashler11a.pdf. If you find 16 | our code to be useful, the Waffles team would love to hear how you use it. 17 | */ 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include "Agent.h" 26 | #include "Test.h" 27 | #include 28 | #include "AgentRandy.h" 29 | #include "AgentManic.h" 30 | #include "DriftingPlatform.h" 31 | 32 | 33 | using namespace GClasses; 34 | using std::cerr; 35 | using std::cout; 36 | using std::vector; 37 | 38 | 39 | void gauntlet(std::vector& agents, std::vector& tests) 40 | { 41 | GMatrix results(tests.size(), agents.size()); 42 | 43 | // Evaluate every agent against every test 44 | for(size_t i = 0; i < tests.size(); i++) 45 | { 46 | Test& challenge = *tests[i]; 47 | for(size_t j = 0; j < agents.size(); j++) 48 | { 49 | Agent& agent = *agents[j]; 50 | double result = challenge.test(agent); 51 | results.row(i)[j] = result; 52 | } 53 | } 54 | 55 | cout << "\n\n"; 56 | cout << "-------------\n"; 57 | cout << "Final results\n"; 58 | cout << "-------------\n"; 59 | cout << "[" << agents[0]->getName(); 60 | for(size_t i = 1; i < agents.size(); i++) { 61 | cout << "," << agents[i]->getName(); 62 | } 63 | cout << "]\n"; 64 | results.print(cout); 65 | } 66 | 67 | void doit() 68 | { 69 | 70 | GRand r(1234); 71 | 72 | // Make a list of agents 73 | vector agents; 74 | agents.push_back(new AgentRandy(r)); 75 | agents.push_back(new AgentManic(r)); 76 | 77 | // Make a list of tests 78 | vector tests; 79 | tests.push_back(new DriftingPlatform(r)); 80 | 81 | // Run the agents through the gauntlet 82 | gauntlet(agents, tests); 83 | 84 | for(size_t i = 0; i < agents.size(); i++) 85 | delete(agents[i]); 86 | for(size_t i = 0; i < tests.size(); i++) 87 | delete(tests[i]); 88 | } 89 | 90 | int main(int argc, char *argv[]) 91 | { 92 | #ifdef _DEBUG 93 | GApp::enableFloatingPointExceptions(); 94 | #endif 95 | int nRet = 0; 96 | try 97 | { 98 | GArgReader args(argc, argv); 99 | doit(); 100 | } 101 | catch(const std::exception& e) 102 | { 103 | cerr << e.what() << "\n"; 104 | nRet = 1; 105 | } 106 | 107 | return nRet; 108 | } 109 | 110 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 |
25 | 26 |
27 | 28 |

A Java implementation of the MANIC cognitive architecture

29 | 30 |



31 |

Q and A:

32 | 33 |
    34 |
  • What the heck is this? A cognitive architecture is a design for a machine 35 | that attempts to achieve human-like thinking abilities. 36 | A cognitive architecture called MANIC 37 | was designed at University of Arkansas. This is an implementation of that architecture. 38 |


  • 39 | 40 |
  • Does it work? It passes some tests. More testing is still needed.


  • 41 | 42 |
  • How does it work? You could start by reading the paper. 43 | After that, the code fills in the rest of the details. 44 |


  • 45 | 46 |
  • Can you give me an overview of the structure of this code? 47 |
     48 | manic
     49 |   |
     50 |   +--> docs   (You are here.)
     51 |   |
     52 |   +--> java   (The Java version.)
     53 |   |     |
     54 |   |     +--> src
     55 |   |           |
     56 |   |           +--> agents    (Implementations of agents, including MANIC.)
     57 |   |           |      |
     58 |   |           |      +---> manic  (The implementation of MANIC.
     59 |   |           |      |             ...what this project is all about.)
     60 |   |           |      |
     61 |   |           |      +---> randy  (An agent that makes random decisions.
     62 |   |           |                    Basically, this is a straw man for MANIC
     63 |   |           |                    to destroy.)
     64 |   |           |
     65 |   |           +---> tests    (Tests for evaluating the agents.)
     66 |   |           |
     67 |   |           +---> common   (Various interfaces and classes that are
     68 |   |                           used throughout the project.)
     69 |   |
     70 |   +--> cpp   (The C++ version.)
     71 |         |
     72 |         +--> src   (The C++ source code.)
     73 |         |
     74 |         +--> bin   (The C++ binaries.)
     75 | 
     76 | 
    77 |


  • 78 | 79 |
  • What do I need to know to add a new test to your collection? 80 | First, take a look at the ITest interface in manic/src/common. 81 | To make a test, you just need to write a class that implements this interface. 82 | Next, take a look at manic/src/Main.java. 83 | To add your test to the collection, just add it here. 84 | Perhaps, the best way to get started is to copy one of the existing tests, 85 | then modify it to do what you want. 86 | Every test will do the following things: 87 |
      88 |
    1. Instatiate a teacher
    2. 89 |
    3. Reset the agent
    4. 90 |
    5. Call "IAgent.think" several thousand times
    6. 91 |
    7. Evaluate how well the agent did (usually without any feedback from the teacher)
    8. 92 |
    93 |


  • 94 | 95 |
  • How can you say MANIC is intelligent if it needs a teacher? 96 | You can think of the teacher as part of each test challenge. 97 | The role of the teacher is to help the agent know what it is supposed to do, 98 | not to tell the agent exactly how to do it. Of course, that can be a fuzzy line. 99 | That's why it takes a lot of challenges to make a good test. 100 |


  • 101 | 102 |
  • How do I make a teacher? 103 | Make a class that implementes the ITeacher interface in manic/src/common. 104 |


  • 105 | 106 |
  • How do I reset the agent, and what does resetting the agent do? 107 | You call the "IAgent.reset" method. This tells the agent what it needs to know to begin performing your test: 108 |
      109 |
    • Which teacher will show it what to do?
    • 110 |
    • How many values will it sense or observe each time-step?
    • 111 |
    • How many values does it need to represent state in your world?
    • 112 |
    • How many values do you expect it to provide for its actions?
    • 113 |
    • How many time steps into the future to you expect it to plan?
    • 114 |
    115 |


  • 116 | 117 |
  • What does calling "IAgent.think" do? 118 | You call "IAgent.think" to give the agent "life". 119 | When you call this method, you pass in a vector of observations, 120 | and it returns a vector of actions. 121 | Each observed value should be a continuous value between -1 and 1. 122 | Each action value will be a continuous value between 0 and 1. 123 | What do all those values mean? Well, that's up to your test. 124 | Your job is to write a test that uses those vectors in some meaningful way. 125 | The agent's job is to figure out what the values mean and use them intelligently. 126 |


  • 127 | 128 |
  • How do I evaluate the agent? 129 | Usually, when you are evaluating the agent, you will have the teacher always 130 | return NO_FEEDBACK. This means it is no longer providing the agent with any 131 | useful information. How you evaluate the agent is really up to you. 132 | Your test will return a number that represents how well the agent performed 133 | at your test. Larger numbers are better. Smaller numbers are worse. 134 | Scores should only be used for ranking, so there is no established range for the scores. 135 |


  • 136 | 137 |
  • What if MANIC doesn't pass my test? 138 | Could a human pass the test? Could a human pass it without utilizing 139 | any knowledge obtained outside the test? If not, it is probably not 140 | reasonable to expect a computer to pass it either. If a human could pass it, 141 | please contribute your test, so we can work to improve our agents until they pass it. 142 | Those are the tests we want most of all! 143 |


  • 144 | 145 |
  • Can MANIC be persisted to a file? Yes: 146 |
    147 | JSONObject obj = agent.marshal();
    148 | FileWriter file = new FileWriter("agent.json");
    149 | file.write(obj.toJSONString());
    150 | file.close();
    151 | 
    152 | And, you can restore it from a file too: 153 |
    154 | JSONParser parser = new JSONParser();
    155 | JSONObject obj2 = (JSONObject)parser.parse(new FileReader("agent.json"));
    156 | ManicAgent agent2 = new ManicAgent(obj2, new Random(1234), new MyTeacher());
    157 | 
    158 |


  • 159 | 160 |
  • What is the license of this code? 161 | The code in manic/src/common/json contains code in the Apache 2.0 license. 162 | The rest was written by myself and other contributors who all agree to release their code under the Creative Commons Zero public domain dedication. 163 | In other words, you can do pretty much whatever you want with this code. 164 |


  • 165 | 166 |
  • How would one debug this thing with Eclipse? 167 |
      168 |
    1. Launch Eclipse
    2. 169 |
    3. If it asks you, choose a default workspace.
    4. 170 |
    5. Close the welcome page
    6. 171 |
    7. File->New->Project->Java Project->Next
    8. 172 |
    9. Uncheck "Use default location", and click Browse
    10. 173 |
    11. Find the "manic/src" folder, and click OK, then Finish
    12. 174 |
    13. If it asks to open the associated perspective, choose Yes
    14. 175 |
    15. In the "Package Explorer" window, right-click on "src", then click "Properties"
    16. 176 |
    17. Click Java Compiler->Enable project specific settings, and set the Compiler compliance level to at least 1.7
    18. 177 |
    19. Click OK. Yes, it is okay for it to rebuild. Now, it should build without errors.
    20. 178 |
    21. Click on the bug icon. Choose Java Application->OK.
    22. 179 |
    23. Set a breakpoint somewhere. Yes, it is okay to change the perspective again.
    24. 180 |
    25. Rearrange your windows so you can actually see what you are doing.
    26. 181 |
    27. Now you know why Eclipse is dying.
    28. 182 |
    183 |


  • 184 | 185 |
  • Why is MANIC so slow? 186 | It does a lot of stuff. The neurons in your brain think in parallel. 187 | This code is doing it all in serial on the Java Virtual Machine. 188 |


  • 189 | 190 |
  • Does this implementation include the functional equivalence of sentience that the paper conjectures about? 191 | No. Why not? No one knows how to test for sentience, anyway, so what would be the purpose 192 | in implementing it? 193 |


  • 194 | 195 | 196 | 197 |
198 | 199 | 200 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 |
25 | 26 |
27 | 28 |

A Java implementation of the MANIC cognitive architecture

29 | 30 |



31 |

Q and A:

32 | 33 |
    34 |
  • What the heck is this? A cognitive architecture is a design for a machine 35 | that attempts to achieve human-like thinking abilities. 36 | A cognitive architecture called MANIC 37 | was designed at University of Arkansas. This is an implementation of that architecture. 38 |


  • 39 | 40 |
  • Does it work? It passes some tests. More testing is still needed.


  • 41 | 42 |
  • How does it work? You could start by reading the paper. 43 | After that, the code fills in the rest of the details. 44 |


  • 45 | 46 |
  • Can you give me an overview of the structure of this code? 47 |
     48 | manic
     49 |   |
     50 |   +--> docs   (You are here.)
     51 |   |
     52 |   +--> java   (For Java programmers)
     53 |   |     |
     54 |   |     +--> class (Where compiled bytecode goes)
     55 |   |     |
     56 |   |     +--> src   (The Java source code)
     57 |   |           |
     58 |   |           +--> agents    (Implementations of various agents, including MANIC.)
     59 |   |           |      |
     60 |   |           |      +---> manic  (The implementation of MANIC.
     61 |   |           |      |             ...what this project is all about.)
     62 |   |           |      |
     63 |   |           |      +---> randy  (An agent that makes random decisions. Basically,
     64 |   |           |                    this is a straw man for MANIC to destroy.)
     65 |   |           |
     66 |   |           +---> tests    (A collection of tests for evaluating the agents.)
     67 |   |           |
     68 |   |           +---> common   (Various interfaces and classes that are
     69 |   |                           used throughout the project.)
     70 |   |
     71 |   +--> cpp   (for C++ programmers)
     72 |         |
     73 |         +--> bin   (Where the binaries go)
     74 |         |
     75 |         +--> src   (The C++ source code)
     76 |                       
     77 | 
    78 |


  • 79 | 80 |
  • What do I need to know to add a new test to the Java version of your collection? 81 | First, take a look at the ITest interface in manic/java/src/common. 82 | To make a test, you just need to write a class that implements this interface. 83 | Next, take a look at manic/src/Main.java. 84 | To add your test to the collection, just add it here. 85 | Perhaps, the best way to get started is to copy one of the existing tests, 86 | then modify it to do what you want. 87 | Every test will do the following things: 88 |
      89 |
    1. Instatiate a mentor
    2. 90 |
    3. Reset the agent
    4. 91 |
    5. Call "IAgent.think" several thousand times
    6. 92 |
    7. Evaluate how well the agent did (usually without any feedback from the mentor)
    8. 93 |
    94 |


  • 95 | 96 |
  • How can you say MANIC is intelligent if it needs a mentor? 97 | You can think of the mentor as part of each test challenge. 98 | The role of the mentor is to help the agent know what it is supposed to do, 99 | not to tell the agent how to do it. Of course, that can be a fuzzy line. 100 | That's why it takes a lot of challenges to make a good test. 101 |


  • 102 | 103 |
  • How do I make a mentor? 104 | Make a class that implementes the IMentor interface in manic/src/common. 105 |


  • 106 | 107 |
  • How do I reset the agent, and what does resetting the agent do? 108 | You call the "IAgent.reset" method. This tells the agent what it needs to know to begin performing your test: 109 |
      110 |
    • Which mentor will show it what to do?
    • 111 |
    • How many values will it sense or observe each time-step?
    • 112 |
    • How many values does it need to represent state in your world?
    • 113 |
    • How many values do you expect it to provide for its actions?
    • 114 |
    • How many time steps into the future to you expect it to plan?
    • 115 |
    116 |


  • 117 | 118 |
  • What does calling "IAgent.think" do? 119 | You call "IAgent.think" to give the agent "life". 120 | When you call this method, you pass in a vector of observations, 121 | and it returns a vector of actions. 122 | Each observed value should be a continuous value between -1 and 1. 123 | Each action value will be a continuous value between 0 and 1. 124 | What do all those values mean? Well, that's up to your test. 125 | Your job is to write a test that uses those vectors in some meaningful way. 126 | The agent's job is to figure out what the values mean and use them intelligently. 127 |


  • 128 | 129 |
  • How do I evaluate the agent? 130 | Usually, when you are evaluating the agent, you will have the mentor always 131 | return NO_FEEDBACK. This means it is no longer providing the agent with any 132 | useful information. How you evaluate the agent is really up to you. 133 | Your test will return a number that represents how well the agent performed 134 | at your test. Larger numbers are better. Smaller numbers are worse. 135 | Scores should only be used for ranking, so there is no established range for the scores. 136 |


  • 137 | 138 |
  • What if MANIC doesn't pass my test? 139 | Could a human pass the test? Could a human pass it without utilizing 140 | any knowledge obtained outside the test? If not, it is probably not 141 | reasonable to expect a computer to pass it either. If a human could pass it, 142 | please contribute your test, so we can work to improve our agents until they pass it. 143 | Those are the tests we want most of all! 144 |


  • 145 | 146 |
  • Can MANIC be persisted to a file? Yes: 147 |
    148 | JSONObject obj = agent.marshal();
    149 | FileWriter file = new FileWriter("agent.json");
    150 | file.write(obj.toJSONString());
    151 | file.close();
    152 | 
    153 | And, you can restore it from a file too: 154 |
    155 | JSONParser parser = new JSONParser();
    156 | JSONObject obj2 = (JSONObject)parser.parse(new FileReader("agent.json"));
    157 | ManicAgent agent2 = new ManicAgent(obj2, new Random(1234), new MyMentor());
    158 | 
    159 |


  • 160 | 161 |
  • What is the license of this code? 162 | The code in manic/src/common/json contains code in the Apache 2.0 license. 163 | The rest was written by myself and other contributors who all agree to release their code under the Creative Commons Zero public domain dedication. 164 | In other words, you can do pretty much whatever you want with this code. 165 |


  • 166 | 167 |
  • How would one debug the Java version with Eclipse? 168 |
      169 |
    1. Launch Eclipse
    2. 170 |
    3. If it asks you, choose a default workspace.
    4. 171 |
    5. Close the welcome page
    6. 172 |
    7. File->New->Project->Java Project->Next
    8. 173 |
    9. Uncheck "Use default location", and click Browse
    10. 174 |
    11. Find the "manic/src" folder, and click OK, then Finish
    12. 175 |
    13. If it asks to open the associated perspective, choose Yes
    14. 176 |
    15. In the "Package Explorer" window, right-click on "src", then click "Properties"
    16. 177 |
    17. Click Java Compiler->Enable project specific settings, and set the Compiler compliance level to at least 1.7
    18. 178 |
    19. Click OK. Yes, it is okay for it to rebuild. Now, it should build without errors.
    20. 179 |
    21. Click on the bug icon. Choose Java Application->OK.
    22. 180 |
    23. Set a breakpoint somewhere. Yes, it is okay to change the perspective again.
    24. 181 |
    25. Rearrange your windows so you can actually see what you are doing.
    26. 182 |
    27. Now you know why Eclipse is dying.
    28. 183 |
    184 |


  • 185 | 186 |
  • Why is MANIC so slow? 187 | It does a lot of stuff. Your brain has about 100 billion neurons that all run in parallel. 188 | This code is doing it all in serial on the Java Virtual Machine. 189 |


  • 190 | 191 |
  • Does this implementation include the functional equivalence of sentience that the paper conjectures about? 192 | No. Why not? No one knows how to test for sentience, anyway, so what would be the purpose in implementing it? 193 |


  • 194 | 195 | 196 | 197 |
198 | 199 | 200 | -------------------------------------------------------------------------------- /java/class/stub.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikegashler/manic/d4cf0cb3eed7d6cb956dda8554ee9faa177636a9/java/class/stub.txt -------------------------------------------------------------------------------- /java/src/AgentManic.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | 3 | /// Implements a weak artificial general intelligence. 4 | public class AgentManic implements IAgent { 5 | public Random rand; 6 | public TransitionModel transitionModel; 7 | public ObservationModel observationModel; 8 | public ContentmentModel contentmentModel; 9 | public PlanningSystem planningSystem; 10 | public double[] actions; 11 | public double[] beliefs; 12 | public double[] anticipatedBeliefs; 13 | 14 | 15 | // General-purpose constructor. 16 | public AgentManic(Random r) { 17 | rand = r; 18 | } 19 | 20 | public String getName() { return "Manic"; } 21 | 22 | // This method is called to initialize the agent in a new world. 23 | // mentor is an object that helps the agent learn what to do in this world. 24 | // observationDims is the number of double values that the agent observes each time step. 25 | // beliefDims is the number of double values that the agent uses internally to model the state of the world. (It should generally be <= observationDims.) 26 | // actionDims is the number of double values the agent uses to specify an action. 27 | // maxPlanLength specifies the maximum number of time-steps into the future that the agent should attempt to plan. 28 | public void reset(IMentor mentor, int observationDims, int beliefDims, int actionDims, int maxPlanLength) { 29 | if(beliefDims > observationDims) 30 | throw new IllegalArgumentException("Expected beliefDims to be <= observationDims"); 31 | transitionModel = new TransitionModel( 32 | actionDims + beliefDims, 33 | beliefDims, 34 | 2, // number of layers in the transition model 35 | 500, // size of short term memory for transitions 36 | 1000, // number of training iterations to perform with each new sample 37 | rand); 38 | observationModel = new ObservationModel( 39 | transitionModel, 40 | observationDims, 41 | beliefDims, 42 | 2, // number of layers in the decoder 43 | 2, // number of layers in the encoder 44 | 500, // size of short term memory for observations 45 | 50, // number of training iterations to perform with each new sample 46 | 500, // number of iterations to calibrate beliefs to correspond with observations 47 | rand); 48 | contentmentModel = new ContentmentModel( 49 | beliefDims, 50 | 2, // number of layers in the contentment model 51 | 500, // size of short term memory for feedback from the mentor 52 | 50, // number of training iterations to perform with each new sample 53 | rand); 54 | planningSystem = new PlanningSystem( 55 | this, 56 | transitionModel, 57 | observationModel, 58 | contentmentModel, 59 | mentor, 60 | actionDims, 61 | 30, // population size 62 | 50, // number of iterations to refine each member of the population per time step 63 | 500, // burn-in iterations (the number of times at the start to just pick a random action, so the transition function has a chance to explore its space) 64 | maxPlanLength, 65 | 0.99, // discount factor (to make short plans be preferred over long plans that ultimately arrive at nearly the same state) 66 | 0.0, // exploration rate (the probability that the agent will choose a random action, just to see what happens) 67 | rand); 68 | actions = new double[actionDims]; 69 | beliefs = new double[beliefDims]; 70 | anticipatedBeliefs = new double[beliefDims]; 71 | teleport(); 72 | } 73 | 74 | 75 | /// Unmarshaling constructor 76 | public AgentManic(Json obj, Random r, IMentor mentor) { 77 | rand = r; 78 | transitionModel = new TransitionModel(obj.get("transition"), r); 79 | observationModel = new ObservationModel(transitionModel, obj.get("observation"), r); 80 | contentmentModel = new ContentmentModel(obj.get("contentment"), r); 81 | planningSystem = new PlanningSystem(obj.get("planning"), this, r, transitionModel, observationModel, contentmentModel, mentor); 82 | actions = new double[transitionModel.actionDims()]; 83 | beliefs = Vec.unmarshal(obj.get("beliefs")); 84 | anticipatedBeliefs = new double[beliefs.length]; 85 | } 86 | 87 | 88 | /// Marshals this agent to a JSON DOM. 89 | public Json marshal() { 90 | Json obj = Json.newObject(); 91 | obj.add("transition", transitionModel.marshal()); 92 | obj.add("observation", observationModel.marshal()); 93 | obj.add("contentment", contentmentModel.marshal()); 94 | obj.add("planning", planningSystem.marshal()); 95 | obj.add("beliefs", Vec.marshal(beliefs)); 96 | return obj; 97 | } 98 | 99 | 100 | /// Replaces the mentor with the specified one 101 | public void setMentor(IMentor mentor) { 102 | planningSystem.setMentor(mentor); 103 | } 104 | 105 | 106 | /// Sets the tutor to use with this agent 107 | public void setTutor(ITutor tutor, boolean helpObservationFunction, boolean helpTransitionFunction, boolean helpContentmentModel, boolean helpPlanningSystem) { 108 | observationModel.setTutor(helpObservationFunction ? tutor : null); 109 | transitionModel.setTutor(helpTransitionFunction ? tutor : null); 110 | contentmentModel.setTutor(helpContentmentModel ? tutor : null); 111 | planningSystem.setTutor(helpPlanningSystem ? tutor : null); 112 | } 113 | 114 | 115 | /// Tells the agent that the next observation passed to learnFromExperience does not follow 116 | /// from the previous one. This should be called when a game is reset, or when the state is 117 | /// adjusted in a manner that the agent is not expected to anticipate. 118 | public void teleport() { 119 | beliefs[0] = IMentor.NO_FEEDBACK; 120 | } 121 | 122 | 123 | /// Learns from observations 124 | void learnFromExperience(double[] observations) { 125 | 126 | // Learn to perceive the world a little better 127 | observationModel.trainIncremental(observations); 128 | 129 | // Refine beliefs to correspond with the new observations better 130 | observationModel.calibrateBeliefs(anticipatedBeliefs, observations); 131 | 132 | // Learn to anticipate consequences a little better 133 | if(beliefs[0] != IMentor.NO_FEEDBACK) 134 | transitionModel.trainIncremental(beliefs, actions, anticipatedBeliefs); 135 | } 136 | 137 | 138 | /// Returns an action vector 139 | double[] decideWhatToDo() { 140 | 141 | // Make the anticipated beliefs the new beliefs 142 | double[] tmp = beliefs; 143 | beliefs = anticipatedBeliefs; 144 | anticipatedBeliefs = tmp; 145 | 146 | // Drop the first action in every plan 147 | planningSystem.advanceTime(); 148 | 149 | // Try to make the plans better 150 | planningSystem.refinePlans(beliefs); 151 | 152 | // Choose an action that is expected to maximize contentment (with the assistance of the mentor, if available) 153 | planningSystem.chooseNextActions(beliefs, actions); 154 | 155 | // Anticipate how the world will change with time 156 | transitionModel.anticipateNextBeliefsInPlace(beliefs, actions, anticipatedBeliefs); 157 | 158 | // Return the selected actions 159 | return actions; 160 | } 161 | 162 | 163 | /// Anticipates what this agent will observe if the specified plan is performed. 164 | public double[] anticipateObservation(Matrix plan) 165 | { 166 | double[] anticipatedBeliefs = transitionModel.getFinalBeliefs(beliefs, plan); 167 | double[] anticipatedObs = observationModel.beliefsToObservations(anticipatedBeliefs); 168 | return anticipatedObs; 169 | } 170 | 171 | 172 | /// A vector of observations goes in. All observed values may be expected to fall between -1 and 1. 173 | /// Returns a vector of chosen actions. All returned values should fall between 0 and 1. 174 | public double[] think(double[] observations) { 175 | 176 | // Check the observations 177 | for(int i = 0; i < observations.length; i++) { 178 | if(observations[i] < -1.0 || observations[i] > 1.0) 179 | throw new IllegalArgumentException("Observed values must be between -1 and 1."); 180 | } 181 | 182 | learnFromExperience(observations); 183 | return decideWhatToDo(); 184 | } 185 | 186 | /* 187 | public static void testMarshaling() throws Exception { 188 | // Make an agent 189 | AgentManic agent = new AgentManic( 190 | new Random(1234), 191 | new MyMentor(), 192 | 8, // observation dims 193 | 3, // belief dims 194 | 2, // action dims 195 | 10); // max plan length 196 | 197 | // Write it to a file 198 | JSONObject obj = agent.marshal(); 199 | FileWriter file = new FileWriter("test.json"); 200 | file.write(obj.toJSONString()); 201 | file.close(); 202 | 203 | // Read it from a file 204 | JSONParser parser = new JSONParser(); 205 | JSONObject obj2 = (JSONObject)parser.parse(new FileReader("test.json")); 206 | AgentManic agent2 = new AgentManic(obj2, new Random(1234), new MyMentor()); 207 | 208 | System.out.println("passed"); 209 | } 210 | */ 211 | } 212 | -------------------------------------------------------------------------------- /java/src/AgentRandy.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | 3 | 4 | // A poor agent that just picks random actions 5 | public class AgentRandy implements IAgent { 6 | double[] recentObservation; 7 | double[] actions; 8 | Random rand; 9 | 10 | 11 | // General-purpose constructor. 12 | public AgentRandy(Random r) { 13 | rand = r; 14 | } 15 | 16 | public String getName() { return "Randy"; } 17 | 18 | // This method is called to initialize the agent in a new world. 19 | // mentor is an object that helps the agent learn what to do in this world. 20 | // observationDims is the number of double values that the agent observes each time step. 21 | // beliefDims is the number of double values that the agent uses internally to model the state of the world. (It should generally be <= observationDims.) 22 | // actionDims is the number of double values the agent uses to specify an action. 23 | // maxPlanLength specifies the maximum number of time-steps into the future that the agent should attempt to plan. 24 | public void reset(IMentor mentor, int observationDims, int beliefDims, int actionDims, int maxPlanLength) { 25 | actions = new double[actionDims]; 26 | } 27 | 28 | 29 | /// Unmarshaling constructor 30 | public AgentRandy(Json obj, Random r, IMentor mentor) { 31 | rand = r; 32 | int actionDims = (int)obj.getLong("actionDims"); 33 | actions = new double[actionDims]; 34 | } 35 | 36 | 37 | /// Marshals this agent to a JSON DOM. 38 | public Json marshal() { 39 | Json obj = Json.newObject(); 40 | obj.add("actionDims", actions.length); 41 | return obj; 42 | } 43 | 44 | 45 | /// Replaces the mentor with the specified one 46 | public void setMentor(IMentor mentor) { 47 | } 48 | 49 | 50 | /// Sets the tutor to use with this agent 51 | public void setTutor(ITutor tutor, boolean helpObservationFunction, boolean helpTransitionFunction, boolean helpContentmentModel, boolean helpPlanningSystem) { 52 | } 53 | 54 | 55 | /// Does nothing, since this agent has no memory anyway 56 | public void teleport() { 57 | } 58 | 59 | 60 | /// Ignores the plan and anticipates that the most recent observation will occur again 61 | public double[] anticipateObservation(Matrix plan) 62 | { 63 | return recentObservation; 64 | } 65 | 66 | 67 | /// Ignores the observations and picks random actions 68 | public double[] think(double[] observations) { 69 | recentObservation = observations; 70 | for(int i = 0; i < actions.length; i++) { 71 | actions[i] = rand.nextDouble(); 72 | } 73 | 74 | return actions; 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /java/src/ContentmentModel.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | 3 | /// A model that maps from anticipated beliefs to contentment (or utility). 4 | /// This model is trained by reinforcement from a mentor. 5 | public class ContentmentModel { 6 | public Random rand; 7 | public NeuralNet model; 8 | public Matrix samples; 9 | public Matrix contentment; 10 | ITutor tutor; 11 | public int trainPos; 12 | public int trainSize; 13 | public int trainIters; 14 | public double learningRate; 15 | public int trainProgress; 16 | public double err; 17 | double[] targBuf; 18 | 19 | 20 | // General-purpose constructor 21 | ContentmentModel(int beliefDims, int total_layers, int queue_size, int trainItersPerPattern, Random r) { 22 | 23 | // Init the model 24 | rand = r; 25 | model = new NeuralNet(); 26 | int hidden = Math.min(30, beliefDims * 10); 27 | model.layers.add(new LayerLinear(beliefDims, hidden)); 28 | model.layers.add(new LayerTanh(hidden)); 29 | model.layers.add(new LayerLinear(hidden, 1)); 30 | model.layers.add(new LayerTanh(1)); 31 | model.init(rand); 32 | 33 | // Init the buffers 34 | samples = new Matrix(queue_size, beliefDims); 35 | contentment = new Matrix(queue_size, 1); 36 | 37 | // Init the meta-parameters 38 | trainIters = trainItersPerPattern; 39 | learningRate = 0.03; 40 | targBuf = new double[1]; 41 | } 42 | 43 | 44 | /// Unmarshaling constructor 45 | ContentmentModel(Json obj, Random r) { 46 | rand = r; 47 | model = new NeuralNet(obj.get("model")); 48 | samples = new Matrix(obj.get("samples")); 49 | contentment = new Matrix(obj.get("contentment")); 50 | trainPos = (int)obj.getLong("trainPos"); 51 | trainSize = (int)obj.getLong("trainSize"); 52 | trainIters = (int)obj.getLong("trainIters"); 53 | learningRate = obj.getDouble("learningRate"); 54 | trainProgress = (int)obj.getLong("trainProgress"); 55 | err = obj.getDouble("err"); 56 | targBuf = new double[1]; 57 | } 58 | 59 | 60 | /// Marshals this model to a JSON DOM. 61 | Json marshal() { 62 | Json obj = Json.newObject(); 63 | obj.add("model", model.marshal()); 64 | obj.add("samples", samples.marshal()); 65 | obj.add("contentment", contentment.marshal()); 66 | obj.add("trainPos", trainPos); 67 | obj.add("trainSize", trainSize); 68 | obj.add("trainIters", trainIters); 69 | obj.add("learningRate", learningRate); 70 | obj.add("trainProgress", trainProgress); 71 | obj.add("err", err); 72 | return obj; 73 | } 74 | 75 | 76 | void setTutor(ITutor t) { 77 | tutor = t; 78 | } 79 | 80 | 81 | /// Performs one pattern-presentation of stochastic gradient descent, and dynamically tunes the learning rate 82 | void doSomeTraining() { 83 | 84 | // Present a sample of beliefs and corresponding contentment for training 85 | int index = rand.nextInt(trainSize); 86 | model.regularize(learningRate * 0.000001); 87 | model.trainIncremental(samples.row(index), contentment.row(index), learningRate); 88 | err += Vec.squaredDistance(model.layers.get(model.layers.size() - 1).activation, contentment.row(index)); 89 | if(++trainProgress >= 1000) { 90 | trainProgress = 0; 91 | //System.out.println("Contentment error: " + Double.toString(err / 1000.0)); 92 | err = 0.0; 93 | } 94 | } 95 | 96 | 97 | /// Refines this model based on feedback from the mentor 98 | void trainIncremental(double[] sample_beliefs, double sample_contentment) { 99 | 100 | // Buffer the samples 101 | double[] dest = samples.row(trainPos); 102 | if(sample_beliefs.length != dest.length) 103 | throw new IllegalArgumentException("size mismatch"); 104 | for(int i = 0; i < dest.length; i++) 105 | dest[i] = sample_beliefs[i]; 106 | contentment.row(trainPos)[0] = sample_contentment; 107 | trainPos++; 108 | trainSize = Math.max(trainSize, trainPos); 109 | if(trainPos >= samples.rows()) 110 | trainPos = 0; 111 | 112 | // Do a few iterations of stochastic gradient descent 113 | int iters = Math.min(trainIters, trainSize); 114 | for(int i = 0; i < iters; i++) 115 | doSomeTraining(); 116 | } 117 | 118 | 119 | /// Computes the contentment of a particular belief vector 120 | public double evaluate(double[] beliefs) { 121 | if(tutor != null) 122 | return tutor.evaluateState(beliefs); 123 | double[] output = model.forwardProp(beliefs); 124 | return output[0]; 125 | } 126 | } 127 | 128 | 129 | -------------------------------------------------------------------------------- /java/src/DriftingPlatform.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | import java.awt.image.BufferedImage; 3 | import java.io.File; 4 | import javax.imageio.ImageIO; 5 | import java.awt.Color; 6 | import java.awt.Graphics2D; 7 | 8 | 9 | // The mentor's job is to evaluate the plans that an agent makes. 10 | // (It's like the hot-and-cold game.) The mentor cannot tell the agent what to do. 11 | // It can only tell the agent when it is on the right track. 12 | class DriftingPlatformMentor implements IMentor { 13 | boolean alive; 14 | 15 | DriftingPlatformMentor() { 16 | alive = true; 17 | } 18 | 19 | // Prefer the plan that minimizes the magnitude of the observation vector 20 | public double evaluatePlan(IAgent agent, Matrix plan) { 21 | if(!alive) 22 | return NO_FEEDBACK; 23 | double[] anticipatedObs = agent.anticipateObservation(plan); 24 | return evaluateObservation(anticipatedObs); 25 | } 26 | 27 | static double evaluateObservation(double[] anticipatedObs) { 28 | double sqMag = Vec.squaredMagnitude(anticipatedObs); 29 | return Math.exp(-sqMag); 30 | } 31 | } 32 | 33 | 34 | 35 | // The tutor's job is to help the agent cheat. 36 | // A tutor should be used only for debugging. 37 | class DriftingPlatformTutor implements ITutor { 38 | DriftingPlatform world; 39 | DriftingPlatformMentor mentor; 40 | 41 | DriftingPlatformTutor(DriftingPlatform w, DriftingPlatformMentor m) 42 | { 43 | world = w; 44 | mentor = m; 45 | } 46 | 47 | public double[] observationsToState(double[] observations) { 48 | return Vec.copy(observations); 49 | } 50 | 51 | public double[] stateToObservations(double[] state) { 52 | return world.computeObservations(state); 53 | } 54 | 55 | public void transition(double[] current_state, double[] actions, double[] next_state) { 56 | world.computeNextState(current_state, actions, next_state); 57 | } 58 | 59 | public double evaluateState(double[] state) { 60 | double[] obs = stateToObservations(state); 61 | return DriftingPlatformMentor.evaluateObservation(obs); 62 | } 63 | 64 | public void chooseActions(double[] state, double[] actions) { 65 | double theta = Math.atan2(state[1], state[0]); 66 | theta -= world.controlOrigin; 67 | theta += Math.PI; 68 | while(theta < 0.0) 69 | theta += 1.0; 70 | while(theta > 1.0) 71 | theta -= 1.0; 72 | theta /= (2.0 * Math.PI); 73 | actions[0] = theta; 74 | } 75 | } 76 | 77 | 78 | 79 | 80 | public class DriftingPlatform implements ITest { 81 | 82 | double stepSize; 83 | double controlOrigin; 84 | Random rand; 85 | 86 | 87 | public DriftingPlatform(Random r) { 88 | stepSize = 0.05; 89 | controlOrigin = 0.0; 90 | rand = r; 91 | } 92 | 93 | double[] computeObservations(double[] state) { 94 | return Vec.copy(state); 95 | } 96 | 97 | void computeNextState(double[] current_state, double[] actions, double[] next_state) { 98 | Vec.copy(next_state, current_state); 99 | double angle = actions[0] * 2.0 * Math.PI + controlOrigin; 100 | next_state[0] += stepSize * Math.cos(angle); 101 | next_state[1] += stepSize * Math.sin(angle); 102 | Vec.clip(next_state, -1.0, 1.0); 103 | } 104 | 105 | /* 106 | /// Generates an image to visualize what's going on inside an AgentManic's artificial brain for debugging purposes 107 | static BufferedImage visualize(agents.manic.AgentManic agent, double[] state_orig, double[] state_drifted, double[] state) { 108 | if(agent.beliefs.length != 2) 109 | throw new IllegalArgumentException("Sorry, this method only works with 2D belief spaces"); 110 | 111 | // Find the min and max locations 112 | double[] in = new double[2]; 113 | double mi = Double.MAX_VALUE; 114 | double ma = -Double.MAX_VALUE; 115 | double[] min_loc = new double[2]; 116 | double[] max_loc = new double[2]; 117 | for(int y = 0; y < 1000; y++) { 118 | for(int x = 0; x < 1000; x++) { 119 | in[0] = ((double)x) / 1000.0 * 2.0 - 1.0; 120 | in[1] = ((double)y) / 1000.0 * 2.0 - 1.0; 121 | double out = agent.contentmentModel.evaluate(agent.observationModel.observationsToBeliefs(in)); 122 | if(out < mi) { 123 | mi = out; 124 | min_loc[0] = in[0]; 125 | min_loc[1] = in[1]; 126 | } 127 | if(out > ma) { 128 | ma = out; 129 | max_loc[0] = in[0]; 130 | max_loc[1] = in[1]; 131 | } 132 | } 133 | } 134 | 135 | // Draw the contours of the contentment function 136 | BufferedImage image = new BufferedImage(1000, 1000, BufferedImage.TYPE_INT_ARGB); 137 | for(int y = 0; y < 1000; y++) { 138 | for(int x = 0; x < 1000; x++) { 139 | in[0] = ((double)x) * 0.002 - 1.0; 140 | in[1] = ((double)y) * 0.002 - 1.0; 141 | double out = (agent.contentmentModel.evaluate(agent.observationModel.observationsToBeliefs(in)) - mi) * 256.0 / (ma - mi); 142 | int g = Math.max(0, Math.min(255, (int)out)); 143 | int gg = g; 144 | if(g % 5 == 0) 145 | gg = (128 - (int)(Math.tanh((double)(g - 128) * 0.03) * 127.0)); 146 | image.setRGB(x, y, new Color(g, g, gg).getRGB()); 147 | } 148 | } 149 | 150 | // Draw magenta dots at the sample locations for training the contentment function 151 | Graphics2D g = image.createGraphics(); 152 | g.setColor(new Color(255, 0, 255)); 153 | for(int i = 0; i < agent.contentmentModel.trainSize; i++) { 154 | double[] r = agent.observationModel.beliefsToObservations(agent.contentmentModel.samples.row(i)); 155 | int x = (int)((r[0] + 1.0) * 500.0); 156 | int y = (int)((r[1] + 1.0) * 500.0); 157 | g.fillOval(x - 2, y - 2, 4, 4); 158 | } 159 | 160 | // Draw the circle of transitions. (If the agent has learned transitions well, these will form a circle around the point of beliefs.) 161 | double[] tmp_act = new double[1]; 162 | for(double d = 0; d <= 1.0; d += 0.03125) { 163 | if(d == 0) 164 | g.setColor(new Color(255, 0, 0)); 165 | else if(d == 0.25) 166 | g.setColor(new Color(255, 255, 0)); 167 | else if(d == 0.5) 168 | g.setColor(new Color(0, 255, 0)); 169 | else if(d == 0.75) 170 | g.setColor(new Color(0, 255, 255)); 171 | tmp_act[0] = d; 172 | double[] next = agent.transitionModel.anticipateNextBeliefs(agent.beliefs, tmp_act); 173 | double[] next_obs = agent.observationModel.beliefsToObservations(next); 174 | g.fillOval((int)((next_obs[0] + 1.0) * 500.0) - 4, (int)((next_obs[1] + 1.0) * 500.0) - 4, 8, 8); 175 | } 176 | 177 | // Draw an orange circle to represent the agent's beliefs 178 | g.setColor(new Color(255, 128, 0)); 179 | double[] exp_obs = agent.observationModel.beliefsToObservations(agent.beliefs); 180 | g.fillOval((int)((exp_obs[0] + 1.0) * 500.0) - 4, (int)((exp_obs[1] + 1.0) * 500.0) - 4, 8, 8); 181 | 182 | // Draw orange lines to represent the plans 183 | for(int i = 0; i < agent.planningSystem.plans.size(); i++) { 184 | agents.manic.Plan plan = agent.planningSystem.plans.get(i); 185 | double[] prev = agent.beliefs; 186 | double[] prev_obs = exp_obs; 187 | for(int j = 0; j < plan.steps.size(); j++) { 188 | double[] next = agent.transitionModel.anticipateNextBeliefs(prev, plan.steps.get(j)); 189 | double[] next_obs = agent.observationModel.beliefsToObservations(next); 190 | g.drawLine((int)((prev_obs[0] + 1.0) * 500.0), (int)((prev_obs[1] + 1.0) * 500.0), (int)((next_obs[0] + 1.0) * 500.0), (int)((next_obs[1] + 1.0) * 500.0)); 191 | prev = next; 192 | prev_obs = next_obs; 193 | } 194 | } 195 | 196 | // Draw the chosen action in dark green 197 | g.setColor(new Color(0, 128, 0)); 198 | double[] ant_obs = agent.observationModel.beliefsToObservations(agent.anticipatedBeliefs); 199 | g.drawLine((int)((exp_obs[0] + 1.0) * 500.0), (int)((exp_obs[1] + 1.0) * 500.0), (int)((ant_obs[0] + 1.0) * 500.0), (int)((ant_obs[1] + 1.0) * 500.0)); 200 | 201 | // Draw the actual action in bright green 202 | g.setColor(new Color(0, 255, 0)); 203 | g.drawLine((int)((state_drifted[0] + 1.0) * 500.0), (int)((state_drifted[1] + 1.0) * 500.0), (int)((state[0] + 1.0) * 500.0), (int)((state[1] + 1.0) * 500.0)); 204 | 205 | // Draw the drift in cyan 206 | g.setColor(new Color(0, 255, 255)); 207 | g.drawLine((int)((state_orig[0] + 1.0) * 500.0), (int)((state_orig[1] + 1.0) * 500.0), (int)((state_drifted[0] + 1.0) * 500.0), (int)((state_drifted[1] + 1.0) * 500.0)); 208 | 209 | return image; 210 | } 211 | 212 | static void makeVisualization(String suffix, agents.manic.AgentManic agent, double[] state_orig, double[] state_drifted, double[] state) { 213 | BufferedImage image = visualize(agent, state_orig, state_drifted, state); 214 | String filename = "viz" + suffix + ".png"; 215 | try { 216 | ImageIO.write(image, "png", new File(filename)); 217 | }catch(Exception e) { 218 | throw new IllegalArgumentException("got an exception while trying to write file " + filename); 219 | } 220 | } 221 | */ 222 | 223 | 224 | public double test(IAgent agent) { 225 | 226 | System.out.println("----------------------"); 227 | System.out.println("Drifting platform test Agent: " + agent.getName()); 228 | System.out.println("----------------------"); 229 | System.out.println("In this test, the agent is placed on an imaginary 2D platform of infinite size. " + 230 | "The agent's objective is to stay near the origin. Each time-step, the platform " + 231 | "drifts a small amount in a random direction. The agent can step in any direction " + 232 | "(from 0 to 2*PI). Initially, a mentor will help it learn what to do.\n"); 233 | 234 | // Define some constants for this test 235 | double driftSpeed = 0.1; 236 | 237 | // Set up the agent 238 | DriftingPlatformMentor mentor = new DriftingPlatformMentor(); 239 | agent.reset(mentor, // This mentor prefers plans that lead closer to the origin 240 | 2, // The agent observes its x,y position (which is the complete state of this world) 241 | 2, // the agent models state with 2 dimensions because it cannot be simplified further 242 | 1, // The agent chooses a direction for travel 243 | 1); // The agent plans up to 1 time-steps into the future 244 | 245 | // To debug an agent that isn't working, uncomment the following line and verify that it works. 246 | // Then, set each "true" to "false" until you find the component that isn't doing its job properly. 247 | //DriftingPlatformTutor tutor = new DriftingPlatformTutor(mentor); 248 | //agent.setTutor(tutor, true/*observation*/, true/*transition*/, true/*contentment*/, true/*planning*/); 249 | 250 | // Train with mentor 251 | System.out.println("Phase 1 of 3: Learn the objective from the mentor..."); 252 | System.out.println("|------------------------------------------------|"); 253 | double[] state = new double[2]; 254 | double[] next_state = new double[2]; 255 | double[] drift = new double[2]; 256 | for(int i = 0; i < 2000; i++) { 257 | 258 | if(i % 40 == 0) 259 | System.out.print(">"); 260 | 261 | // The platform drifts in a random direction 262 | drift[0] = rand.nextGaussian(); 263 | drift[1] = rand.nextGaussian(); 264 | Vec.normalize(drift); 265 | Vec.scale(drift, driftSpeed); 266 | Vec.add(state, drift); 267 | Vec.clip(state, -1.0, 1.0); 268 | 269 | // The agent takes a step in a direction of its choice 270 | double[] obs = computeObservations(state); 271 | double[] act = agent.think(obs); 272 | computeNextState(state, act, next_state); 273 | Vec.copy(state, next_state); 274 | } 275 | 276 | System.out.println("\n\nNow, the mentor dies, so the agent is on its own."); 277 | mentor.alive = false; 278 | 279 | System.out.println("Also, to make the problem more challenging, the agent's controls " + 280 | "are changed by 120 degrees. The agent will now have to figure out how to operate " + 281 | "the new controls without a mentor to help it.\n"); 282 | controlOrigin += Math.PI * 2.0 / 3.0; 283 | 284 | // Train without mentor 285 | System.out.println("Phase 2 of 3: Figure out new controls (without mentor)..."); 286 | System.out.println("|------------------------------------------------|"); 287 | for(int i = 0; i < 2000; i++) { 288 | 289 | if(i % 40 == 0) 290 | System.out.print(">"); 291 | // if(i % 80 == 0) 292 | // makeVisualization(Integer.toString(i), (agents.manic.AgentManic)agent, state_orig, state_drifted, state); 293 | 294 | // The platform drifts in a random direction 295 | drift[0] = rand.nextGaussian(); 296 | drift[1] = rand.nextGaussian(); 297 | Vec.normalize(drift); 298 | Vec.scale(drift, driftSpeed); 299 | Vec.add(state, drift); 300 | Vec.clip(state, -1.0, 1.0); 301 | 302 | // The agent takes a step in a direction of its choice 303 | double[] obs = computeObservations(state); 304 | double[] act = agent.think(obs); 305 | computeNextState(state, act, next_state); 306 | Vec.copy(state, next_state); 307 | } 308 | 309 | // Test 310 | System.out.println("\n\nThe agent has had enough time to figure out the new controls, so now we test the agent. " + 311 | "We will let the platform continue to drift randomly for 1000 iterations, and measure the average " + 312 | "distance between the origin and the agent. (If the agent is intelligent, it should achieve a low " + 313 | "average distance, such as 0.2. If it is unintelligent, it will achieve a higher average distance, " + 314 | "such as 0.7.\n"); 315 | System.out.println("Phase 3 of 3: Testing (without mentor)..."); 316 | System.out.println("|------------------------------------------------|"); 317 | double sumSqMag = 0.0; 318 | for(int i = 0; i < 1000; i++) { 319 | 320 | if(i % 20 == 0) 321 | System.out.print(">"); 322 | 323 | // if(i % 100 == 0) 324 | // makeVisualization(Integer.toString(i), (agents.manic.AgentManic)agent, state_orig, state_drifted, state); 325 | 326 | // The platform drifts in a random direction 327 | drift[0] = rand.nextGaussian(); 328 | drift[1] = rand.nextGaussian(); 329 | Vec.normalize(drift); 330 | Vec.scale(drift, driftSpeed); 331 | Vec.add(state, drift); 332 | Vec.clip(state, -1.0, 1.0); 333 | 334 | // The agent takes a step in a direction of its choice 335 | double[] obs = computeObservations(state); 336 | double[] act = agent.think(obs); 337 | computeNextState(state, act, next_state); 338 | Vec.copy(state, next_state); 339 | 340 | // Sum up how far the agent ever drifts from the origin 341 | sumSqMag += Math.sqrt(Vec.squaredMagnitude(state)); 342 | } 343 | 344 | double aveDist = sumSqMag / 1000.0; 345 | System.out.println("\n\nThe agent's average distance from the origin during the testing phase was " + Double.toString(aveDist)); 346 | 347 | return -aveDist; // Bigger is supposed to be better, so we negate the average distance 348 | } 349 | } 350 | -------------------------------------------------------------------------------- /java/src/IAgent.java: -------------------------------------------------------------------------------- 1 | 2 | public interface IAgent { 3 | 4 | /// Returns this agent's name 5 | String getName(); 6 | 7 | /// This method is called to initialize the agent in a new world. 8 | /// mentor is an object that helps the agent learn what to do in this world. 9 | /// observationDims is the number of double values that the agent observes each time step. 10 | /// beliefDims is the number of double values that the agent uses internally to model the state of the world. (It should generally be <= observationDims.) 11 | /// actionDims is the number of double values the agent uses to specify an action. 12 | /// maxPlanLength specifies the maximum number of time-steps into the future that the agent should attempt to plan. 13 | void reset(IMentor mentor, int observationDims, int beliefDims, int actionDims, int maxPlanLength); 14 | 15 | /// Tells the agent that the next observation passed to learnFromExperience does not follow 16 | /// from the previous one. This should be called when a game is started over, or when the state is 17 | /// adjusted in a manner that the agent is not expected to anticipate. 18 | void teleport(); 19 | 20 | /// Sets the mentor to use with this agent 21 | void setMentor(IMentor mentor); 22 | 23 | /// Sets the tutor to use with this agent. 24 | void setTutor(ITutor tutor, boolean helpWithObservations, boolean helpWithTransitions, boolean helpWithContentment, boolean helpWithPlanning); 25 | 26 | /// Asks the agent what it anticipates observing if it performs the specified plan. 27 | /// (null should be treated as a plan with zero steps.) 28 | double[] anticipateObservation(Matrix plan); 29 | 30 | /// A vector of observations goes in. All observed values may be expected to fall between -1 and 1. 31 | /// Returns a vector of chosen actions. All returned values should fall between 0 and 1. 32 | double[] think(double[] observations); 33 | } 34 | -------------------------------------------------------------------------------- /java/src/IMentor.java: -------------------------------------------------------------------------------- 1 | 2 | /// A mentor helps the agent learn what to do. 3 | /// It does not help the agent learn how to do anything. 4 | public interface IMentor { 5 | 6 | public static final double NO_FEEDBACK = -Double.MIN_VALUE; 7 | 8 | /// Implementations should evaluate the goodness of the plan. 9 | /// return 1 for the best possible plan. 10 | /// return 0 for the worst possible plan. 11 | /// return a value between 0 and 1 for plans that are neither the worst nor best. 12 | /// return NO_FEEDBACK if the mentor cannot determine the goodness of the plan, 13 | /// or if the mentor is not available, or if the mentor wants to test the 14 | /// agent by letting the agent decide for itself. 15 | double evaluatePlan(IAgent agent, Matrix plan); 16 | } 17 | -------------------------------------------------------------------------------- /java/src/ITest.java: -------------------------------------------------------------------------------- 1 | 2 | public interface ITest { 3 | 4 | /// Evaluates the general intelligence of the agent with some task. 5 | /// Returns a number that represents the intelligence of the agent. 6 | /// (More intelligent agents should achieve a higher score. 7 | /// Less intelligent agents should achieve a lower score. 8 | /// The scores may be span any range, even negative values.) 9 | double test(IAgent agent); 10 | } 11 | -------------------------------------------------------------------------------- /java/src/ITutor.java: -------------------------------------------------------------------------------- 1 | 2 | /// A tutor helps the agent do certain parts of its job. 3 | /// Using a tutor is typically considered cheating. 4 | /// The main purpose of a tutor is to help debug an agent that is failing to learn some problem. 5 | /// When you find the minimal subset of jobs that the tutor must perform to make the agent successful, you have isolated the bug. 6 | public interface ITutor { 7 | 8 | /// Computes the state from the observations. 9 | double[] observationsToState(double[] observations); 10 | 11 | /// Computes the observations from the state. 12 | double[] stateToObservations(double[] state); 13 | 14 | /// Computes how actions will affect state. 15 | void transition(double[] current_state, double[] actions, double[] next_state); 16 | 17 | /// Computes a near-optimal evaluation of state. 18 | double evaluateState(double[] state); 19 | 20 | /// Chooses the best actions to perform in the given state. 21 | void chooseActions(double[] state, double[] actions); 22 | } 23 | -------------------------------------------------------------------------------- /java/src/Json.java: -------------------------------------------------------------------------------- 1 | import java.util.ArrayList; 2 | import java.lang.StringBuilder; 3 | import java.io.BufferedWriter; 4 | import java.io.FileWriter; 5 | import java.nio.file.Paths; 6 | import java.nio.file.Files; 7 | 8 | abstract class Json 9 | { 10 | abstract void write(StringBuilder sb); 11 | 12 | public static Json newObject() 13 | { 14 | return new JObject(); 15 | } 16 | 17 | public static Json newList() 18 | { 19 | return new JList(); 20 | } 21 | 22 | public static Json parseNode(StringParser p) 23 | { 24 | p.skipWhitespace(); 25 | if(p.remaining() == 0) 26 | throw new RuntimeException("Unexpected end of JSON file"); 27 | char c = p.peek(); 28 | if(c == '"') 29 | return new JString(JString.parseString(p)); 30 | else if(c == '{') 31 | return JObject.parseObject(p); 32 | else if(c == '[') 33 | return JList.parseList(p); 34 | else if(c == 't') 35 | { 36 | p.expect("true"); 37 | return new JBool(true); 38 | } 39 | else if(c == 'f') 40 | { 41 | p.expect("false"); 42 | return new JBool(false); 43 | } 44 | else if(c == 'n') 45 | { 46 | p.expect("null"); 47 | return new JNull(); 48 | } 49 | else if((c >= '0' && c <= '9') || c == '-') 50 | return JDouble.parseNumber(p); 51 | else 52 | throw new RuntimeException("Unexpected token at " + p.str.substring(p.pos, Math.min(p.remaining(), 50))); 53 | } 54 | 55 | public int size() 56 | { 57 | return this.asList().size(); 58 | } 59 | 60 | public Json get(String name) 61 | { 62 | return this.asObject().field(name); 63 | } 64 | 65 | public Json get(int index) 66 | { 67 | return this.asList().get(index); 68 | } 69 | 70 | public boolean getBool(String name) 71 | { 72 | return get(name).asBool(); 73 | } 74 | 75 | public boolean getBool(int index) 76 | { 77 | return get(index).asBool(); 78 | } 79 | 80 | public long getLong(String name) 81 | { 82 | return get(name).asLong(); 83 | } 84 | 85 | public long getLong(int index) 86 | { 87 | return get(index).asLong(); 88 | } 89 | 90 | public double getDouble(String name) 91 | { 92 | return get(name).asDouble(); 93 | } 94 | 95 | public double getDouble(int index) 96 | { 97 | return get(index).asDouble(); 98 | } 99 | 100 | public String getString(String name) 101 | { 102 | return get(name).asString(); 103 | } 104 | 105 | public String getString(int index) 106 | { 107 | return get(index).asString(); 108 | } 109 | 110 | public void add(String name, Json val) 111 | { 112 | this.asObject().add(name, val); 113 | } 114 | 115 | public void add(String name, boolean val) 116 | { 117 | this.asObject().add(name, new Json.JBool(val)); 118 | } 119 | 120 | public void add(String name, long val) 121 | { 122 | this.asObject().add(name, new Json.JLong(val)); 123 | } 124 | 125 | public void add(String name, double val) 126 | { 127 | this.asObject().add(name, new Json.JDouble(val)); 128 | } 129 | 130 | public void add(String name, String val) 131 | { 132 | this.asObject().add(name, new Json.JString(val)); 133 | } 134 | 135 | public void add(Json item) 136 | { 137 | this.asList().add(item); 138 | } 139 | 140 | public void add(boolean val) 141 | { 142 | this.asList().add(new Json.JBool(val)); 143 | } 144 | 145 | public void add(long val) 146 | { 147 | this.asList().add(new Json.JLong(val)); 148 | } 149 | 150 | public void add(double val) 151 | { 152 | this.asList().add(new Json.JDouble(val)); 153 | } 154 | 155 | public void add(String val) 156 | { 157 | this.asList().add(new Json.JString(val)); 158 | } 159 | 160 | public boolean asBool() 161 | { 162 | return ((JBool)this).value; 163 | } 164 | 165 | public long asLong() 166 | { 167 | return ((JLong)this).value; 168 | } 169 | 170 | public double asDouble() 171 | { 172 | return ((JDouble)this).value; 173 | } 174 | 175 | public String asString() 176 | { 177 | return ((JString)this).value; 178 | } 179 | 180 | public String toString() 181 | { 182 | StringBuilder sb = new StringBuilder(); 183 | write(sb); 184 | return sb.toString(); 185 | } 186 | 187 | private JObject asObject() 188 | { 189 | return (JObject)this; 190 | } 191 | 192 | private JList asList() 193 | { 194 | return (JList)this; 195 | } 196 | 197 | public void save(String filename) 198 | { 199 | try 200 | { 201 | BufferedWriter out = new BufferedWriter(new FileWriter(filename)); 202 | out.write(toString()); 203 | out.close(); 204 | } 205 | catch(Exception e) 206 | { 207 | throw new RuntimeException(e); 208 | } 209 | } 210 | 211 | public static Json parse(String s) 212 | { 213 | StringParser p = new StringParser(s); 214 | return Json.parseNode(p); 215 | } 216 | 217 | public static Json load(String filename) 218 | { 219 | String contents; 220 | try 221 | { 222 | contents = new String(Files.readAllBytes(Paths.get(filename))); 223 | } 224 | catch(Exception e) 225 | { 226 | throw new RuntimeException(e); 227 | } 228 | return parse(contents); 229 | } 230 | 231 | private static class StringParser 232 | { 233 | String str; 234 | int pos; 235 | 236 | StringParser(String s) 237 | { 238 | str = s; 239 | pos = 0; 240 | } 241 | 242 | int remaining() 243 | { 244 | return str.length() - pos; 245 | } 246 | 247 | char peek() 248 | { 249 | return str.charAt(pos); 250 | } 251 | 252 | void advance(int n) 253 | { 254 | pos += n; 255 | } 256 | 257 | void skipWhitespace() 258 | { 259 | while(pos < str.length() && str.charAt(pos) <= ' ') 260 | pos++; 261 | } 262 | 263 | void expect(String s) 264 | { 265 | if(!str.substring(pos, Math.min(str.length(), pos + s.length())).equals(s)) 266 | throw new RuntimeException("Expected \"" + s + "\", Got \"" + str.substring(pos, Math.min(str.length(), pos + s.length())) + "\""); 267 | pos += s.length(); 268 | } 269 | 270 | String until(char c) 271 | { 272 | int i = pos; 273 | while(i < str.length() && str.charAt(i) != c) 274 | i++; 275 | String s = str.substring(pos, i); 276 | pos = i; 277 | return s; 278 | } 279 | 280 | String until(char a, char b) 281 | { 282 | int i = pos; 283 | while(i < str.length() && str.charAt(i) != a && str.charAt(i) != b) 284 | i++; 285 | String s = str.substring(pos, i); 286 | pos = i; 287 | return s; 288 | } 289 | 290 | String whileReal() 291 | { 292 | int i = pos; 293 | while(i < str.length()) 294 | { 295 | char c = str.charAt(i); 296 | if((c >= '0' && c <= '9') || 297 | c == '-' || 298 | c == '+' || 299 | c == '.' || 300 | c == 'e' || 301 | c == 'E') 302 | i++; 303 | else 304 | break; 305 | } 306 | String s = str.substring(pos, i); 307 | pos = i; 308 | return s; 309 | } 310 | } 311 | 312 | private static class NameVal 313 | { 314 | String name; 315 | Json value; 316 | 317 | NameVal(String nam, Json val) 318 | { 319 | if(nam == null) 320 | throw new IllegalArgumentException("The name cannot be null"); 321 | if(val == null) 322 | val = new JNull(); 323 | name = nam; 324 | value = val; 325 | } 326 | } 327 | 328 | private static class JObject extends Json 329 | { 330 | ArrayList fields; 331 | 332 | JObject() 333 | { 334 | fields = new ArrayList(); 335 | } 336 | 337 | public void add(String name, Json val) 338 | { 339 | fields.add(new NameVal(name, val)); 340 | } 341 | 342 | Json fieldIfExists(String name) 343 | { 344 | for(NameVal nv : fields) 345 | { 346 | if(nv.name.equals(name)) 347 | return nv.value; 348 | } 349 | return null; 350 | } 351 | 352 | Json field(String name) 353 | { 354 | Json n = fieldIfExists(name); 355 | if(n == null) 356 | throw new RuntimeException("No field named \"" + name + "\" found."); 357 | return n; 358 | } 359 | 360 | void write(StringBuilder sb) 361 | { 362 | sb.append("{"); 363 | for(int i = 0; i < fields.size(); i++) 364 | { 365 | if(i > 0) 366 | sb.append(","); 367 | NameVal nv = fields.get(i); 368 | JString.write(sb, nv.name); 369 | sb.append(":"); 370 | nv.value.write(sb); 371 | } 372 | sb.append("}"); 373 | } 374 | 375 | static JObject parseObject(StringParser p) 376 | { 377 | p.expect("{"); 378 | JObject newOb = new JObject(); 379 | boolean readyForField = true; 380 | while(p.remaining() > 0) 381 | { 382 | char c = p.peek(); 383 | if(c <= ' ') 384 | { 385 | p.advance(1); 386 | } 387 | else if(c == '}') 388 | { 389 | p.advance(1); 390 | return newOb; 391 | } 392 | else if(c == ',') 393 | { 394 | if(readyForField) 395 | throw new RuntimeException("Unexpected ','"); 396 | p.advance(1); 397 | readyForField = true; 398 | } 399 | else if(c == '\"') 400 | { 401 | if(!readyForField) 402 | throw new RuntimeException("Expected a ',' before the next field in JSON file"); 403 | p.skipWhitespace(); 404 | String name = JString.parseString(p); 405 | p.skipWhitespace(); 406 | p.expect(":"); 407 | Json value = Json.parseNode(p); 408 | newOb.add(name, value); 409 | readyForField = false; 410 | } 411 | else 412 | throw new RuntimeException("Expected a '}' or a '\"'. Got " + p.str.substring(p.pos, p.pos + 10)); 413 | } 414 | throw new RuntimeException("Expected a matching '}' in JSON file"); 415 | } 416 | } 417 | 418 | private static class JList extends Json 419 | { 420 | ArrayList list; 421 | 422 | JList() 423 | { 424 | list = new ArrayList(); 425 | } 426 | 427 | public void add(Json item) 428 | { 429 | if(item == null) 430 | item = new JNull(); 431 | list.add(item); 432 | } 433 | 434 | public int size() 435 | { 436 | return list.size(); 437 | } 438 | 439 | public Json get(int index) 440 | { 441 | return list.get(index); 442 | } 443 | 444 | void write(StringBuilder sb) 445 | { 446 | sb.append("["); 447 | for(int i = 0; i < list.size(); i++) 448 | { 449 | if(i > 0) 450 | sb.append(","); 451 | list.get(i).write(sb); 452 | } 453 | sb.append("]"); 454 | } 455 | 456 | static JList parseList(StringParser p) 457 | { 458 | p.expect("["); 459 | JList newList = new JList(); 460 | boolean readyForValue = true; 461 | while(p.remaining() > 0) 462 | { 463 | p.skipWhitespace(); 464 | char c = p.peek(); 465 | if(c == ']') 466 | { 467 | p.advance(1); 468 | return newList; 469 | } 470 | else if(c == ',') 471 | { 472 | if(readyForValue) 473 | throw new RuntimeException("Unexpected ',' in JSON file"); 474 | p.advance(1); 475 | readyForValue = true; 476 | } 477 | else 478 | { 479 | if(!readyForValue) 480 | throw new RuntimeException("Expected a ',' or ']' in JSON file"); 481 | newList.list.add(Json.parseNode(p)); 482 | readyForValue = false; 483 | } 484 | } 485 | throw new RuntimeException("Expected a matching ']' in JSON file"); 486 | } 487 | } 488 | 489 | private static class JBool extends Json 490 | { 491 | boolean value; 492 | 493 | JBool(boolean val) 494 | { 495 | value = val; 496 | } 497 | 498 | void write(StringBuilder sb) 499 | { 500 | sb.append(value ? "true" : "false"); 501 | } 502 | } 503 | 504 | private static class JLong extends Json 505 | { 506 | long value; 507 | 508 | JLong(long val) 509 | { 510 | value = val; 511 | } 512 | 513 | void write(StringBuilder sb) 514 | { 515 | sb.append(value); 516 | } 517 | } 518 | 519 | private static class JDouble extends Json 520 | { 521 | double value; 522 | 523 | JDouble(double val) 524 | { 525 | value = val; 526 | } 527 | 528 | void write(StringBuilder sb) 529 | { 530 | sb.append(value); 531 | } 532 | 533 | static Json parseNumber(StringParser p) 534 | { 535 | String s = p.whileReal(); 536 | if(s.indexOf('.') >= 0) 537 | return new JDouble(Double.parseDouble(s)); 538 | else 539 | return new JLong(Long.parseLong(s)); 540 | } 541 | } 542 | 543 | private static class JString extends Json 544 | { 545 | String value; 546 | 547 | JString(String val) 548 | { 549 | value = val; 550 | } 551 | 552 | static void write(StringBuilder sb, String value) 553 | { 554 | sb.append('"'); 555 | for(int i = 0; i < value.length(); i++) 556 | { 557 | char c = value.charAt(i); 558 | if(c < ' ') 559 | { 560 | switch(c) 561 | { 562 | case '\b': sb.append("\\b"); break; 563 | case '\f': sb.append("\\f"); break; 564 | case '\n': sb.append("\\n"); break; 565 | case '\r': sb.append("\\r"); break; 566 | case '\t': sb.append("\\t"); break; 567 | default: 568 | sb.append(c); 569 | } 570 | } 571 | else if(c == '\\') 572 | sb.append("\\\\"); 573 | else if(c == '"') 574 | sb.append("\\\""); 575 | else 576 | sb.append(c); 577 | } 578 | sb.append('"'); 579 | } 580 | 581 | void write(StringBuilder sb) 582 | { 583 | write(sb, value); 584 | } 585 | 586 | static String parseString(StringParser p) 587 | { 588 | StringBuilder sb = new StringBuilder(); 589 | p.expect("\""); 590 | while(p.remaining() > 0) 591 | { 592 | char c = p.peek(); 593 | if(c == '\"') 594 | { 595 | p.advance(1); 596 | return sb.toString(); 597 | } 598 | else if(c == '\\') 599 | { 600 | p.advance(1); 601 | c = p.peek(); 602 | p.advance(1); 603 | switch(c) 604 | { 605 | case '"': sb.append('"'); break; 606 | case '\\': sb.append('\\'); break; 607 | case '/': sb.append('/'); break; 608 | case 'b': sb.append('\b'); break; 609 | case 'f': sb.append('\f'); break; 610 | case 'n': sb.append('\n'); break; 611 | case 'r': sb.append('\r'); break; 612 | case 't': sb.append('\t'); break; 613 | case 'u': throw new RuntimeException("Sorry, unicode characters are not yet supported"); 614 | default: throw new RuntimeException("Unrecognized escape sequence"); 615 | } 616 | } 617 | else 618 | { 619 | sb.append(c); 620 | p.advance(1); 621 | } 622 | } 623 | throw new RuntimeException("No closing \""); 624 | } 625 | } 626 | 627 | private static class JNull extends Json 628 | { 629 | JNull() 630 | { 631 | } 632 | 633 | void write(StringBuilder sb) 634 | { 635 | sb.append("null"); 636 | } 637 | } 638 | } 639 | -------------------------------------------------------------------------------- /java/src/Layer.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | 3 | abstract class Layer 4 | { 5 | double[] activation; 6 | double[] error; 7 | 8 | static final int t_linear = 0; 9 | static final int t_tanh = 1; 10 | 11 | 12 | /// General-purpose constructor 13 | Layer(int outputs) 14 | { 15 | activation = new double[outputs]; 16 | error = new double[outputs]; 17 | } 18 | 19 | 20 | /// Copy constructor 21 | Layer(Layer that) 22 | { 23 | activation = Vec.copy(that.activation); 24 | error = Vec.copy(that.error); 25 | } 26 | 27 | 28 | /// Unmarshal from a JSON DOM 29 | Layer(Json n) 30 | { 31 | int units = (int)n.getLong("units"); 32 | activation = new double[units]; 33 | error = new double[units]; 34 | } 35 | 36 | 37 | void computeError(double[] target) 38 | { 39 | if(target.length != activation.length) 40 | throw new IllegalArgumentException("size mismatch. " + Integer.toString(target.length) + " != " + Integer.toString(activation.length)); 41 | for(int i = 0; i < activation.length; i++) 42 | { 43 | error[i] = target[i] - activation[i]; 44 | } 45 | } 46 | 47 | 48 | int outputCount() 49 | { 50 | return activation.length; 51 | } 52 | 53 | 54 | static Layer unmarshal(Json n) 55 | { 56 | int t = (int)n.getLong("type"); 57 | switch(t) 58 | { 59 | case t_linear: return new LayerLinear(n); 60 | case t_tanh: return new LayerTanh(n); 61 | default: throw new RuntimeException("Unrecognized type"); 62 | } 63 | } 64 | 65 | 66 | protected abstract Layer clone(); 67 | abstract Json marshal(); 68 | abstract int type(); 69 | abstract int inputCount(); 70 | abstract void initWeights(Random r); 71 | abstract double[] forwardProp(double[] in); 72 | abstract void backProp(Layer upStream); 73 | abstract void scaleGradient(double momentum); 74 | abstract void updateGradient(double[] in); 75 | abstract void step(double stepSize); 76 | abstract int countWeights(); 77 | abstract int setWeights(double[] w, int start); 78 | abstract void regularizeWeights(double lambda); 79 | } 80 | 81 | 82 | 83 | class LayerLinear extends Layer 84 | { 85 | Matrix weights; // rows are inputs, cols are outputs 86 | Matrix weightsGrad; 87 | double[] bias; 88 | double[] biasGrad; 89 | 90 | 91 | /// General-purpose constructor 92 | LayerLinear(int inputs, int outputs) 93 | { 94 | super(outputs); 95 | weights = new Matrix(); 96 | weights.setSize(inputs, outputs); 97 | weightsGrad = new Matrix(); 98 | weightsGrad.setSize(inputs, outputs); 99 | bias = new double[outputs]; 100 | biasGrad = new double[outputs]; 101 | } 102 | 103 | 104 | /// Copy constructor 105 | LayerLinear(LayerLinear that) 106 | { 107 | super(that); 108 | weights = new Matrix(that.weights); 109 | weightsGrad = new Matrix(that.weightsGrad); 110 | bias = Vec.copy(that.bias); 111 | biasGrad = Vec.copy(that.biasGrad); 112 | weightsGrad = new Matrix(); 113 | weightsGrad.setSize(weights.rows(), weights.cols()); 114 | weightsGrad.setAll(0.0); 115 | biasGrad = new double[weights.cols()]; 116 | Vec.setAll(biasGrad, 0.0); 117 | } 118 | 119 | 120 | /// Unmarshal from a JSON DOM 121 | LayerLinear(Json n) 122 | { 123 | super(n); 124 | weights = new Matrix(n.get("weights")); 125 | bias = Vec.unmarshal(n.get("bias")); 126 | } 127 | 128 | 129 | protected LayerLinear clone() 130 | { 131 | return new LayerLinear(this); 132 | } 133 | 134 | 135 | /// Marshal into a JSON DOM 136 | Json marshal() 137 | { 138 | Json ob = Json.newObject(); 139 | ob.add("units", (long)outputCount()); // required in all layers 140 | ob.add("weights", weights.marshal()); 141 | ob.add("bias", Vec.marshal(bias)); 142 | return ob; 143 | } 144 | 145 | 146 | void copy(LayerLinear src) 147 | { 148 | if(src.weights.rows() != weights.rows() || src.weights.cols() != weights.cols()) 149 | throw new IllegalArgumentException("mismatching sizes"); 150 | weights.copyBlock(0, 0, src.weights, 0, 0, src.weights.rows(), src.weights.cols()); 151 | for(int i = 0; i < bias.length; i++) 152 | { 153 | bias[i] = src.bias[i]; 154 | } 155 | } 156 | 157 | 158 | int type() { return t_linear; } 159 | int inputCount() { return weights.rows(); } 160 | 161 | 162 | void initWeights(Random r) 163 | { 164 | double dev = Math.max(0.3, 1.0 / weights.rows()); 165 | for(int i = 0; i < weights.rows(); i++) 166 | { 167 | double[] row = weights.row(i); 168 | for(int j = 0; j < weights.cols(); j++) 169 | { 170 | row[j] = dev * r.nextGaussian(); 171 | } 172 | } 173 | for(int j = 0; j < weights.cols(); j++) { 174 | bias[j] = dev * r.nextGaussian(); 175 | } 176 | weightsGrad.setAll(0.0); 177 | Vec.setAll(biasGrad, 0.0); 178 | } 179 | 180 | 181 | int countWeights() 182 | { 183 | return weights.rows() * weights.cols() + bias.length; 184 | } 185 | 186 | 187 | int setWeights(double[] w, int start) 188 | { 189 | int oldStart = start; 190 | for(int i = 0; i < bias.length; i++) 191 | bias[i] = w[start++]; 192 | for(int i = 0; i < weights.rows(); i++) 193 | { 194 | double[] row = weights.row(i); 195 | for(int j = 0; j < weights.cols(); j++) 196 | row[j] = w[start++]; 197 | } 198 | return start - oldStart; 199 | } 200 | 201 | 202 | double[] forwardProp(double[] in) 203 | { 204 | if(in.length != weights.rows()) 205 | throw new IllegalArgumentException("size mismatch. " + Integer.toString(in.length) + " != " + Integer.toString(weights.rows())); 206 | for(int i = 0; i < activation.length; i++) 207 | activation[i] = bias[i]; 208 | for(int j = 0; j < weights.rows(); j++) 209 | { 210 | double v = in[j]; 211 | double[] w = weights.row(j); 212 | for(int i = 0; i < weights.cols(); i++) 213 | activation[i] += v * w[i]; 214 | } 215 | return activation; 216 | } 217 | 218 | 219 | double[] forwardProp2(double[] in1, double[] in2) 220 | { 221 | if(in1.length + in2.length != weights.rows()) 222 | throw new IllegalArgumentException("size mismatch. " + Integer.toString(in1.length) + " + " + Integer.toString(in2.length) + " != " + Integer.toString(weights.rows())); 223 | for(int i = 0; i < activation.length; i++) 224 | activation[i] = bias[i]; 225 | for(int j = 0; j < in1.length; j++) 226 | { 227 | double v = in1[j]; 228 | double[] w = weights.row(j); 229 | for(int i = 0; i < weights.cols(); i++) 230 | activation[i] += v * w[i]; 231 | } 232 | for(int j = 0; j < in2.length; j++) 233 | { 234 | double v = in2[j]; 235 | double[] w = weights.row(in1.length + j); 236 | for(int i = 0; i < weights.cols(); i++) 237 | activation[i] += v * w[i]; 238 | } 239 | return activation; 240 | } 241 | 242 | 243 | void backProp(Layer upStream) 244 | { 245 | if(upStream.outputCount() != weights.rows()) 246 | throw new IllegalArgumentException("size mismatch"); 247 | for(int j = 0; j < weights.rows(); j++) 248 | { 249 | double[] w = weights.row(j); 250 | double d = 0.0; 251 | for(int i = 0; i < weights.cols(); i++) 252 | { 253 | d += error[i] * w[i]; 254 | } 255 | upStream.error[j] = d; 256 | } 257 | } 258 | 259 | 260 | void refineInputs(double[] inputs, double learningRate) 261 | { 262 | if(inputs.length != weights.rows()) 263 | throw new IllegalArgumentException("size mismatch"); 264 | for(int j = 0; j < weights.rows(); j++) 265 | { 266 | double[] w = weights.row(j); 267 | double d = 0.0; 268 | for(int i = 0; i < weights.cols(); i++) 269 | { 270 | d += error[i] * w[i]; 271 | } 272 | inputs[j] += learningRate * d; 273 | } 274 | } 275 | 276 | 277 | void scaleGradient(double momentum) 278 | { 279 | weightsGrad.scale(momentum); 280 | Vec.scale(biasGrad, momentum); 281 | } 282 | 283 | 284 | void updateGradient(double[] in) 285 | { 286 | for(int i = 0; i < bias.length; i++) 287 | { 288 | biasGrad[i] += error[i]; 289 | } 290 | for(int j = 0; j < weights.rows(); j++) 291 | { 292 | double[] w = weightsGrad.row(j); 293 | double x = in[j]; 294 | for(int i = 0; i < weights.cols(); i++) 295 | { 296 | w[i] += x * error[i]; 297 | } 298 | } 299 | } 300 | 301 | 302 | void step(double stepSize) 303 | { 304 | weights.addScaled(weightsGrad, stepSize); 305 | Vec.addScaled(bias, biasGrad, stepSize); 306 | } 307 | 308 | 309 | // Applies both L2 and L1 regularization to the weights and bias values 310 | void regularizeWeights(double lambda) 311 | { 312 | for(int i = 0; i < weights.rows(); i++) 313 | { 314 | double[] row = weights.row(i); 315 | for(int j = 0; j < row.length; j++) 316 | { 317 | row[j] *= (1.0 - lambda); 318 | if(row[j] < 0.0) 319 | row[j] += lambda; 320 | else 321 | row[j] -= lambda; 322 | } 323 | } 324 | for(int j = 0; j < bias.length; j++) 325 | { 326 | bias[j] *= (1.0 - lambda); 327 | if(bias[j] < 0.0) 328 | bias[j] += lambda; 329 | else 330 | bias[j] -= lambda; 331 | } 332 | } 333 | } 334 | 335 | 336 | 337 | 338 | 339 | class LayerTanh extends Layer 340 | { 341 | /// General-purpose constructor 342 | LayerTanh(int nodes) 343 | { 344 | super(nodes); 345 | } 346 | 347 | 348 | /// Copy constructor 349 | LayerTanh(LayerTanh that) 350 | { 351 | super(that); 352 | } 353 | 354 | 355 | /// Unmarshal from a JSON DOM 356 | LayerTanh(Json n) 357 | { 358 | super(n); 359 | } 360 | 361 | 362 | protected LayerTanh clone() 363 | { 364 | return new LayerTanh(this); 365 | } 366 | 367 | 368 | /// Marshal into a JSON DOM 369 | Json marshal() 370 | { 371 | Json ob = Json.newObject(); 372 | ob.add("units", (long)outputCount()); // required in all layers 373 | return ob; 374 | } 375 | 376 | 377 | void copy(LayerTanh src) 378 | { 379 | } 380 | 381 | 382 | int type() { return t_tanh; } 383 | int inputCount() { return activation.length; } 384 | 385 | 386 | void initWeights(Random r) 387 | { 388 | } 389 | 390 | 391 | int countWeights() 392 | { 393 | return 0; 394 | } 395 | 396 | 397 | int setWeights(double[] w, int start) 398 | { 399 | if(w.length != 0) 400 | throw new IllegalArgumentException("size mismatch"); 401 | return 0; 402 | } 403 | 404 | 405 | double[] forwardProp(double[] in) 406 | { 407 | if(in.length != outputCount()) 408 | throw new IllegalArgumentException("size mismatch. " + Integer.toString(in.length) + " != " + Integer.toString(outputCount())); 409 | for(int i = 0; i < activation.length; i++) 410 | { 411 | activation[i] = Math.tanh(in[i]); 412 | } 413 | return activation; 414 | } 415 | 416 | 417 | void backProp(Layer upStream) 418 | { 419 | if(upStream.outputCount() != outputCount()) 420 | throw new IllegalArgumentException("size mismatch"); 421 | for(int i = 0; i < activation.length; i++) 422 | { 423 | upStream.error[i] = error[i] * (1.0 - activation[i] * activation[i]); 424 | } 425 | } 426 | 427 | 428 | void scaleGradient(double momentum) 429 | { 430 | } 431 | 432 | 433 | void updateGradient(double[] in) 434 | { 435 | } 436 | 437 | 438 | void step(double stepSize) 439 | { 440 | } 441 | 442 | 443 | // Applies both L2 and L1 regularization to the weights and bias values 444 | void regularizeWeights(double lambda) 445 | { 446 | } 447 | } 448 | 449 | -------------------------------------------------------------------------------- /java/src/Main.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | import java.util.ArrayList; 3 | 4 | public class Main { 5 | 6 | static void gauntlet(ArrayList agents, ArrayList tests) { 7 | 8 | Matrix results = new Matrix(tests.size(), agents.size()); 9 | 10 | // Evaluate every agent against every test 11 | for(int i = 0; i < tests.size(); i++) { 12 | ITest challenge = tests.get(i); 13 | for(int j = 0; j < agents.size(); j++) { 14 | IAgent agent = agents.get(j); 15 | double result = challenge.test(agent); 16 | results.row(i)[j] = result; 17 | } 18 | } 19 | 20 | System.out.println("\n\n"); 21 | System.out.println("-------------"); 22 | System.out.println("Final results"); 23 | System.out.println("-------------"); 24 | System.out.print("[" + agents.get(0).getName()); 25 | for(int i = 1; i < agents.size(); i++) { 26 | System.out.print("," + agents.get(i).getName()); 27 | } 28 | System.out.println("]"); 29 | System.out.println(results.toString()); 30 | } 31 | 32 | public static void main(String[] args) throws Exception { 33 | 34 | Random r = new Random(0); 35 | 36 | // Make a list of agents 37 | ArrayList agents = new ArrayList(); 38 | agents.add(new AgentRandy(r)); 39 | agents.add(new AgentManic(r)); 40 | 41 | // Make a list of tests 42 | ArrayList tests = new ArrayList(); 43 | tests.add(new DriftingPlatform(r)); 44 | 45 | // Run the agents through the gauntlet 46 | gauntlet(agents, tests); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /java/src/Makefile: -------------------------------------------------------------------------------- 1 | build: clean 2 | echo;echo;echo;echo;echo;echo 3 | javac -d ../class -Xmaxerrs 3 Main.java 4 | 5 | clean: 6 | find . -name "*.class" -exec rm {} \; 7 | find ../class -name "*.class" -exec rm {} \; 8 | 9 | run: 10 | java -cp ../class Main 11 | -------------------------------------------------------------------------------- /java/src/Matrix.java: -------------------------------------------------------------------------------- 1 | // ---------------------------------------------------------------- 2 | // The contents of this file are distributed under the CC0 license. 3 | // See http://creativecommons.org/publicdomain/zero/1.0/ 4 | // ---------------------------------------------------------------- 5 | 6 | import java.io.File; 7 | import java.io.FileNotFoundException; 8 | import java.io.PrintWriter; 9 | import java.util.HashMap; 10 | import java.util.Map; 11 | import java.util.Scanner; 12 | import java.util.ArrayList; 13 | import java.util.Iterator; 14 | import java.lang.StringBuilder; 15 | 16 | 17 | /// This stores a matrix, A.K.A. data set, A.K.A. table. Each element is 18 | /// represented as a double value. Nominal values are represented using their 19 | /// corresponding zero-indexed enumeration value. For convenience, 20 | /// the matrix also stores some meta-data which describes the columns (or attributes) 21 | /// in the matrix. 22 | public class Matrix 23 | { 24 | /// Used to represent elements in the matrix for which the value is not known. 25 | public static final double UNKNOWN_VALUE = -1e308; 26 | 27 | // Data 28 | private ArrayList m_data = new ArrayList(); //matrix elements 29 | 30 | // Meta-data 31 | private String m_filename; // the name of the file 32 | private ArrayList m_attr_name; // the name of each attribute (or column) 33 | private ArrayList> m_str_to_enum; // value to enumeration 34 | private ArrayList> m_enum_to_str; // enumeration to value 35 | 36 | 37 | /// Creates a 0x0 matrix. (Next, to give this matrix some dimensions, you should call: 38 | /// loadARFF 39 | /// setSize 40 | /// addColumn, or 41 | /// copyMetaData 42 | @SuppressWarnings("unchecked") 43 | public Matrix() 44 | { 45 | this.m_filename = ""; 46 | this.m_attr_name = new ArrayList(); 47 | this.m_str_to_enum = new ArrayList>(); 48 | this.m_enum_to_str = new ArrayList>(); 49 | } 50 | 51 | 52 | public Matrix(int rows, int cols) 53 | { 54 | this.m_filename = ""; 55 | this.m_attr_name = new ArrayList(); 56 | this.m_str_to_enum = new ArrayList>(); 57 | this.m_enum_to_str = new ArrayList>(); 58 | setSize(rows, cols); 59 | } 60 | 61 | 62 | public Matrix(Matrix that) 63 | { 64 | m_filename = that.m_filename; 65 | m_attr_name = new ArrayList(); 66 | m_str_to_enum = new ArrayList>(); 67 | m_enum_to_str = new ArrayList>(); 68 | setSize(that.rows(), that.cols()); 69 | copyBlock(0, 0, that, 0, 0, that.rows(), that.cols()); // (copies the meta data too) 70 | } 71 | 72 | 73 | public Matrix(Json n) 74 | { 75 | int rowCount = n.size(); 76 | int colCount = n.get(0).size(); 77 | setSize(rowCount, colCount); 78 | for(int i = 0; i < rowCount; i++) 79 | { 80 | double[] mrow = row(i); 81 | Json jrow = n.get(i); 82 | for(int j = 0; j < colCount; j++) 83 | { 84 | mrow[j] = jrow.getDouble(j); 85 | } 86 | } 87 | } 88 | 89 | 90 | public Json marshal() 91 | { 92 | Json list = Json.newList(); 93 | for(int i = 0; i < rows(); i++) 94 | list.add(Vec.marshal(row(i))); 95 | return list; 96 | } 97 | 98 | 99 | /// Loads the matrix from an ARFF file 100 | public void loadARFF(String filename) 101 | { 102 | HashMap tempMap = new HashMap(); //temp map for int->string map (attrInts) 103 | HashMap tempMapS = new HashMap(); //temp map for string->int map (attrString) 104 | int attrCount = 0; // Count number of attributes 105 | int lineNum = 0; // Used for exception messages 106 | Scanner s = null; 107 | m_str_to_enum.clear(); 108 | m_enum_to_str.clear(); 109 | m_attr_name.clear(); 110 | 111 | try 112 | { 113 | s = new Scanner(new File(filename)); 114 | while (s.hasNextLine()) 115 | { 116 | lineNum++; 117 | String line = s.nextLine().trim(); 118 | String upper = line.toUpperCase(); 119 | 120 | if (upper.startsWith("@RELATION")) 121 | m_filename = line.split(" ")[1]; 122 | else if (upper.startsWith("@ATTRIBUTE")) 123 | { 124 | String[] pieces = line.split("\\s+"); 125 | m_attr_name.add(pieces[1]); 126 | 127 | tempMap.clear(); 128 | tempMapS.clear(); 129 | 130 | // If the attribute is nominal 131 | if (pieces[2].startsWith("{")) 132 | { 133 | // Splits this string based on curly brackets or commas 134 | String[] attributeNames = pieces[2].split("[{},]"); 135 | int valCount = 0; 136 | 137 | for (String attribute : attributeNames) 138 | { 139 | if (!attribute.equals("")) // Ignore empty strings 140 | { 141 | tempMapS.put(valCount, attribute); 142 | tempMap.put(attribute, valCount++); 143 | } 144 | } 145 | } 146 | 147 | // The attribute is continuous if it wasn't picked up in the previous "if" statement 148 | 149 | m_str_to_enum.add(new HashMap(tempMap)); 150 | m_enum_to_str.add(new HashMap(tempMapS)); 151 | 152 | attrCount++; 153 | } 154 | else if (upper.startsWith("@DATA")) 155 | { 156 | m_data.clear(); 157 | 158 | while (s.hasNextLine()) 159 | { 160 | double[] temp = new double[attrCount]; 161 | 162 | lineNum++; 163 | line = s.nextLine().trim(); 164 | 165 | if (line.startsWith("%") || line.isEmpty()) continue; 166 | String[] pieces = line.split(","); 167 | 168 | if (pieces.length < attrCount) throw new IllegalArgumentException("Expected more elements on line: " + lineNum + "."); 169 | 170 | for (int i = 0; i < attrCount; i++) 171 | { 172 | int vals = valueCount(i); 173 | String val = pieces[i]; 174 | 175 | // Unknown values are always set to UNKNOWN_VALUE 176 | if (val.equals("?")) 177 | { 178 | temp[i] = UNKNOWN_VALUE; 179 | continue; 180 | } 181 | 182 | // If the attribute is nominal 183 | if (vals > 0) 184 | { 185 | HashMap enumMap = m_str_to_enum.get(i); 186 | if (!enumMap.containsKey(val)) 187 | throw new IllegalArgumentException("Unrecognized enumeration value " + val + " on line: " + lineNum + "."); 188 | 189 | temp[i] = (double)enumMap.get(val); 190 | } 191 | else 192 | temp[i] = Double.parseDouble(val); // The attribute is continuous 193 | } 194 | 195 | m_data.add(temp); 196 | } 197 | } 198 | } 199 | } 200 | catch (FileNotFoundException e) 201 | { 202 | throw new IllegalArgumentException("Failed to open file: " + filename + "."); 203 | } 204 | finally 205 | { 206 | s.close(); 207 | } 208 | } 209 | 210 | 211 | public String toString() 212 | { 213 | StringBuilder sb = new StringBuilder(); 214 | for(int j = 0; j < rows(); j++) 215 | { 216 | if(j > 0) 217 | sb.append("\n"); 218 | sb.append(Vec.toString(row(j))); 219 | } 220 | return sb.toString(); 221 | } 222 | 223 | 224 | /// Saves the matrix to an ARFF file 225 | public void saveARFF(String filename) 226 | { 227 | PrintWriter os = null; 228 | 229 | try 230 | { 231 | os = new PrintWriter(filename); 232 | // Print the relation name, if one has been provided ('x' is default) 233 | os.print("@RELATION "); 234 | os.println(m_filename.isEmpty() ? "x" : m_filename); 235 | 236 | // Print each attribute in order 237 | for (int i = 0; i < m_attr_name.size(); i++) 238 | { 239 | os.print("@ATTRIBUTE "); 240 | 241 | String attributeName = m_attr_name.get(i); 242 | os.print(attributeName.isEmpty() ? "x" : attributeName); 243 | 244 | int vals = valueCount(i); 245 | 246 | if (vals == 0) os.println(" REAL"); 247 | else 248 | { 249 | os.print(" {"); 250 | for (int j = 0; j < vals; j++) 251 | { 252 | os.print(attrValue(i, j)); 253 | if (j + 1 < vals) os.print(","); 254 | } 255 | os.println("}"); 256 | } 257 | } 258 | 259 | // Print the data 260 | os.println("@DATA"); 261 | for (int i = 0; i < rows(); i++) 262 | { 263 | double[] row = m_data.get(i); 264 | for (int j = 0; j < cols(); j++) 265 | { 266 | if (row[j] == UNKNOWN_VALUE) 267 | os.print("?"); 268 | else 269 | { 270 | int vals = valueCount(j); 271 | if (vals == 0) os.print(row[j]); 272 | else 273 | { 274 | int val = (int)row[j]; 275 | if (val >= vals) throw new IllegalArgumentException("Value out of range."); 276 | os.print(attrValue(j, val)); 277 | } 278 | } 279 | 280 | if (j + 1 < cols()) os.print(","); 281 | } 282 | os.println(); 283 | } 284 | } 285 | catch (FileNotFoundException e) 286 | { 287 | throw new IllegalArgumentException("Error creating file: " + filename + "."); 288 | } 289 | finally 290 | { 291 | os.close(); 292 | } 293 | } 294 | 295 | /// Makes a rows-by-columns matrix of *ALL CONTINUOUS VALUES*. 296 | /// This method wipes out any data currently in the matrix. It also 297 | /// wipes out any meta-data. 298 | public void setSize(int rows, int cols) 299 | { 300 | m_data.clear(); 301 | 302 | // Set the meta-data 303 | m_filename = ""; 304 | m_attr_name.clear(); 305 | m_str_to_enum.clear(); 306 | m_enum_to_str.clear(); 307 | 308 | // Make space for each of the columns, then each of the rows 309 | newColumns(cols); 310 | newRows(rows); 311 | } 312 | 313 | /// Clears this matrix and copies the meta-data from that matrix. 314 | /// In other words, it makes a zero-row matrix with the same number 315 | /// of columns as "that" matrix. You will need to call newRow or newRows 316 | /// to give the matrix some rows. 317 | @SuppressWarnings("unchecked") 318 | public void copyMetaData(Matrix that) 319 | { 320 | m_data.clear(); 321 | m_attr_name = new ArrayList(that.m_attr_name); 322 | 323 | // Make a deep copy of that.m_str_to_enum 324 | m_str_to_enum = new ArrayList>(); 325 | for (HashMap map : that.m_str_to_enum) 326 | { 327 | HashMap temp = new HashMap(); 328 | for (Map.Entry entry : map.entrySet()) 329 | temp.put(entry.getKey(), entry.getValue()); 330 | 331 | m_str_to_enum.add(temp); 332 | } 333 | 334 | // Make a deep copy of that.m_enum_to_string 335 | m_enum_to_str = new ArrayList>(); 336 | for (HashMap map : that.m_enum_to_str) 337 | { 338 | HashMap temp = new HashMap(); 339 | for (Map.Entry entry : map.entrySet()) 340 | temp.put(entry.getKey(), entry.getValue()); 341 | 342 | m_enum_to_str.add(temp); 343 | } 344 | } 345 | 346 | /// Adds a column to this matrix with the specified number of values. (Use 0 for 347 | /// a continuous attribute.) This method also sets the number of rows to 0, so 348 | /// you will need to call newRow or newRows when you are done adding columns. 349 | public void newColumn(int vals) 350 | { 351 | m_data.clear(); 352 | String name = "col_" + cols(); 353 | 354 | m_attr_name.add(name); 355 | 356 | HashMap temp_str_to_enum = new HashMap(); 357 | HashMap temp_enum_to_str = new HashMap(); 358 | 359 | for (int i = 0; i < vals; i++) 360 | { 361 | String sVal = "val_" + i; 362 | temp_str_to_enum.put(sVal, i); 363 | temp_enum_to_str.put(i, sVal); 364 | } 365 | 366 | m_str_to_enum.add(temp_str_to_enum); 367 | m_enum_to_str.add(temp_enum_to_str); 368 | } 369 | 370 | 371 | /// Adds a column to this matrix with 0 values (continuous data). 372 | public void newColumn() 373 | { 374 | this.newColumn(0); 375 | } 376 | 377 | 378 | /// Adds n columns to this matrix, each with 0 values (continuous data). 379 | public void newColumns(int n) 380 | { 381 | for (int i = 0; i < n; i++) 382 | newColumn(); 383 | } 384 | 385 | 386 | /// Adds one new row to this matrix. Returns a reference to the new row. 387 | public double[] newRow() 388 | { 389 | int c = cols(); 390 | if (c == 0) 391 | throw new IllegalArgumentException("You must add some columns before you add any rows."); 392 | double[] newRow = new double[c]; 393 | m_data.add(newRow); 394 | return newRow; 395 | } 396 | 397 | 398 | /// Adds one new row to this matrix at the specified location. Returns a reference to the new row. 399 | public double[] insertRow(int i) 400 | { 401 | int c = cols(); 402 | if (c == 0) 403 | throw new IllegalArgumentException("You must add some columns before you add any rows."); 404 | double[] newRow = new double[c]; 405 | m_data.add(i, newRow); 406 | return newRow; 407 | } 408 | 409 | 410 | /// Removes the specified row from this matrix. Returns a reference to the removed row. 411 | public double[] removeRow(int i) 412 | { 413 | return m_data.remove(i); 414 | } 415 | 416 | 417 | /// Appends the specified row to this matrix. 418 | public void takeRow(double[] row) 419 | { 420 | if(row.length != cols()) 421 | throw new IllegalArgumentException("Row size differs from the number of columns in this matrix."); 422 | m_data.add(row); 423 | } 424 | 425 | 426 | /// Adds 'n' new rows to this matrix 427 | public void newRows(int n) 428 | { 429 | for (int i = 0; i < n; i++) 430 | newRow(); 431 | } 432 | 433 | 434 | /// Returns the number of rows in the matrix 435 | public int rows() { return m_data.size(); } 436 | 437 | /// Returns the number of columns (or attributes) in the matrix 438 | public int cols() { return m_attr_name.size(); } 439 | 440 | /// Returns the name of the specified attribute 441 | public String attrName(int col) { return m_attr_name.get(col); } 442 | 443 | /// Returns the name of the specified value 444 | public String attrValue(int attr, int val) 445 | { 446 | String value = m_enum_to_str.get(attr).get(val); 447 | if (value == null) 448 | throw new IllegalArgumentException("No name."); 449 | else return value; 450 | } 451 | 452 | /// Returns a reference to the specified row 453 | public double[] row(int index) { return m_data.get(index); } 454 | 455 | /// Swaps the positions of the two specified rows 456 | public void swapRows(int a, int b) 457 | { 458 | double[] temp = m_data.get(a); 459 | m_data.set(a, m_data.get(b)); 460 | m_data.set(b, temp); 461 | } 462 | 463 | /// Returns the number of values associated with the specified attribute (or column) 464 | /// 0 = continuous, 2 = binary, 3 = trinary, etc. 465 | public int valueCount(int attr) { return m_enum_to_str.get(attr).size(); } 466 | 467 | /// Copies that matrix 468 | void copy(Matrix that) 469 | { 470 | setSize(that.rows(), that.cols()); 471 | copyBlock(0, 0, that, 0, 0, that.rows(), that.cols()); 472 | } 473 | 474 | /// Returns the mean of the elements in the specified column. (Elements with the value UNKNOWN_VALUE are ignored.) 475 | public double columnMean(int col) 476 | { 477 | double sum = 0.0; 478 | int count = 0; 479 | for (double[] list : m_data) 480 | { 481 | double val = list[col]; 482 | if (val != UNKNOWN_VALUE) 483 | { 484 | sum += val; 485 | count++; 486 | } 487 | } 488 | 489 | return sum / count; 490 | } 491 | 492 | /// Returns the minimum element in the specified column. (Elements with the value UNKNOWN_VALUE are ignored.) 493 | public double columnMin(int col) 494 | { 495 | double min = Double.MAX_VALUE; 496 | for (double[] list : m_data) 497 | { 498 | double val = list[col]; 499 | if (val != UNKNOWN_VALUE) 500 | min = Math.min(min, val); 501 | } 502 | 503 | return min; 504 | } 505 | 506 | /// Returns the maximum element in the specifed column. (Elements with the value UNKNOWN_VALUE are ignored.) 507 | public double columnMax(int col) 508 | { 509 | double max = -Double.MAX_VALUE; 510 | for (double[] list : m_data) 511 | { 512 | double val = list[col]; 513 | if (val != UNKNOWN_VALUE) 514 | max = Math.max(max, val); 515 | } 516 | 517 | return max; 518 | } 519 | 520 | /// Returns the most common value in the specified column. (Elements with the value UNKNOWN_VALUE are ignored.) 521 | public double mostCommonValue(int col) 522 | { 523 | HashMap counts = new HashMap(); 524 | for (double[] list : m_data) 525 | { 526 | double val = list[col]; 527 | if (val != UNKNOWN_VALUE) 528 | { 529 | Integer result = counts.get(val); 530 | if (result == null) result = 0; 531 | 532 | counts.put(val, result + 1); 533 | } 534 | } 535 | 536 | int valueCount = 0; 537 | double value = 0; 538 | for (Map.Entry entry : counts.entrySet()) 539 | { 540 | if (entry.getValue() > valueCount) 541 | { 542 | value = entry.getKey(); 543 | valueCount = entry.getValue(); 544 | } 545 | } 546 | 547 | return value; 548 | } 549 | 550 | /// Copies the specified rectangular portion of that matrix, and puts it in the specified location in this matrix. 551 | public void copyBlock(int destRow, int destCol, Matrix that, int rowBegin, int colBegin, int rowCount, int colCount) 552 | { 553 | if (destRow + rowCount > this.rows() || destCol + colCount > this.cols()) 554 | throw new IllegalArgumentException("Out of range for destination matrix."); 555 | if (rowBegin + rowCount > that.rows() || colBegin + colCount > that.cols()) 556 | throw new IllegalArgumentException("Out of range for source matrix."); 557 | 558 | // Copy the specified region of meta-data 559 | for (int i = 0; i < colCount; i++) 560 | { 561 | m_attr_name.set(destCol + i, that.m_attr_name.get(colBegin + i)); 562 | m_str_to_enum.set(destCol + i, new HashMap(that.m_str_to_enum.get(colBegin + i))); 563 | m_enum_to_str.set(destCol + i, new HashMap(that.m_enum_to_str.get(colBegin + i))); 564 | } 565 | 566 | // Copy the specified region of data 567 | for (int i = 0; i < rowCount; i++) 568 | { 569 | double[] source = that.row(rowBegin + i); 570 | double[] dest = this.row(destRow + i); 571 | for(int j = 0; j < colCount; j++) 572 | dest[destCol + j] = source[colBegin + j]; 573 | } 574 | } 575 | 576 | /// Sets every element in the matrix to the specified value. 577 | public void setAll(double val) 578 | { 579 | for (double[] vec : m_data) 580 | { 581 | for(int i = 0; i < vec.length; i++) 582 | vec[i] = val; 583 | } 584 | } 585 | 586 | 587 | /// Sets every element in the matrix to the specified value. 588 | public void scale(double scalar) 589 | { 590 | for (double[] vec : m_data) 591 | { 592 | for(int i = 0; i < vec.length; i++) 593 | vec[i] *= scalar; 594 | } 595 | } 596 | 597 | 598 | /// Adds every element in that matrix to this one 599 | public void addScaled(Matrix that, double scalar) 600 | { 601 | if(that.rows() != this.rows() || that.cols() != this.cols()) 602 | throw new IllegalArgumentException("Mismatching size"); 603 | for (int i = 0; i < rows(); i++) 604 | { 605 | double[] dest = this.row(i); 606 | double[] src = that.row(i); 607 | Vec.addScaled(dest, src, scalar); 608 | } 609 | } 610 | 611 | 612 | /// Sets this to the identity matrix. 613 | public void setToIdentity() 614 | { 615 | setAll(0.0); 616 | int m = Math.min(cols(), rows()); 617 | for(int i = 0; i < m; i++) 618 | row(i)[i] = 1.0; 619 | } 620 | 621 | /// Throws an exception if that has a different number of columns than 622 | /// this, or if one of its columns has a different number of values. 623 | public void checkCompatibility(Matrix that) 624 | { 625 | int c = cols(); 626 | if (that.cols() != c) 627 | throw new IllegalArgumentException("Matrices have different number of columns."); 628 | 629 | for (int i = 0; i < c; i++) 630 | { 631 | if (valueCount(i) != that.valueCount(i)) 632 | throw new IllegalArgumentException("Column " + i + " has mis-matching number of values."); 633 | } 634 | } 635 | } 636 | -------------------------------------------------------------------------------- /java/src/NeuralNet.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | import java.util.ArrayList; 3 | import java.util.Iterator; 4 | import java.awt.image.BufferedImage; 5 | import java.awt.Color; 6 | import java.io.File; 7 | import javax.imageio.ImageIO; 8 | 9 | 10 | public class NeuralNet 11 | { 12 | public ArrayList layers; 13 | 14 | 15 | /// General-purpose constructor. (Starts with no layers. You must add at least one.) 16 | NeuralNet() 17 | { 18 | layers = new ArrayList(); 19 | } 20 | 21 | 22 | /// Copy constructor 23 | NeuralNet(NeuralNet that) 24 | { 25 | layers = new ArrayList(); 26 | for(int i = 0; i < that.layers.size(); i++) 27 | { 28 | layers.add(that.layers.get(i).clone()); 29 | } 30 | } 31 | 32 | 33 | /// Unmarshals from a JSON DOM. 34 | NeuralNet(Json n) 35 | { 36 | layers = new ArrayList(); 37 | Json l = n.get("layers"); 38 | for(int i = 0; i < l.size(); i++) 39 | layers.add(Layer.unmarshal(l.get(i))); 40 | } 41 | 42 | 43 | /// Marshal this neural network into a JSON DOM. 44 | Json marshal() 45 | { 46 | Json ob = Json.newObject(); 47 | Json l = Json.newList(); 48 | ob.add("layers", l); 49 | for(int i = 0; i < layers.size(); i++) 50 | l.add(layers.get(i).marshal()); 51 | return ob; 52 | } 53 | 54 | 55 | /// Initializes the weights and biases with small random values 56 | void init(Random r) 57 | { 58 | for(int i = 0; i < layers.size(); i++) 59 | { 60 | layers.get(i).initWeights(r); 61 | } 62 | } 63 | 64 | 65 | /// Feeds "in" into this neural network and propagates it forward to compute predicted outputs. 66 | double[] forwardProp(double[] in) 67 | { 68 | for(int i = 0; i < layers.size(); i++) 69 | { 70 | in = layers.get(i).forwardProp(in); 71 | } 72 | return in; 73 | } 74 | 75 | 76 | /// Feeds the concatenation of "in1" and "in2" into this neural network and propagates it forward to compute predicted outputs. 77 | double[] forwardProp2(double[] in1, double[] in2) 78 | { 79 | double[] in = ((LayerLinear)layers.get(0)).forwardProp2(in1, in2); 80 | for(int i = 1; i < layers.size(); i++) 81 | { 82 | in = layers.get(i).forwardProp(in); 83 | } 84 | return in; 85 | } 86 | 87 | 88 | /// Backpropagates the error to the upstream layer. 89 | void backProp(double[] target) 90 | { 91 | int i = layers.size() - 1; 92 | Layer l = layers.get(i); 93 | l.computeError(target); 94 | for(i--; i >= 0; i--) 95 | { 96 | Layer upstream = layers.get(i); 97 | l.backProp(upstream); 98 | l = upstream; 99 | } 100 | } 101 | 102 | 103 | /// Backpropagates the error from another neural network. (This is used when training autoencoders.) 104 | void backPropFromDecoder(NeuralNet decoder) 105 | { 106 | int i = layers.size() - 1; 107 | Layer l = decoder.layers.get(0); 108 | Layer upstream = layers.get(i); 109 | l.backProp(upstream); 110 | l = upstream; 111 | for(i--; i >= 0; i--) 112 | { 113 | upstream = layers.get(i); 114 | l.backProp(upstream); 115 | l = upstream; 116 | } 117 | } 118 | 119 | 120 | /// Updates the weights and biases 121 | void descendGradient(double[] in, double learningRate) 122 | { 123 | for(int i = 0; i < layers.size(); i++) 124 | { 125 | Layer l = layers.get(i); 126 | l.scaleGradient(0.0); 127 | l.updateGradient(in); 128 | l.step(learningRate); 129 | in = l.activation; 130 | } 131 | } 132 | 133 | 134 | /// Keeps the weights and biases from getting too big 135 | void regularize(double amount) 136 | { 137 | for(int i = 0; i < layers.size(); i++) 138 | { 139 | Layer lay = layers.get(i); 140 | lay.regularizeWeights(amount); 141 | } 142 | } 143 | 144 | 145 | /// Refines the weights and biases with on iteration of stochastic gradient descent. 146 | void trainIncremental(double[] in, double[] target, double learningRate) 147 | { 148 | forwardProp(in); 149 | backProp(target); 150 | //backPropAndBendHinge(target, learningRate); 151 | descendGradient(in, learningRate); 152 | } 153 | 154 | 155 | /// Refines "in" with one iteration of stochastic gradient descent. 156 | void refineInputs(double[] in, double[] target, double learningRate) 157 | { 158 | forwardProp(in); 159 | backProp(target); 160 | ((LayerLinear)layers.get(0)).refineInputs(in, learningRate); 161 | } 162 | 163 | 164 | static void testMath() 165 | { 166 | NeuralNet nn = new NeuralNet(); 167 | LayerLinear l1 = new LayerLinear(2, 3); 168 | l1.weights.row(0)[0] = 0.1; 169 | l1.weights.row(0)[1] = 0.0; 170 | l1.weights.row(0)[2] = 0.1; 171 | l1.weights.row(1)[0] = 0.1; 172 | l1.weights.row(1)[1] = 0.0; 173 | l1.weights.row(1)[2] = -0.1; 174 | l1.bias[0] = 0.1; 175 | l1.bias[1] = 0.1; 176 | l1.bias[2] = 0.0; 177 | nn.layers.add(l1); 178 | nn.layers.add(new LayerTanh(3)); 179 | 180 | LayerLinear l2 = new LayerLinear(3, 2); 181 | l2.weights.row(0)[0] = 0.1; 182 | l2.weights.row(0)[1] = 0.1; 183 | l2.weights.row(1)[0] = 0.1; 184 | l2.weights.row(1)[1] = 0.3; 185 | l2.weights.row(2)[0] = 0.1; 186 | l2.weights.row(2)[1] = -0.1; 187 | l2.bias[0] = 0.1; 188 | l2.bias[1] = -0.2; 189 | nn.layers.add(l2); 190 | nn.layers.add(new LayerTanh(2)); 191 | 192 | System.out.println("l1 weights:" + l1.weights.toString()); 193 | System.out.println("l1 bias:" + Vec.toString(l1.bias)); 194 | System.out.println("l2 weights:" + l2.weights.toString()); 195 | System.out.println("l2 bias:" + Vec.toString(l2.bias)); 196 | 197 | System.out.println("----Forward prop"); 198 | double in[] = new double[2]; 199 | in[0] = 0.3; 200 | in[1] = -0.2; 201 | double[] out = nn.forwardProp(in); 202 | System.out.println("activation:" + Vec.toString(out)); 203 | 204 | System.out.println("----Back prop"); 205 | double targ[] = new double[2]; 206 | targ[0] = 0.1; 207 | targ[1] = 0.0; 208 | nn.backProp(targ); 209 | System.out.println("error 2:" + Vec.toString(l2.error)); 210 | System.out.println("error 1:" + Vec.toString(l1.error)); 211 | 212 | nn.descendGradient(in, 0.1); 213 | System.out.println("----Descending gradient"); 214 | System.out.println("l1 weights:" + l1.weights.toString()); 215 | System.out.println("l1 bias:" + Vec.toString(l1.bias)); 216 | System.out.println("l2 weights:" + l2.weights.toString()); 217 | System.out.println("l2 bias:" + Vec.toString(l2.bias)); 218 | 219 | if(Math.abs(l1.weights.row(0)[0] - 0.10039573704287) > 0.0000000001) 220 | throw new IllegalArgumentException("failed"); 221 | if(Math.abs(l1.weights.row(0)[1] - 0.0013373814241446) > 0.0000000001) 222 | throw new IllegalArgumentException("failed"); 223 | if(Math.abs(l1.bias[1] - 0.10445793808048) > 0.0000000001) 224 | throw new IllegalArgumentException("failed"); 225 | System.out.println("passed"); 226 | } 227 | } 228 | -------------------------------------------------------------------------------- /java/src/ObservationModel.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | 3 | /// A bidirectional model that maps between beliefs and observations. 4 | /// Mapping from observations to beliefs is done by the encoder. 5 | /// Mapping from beliefs to observations is done by the decoder. 6 | /// These two components are trained together in an unsupervised manner as an autoencoder. 7 | public class ObservationModel { 8 | public Random rand; 9 | public NeuralNet decoder; 10 | public NeuralNet encoder; 11 | NeuralNet decoderExperimental; 12 | NeuralNet encoderExperimental; 13 | public Matrix train; 14 | public Matrix validation; 15 | ITutor tutor; 16 | TransitionModel transitionModel; 17 | public int trainPos; 18 | public int trainSize; 19 | int validationPos; 20 | int validationSize; 21 | int trainIters; 22 | int trainProgress; 23 | int calibrationIters; 24 | public double learningRate; 25 | 26 | 27 | /// General-purpose constructor 28 | ObservationModel(TransitionModel transition, int observation_dims, int belief_dims, int decoder_layers, 29 | int encoder_layers, int queue_size, int trainItersPerPattern, int calibrationIterations, Random r) { 30 | 31 | if(belief_dims > observation_dims) 32 | throw new IllegalArgumentException("observation_dims must be >= belief_dims"); 33 | 34 | // Init the encoder 35 | rand = r; 36 | int hidden = Math.max(30, (observation_dims + belief_dims) / 2); 37 | encoder = new NeuralNet(); 38 | encoder.layers.add(new LayerLinear(observation_dims, hidden)); 39 | encoder.layers.add(new LayerTanh(hidden)); 40 | encoder.layers.add(new LayerLinear(hidden, belief_dims)); 41 | encoder.layers.add(new LayerTanh(belief_dims)); 42 | encoder.init(rand); 43 | 44 | // Init the decoder 45 | decoder = new NeuralNet(); 46 | decoder.layers.add(new LayerLinear(belief_dims, hidden)); 47 | decoder.layers.add(new LayerTanh(hidden)); 48 | decoder.layers.add(new LayerLinear(hidden, observation_dims)); 49 | decoder.layers.add(new LayerTanh(observation_dims)); 50 | decoder.init(rand); 51 | 52 | // Make the experimental nets 53 | decoderExperimental = new NeuralNet(decoder); 54 | encoderExperimental = new NeuralNet(encoder); 55 | 56 | // Init the buffers 57 | train = new Matrix(queue_size, observation_dims); 58 | validation = new Matrix(queue_size, observation_dims); 59 | 60 | // Init the meta-parameters 61 | transitionModel = transition; 62 | trainIters = trainItersPerPattern; 63 | calibrationIters = calibrationIterations; 64 | learningRate = 0.03; 65 | } 66 | 67 | 68 | /// Unmarshaling constructor 69 | ObservationModel(TransitionModel transition, Json obj, Random r) { 70 | rand = r; 71 | decoder = new NeuralNet(obj.get("decoder")); 72 | encoder = new NeuralNet(obj.get("encoder")); 73 | decoderExperimental = new NeuralNet(obj.get("decoderExperimental")); 74 | encoderExperimental = new NeuralNet(obj.get("encoderExperimental")); 75 | train = new Matrix(obj.get("train")); 76 | validation = new Matrix(obj.get("validation")); 77 | trainPos = (int)obj.getLong("trainPos"); 78 | trainSize = (int)obj.getLong("trainSize"); 79 | validationPos = (int)obj.getLong("validationPos"); 80 | validationSize = (int)obj.getLong("validationSize"); 81 | trainIters = (int)obj.getLong("trainIters"); 82 | trainProgress = (int)obj.getLong("trainProgress"); 83 | calibrationIters = (int)obj.getLong("calibrationIters"); 84 | learningRate = obj.getDouble("learningRate"); 85 | transitionModel = transition; 86 | } 87 | 88 | 89 | /// Marshals this model to a JSON DOM. 90 | Json marshal() { 91 | Json obj = Json.newObject(); 92 | obj.add("decoder", decoder.marshal()); 93 | obj.add("encoder", encoder.marshal()); 94 | obj.add("decoderExperimental", decoderExperimental.marshal()); 95 | obj.add("encoderExperimental", encoderExperimental.marshal()); 96 | obj.add("train", train.marshal()); 97 | obj.add("validation", validation.marshal()); 98 | obj.add("trainPos", trainPos); 99 | obj.add("trainSize", trainSize); 100 | obj.add("validationPos", validationPos); 101 | obj.add("validationSize", validationSize); 102 | obj.add("trainIters", trainIters); 103 | obj.add("trainProgress", trainProgress); 104 | obj.add("calibrationIters", calibrationIters); 105 | obj.add("learningRate", learningRate); 106 | return obj; 107 | } 108 | 109 | 110 | void setTutor(ITutor t) { 111 | tutor = t; 112 | } 113 | 114 | 115 | /// Performs one pattern-presentation of stochastic gradient descent and dynamically tunes the learning rate 116 | void doSomeTraining() { 117 | 118 | // Train the decoderExperimental and encoderExperimental together as an autoencoder 119 | decoderExperimental.regularize(learningRate * 0.00001); 120 | encoderExperimental.regularize(learningRate * 0.00001); 121 | int index = rand.nextInt(trainSize); 122 | double[] observation = train.row(index); 123 | double[] belief = encoderExperimental.forwardProp(observation); 124 | double[] prediction = decoderExperimental.forwardProp(belief); 125 | decoderExperimental.backProp(observation); 126 | encoderExperimental.backPropFromDecoder(decoderExperimental); 127 | encoderExperimental.descendGradient(observation, learningRate); 128 | decoderExperimental.descendGradient(belief, learningRate); 129 | 130 | // Since changing the observation function resets the training data for the transition function, 131 | // we only want to change our perception when it will lead to big improvements. 132 | // Here, we test whether our experimental model is significantly better than the one we have been using. 133 | // If so, then the experimental model becomes the new model. 134 | trainProgress++; 135 | if(trainProgress >= train.rows()) { 136 | // Measure mean squared error 137 | trainProgress = 0; 138 | double err1 = 0.0; 139 | double err2 = 0.0; 140 | for(int i = 0; i < validationSize; i++) { 141 | double[] targ = validation.row(i); 142 | double[] pred1 = decoder.forwardProp(encoder.forwardProp(targ)); 143 | double[] pred2 = decoderExperimental.forwardProp(encoderExperimental.forwardProp(targ)); 144 | for(int j = 0; j < targ.length; j++) { 145 | err1 += (targ[j] - pred1[j]) * (targ[j] - pred1[j]); 146 | err2 += (targ[j] - pred2[j]) * (targ[j] - pred2[j]); 147 | } 148 | } 149 | err1 = Math.sqrt(err1 / validationSize); 150 | err2 = Math.sqrt(err2 / validationSize); 151 | if(err2 < 0.85 * err1) { 152 | // Update the observation model and reset the training data for the transition function 153 | 154 | encoder = new NeuralNet(encoderExperimental); 155 | decoder = new NeuralNet(decoderExperimental); 156 | transitionModel.trainPos = 0; 157 | transitionModel.trainSize = 0; 158 | } 159 | else if(err1 < 0.85 * err2) { 160 | // This should really never happen 161 | encoderExperimental = new NeuralNet(encoder); 162 | decoderExperimental = new NeuralNet(decoder); 163 | } 164 | //System.out.println("Observation error: " + Double.toString(err1) + ", " + Double.toString(err2)); 165 | } 166 | } 167 | 168 | 169 | /// Refines the encoder and decoder based on the new observation. 170 | void trainIncremental(double[] observation) { 171 | 172 | // Buffer the pattern 173 | double[] dest; 174 | if(validationPos < trainPos) { 175 | dest = validation.row(validationPos); 176 | if(++validationPos >= validation.rows()) 177 | validationPos = 0; 178 | validationSize = Math.max(validationSize, validationPos); 179 | } else { 180 | dest = train.row(trainPos); 181 | trainPos++; 182 | trainSize = Math.max(trainSize, trainPos); 183 | if(trainPos >= train.rows()) 184 | trainPos = 0; 185 | } 186 | for(int i = 0; i < dest.length; i++) 187 | dest[i] = observation[i]; 188 | 189 | // Train 190 | int iters = Math.min(trainIters, trainSize); 191 | for(int i = 0; i < iters; i++) 192 | doSomeTraining(); 193 | } 194 | 195 | 196 | /// Refines the beliefs to correspond with actual observations 197 | public void calibrateBeliefs(double[] beliefs, double[] observations) { 198 | if(tutor != null) 199 | Vec.copy(beliefs, tutor.observationsToState(observations)); 200 | for(int i = 0; i < calibrationIters; i++) { 201 | decoder.refineInputs(beliefs, observations, learningRate); 202 | for(int j = 0; j < beliefs.length; j++) 203 | beliefs[j] = Math.max(-1.0, Math.min(1.0, beliefs[j])); 204 | } 205 | } 206 | 207 | 208 | /// Decodes beliefs to predict observations 209 | public double[] beliefsToObservations(double[] beliefs) { 210 | if(tutor != null) 211 | return tutor.stateToObservations(beliefs); 212 | double[] obs = decoder.forwardProp(beliefs); 213 | double[] ret = new double[obs.length]; 214 | for(int i = 0; i < obs.length; i++) { 215 | ret[i] = obs[i]; 216 | } 217 | return ret; 218 | } 219 | 220 | 221 | /// Encodes observations to predict beliefs 222 | public double[] observationsToBeliefs(double[] observations) { 223 | if(tutor != null) 224 | return tutor.observationsToState(observations); 225 | double[] bel = encoder.forwardProp(observations); 226 | double[] ret = new double[bel.length]; 227 | for(int i = 0; i < bel.length; i++) { 228 | ret[i] = bel[i]; 229 | } 230 | return ret; 231 | } 232 | } 233 | -------------------------------------------------------------------------------- /java/src/PlanningSystem.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | import java.util.ArrayList; 3 | import java.util.Iterator; 4 | 5 | 6 | /// A genetic algorithm that sequences actions to form a plan intended to maximize contentment. 7 | public class PlanningSystem { 8 | IAgent self; 9 | Matrix randomPlan; 10 | public ArrayList plans; 11 | TransitionModel transitionModel; 12 | ObservationModel observationModel; 13 | ContentmentModel contentmentModel; 14 | IMentor mentor; 15 | ITutor tutor; 16 | int maxPlanLength; 17 | int refinementIters; 18 | int actionDims; 19 | int burnIn; 20 | double discountFactor; 21 | double explorationRate; 22 | Random rand; 23 | 24 | 25 | // General-purpose constructor 26 | PlanningSystem(IAgent agent, TransitionModel transition, ObservationModel observation, ContentmentModel contentment, IMentor _mentor, 27 | int actionDimensions, int populationSize, int planRefinementIters, int burnInIters, int maxPlanLen, double discount, double explore, Random r) { 28 | self = agent; 29 | transitionModel = transition; 30 | observationModel = observation; 31 | contentmentModel = contentment; 32 | mentor = _mentor; 33 | rand = r; 34 | plans = new ArrayList(); 35 | if(populationSize < 2) 36 | throw new IllegalArgumentException("The population size must be at least 2"); 37 | refinementIters = populationSize * planRefinementIters; 38 | burnIn = burnInIters; 39 | actionDims = actionDimensions; 40 | maxPlanLength = maxPlanLen; 41 | discountFactor = discount; 42 | explorationRate = explore; 43 | for(int i = 0; i < populationSize; i++) { 44 | Matrix p = new Matrix(0, actionDimensions); 45 | for(int j = Math.min(maxPlanLen, rand.nextInt(maxPlanLen) + 2); j > 0; j--) { 46 | // Add a random action vector to the end 47 | double[] newActions = p.newRow(); 48 | for(int k = 0; k < actionDims; k++) { 49 | newActions[k] = rand.nextDouble(); 50 | } 51 | } 52 | plans.add(p); 53 | } 54 | randomPlan = new Matrix(0, actionDimensions); 55 | randomPlan.newRow(); 56 | } 57 | 58 | 59 | /// Unmarshaling constructor 60 | PlanningSystem(Json obj, IAgent agent, Random r, TransitionModel transition, ObservationModel observation, ContentmentModel contentment, IMentor _mentor) { 61 | self = agent; 62 | transitionModel = transition; 63 | observationModel = observation; 64 | contentmentModel = contentment; 65 | mentor = _mentor; 66 | rand = r; 67 | Json plansArr = obj.get("plans"); 68 | plans = new ArrayList(); 69 | for(int i = 0; i < plansArr.size(); i++) 70 | { 71 | plans.add(new Matrix(plansArr.get(i))); 72 | } 73 | maxPlanLength = (int)obj.getLong("maxPlanLength"); 74 | discountFactor = obj.getDouble("discount"); 75 | explorationRate = obj.getDouble("explore"); 76 | refinementIters = (int)obj.getLong("refinementIters"); 77 | burnIn = (int)obj.getLong("burnIn"); 78 | actionDims = (int)obj.getLong("actionDims"); 79 | randomPlan = new Matrix(0, actionDims); 80 | randomPlan.newRow(); 81 | } 82 | 83 | 84 | /// Marshals this model to a JSON DOM. 85 | Json marshal() { 86 | Json obj = Json.newObject(); 87 | Json plansArr = Json.newList(); 88 | for(int i = 0; i < plans.size(); i++) 89 | { 90 | plansArr.add(plans.get(i).marshal()); 91 | } 92 | obj.add("plans", plansArr); 93 | obj.add("maxPlanLength", maxPlanLength); 94 | obj.add("discount", discountFactor); 95 | obj.add("explore", explorationRate); 96 | obj.add("refinementIters", refinementIters); 97 | obj.add("burnIn", burnIn); 98 | obj.add("actionDims", actionDims); 99 | return obj; 100 | } 101 | 102 | 103 | /// Replaces the mentor with the specified one 104 | void setMentor(IMentor _mentor) { 105 | mentor = _mentor; 106 | } 107 | 108 | 109 | void setTutor(ITutor t) { 110 | tutor = t; 111 | } 112 | 113 | 114 | /// Prints a representation of all the plans to stdout 115 | void printPlans() { 116 | for(int i = 0; i < plans.size(); i++) 117 | System.out.println(plans.get(i).toString()); 118 | } 119 | 120 | 121 | /// Perturbs a random plan 122 | void mutate() { 123 | double d = rand.nextDouble(); 124 | Matrix p = plans.get(rand.nextInt(plans.size())); 125 | if(d < 0.1) { // lengthen the plan 126 | if(p.rows() < maxPlanLength) { 127 | double[] newActions = p.insertRow(rand.nextInt(p.rows() + 1)); 128 | for(int i = 0; i < actionDims; i++) { 129 | newActions[i] = rand.nextDouble(); 130 | } 131 | } 132 | } 133 | else if(d < 0.2) { // shorten the plan 134 | if(p.rows() > 1) { 135 | p.removeRow(rand.nextInt(p.rows())); 136 | } 137 | } 138 | else if(d < 0.7) { // perturb a single element of an action vector 139 | double[] actions = p.row(rand.nextInt(p.rows())); 140 | int i = rand.nextInt(actions.length); 141 | actions[i] = Math.max(0.0, Math.min(1.0, actions[i] + 0.03 * rand.nextGaussian())); 142 | } 143 | else if(d < 0.9) { // perturb a whole action vector 144 | double[] actions = p.row(rand.nextInt(p.rows())); 145 | for(int i = 0; i < actions.length; i++) { 146 | actions[i] = Math.max(0.0, Math.min(1.0, actions[i] + 0.02 * rand.nextGaussian())); 147 | } 148 | } 149 | else { // perturb the whole plan 150 | for(int j = 0; j < p.rows(); j++) { 151 | double[] actions = p.row(j); 152 | for(int i = 0; i < actions.length; i++) { 153 | actions[i] = Math.max(0.0, Math.min(1.0, actions[i] + 0.01 * rand.nextGaussian())); 154 | } 155 | } 156 | } 157 | } 158 | 159 | 160 | /// Replaces the specified plan with a new one. 161 | void replace(int childIndex) { 162 | double d = rand.nextDouble(); 163 | if(d < 0.2) { 164 | // Clone a random parent (asexual reproduction) 165 | plans.set(childIndex, new Matrix(plans.get(rand.nextInt(plans.size())))); 166 | } else if(d < 0.7) { 167 | // Cross-over (sexual reproduction) 168 | Matrix mother = plans.get(rand.nextInt(plans.size())); 169 | Matrix father = plans.get(rand.nextInt(plans.size())); 170 | int crossOverPoint = rand.nextInt(mother.rows()); 171 | Matrix child = new Matrix(0, actionDims); 172 | for(int i = 0; i < crossOverPoint; i++) 173 | Vec.copy(child.newRow(), mother.row(i)); 174 | for(int i = crossOverPoint; i < father.rows(); i++) 175 | Vec.copy(child.newRow(), father.row(i)); 176 | plans.set(childIndex, child); 177 | } else { 178 | // Interpolation/extrapolation 179 | Matrix mother = plans.get(rand.nextInt(plans.size())); 180 | Matrix father = plans.get(rand.nextInt(plans.size())); 181 | int len = Math.min(mother.rows(), father.rows()); 182 | Matrix child = new Matrix(0, actionDims); 183 | double alpha = rand.nextDouble() * 2.0; 184 | for(int i = 0; i < len; i++) { 185 | double[] a = mother.row(i); 186 | double[] b = father.row(i); 187 | double[] c = child.newRow(); 188 | for(int j = 0; j < c.length; j++) { 189 | c[j] = Math.max(0.0, Math.min(1.0, alpha * a[j] + (1.0 - alpha) * b[j])); 190 | } 191 | } 192 | plans.set(childIndex, child); 193 | } 194 | } 195 | 196 | 197 | /// Returns the expected contentment at the end of the plan 198 | double evaluatePlan(double[] beliefs, Matrix plan) { 199 | return contentmentModel.evaluate(transitionModel.getFinalBeliefs(beliefs, plan)) * Math.pow(discountFactor, plan.rows()); 200 | } 201 | 202 | 203 | /// Performs a tournament between two randomly-selected plans. 204 | /// One of them, usually the winner, is replaced. 205 | void tournament(double[] beliefs) { 206 | int a = rand.nextInt(plans.size()); 207 | int b = rand.nextInt(plans.size()); 208 | boolean a_prevails; 209 | if(rand.nextDouble() < 0.3) 210 | a_prevails = true; // Let a random plan prevail 211 | else { 212 | // Let the better plan prevail 213 | double fitnessA = evaluatePlan(beliefs, plans.get(a)); 214 | double fitnessB = evaluatePlan(beliefs, plans.get(b)); 215 | if(fitnessA >= fitnessB) 216 | a_prevails = true; 217 | else 218 | a_prevails = false; 219 | } 220 | replace(a_prevails ? b : a); 221 | } 222 | 223 | 224 | /// Performs several iterations of plan refinement 225 | void refinePlans(double[] beliefs) { 226 | 227 | // If we are still burning in, then the models are probably not even reliable enough to make refining plans worthwhile 228 | if(burnIn > 0) 229 | return; 230 | 231 | for(int i = 0; i < refinementIters; i++) { 232 | double d = rand.nextDouble(); 233 | if(d < 0.65) 234 | mutate(); 235 | else 236 | tournament(beliefs); 237 | } 238 | } 239 | 240 | 241 | /// Drops the first action in every plan 242 | void advanceTime() { 243 | for(int i = 0; i < plans.size(); i++) { 244 | Matrix p = plans.get(i); 245 | if(p.rows() > 0) 246 | { 247 | // Move the first action vector in each plan to the end 248 | double[] tmp = p.removeRow(0); 249 | p.takeRow(tmp); 250 | } 251 | } 252 | } 253 | 254 | 255 | /// Asks the mentor to evaluate the plan, given our current beliefs, and learn from it 256 | void askMentorToEvaluatePlan(double[] beliefs, Matrix plan) { 257 | double feedback = mentor.evaluatePlan(self, plan); 258 | if(feedback < -1.0 || feedback > 1.0) 259 | throw new IllegalArgumentException("The mentor returned an evaluation that was out of range."); 260 | if(feedback != IMentor.NO_FEEDBACK) 261 | { 262 | double[] anticipatedBeliefs = transitionModel.getFinalBeliefs(beliefs, plan); 263 | contentmentModel.trainIncremental(anticipatedBeliefs, feedback); 264 | } 265 | } 266 | 267 | 268 | /// Finds the best plan and copies its first step 269 | void chooseNextActions(double[] beliefs, double[] actions) { 270 | 271 | if(tutor != null) { 272 | tutor.chooseActions(beliefs, actions); 273 | return; 274 | } 275 | 276 | // Find the best plan (according to the contentment model) and ask the mentor to evaluate it 277 | int planBestIndex = 0; 278 | double bestContentment = -Double.MAX_VALUE; 279 | for(int i = 0; i < plans.size(); i++) { 280 | double d = evaluatePlan(beliefs, plans.get(i)); 281 | if(d > bestContentment) { 282 | bestContentment = d; 283 | planBestIndex = i; 284 | } 285 | } 286 | //System.out.println("Best contentment: " + Double.toString(bestContentment)); 287 | Matrix bestPlan = plans.get(planBestIndex); 288 | askMentorToEvaluatePlan(beliefs, bestPlan); 289 | 290 | // Pick a random plan from the population and ask the mentor to evaluate it (for contrast) 291 | int planBindex = rand.nextInt(plans.size() - 1); 292 | if(planBindex >= planBestIndex) 293 | planBindex++; 294 | askMentorToEvaluatePlan(beliefs, plans.get(planBindex)); 295 | 296 | // Make a random one-step plan, and ask the mentor to evaluate it (for contrast) 297 | double[] action = randomPlan.row(0); 298 | for(int i = 0; i < action.length; i++) 299 | action[i] = rand.nextDouble(); 300 | askMentorToEvaluatePlan(beliefs, randomPlan); 301 | 302 | // Copy the first action vector of the best plan for our chosen action 303 | double[] bestActions = bestPlan.row(0); 304 | if(burnIn > 0 || rand.nextDouble() < explorationRate) 305 | bestActions = randomPlan.row(0); 306 | burnIn = Math.max(0, burnIn - 1); 307 | for(int i = 0; i < bestActions.length; i++) { 308 | actions[i] = bestActions[i]; 309 | } 310 | } 311 | } 312 | -------------------------------------------------------------------------------- /java/src/TransitionModel.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | 3 | /// A model that maps from current beliefs and actions to anticipated beliefs. 4 | /// This model is trained in a supervised manner. 5 | public class TransitionModel { 6 | Random rand; 7 | NeuralNet model; 8 | Matrix trainInput; 9 | Matrix trainOutput; 10 | ITutor tutor; 11 | int trainPos; 12 | public int trainSize; 13 | int trainIters; 14 | int trainProgress; 15 | double learningRate; 16 | double err; 17 | double prevErr; 18 | 19 | 20 | /// General-purpose constructor 21 | TransitionModel(int input_dims, int output_dims, int total_layers, int queue_size, int trainItersPerPattern, Random r) { 22 | 23 | // Init the model 24 | rand = r; 25 | model = new NeuralNet(); 26 | int hidden = Math.max(30, output_dims); 27 | model.layers.add(new LayerLinear(input_dims, hidden)); 28 | model.layers.add(new LayerTanh(hidden)); 29 | model.layers.add(new LayerLinear(hidden, output_dims)); 30 | model.layers.add(new LayerTanh(output_dims)); 31 | model.init(rand); 32 | 33 | // Init the buffers 34 | trainInput = new Matrix(queue_size, input_dims); 35 | trainOutput = new Matrix(queue_size, output_dims); 36 | 37 | // Init the meta-parameters 38 | trainIters = trainItersPerPattern; 39 | learningRate = 0.03; 40 | } 41 | 42 | 43 | /// Unmarshaling constructor 44 | TransitionModel(Json obj, Random r) { 45 | rand = r; 46 | model = new NeuralNet(obj.get("model")); 47 | trainInput = new Matrix(obj.get("trainInput")); 48 | trainOutput = new Matrix(obj.get("trainOutput")); 49 | trainPos = (int)obj.getLong("trainPos"); 50 | trainSize = (int)obj.getLong("trainSize"); 51 | trainIters = (int)obj.getLong("trainIters"); 52 | trainProgress = (int)obj.getLong("trainProgress"); 53 | learningRate = obj.getDouble("learningRate"); 54 | err = obj.getDouble("err"); 55 | prevErr = obj.getDouble("prevErr"); 56 | } 57 | 58 | 59 | /// Marshals this model to a JSON DOM. 60 | Json marshal() { 61 | Json obj = Json.newObject(); 62 | obj.add("model", model.marshal()); 63 | obj.add("trainInput", trainInput.marshal()); 64 | obj.add("trainOutput", trainOutput.marshal()); 65 | obj.add("trainPos", trainPos); 66 | obj.add("trainSize", trainSize); 67 | obj.add("trainIters", trainIters); 68 | obj.add("trainProgress", trainProgress); 69 | obj.add("learningRate", learningRate); 70 | obj.add("err", err); 71 | obj.add("prevErr", prevErr); 72 | return obj; 73 | } 74 | 75 | 76 | /// Returns the number of action dims 77 | int actionDims() { return model.layers.get(0).inputCount() - model.layers.get(model.layers.size() - 1).outputCount(); } 78 | 79 | 80 | void setTutor(ITutor t) { 81 | tutor = t; 82 | } 83 | 84 | 85 | /// Performs one pattern-presentation of stochastic gradient descent, and dynamically tunes the learning rate 86 | void doSomeTraining() { 87 | 88 | // Present one pattern 89 | model.regularize(learningRate * 0.0000001); 90 | int index = rand.nextInt(trainSize); 91 | model.trainIncremental(trainInput.row(index), trainOutput.row(index), learningRate); 92 | err += Vec.squaredDistance(model.layers.get(model.layers.size() - 1).activation, trainOutput.row(index)); 93 | 94 | // Measure how we are doing 95 | trainProgress++; 96 | if(trainProgress >= trainInput.rows()) { 97 | trainProgress = 0; 98 | prevErr = Math.sqrt(err / trainInput.rows()); 99 | err = 0.0; 100 | //System.out.println("Transition error: " + Double.toString(prevErr)); 101 | } 102 | } 103 | 104 | 105 | /// Refines this model based on a recently performed action and change in beliefs 106 | void trainIncremental(double[] beliefs, double[] actions, double[] nextBeliefs) { 107 | 108 | // Buffer the pattern 109 | double[] destIn = trainInput.row(trainPos); 110 | double[] destOut = trainOutput.row(trainPos); 111 | trainPos++; 112 | trainSize = Math.max(trainSize, trainPos); 113 | if(trainPos >= trainInput.rows()) 114 | trainPos = 0; 115 | if(beliefs.length + actions.length != destIn.length) 116 | throw new IllegalArgumentException("size mismatch"); 117 | for(int i = 0; i < beliefs.length; i++) 118 | destIn[i] = beliefs[i]; 119 | for(int i = 0; i < actions.length; i++) 120 | destIn[beliefs.length + i] = actions[i]; 121 | for(int i = 0; i < destOut.length; i++) 122 | destOut[i] = 0.5 * (nextBeliefs[i] - beliefs[i]); 123 | 124 | // Refine the model 125 | int iters = Math.min(trainIters, 1000 * trainSize); 126 | for(int i = 0; i < iters; i++) 127 | doSomeTraining(); 128 | } 129 | 130 | 131 | /// Predict the belief vector that will result if the specified action is performed 132 | public void anticipateNextBeliefsInPlace(double[] beliefs, double[] actions, double[] anticipatedBeliefs) { 133 | if(tutor != null) 134 | tutor.transition(beliefs, actions, anticipatedBeliefs); 135 | double[] pred = model.forwardProp2(beliefs, actions); 136 | for(int i = 0; i < pred.length; i++) { 137 | anticipatedBeliefs[i] = Math.max(-1.0, Math.min(1.0, beliefs[i] + 2.0 * pred[i])); 138 | } 139 | } 140 | 141 | 142 | /// Predict the belief vector that will result if the specified action is performed 143 | public double[] anticipateNextBeliefs(double[] beliefs, double[] actions) { 144 | double[] anticipatedBeliefs = new double[beliefs.length]; 145 | anticipateNextBeliefsInPlace(beliefs, actions, anticipatedBeliefs); 146 | return anticipatedBeliefs; 147 | } 148 | 149 | 150 | /// Compute the anticipated belief vector that will result if the specified plan is executed. 151 | public double[] getFinalBeliefs(double[] beliefs, Matrix plan) { 152 | if(plan != null) 153 | { 154 | for(int i = 0; i < plan.rows(); i++) { 155 | beliefs = anticipateNextBeliefs(beliefs, plan.row(i)); 156 | } 157 | } 158 | return beliefs; 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /java/src/Vec.java: -------------------------------------------------------------------------------- 1 | // ---------------------------------------------------------------- 2 | // The contents of this file are distributed under the CC0 license. 3 | // See http://creativecommons.org/publicdomain/zero/1.0/ 4 | // ---------------------------------------------------------------- 5 | 6 | import java.util.Iterator; 7 | import java.lang.StringBuilder; 8 | 9 | /// Provides several useful static methods for operating on arrays of doubles 10 | public class Vec 11 | { 12 | public static Json marshal(double[] vec) 13 | { 14 | Json list = Json.newList(); 15 | for(int i = 0; i < vec.length; i++) 16 | list.add(vec[i]); 17 | return list; 18 | } 19 | 20 | public static double[] unmarshal(Json n) 21 | { 22 | double[] vec = new double[n.size()]; 23 | for(int i = 0; i < n.size(); i++) 24 | vec[i] = n.getDouble(i); 25 | return vec; 26 | } 27 | 28 | public static String toString(double[] vec) 29 | { 30 | StringBuilder sb = new StringBuilder(); 31 | if(vec.length > 0) 32 | { 33 | sb.append(Double.toString(vec[0])); 34 | for(int i = 1; i < vec.length; i++) 35 | { 36 | sb.append(","); 37 | sb.append(Double.toString(vec[i])); 38 | } 39 | } 40 | return sb.toString(); 41 | } 42 | 43 | public static void setAll(double[] vec, double val) 44 | { 45 | for(int i = 0; i < vec.length; i++) 46 | vec[i] = val; 47 | } 48 | 49 | public static double squaredMagnitude(double[] vec) 50 | { 51 | double d = 0.0; 52 | for(int i = 0; i < vec.length; i++) 53 | d += vec[i] * vec[i]; 54 | return d; 55 | } 56 | 57 | public static void normalize(double[] vec) 58 | { 59 | double mag = squaredMagnitude(vec); 60 | if(mag <= 0.0) { 61 | setAll(vec, 0.0); 62 | vec[0] = 1.0; 63 | } else { 64 | double s = 1.0 / Math.sqrt(mag); 65 | for(int i = 0; i < vec.length; i++) 66 | vec[i] *= s; 67 | } 68 | } 69 | 70 | public static void copy(double[] dest, double[] src) 71 | { 72 | if(dest.length != src.length) 73 | throw new IllegalArgumentException("mismatching sizes"); 74 | for(int i = 0; i < src.length; i++) 75 | { 76 | dest[i] = src[i]; 77 | } 78 | } 79 | 80 | public static double[] copy(double[] src) 81 | { 82 | double[] dest = new double[src.length]; 83 | for(int i = 0; i < src.length; i++) 84 | { 85 | dest[i] = src[i]; 86 | } 87 | return dest; 88 | } 89 | 90 | public static void add(double[] dest, double[] src) 91 | { 92 | if(dest.length != src.length) 93 | throw new IllegalArgumentException("mismatching sizes"); 94 | for(int i = 0; i < dest.length; i++) 95 | { 96 | dest[i] += src[i]; 97 | } 98 | } 99 | 100 | public static void addScaled(double[] dest, double[] src, double scalar) 101 | { 102 | if(dest.length != src.length) 103 | throw new IllegalArgumentException("mismatching sizes"); 104 | for(int i = 0; i < dest.length; i++) 105 | { 106 | dest[i] += scalar * src[i]; 107 | } 108 | } 109 | 110 | public static void scale(double[] dest, double scalar) 111 | { 112 | for(int i = 0; i < dest.length; i++) 113 | { 114 | dest[i] *= scalar; 115 | } 116 | } 117 | 118 | public static double dotProduct(double[] a, double[] b) 119 | { 120 | if(a.length != b.length) 121 | throw new IllegalArgumentException("mismatching sizes"); 122 | double d = 0.0; 123 | for(int i = 0; i < a.length; i++) 124 | d += a[i] * b[i]; 125 | return d; 126 | } 127 | 128 | public static double squaredDistance(double[] a, double[] b) 129 | { 130 | if(a.length != b.length) 131 | throw new IllegalArgumentException("mismatching sizes"); 132 | double d = 0.0; 133 | for(int i = 0; i < a.length; i++) 134 | { 135 | double t = a[i] - b[i]; 136 | d += t * t; 137 | } 138 | return d; 139 | } 140 | 141 | public static void clip(double[] vec, double min, double max) 142 | { 143 | if(max < min) 144 | throw new IllegalArgumentException("max must be >= min"); 145 | for(int i = 0; i < vec.length; i++) 146 | { 147 | vec[i] = Math.max(min, Math.min(max, vec[i])); 148 | } 149 | } 150 | 151 | public static double[] concatenate(double[] a, double[] b) 152 | { 153 | double[] c = new double[a.length + b.length]; 154 | for(int i = 0; i < a.length; i++) 155 | c[i] = a[i]; 156 | for(int i = 0; i < b.length; i++) 157 | c[a.length + i] = b[i]; 158 | return c; 159 | } 160 | 161 | } 162 | --------------------------------------------------------------------------------