├── Assignment1 ├── README.txt ├── letter.arff ├── phishing.arff ├── result.xlsx ├── table.docx └── ycai87-analysis.pdf ├── Assignment2 ├── ContinuousPeaks.java ├── Knapsack.java ├── Knapsack_mimic.java ├── LetterRecognition.java ├── README.txt ├── TravelingSalesman.java ├── phishing.csv ├── phishing_ga.java ├── phishing_rhc.java ├── phishing_sa.java ├── phishingwebsite.java ├── travellingsales_ga.java └── ycai87-analysis.pdf ├── Assignment3 ├── BRInformationGain.java ├── BR_ICAWithFilter.java ├── BR_PCAWithFilter.java ├── LRInformationGain.java ├── LR_DR.java ├── LR_ICAWithFilter.java ├── Lr_PCAWithFilter.java ├── README.txt ├── Randomprojection.java ├── bc_kmeans.py ├── br_dr_em.py ├── br_dr_kmeans.py ├── clustertesters │ ├── ExpectationMaximizationTestCluster.py │ ├── ExpectationMaximizationTestCluster.pyc │ ├── KMeansTestCluster.py │ ├── KMeansTestCluster.pyc │ ├── __init__.py │ ├── __init__.pyc │ ├── lr_ExpectationMaximizationTestCluster.py │ ├── lr_ExpectationMaximizationTestCluster.pyc │ ├── lr_KMeansTestCluster.py │ └── lr_KMeansTestCluster.pyc ├── letter.arff ├── lr_dr_em.py ├── lr_dr_kmeans.py ├── lr_em.py ├── lr_kmeans.py ├── wisconsin.arff └── ycai87-analysis.pdf ├── Assignment4 ├── README.txt ├── easyGW.py ├── hardGW.py ├── src │ └── burlap │ │ ├── assignment4 │ │ ├── BasicGridWorld.java │ │ ├── EasyGridWorldLauncher.java │ │ ├── HardGridWorldLauncher.java │ │ └── util │ │ │ ├── AgentPainter.java │ │ │ ├── AnalysisAggregator.java │ │ │ ├── AnalysisRunner.java │ │ │ ├── AtLocation.java │ │ │ ├── BasicRewardFunction.java │ │ │ ├── BasicTerminalFunction.java │ │ │ ├── LocationPainter.java │ │ │ ├── MapPrinter.java │ │ │ ├── Movement.java │ │ │ └── WallPainter.java │ │ ├── behavior │ │ ├── learningrate │ │ │ ├── ConstantLR.java │ │ │ ├── ExponentialDecayLR.java │ │ │ ├── LearningRate.java │ │ │ └── SoftTimeInverseDecayLR.java │ │ ├── policy │ │ │ ├── BeliefPolicyToPOMDPPolicy.java │ │ │ ├── BoltzmannQPolicy.java │ │ │ ├── CachedPolicy.java │ │ │ ├── DomainMappedPolicy.java │ │ │ ├── EpsilonGreedy.java │ │ │ ├── GreedyDeterministicQPolicy.java │ │ │ ├── GreedyQPolicy.java │ │ │ ├── Policy.java │ │ │ ├── RandomPolicy.java │ │ │ └── SolverDerivedPolicy.java │ │ ├── singleagent │ │ │ ├── EpisodeAnalysis.java │ │ │ ├── MDPSolver.java │ │ │ ├── MDPSolverInterface.java │ │ │ ├── auxiliary │ │ │ │ ├── EpisodeSequenceVisualizer.java │ │ │ │ ├── StateEnumerator.java │ │ │ │ ├── StateGridder.java │ │ │ │ ├── StateReachability.java │ │ │ │ ├── performance │ │ │ │ │ ├── ExperimentalEnvironment.java │ │ │ │ │ ├── LearningAlgorithmExperimenter.java │ │ │ │ │ ├── PerformanceMetric.java │ │ │ │ │ ├── PerformancePlotter.java │ │ │ │ │ └── TrialMode.java │ │ │ │ └── valuefunctionvis │ │ │ │ │ ├── PolicyRenderLayer.java │ │ │ │ │ ├── StatePolicyPainter.java │ │ │ │ │ ├── StateValuePainter.java │ │ │ │ │ ├── StaticDomainPainter.java │ │ │ │ │ ├── ValueFunctionRenderLayer.java │ │ │ │ │ ├── ValueFunctionVisualizerGUI.java │ │ │ │ │ └── common │ │ │ │ │ ├── ActionGlyphPainter.java │ │ │ │ │ ├── ArrowActionGlyph.java │ │ │ │ │ ├── ColorBlend.java │ │ │ │ │ ├── LandmarkColorBlendInterpolation.java │ │ │ │ │ ├── PolicyGlyphPainter2D.java │ │ │ │ │ └── StateValuePainter2D.java │ │ │ ├── interfaces │ │ │ │ └── rlglue │ │ │ │ │ ├── RLGlueDomain.java │ │ │ │ │ └── RLGlueEnvironmentInterface.java │ │ │ ├── learnfromdemo │ │ │ │ ├── IRLRequest.java │ │ │ │ ├── RewardValueProjection.java │ │ │ │ ├── apprenticeship │ │ │ │ │ ├── ApprenticeshipLearning.java │ │ │ │ │ └── ApprenticeshipLearningRequest.java │ │ │ │ └── mlirl │ │ │ │ │ ├── MLIRL.java │ │ │ │ │ ├── MLIRLRequest.java │ │ │ │ │ ├── MultipleIntentionsMLIRL.java │ │ │ │ │ ├── MultipleIntentionsMLIRLRequest.java │ │ │ │ │ ├── commonrfs │ │ │ │ │ ├── LinearStateActionDifferentiableRF.java │ │ │ │ │ └── LinearStateDifferentiableRF.java │ │ │ │ │ ├── differentiableplanners │ │ │ │ │ ├── DifferentiableDP.java │ │ │ │ │ ├── DifferentiableSparseSampling.java │ │ │ │ │ ├── DifferentiableVI.java │ │ │ │ │ └── diffvinit │ │ │ │ │ │ ├── DiffVFRF.java │ │ │ │ │ │ ├── DifferentiableVInit.java │ │ │ │ │ │ ├── LinearDiffRFVInit.java │ │ │ │ │ │ ├── LinearStateDiffVF.java │ │ │ │ │ │ └── VanillaDiffVinit.java │ │ │ │ │ └── support │ │ │ │ │ ├── BoltzmannPolicyGradient.java │ │ │ │ │ ├── DifferentiableRF.java │ │ │ │ │ ├── QGradientPlanner.java │ │ │ │ │ ├── QGradientPlannerFactory.java │ │ │ │ │ └── QGradientTuple.java │ │ │ ├── learning │ │ │ │ ├── LearningAgent.java │ │ │ │ ├── LearningAgentFactory.java │ │ │ │ ├── actorcritic │ │ │ │ │ ├── Actor.java │ │ │ │ │ ├── ActorCritic.java │ │ │ │ │ ├── Critic.java │ │ │ │ │ ├── CritiqueResult.java │ │ │ │ │ ├── actor │ │ │ │ │ │ └── BoltzmannActor.java │ │ │ │ │ └── critics │ │ │ │ │ │ ├── TDLambda.java │ │ │ │ │ │ └── TimeIndexedTDLambda.java │ │ │ │ ├── lspi │ │ │ │ │ ├── LSPI.java │ │ │ │ │ ├── SARSCollector.java │ │ │ │ │ └── SARSData.java │ │ │ │ ├── modellearning │ │ │ │ │ ├── Model.java │ │ │ │ │ ├── ModelLearningPlanner.java │ │ │ │ │ ├── ModeledDomainGenerator.java │ │ │ │ │ ├── artdp │ │ │ │ │ │ └── ARTDP.java │ │ │ │ │ ├── modelplanners │ │ │ │ │ │ └── VIModelLearningPlanner.java │ │ │ │ │ ├── models │ │ │ │ │ │ └── TabularModel.java │ │ │ │ │ └── rmax │ │ │ │ │ │ ├── PotentialShapedRMax.java │ │ │ │ │ │ └── UnmodeledFavoredPolicy.java │ │ │ │ └── tdmethods │ │ │ │ │ ├── QLearning.java │ │ │ │ │ ├── QLearningStateNode.java │ │ │ │ │ ├── SarsaLam.java │ │ │ │ │ └── vfa │ │ │ │ │ └── GradientDescentSarsaLam.java │ │ │ ├── options │ │ │ │ ├── DeterministicTerminationOption.java │ │ │ │ ├── MacroAction.java │ │ │ │ ├── Option.java │ │ │ │ ├── PolicyDefinedSubgoalOption.java │ │ │ │ └── support │ │ │ │ │ ├── DirectOptionTerminateMapper.java │ │ │ │ │ ├── EnvironmentOptionOutcome.java │ │ │ │ │ ├── LocalSubgoalRF.java │ │ │ │ │ ├── LocalSubgoalTF.java │ │ │ │ │ └── OptionEvaluatingRF.java │ │ │ ├── planning │ │ │ │ ├── Planner.java │ │ │ │ ├── deterministic │ │ │ │ │ ├── DDPlannerPolicy.java │ │ │ │ │ ├── DeterministicPlanner.java │ │ │ │ │ ├── MultiStatePrePlanner.java │ │ │ │ │ ├── SDPlannerPolicy.java │ │ │ │ │ ├── SearchNode.java │ │ │ │ │ ├── informed │ │ │ │ │ │ ├── BestFirst.java │ │ │ │ │ │ ├── Heuristic.java │ │ │ │ │ │ ├── NullHeuristic.java │ │ │ │ │ │ ├── PrioritizedSearchNode.java │ │ │ │ │ │ └── astar │ │ │ │ │ │ │ ├── AStar.java │ │ │ │ │ │ │ ├── DynamicWeightedAStar.java │ │ │ │ │ │ │ ├── IDAStar.java │ │ │ │ │ │ │ ├── StaticWeightedAStar.java │ │ │ │ │ │ │ └── WeightedGreedy.java │ │ │ │ │ └── uninformed │ │ │ │ │ │ ├── bfs │ │ │ │ │ │ └── BFS.java │ │ │ │ │ │ └── dfs │ │ │ │ │ │ ├── DFS.java │ │ │ │ │ │ └── LimitedMemoryDFS.java │ │ │ │ ├── stochastic │ │ │ │ │ ├── ActionTransitions.java │ │ │ │ │ ├── DynamicProgramming.java │ │ │ │ │ ├── HashedTransitionProbability.java │ │ │ │ │ ├── montecarlo │ │ │ │ │ │ └── uct │ │ │ │ │ │ │ ├── UCT.java │ │ │ │ │ │ │ ├── UCTActionNode.java │ │ │ │ │ │ │ ├── UCTStateNode.java │ │ │ │ │ │ │ └── UCTTreeWalkPolicy.java │ │ │ │ │ ├── policyiteration │ │ │ │ │ │ └── PolicyIteration.java │ │ │ │ │ ├── rtdp │ │ │ │ │ │ ├── BFSRTDP.java │ │ │ │ │ │ ├── BoundedRTDP.java │ │ │ │ │ │ └── RTDP.java │ │ │ │ │ ├── sparsesampling │ │ │ │ │ │ └── SparseSampling.java │ │ │ │ │ └── valueiteration │ │ │ │ │ │ ├── PrioritizedSweeping.java │ │ │ │ │ │ └── ValueIteration.java │ │ │ │ └── vfa │ │ │ │ │ └── fittedvi │ │ │ │ │ ├── FittedVI.java │ │ │ │ │ ├── SupervisedVFA.java │ │ │ │ │ └── WekaVFATrainer.java │ │ │ ├── pomdp │ │ │ │ ├── BeliefPolicyAgent.java │ │ │ │ ├── qmdp │ │ │ │ │ └── QMDP.java │ │ │ │ └── wrappedmdpalgs │ │ │ │ │ └── BeliefSparseSampling.java │ │ │ ├── shaping │ │ │ │ ├── ShapedRewardFunction.java │ │ │ │ └── potential │ │ │ │ │ ├── PotentialFunction.java │ │ │ │ │ └── PotentialShapedRF.java │ │ │ └── vfa │ │ │ │ ├── ActionApproximationResult.java │ │ │ │ ├── ActionFeaturesQuery.java │ │ │ │ ├── ApproximationResult.java │ │ │ │ ├── FeatureDatabase.java │ │ │ │ ├── FunctionWeight.java │ │ │ │ ├── StateFeature.java │ │ │ │ ├── StateToFeatureVectorGenerator.java │ │ │ │ ├── ValueFunctionApproximation.java │ │ │ │ ├── WeightGradient.java │ │ │ │ ├── cmac │ │ │ │ ├── AttributeTileSpecification.java │ │ │ │ ├── CMACFeatureDatabase.java │ │ │ │ ├── FVCMACFeatureDatabase.java │ │ │ │ ├── FVTiling.java │ │ │ │ └── Tiling.java │ │ │ │ ├── common │ │ │ │ ├── ConcatenatedObjectFeatureVectorGenerator.java │ │ │ │ ├── FDFeatureVectorGenerator.java │ │ │ │ ├── FVToFeatureDatabase.java │ │ │ │ ├── LinearFVVFA.java │ │ │ │ ├── LinearVFA.java │ │ │ │ └── PFFeatureVectorGenerator.java │ │ │ │ ├── fourier │ │ │ │ ├── FourierBasis.java │ │ │ │ └── FourierBasisLearningRateWrapper.java │ │ │ │ └── rbf │ │ │ │ ├── DistanceMetric.java │ │ │ │ ├── FVDistanceMetric.java │ │ │ │ ├── FVRBF.java │ │ │ │ ├── FVRBFFeatureDatabase.java │ │ │ │ ├── RBF.java │ │ │ │ ├── RBFFeatureDatabase.java │ │ │ │ ├── functions │ │ │ │ ├── FVGaussianRBF.java │ │ │ │ └── GaussianRBF.java │ │ │ │ └── metrics │ │ │ │ ├── EuclideanDistance.java │ │ │ │ └── FVEuclideanDistance.java │ │ ├── stochasticgames │ │ │ ├── GameAnalysis.java │ │ │ ├── JointPolicy.java │ │ │ ├── PolicyFromJointPolicy.java │ │ │ ├── agents │ │ │ │ ├── RandomSGAgent.java │ │ │ │ ├── SetStrategySGAgent.java │ │ │ │ ├── interfacing │ │ │ │ │ └── singleagent │ │ │ │ │ │ ├── LearningAgentToSGAgentInterface.java │ │ │ │ │ │ └── SGToSADomain.java │ │ │ │ ├── madp │ │ │ │ │ ├── MADPPlanAgentFactory.java │ │ │ │ │ ├── MADPPlannerFactory.java │ │ │ │ │ └── MultiAgentDPPlanningAgent.java │ │ │ │ ├── maql │ │ │ │ │ ├── MAQLFactory.java │ │ │ │ │ └── MultiAgentQLearning.java │ │ │ │ ├── naiveq │ │ │ │ │ ├── SGNaiveQFactory.java │ │ │ │ │ ├── SGNaiveQLAgent.java │ │ │ │ │ └── history │ │ │ │ │ │ ├── ActionIdMap.java │ │ │ │ │ │ ├── ParameterNaiveActionIdMap.java │ │ │ │ │ │ ├── SGQWActionHistory.java │ │ │ │ │ │ └── SGQWActionHistoryFactory.java │ │ │ │ └── twoplayer │ │ │ │ │ ├── repeatedsinglestage │ │ │ │ │ ├── GrimTrigger.java │ │ │ │ │ └── TitForTat.java │ │ │ │ │ └── singlestage │ │ │ │ │ └── equilibriumplayer │ │ │ │ │ ├── BimatrixEquilibriumSolver.java │ │ │ │ │ ├── EquilibriumPlayingSGAgent.java │ │ │ │ │ └── equilibriumsolvers │ │ │ │ │ ├── CorrelatedEquilibrium.java │ │ │ │ │ ├── MaxMax.java │ │ │ │ │ ├── MinMax.java │ │ │ │ │ └── Utilitarian.java │ │ │ ├── auxiliary │ │ │ │ ├── GameSequenceVisualizer.java │ │ │ │ └── performance │ │ │ │ │ ├── AgentFactoryAndType.java │ │ │ │ │ ├── MultiAgentExperimenter.java │ │ │ │ │ └── MultiAgentPerformancePlotter.java │ │ │ ├── madynamicprogramming │ │ │ │ ├── AgentQSourceMap.java │ │ │ │ ├── JAQValue.java │ │ │ │ ├── MADynamicProgramming.java │ │ │ │ ├── MAQSourcePolicy.java │ │ │ │ ├── MultiAgentQSourceProvider.java │ │ │ │ ├── QSourceForSingleAgent.java │ │ │ │ ├── SGBackupOperator.java │ │ │ │ ├── backupOperators │ │ │ │ │ ├── CoCoQ.java │ │ │ │ │ ├── CorrelatedQ.java │ │ │ │ │ ├── MaxQ.java │ │ │ │ │ └── MinMaxQ.java │ │ │ │ ├── dpplanners │ │ │ │ │ └── MAValueIteration.java │ │ │ │ └── policies │ │ │ │ │ ├── ECorrelatedQJointPolicy.java │ │ │ │ │ ├── EGreedyJointPolicy.java │ │ │ │ │ ├── EGreedyMaxWellfare.java │ │ │ │ │ └── EMinMaxPolicy.java │ │ │ └── solvers │ │ │ │ ├── CorrelatedEquilibriumSolver.java │ │ │ │ ├── GeneralBimatrixSolverTools.java │ │ │ │ └── MinMaxSolver.java │ │ └── valuefunction │ │ │ ├── QFunction.java │ │ │ ├── QValue.java │ │ │ ├── ValueFunction.java │ │ │ └── ValueFunctionInitialization.java │ │ ├── datastructures │ │ ├── AlphanumericSorting.java │ │ ├── BoltzmannDistribution.java │ │ ├── CommandLineOptions.java │ │ ├── HashIndexedHeap.java │ │ ├── HashedAggregator.java │ │ ├── StochasticTree.java │ │ └── WekaInterfaces.java │ │ ├── debugtools │ │ ├── DPrint.java │ │ ├── DebugFlags.java │ │ ├── MyTimer.java │ │ └── RandomFactory.java │ │ ├── domain │ │ ├── singleagent │ │ │ ├── blockdude │ │ │ │ ├── BlockDude.java │ │ │ │ ├── BlockDudeLevelConstructor.java │ │ │ │ ├── BlockDudeTF.java │ │ │ │ └── BlockDudeVisualizer.java │ │ │ ├── blocksworld │ │ │ │ ├── BlocksWorld.java │ │ │ │ └── BlocksWorldVisualizer.java │ │ │ ├── cartpole │ │ │ │ ├── CartPoleDomain.java │ │ │ │ ├── CartPoleVisualizer.java │ │ │ │ ├── InvertedPendulum.java │ │ │ │ ├── InvertedPendulumVisualizer.java │ │ │ │ ├── SerializableCartPoleStateFactory.java │ │ │ │ └── SerializableInvertedPendulumStateFactory.java │ │ │ ├── frostbite │ │ │ │ ├── FrostbiteDomain.java │ │ │ │ ├── FrostbiteRF.java │ │ │ │ ├── FrostbiteTF.java │ │ │ │ ├── FrostbiteVisualizer.java │ │ │ │ └── SerializableFrostbiteStateFactory.java │ │ │ ├── graphdefined │ │ │ │ ├── GraphDefinedDomain.java │ │ │ │ ├── GraphRF.java │ │ │ │ └── GraphTF.java │ │ │ ├── gridworld │ │ │ │ ├── GridWorldDomain.java │ │ │ │ ├── GridWorldRewardFunction.java │ │ │ │ ├── GridWorldTerminalFunction.java │ │ │ │ ├── GridWorldVisualizer.java │ │ │ │ ├── SerializableGridWorldStateFactory.java │ │ │ │ └── macro │ │ │ │ │ ├── MacroCellGridWorld.java │ │ │ │ │ └── MacroCellVisualizer.java │ │ │ ├── lunarlander │ │ │ │ ├── LLVisualizer.java │ │ │ │ ├── LunarLanderDomain.java │ │ │ │ ├── LunarLanderRF.java │ │ │ │ ├── LunarLanderTF.java │ │ │ │ └── SerializableLunarLanderStateFactory.java │ │ │ ├── mountaincar │ │ │ │ ├── MCRandomStateGenerator.java │ │ │ │ ├── MountainCar.java │ │ │ │ ├── MountainCarVisualizer.java │ │ │ │ └── SerializableMountainCarStateFactory.java │ │ │ ├── pomdp │ │ │ │ └── tiger │ │ │ │ │ └── TigerDomain.java │ │ │ └── tabularized │ │ │ │ └── TabulatedDomainWrapper.java │ │ └── stochasticgames │ │ │ ├── gridgame │ │ │ ├── GGVisualizer.java │ │ │ ├── GridGame.java │ │ │ └── GridGameStandardMechanics.java │ │ │ └── normalform │ │ │ └── SingleStageNormalFormGame.java │ │ ├── oomdp │ │ ├── auxiliary │ │ │ ├── DomainGenerator.java │ │ │ ├── StateAbstraction.java │ │ │ ├── StateGenerator.java │ │ │ ├── StateMapping.java │ │ │ ├── common │ │ │ │ ├── ConstantStateGenerator.java │ │ │ │ ├── GoalConditionTF.java │ │ │ │ ├── NullAbstraction.java │ │ │ │ ├── NullAbstractionNoCopy.java │ │ │ │ ├── NullTermination.java │ │ │ │ ├── RandomStartStateGenerator.java │ │ │ │ └── SinglePFTF.java │ │ │ └── stateconditiontest │ │ │ │ ├── SinglePFSCT.java │ │ │ │ ├── StateConditionTest.java │ │ │ │ ├── StateConditionTestIterable.java │ │ │ │ └── TFGoalCondition.java │ │ ├── core │ │ │ ├── AbstractGroundedAction.java │ │ │ ├── AbstractObjectParameterizedGroundedAction.java │ │ │ ├── Attribute.java │ │ │ ├── Domain.java │ │ │ ├── GroundedProp.java │ │ │ ├── ObjectClass.java │ │ │ ├── PropositionalFunction.java │ │ │ ├── TerminalFunction.java │ │ │ ├── TransitionProbability.java │ │ │ ├── objects │ │ │ │ ├── ImmutableObjectInstance.java │ │ │ │ ├── MutableObjectInstance.java │ │ │ │ ├── OOMDPObjectInstance.java │ │ │ │ └── ObjectInstance.java │ │ │ ├── states │ │ │ │ ├── FixedSizeImmutableState.java │ │ │ │ ├── ImmutableState.java │ │ │ │ ├── ImmutableStateInterface.java │ │ │ │ ├── MutableState.java │ │ │ │ ├── OOMDPState.java │ │ │ │ └── State.java │ │ │ └── values │ │ │ │ ├── DiscreteValue.java │ │ │ │ ├── DoubleArrayValue.java │ │ │ │ ├── IntArrayValue.java │ │ │ │ ├── IntValue.java │ │ │ │ ├── MultiTargetRelationalValue.java │ │ │ │ ├── OOMDPValue.java │ │ │ │ ├── RealValue.java │ │ │ │ ├── RelationalValue.java │ │ │ │ ├── StringValue.java │ │ │ │ ├── UnsetValueException.java │ │ │ │ └── Value.java │ │ ├── legacy │ │ │ ├── StateJSONParser.java │ │ │ ├── StateParser.java │ │ │ └── StateYAMLParser.java │ │ ├── singleagent │ │ │ ├── Action.java │ │ │ ├── ActionObserver.java │ │ │ ├── FullActionModel.java │ │ │ ├── GroundedAction.java │ │ │ ├── ObjectParameterizedAction.java │ │ │ ├── RewardFunction.java │ │ │ ├── SADomain.java │ │ │ ├── common │ │ │ │ ├── GoalBasedRF.java │ │ │ │ ├── NullAction.java │ │ │ │ ├── NullRewardFunction.java │ │ │ │ ├── SimpleAction.java │ │ │ │ ├── SimpleGroundedAction.java │ │ │ │ ├── SingleGoalPFRF.java │ │ │ │ ├── UniformCostRF.java │ │ │ │ └── VisualActionObserver.java │ │ │ ├── environment │ │ │ │ ├── Environment.java │ │ │ │ ├── EnvironmentObserver.java │ │ │ │ ├── EnvironmentOutcome.java │ │ │ │ ├── EnvironmentServer.java │ │ │ │ ├── SimulatedEnvironment.java │ │ │ │ ├── StateSettableEnvironment.java │ │ │ │ └── TaskSettableEnvironment.java │ │ │ ├── explorer │ │ │ │ ├── SpecialExplorerAction.java │ │ │ │ ├── StateResetSpecialAction.java │ │ │ │ ├── TerminalExplorer.java │ │ │ │ └── VisualExplorer.java │ │ │ ├── interfaces │ │ │ │ └── rlglue │ │ │ │ │ └── RLGlueEnvironment.java │ │ │ └── pomdp │ │ │ │ ├── BeliefAgent.java │ │ │ │ ├── BeliefMDPGenerator.java │ │ │ │ ├── ObservationFunction.java │ │ │ │ ├── PODomain.java │ │ │ │ ├── SimulatedPOEnvironment.java │ │ │ │ └── beliefstate │ │ │ │ ├── BeliefState.java │ │ │ │ ├── DenseBeliefVector.java │ │ │ │ ├── EnumerableBeliefState.java │ │ │ │ └── tabular │ │ │ │ ├── HashableTabularBeliefStateFactory.java │ │ │ │ └── TabularBeliefState.java │ │ ├── statehashing │ │ │ ├── DiscretizingHashableStateFactory.java │ │ │ ├── DiscretizingMaskedHashableStateFactory.java │ │ │ ├── FixedSizeStateHashableStateFactory.java │ │ │ ├── HashableObject.java │ │ │ ├── HashableObjectFactory.java │ │ │ ├── HashableState.java │ │ │ ├── HashableStateFactory.java │ │ │ ├── HashableValue.java │ │ │ ├── HashableValueFactory.java │ │ │ ├── ImmutableHashableObjectFactory.java │ │ │ ├── ImmutableStateHashableStateFactory.java │ │ │ ├── MaskedHashableStateFactory.java │ │ │ └── SimpleHashableStateFactory.java │ │ ├── stateserialization │ │ │ ├── SerializableState.java │ │ │ ├── SerializableStateFactory.java │ │ │ └── simple │ │ │ │ ├── SimpleSerializableState.java │ │ │ │ ├── SimpleSerializableStateFactory.java │ │ │ │ ├── SimpleSerializedObjectInstance.java │ │ │ │ └── SimpleSerializedValue.java │ │ ├── stochasticgames │ │ │ ├── AgentFactory.java │ │ │ ├── InvalidActionException.java │ │ │ ├── JointAction.java │ │ │ ├── JointActionModel.java │ │ │ ├── JointReward.java │ │ │ ├── SGAgent.java │ │ │ ├── SGAgentType.java │ │ │ ├── SGDomain.java │ │ │ ├── SGStateGenerator.java │ │ │ ├── World.java │ │ │ ├── WorldGenerator.java │ │ │ ├── WorldObserver.java │ │ │ ├── agentactions │ │ │ │ ├── GroundedSGAgentAction.java │ │ │ │ ├── ObParamSGAgentAction.java │ │ │ │ ├── SGAgentAction.java │ │ │ │ ├── SimpleGroundedSGAgentAction.java │ │ │ │ └── SimpleSGAgentAction.java │ │ │ ├── common │ │ │ │ ├── AgentFactoryWithSubjectiveReward.java │ │ │ │ ├── ConstantSGStateGenerator.java │ │ │ │ ├── StaticRepeatedGameActionModel.java │ │ │ │ └── VisualWorldObserver.java │ │ │ ├── explorers │ │ │ │ ├── HardStateResetSpecialAction.java │ │ │ │ ├── SGTerminalExplorer.java │ │ │ │ └── SGVisualExplorer.java │ │ │ └── tournament │ │ │ │ ├── MatchEntry.java │ │ │ │ ├── MatchSelector.java │ │ │ │ ├── Tournament.java │ │ │ │ └── common │ │ │ │ ├── AllPairWiseSameTypeMS.java │ │ │ │ └── ConstantWorldGenerator.java │ │ └── visualizer │ │ │ ├── MultiLayerRenderer.java │ │ │ ├── ObjectPainter.java │ │ │ ├── RenderLayer.java │ │ │ ├── StateActionRenderLayer.java │ │ │ ├── StateRenderLayer.java │ │ │ ├── StaticPainter.java │ │ │ └── Visualizer.java │ │ └── tutorials │ │ ├── bd │ │ └── ExampleGridWorld.java │ │ ├── bpl │ │ └── BasicBehavior.java │ │ ├── cpl │ │ ├── QLTutorial.java │ │ └── VITutorial.java │ │ ├── hgw │ │ ├── HelloGridWorld.java │ │ └── PlotTest.java │ │ ├── scd │ │ └── ContinuousDomainTutorial.java │ │ └── video │ │ └── mc │ │ └── MCVideo.java └── ycai87-analysis.pdf └── README.md /Assignment1/README.txt: -------------------------------------------------------------------------------- 1 | All the analysis are done in Weka GUI. For data preprocessing, resample and removepecentage filter 2 | is used with no replacement option to 70-30 split the original dataset to training data and test data. 3 | 10, 20, 30, ...,90% of the training data is generated by removepencetage filter. Training and test results can be 4 | accessed by Weka explorer and experiment. J48 is used for decision tree. IBK is used for K-nearest neighbor. AdaboostM1 is used 5 | for boosting. LibSVM is used for SVM. Multilayerpeceptron is used for ANN. Learning curve results are generated by Weka experiment and plot 6 | by Microsoft Office Excel. 7 | 8 | Results spreadsheet and accuracy table for different classifiers are in the folder. Two datasets are also included. -------------------------------------------------------------------------------- /Assignment1/result.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment1/result.xlsx -------------------------------------------------------------------------------- /Assignment1/table.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment1/table.docx -------------------------------------------------------------------------------- /Assignment1/ycai87-analysis.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment1/ycai87-analysis.pdf -------------------------------------------------------------------------------- /Assignment2/README.txt: -------------------------------------------------------------------------------- 1 | The data file is phishing.csv. 2 | Download ABAGAIL package from: https://github.com/pushkar/ABAGAIL 3 | copy source code to src/opt/ycai87/ 4 | compile abajail.jar and code and run the tests. -------------------------------------------------------------------------------- /Assignment2/ycai87-analysis.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment2/ycai87-analysis.pdf -------------------------------------------------------------------------------- /Assignment3/README.txt: -------------------------------------------------------------------------------- 1 | The experiment is run in Weka and Scikit-Learn. 2 | Use the python files to run the clustering analysis. 3 | Use java code to run the dimensionality reduction analysis, which runs J48 with forward search: adding lower ranking components one by one. 4 | The dataset are attached. 5 | Use weka GUI to do PCA, ICA, Random Projection and Information Gain. Use the filter to get transformed the data. 6 | Save the data to arff and csv files to feed to java code or python code to do dimensionality reduction analysis and clustering analysis. -------------------------------------------------------------------------------- /Assignment3/bc_kmeans.py: -------------------------------------------------------------------------------- 1 | from sklearn import datasets 2 | 3 | from clustertesters import KMeansTestCluster as kmtc 4 | 5 | if __name__ == "__main__": 6 | breast_cancer = datasets.load_breast_cancer() 7 | #print breast_cancer 8 | X, y = breast_cancer.data, breast_cancer.target 9 | #print X 10 | 11 | 12 | tester = kmtc.KMeansTestCluster(X, y, clusters=range(1,10), plot=True, targetcluster=3, stats=True) 13 | tester.run() 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /Assignment3/br_dr_em.py: -------------------------------------------------------------------------------- 1 | 2 | from sklearn import datasets, metrics 3 | from clustertesters import ExpectationMaximizationTestCluster as emtc 4 | import pandas as pd 5 | 6 | def encode_target(df, target_column): 7 | """Add column to df with integers for the target. 8 | 9 | Args 10 | ---- 11 | df -- pandas DataFrame. 12 | target_column -- column to map to int, producing 13 | new Target column. 14 | 15 | Returns 16 | ------- 17 | df_mod -- modified DataFrame. 18 | targets -- list of target names. 19 | """ 20 | df_mod = df.copy() 21 | targets = df_mod[target_column].unique() 22 | map_to_int = {name: n for n, name in enumerate(targets)} 23 | df_mod[target_column].replace(map_to_int, inplace=True) 24 | return (df_mod, map_to_int) 25 | 26 | if __name__ == "__main__": 27 | letter_recognition = pd.read_csv("wisconsin_ig_reduced.csv") 28 | dft, mapping = encode_target(letter_recognition, "class") 29 | 30 | X = (dft.ix[:, :-1]) 31 | y = dft.ix[:, -1] 32 | 33 | tester = emtc.ExpectationMaximizationTestCluster(X, y, clusters=range(1,11), plot=True, targetcluster=3, stats=True) 34 | tester.run() 35 | 36 | -------------------------------------------------------------------------------- /Assignment3/br_dr_kmeans.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from clustertesters import KMeansTestCluster as kmtc 4 | 5 | 6 | def encode_target(df, target_column): 7 | """Add column to df with integers for the target. 8 | 9 | Args 10 | ---- 11 | df -- pandas DataFrame. 12 | target_column -- column to map to int, producing 13 | new Target column. 14 | 15 | Returns 16 | ------- 17 | df_mod -- modified DataFrame. 18 | targets -- list of target names. 19 | """ 20 | df_mod = df.copy() 21 | targets = df_mod[target_column].unique() 22 | map_to_int = {name: n for n, name in enumerate(targets)} 23 | df_mod[target_column].replace(map_to_int, inplace=True) 24 | return (df_mod, map_to_int) 25 | 26 | if __name__ == "__main__": 27 | letter_recognition = pd.read_csv("wisconsin_ig_reduced.csv") 28 | dft, mapping = encode_target(letter_recognition, "class") 29 | #dft.to_csv('letternew.cvs') 30 | #print dft 31 | #dft2 = pd.read_csv("phishing.csv") 32 | X = (dft.ix[:,:-1]) 33 | y = dft.ix[:, -1] 34 | #print X 35 | #print y 36 | tester = kmtc.KMeansTestCluster(X, y, clusters=range(1,11), plot=True, targetcluster=2, stats=True) 37 | tester.run() 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /Assignment3/clustertesters/ExpectationMaximizationTestCluster.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment3/clustertesters/ExpectationMaximizationTestCluster.pyc -------------------------------------------------------------------------------- /Assignment3/clustertesters/KMeansTestCluster.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment3/clustertesters/KMeansTestCluster.pyc -------------------------------------------------------------------------------- /Assignment3/clustertesters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment3/clustertesters/__init__.py -------------------------------------------------------------------------------- /Assignment3/clustertesters/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment3/clustertesters/__init__.pyc -------------------------------------------------------------------------------- /Assignment3/clustertesters/lr_ExpectationMaximizationTestCluster.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment3/clustertesters/lr_ExpectationMaximizationTestCluster.pyc -------------------------------------------------------------------------------- /Assignment3/clustertesters/lr_KMeansTestCluster.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment3/clustertesters/lr_KMeansTestCluster.pyc -------------------------------------------------------------------------------- /Assignment3/lr_dr_em.py: -------------------------------------------------------------------------------- 1 | 2 | from sklearn import datasets, metrics 3 | from clustertesters import lr_ExpectationMaximizationTestCluster as emtc 4 | import pandas as pd 5 | 6 | def encode_target(df, target_column): 7 | """Add column to df with integers for the target. 8 | 9 | Args 10 | ---- 11 | df -- pandas DataFrame. 12 | target_column -- column to map to int, producing 13 | new Target column. 14 | 15 | Returns 16 | ------- 17 | df_mod -- modified DataFrame. 18 | targets -- list of target names. 19 | """ 20 | df_mod = df.copy() 21 | targets = df_mod[target_column].unique() 22 | map_to_int = {name: n for n, name in enumerate(targets)} 23 | df_mod[target_column].replace(map_to_int, inplace=True) 24 | return (df_mod, map_to_int) 25 | 26 | if __name__ == "__main__": 27 | letter_recognition = pd.read_csv("letter_ig_reduced.csv") 28 | dft, mapping = encode_target(letter_recognition, "class") 29 | 30 | X = (dft.ix[:, :-1]) 31 | y = dft.ix[:, -1] 32 | 33 | tester = emtc.ExpectationMaximizationTestCluster(X, y, clusters=range(1,31), plot=True, targetcluster=3, stats=True) 34 | tester.run() 35 | 36 | -------------------------------------------------------------------------------- /Assignment3/lr_dr_kmeans.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from clustertesters import lr_KMeansTestCluster as kmtc 4 | 5 | 6 | def encode_target(df, target_column): 7 | """Add column to df with integers for the target. 8 | 9 | Args 10 | ---- 11 | df -- pandas DataFrame. 12 | target_column -- column to map to int, producing 13 | new Target column. 14 | 15 | Returns 16 | ------- 17 | df_mod -- modified DataFrame. 18 | targets -- list of target names. 19 | """ 20 | df_mod = df.copy() 21 | targets = df_mod[target_column].unique() 22 | map_to_int = {name: n for n, name in enumerate(targets)} 23 | df_mod[target_column].replace(map_to_int, inplace=True) 24 | return (df_mod, map_to_int) 25 | 26 | if __name__ == "__main__": 27 | letter_recognition = pd.read_csv("letter_ig_reduced.csv") 28 | dft, mapping = encode_target(letter_recognition, "class") 29 | #dft.to_csv('letternew.cvs') 30 | #print dft 31 | #dft2 = pd.read_csv("phishing.csv") 32 | X = (dft.ix[:,:-1]) 33 | y = dft.ix[:, -1] 34 | #print X 35 | #print y 36 | tester = kmtc.KMeansTestCluster(X, y, clusters=range(1,31), plot=True, targetcluster=2, stats=True) 37 | tester.run() 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /Assignment3/lr_em.py: -------------------------------------------------------------------------------- 1 | 2 | from sklearn import datasets, metrics 3 | from clustertesters import lr_ExpectationMaximizationTestCluster as emtc 4 | import pandas as pd 5 | 6 | def encode_target(df, target_column): 7 | """Add column to df with integers for the target. 8 | 9 | Args 10 | ---- 11 | df -- pandas DataFrame. 12 | target_column -- column to map to int, producing 13 | new Target column. 14 | 15 | Returns 16 | ------- 17 | df_mod -- modified DataFrame. 18 | targets -- list of target names. 19 | """ 20 | df_mod = df.copy() 21 | targets = df_mod[target_column].unique() 22 | map_to_int = {name: n for n, name in enumerate(targets)} 23 | df_mod[target_column].replace(map_to_int, inplace=True) 24 | return (df_mod, map_to_int) 25 | 26 | if __name__ == "__main__": 27 | letter_recognition = pd.read_csv("letter.csv") 28 | dft, mapping = encode_target(letter_recognition, "class") 29 | 30 | X = (dft.ix[:, :-1]) 31 | y = dft.ix[:, -1] 32 | 33 | tester = emtc.ExpectationMaximizationTestCluster(X, y, clusters=range(1,31), plot=True, targetcluster=3, stats=True) 34 | tester.run() 35 | 36 | -------------------------------------------------------------------------------- /Assignment3/lr_kmeans.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from clustertesters import lr_KMeansTestCluster as kmtc 4 | 5 | 6 | def encode_target(df, target_column): 7 | """Add column to df with integers for the target. 8 | 9 | Args 10 | ---- 11 | df -- pandas DataFrame. 12 | target_column -- column to map to int, producing 13 | new Target column. 14 | 15 | Returns 16 | ------- 17 | df_mod -- modified DataFrame. 18 | targets -- list of target names. 19 | """ 20 | df_mod = df.copy() 21 | targets = df_mod[target_column].unique() 22 | map_to_int = {name: n for n, name in enumerate(targets)} 23 | df_mod[target_column].replace(map_to_int, inplace=True) 24 | return (df_mod, map_to_int) 25 | 26 | if __name__ == "__main__": 27 | letter_recognition = pd.read_csv("letter.csv") 28 | dft, mapping = encode_target(letter_recognition, "class") 29 | dft.to_csv('letternew.cvs') 30 | #print dft 31 | #dft2 = pd.read_csv("phishing.csv") 32 | X = (dft.ix[:,:-1]) 33 | y = dft.ix[:, -1] 34 | #print X 35 | #print y 36 | tester = kmtc.KMeansTestCluster(X, y, clusters=range(1,31), plot=True, targetcluster=2, stats=True) 37 | tester.run() 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /Assignment3/ycai87-analysis.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment3/ycai87-analysis.pdf -------------------------------------------------------------------------------- /Assignment4/README.txt: -------------------------------------------------------------------------------- 1 | python files are from Jon Tay, shared on OMSCS slack 7641 channel. 2 | 3 | 1. Install jython. 4 | 2. Compile BURLAP source to jar file 5 | 3. use jython to run easyGW and hardGW python files. 6 | command: 7 | C:\jython2.7.0\bin\jython easyGW.py 8 | C:\jython2.7.0\bin\jython hardGW.py 9 | 10 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/assignment4/util/AgentPainter.java: -------------------------------------------------------------------------------- 1 | package burlap.assignment4.util; 2 | 3 | import java.awt.Color; 4 | import java.awt.Graphics2D; 5 | import java.awt.geom.Ellipse2D; 6 | 7 | import burlap.assignment4.BasicGridWorld; 8 | import burlap.oomdp.core.objects.ObjectInstance; 9 | import burlap.oomdp.core.states.State; 10 | import burlap.oomdp.visualizer.ObjectPainter; 11 | 12 | public class AgentPainter implements ObjectPainter{ 13 | 14 | protected int[][] map; 15 | 16 | public AgentPainter(int[][] map){ 17 | this.map = map; 18 | } 19 | @Override 20 | public void paintObject(Graphics2D g2, State s, ObjectInstance ob, 21 | float cWidth, float cHeight) { 22 | 23 | //agent will be filled in gray 24 | g2.setColor(Color.GRAY); 25 | 26 | //set up floats for the width and height of our domain 27 | float fWidth = this.map.length; 28 | float fHeight = this.map[0].length; 29 | 30 | //determine the width of a single cell on our canvas 31 | //such that the whole map can be painted 32 | float width = cWidth / fWidth; 33 | float height = cHeight / fHeight; 34 | 35 | int ax = ob.getIntValForAttribute(BasicGridWorld.ATTX); 36 | int ay = ob.getIntValForAttribute(BasicGridWorld.ATTY); 37 | 38 | //left coordinate of cell on our canvas 39 | float rx = ax*width; 40 | 41 | //top coordinate of cell on our canvas 42 | //coordinate system adjustment because the java canvas 43 | //origin is in the top left instead of the bottom right 44 | float ry = cHeight - height - ay*height; 45 | 46 | //paint the rectangle 47 | g2.fill(new Ellipse2D.Float(rx, ry, width, height)); 48 | 49 | 50 | } 51 | } 52 | 53 | 54 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/assignment4/util/AtLocation.java: -------------------------------------------------------------------------------- 1 | package burlap.assignment4.util; 2 | 3 | import burlap.assignment4.BasicGridWorld; 4 | import burlap.oomdp.core.Domain; 5 | import burlap.oomdp.core.PropositionalFunction; 6 | import burlap.oomdp.core.objects.ObjectInstance; 7 | import burlap.oomdp.core.states.State; 8 | 9 | public class AtLocation extends PropositionalFunction { 10 | 11 | public AtLocation(Domain domain) { 12 | super(BasicGridWorld.PFAT, domain, new String[] { BasicGridWorld.CLASSAGENT, BasicGridWorld.CLASSLOCATION }); 13 | } 14 | 15 | @Override 16 | public boolean isTrue(State s, String... params) { 17 | ObjectInstance agent = s.getObject(params[0]); 18 | ObjectInstance location = s.getObject(params[1]); 19 | 20 | int ax = agent.getIntValForAttribute(BasicGridWorld.ATTX); 21 | int ay = agent.getIntValForAttribute(BasicGridWorld.ATTY); 22 | 23 | int lx = location.getIntValForAttribute(BasicGridWorld.ATTX); 24 | int ly = location.getIntValForAttribute(BasicGridWorld.ATTY); 25 | 26 | return ax == lx && ay == ly; 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/assignment4/util/BasicRewardFunction.java: -------------------------------------------------------------------------------- 1 | package burlap.assignment4.util; 2 | 3 | import burlap.assignment4.BasicGridWorld; 4 | import burlap.oomdp.core.objects.ObjectInstance; 5 | import burlap.oomdp.core.states.State; 6 | import burlap.oomdp.singleagent.GroundedAction; 7 | import burlap.oomdp.singleagent.RewardFunction; 8 | 9 | public class BasicRewardFunction implements RewardFunction { 10 | 11 | int goalX; 12 | int goalY; 13 | int[][] map; 14 | 15 | public BasicRewardFunction(int goalX, int goalY,int[][] map) { 16 | this.goalX = goalX; 17 | this.goalY = goalY; 18 | this.map = map; 19 | } 20 | 21 | @Override 22 | public double reward(State s, GroundedAction a, State sprime) { 23 | 24 | // get location of agent in next state 25 | ObjectInstance agent = sprime.getFirstObjectOfClass(BasicGridWorld.CLASSAGENT); 26 | int ax = agent.getIntValForAttribute(BasicGridWorld.ATTX); 27 | int ay = agent.getIntValForAttribute(BasicGridWorld.ATTY); 28 | 29 | // are they at goal location? 30 | if (ax == this.goalX && ay == this.goalY) { 31 | return 100.; 32 | } 33 | if (map[ax][ay]<0){ 34 | return (float) map[ax][ay]*10; 35 | } 36 | 37 | return -1; 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/assignment4/util/BasicTerminalFunction.java: -------------------------------------------------------------------------------- 1 | package burlap.assignment4.util; 2 | 3 | import burlap.assignment4.BasicGridWorld; 4 | import burlap.oomdp.core.TerminalFunction; 5 | import burlap.oomdp.core.objects.ObjectInstance; 6 | import burlap.oomdp.core.states.State; 7 | 8 | public class BasicTerminalFunction implements TerminalFunction { 9 | 10 | int goalX; 11 | int goalY; 12 | 13 | public BasicTerminalFunction(int goalX, int goalY) { 14 | this.goalX = goalX; 15 | this.goalY = goalY; 16 | } 17 | 18 | @Override 19 | public boolean isTerminal(State s) { 20 | 21 | // get location of agent in next state 22 | ObjectInstance agent = s.getFirstObjectOfClass(BasicGridWorld.CLASSAGENT); 23 | int ax = agent.getIntValForAttribute(BasicGridWorld.ATTX); 24 | int ay = agent.getIntValForAttribute(BasicGridWorld.ATTY); 25 | 26 | // are they at goal location? 27 | if (ax == this.goalX && ay == this.goalY) { 28 | return true; 29 | } 30 | 31 | return false; 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/assignment4/util/LocationPainter.java: -------------------------------------------------------------------------------- 1 | package burlap.assignment4.util; 2 | 3 | import java.awt.Color; 4 | import java.awt.Graphics2D; 5 | import java.awt.geom.Rectangle2D; 6 | 7 | import burlap.assignment4.BasicGridWorld; 8 | import burlap.oomdp.core.objects.ObjectInstance; 9 | import burlap.oomdp.core.states.State; 10 | import burlap.oomdp.visualizer.ObjectPainter; 11 | 12 | 13 | public class LocationPainter implements ObjectPainter { 14 | int[][] map; 15 | 16 | public LocationPainter(int[][] map){ 17 | this.map = map; 18 | } 19 | @Override 20 | public void paintObject(Graphics2D g2, State s, ObjectInstance ob, 21 | float cWidth, float cHeight) { 22 | 23 | //agent will be filled in blue 24 | g2.setColor(Color.BLUE); 25 | 26 | //set up floats for the width and height of our domain 27 | float fWidth = this.map.length; 28 | float fHeight = this.map[0].length; 29 | 30 | //determine the width of a single cell on our canvas 31 | //such that the whole map can be painted 32 | float width = cWidth / fWidth; 33 | float height = cHeight / fHeight; 34 | 35 | int ax = ob.getIntValForAttribute(BasicGridWorld.ATTX); 36 | int ay = ob.getIntValForAttribute(BasicGridWorld.ATTY); 37 | 38 | //left coordinate of cell on our canvas 39 | float rx = ax*width; 40 | 41 | //top coordinate of cell on our canvas 42 | //coordinate system adjustment because the java canvas 43 | //origin is in the top left instead of the bottom right 44 | float ry = cHeight - height - ay*height; 45 | 46 | //paint the rectangle 47 | g2.fill(new Rectangle2D.Float(rx, ry, width, height)); 48 | 49 | 50 | } 51 | 52 | 53 | 54 | } -------------------------------------------------------------------------------- /Assignment4/src/burlap/assignment4/util/WallPainter.java: -------------------------------------------------------------------------------- 1 | package burlap.assignment4.util; 2 | 3 | import java.awt.Color; 4 | import java.awt.Graphics2D; 5 | import java.awt.geom.Rectangle2D; 6 | 7 | import burlap.oomdp.core.states.State; 8 | import burlap.oomdp.visualizer.StaticPainter; 9 | 10 | 11 | public class WallPainter implements StaticPainter { 12 | 13 | private int[][] map; 14 | 15 | public WallPainter(int[][] map){ 16 | this.map = map; 17 | } 18 | 19 | @Override 20 | public void paint(Graphics2D g2, State s, float cWidth, float cHeight) { 21 | 22 | //walls will be filled in black 23 | g2.setColor(Color.BLACK); 24 | 25 | //set up floats for the width and height of our domain 26 | float fWidth = this.map.length; 27 | float fHeight = this.map[0].length; 28 | 29 | //determine the width of a single cell 30 | //on our canvas such that the whole map can be painted 31 | float width = cWidth / fWidth; 32 | float height = cHeight / fHeight; 33 | 34 | //pass through each cell of our map and if it's a wall, paint a black rectangle on our 35 | //cavas of dimension widthxheight 36 | for(int i = 0; i < this.map.length; i++){ 37 | for(int j = 0; j < this.map[0].length; j++){ 38 | 39 | //is there a wall here? 40 | if(this.map[i][j] == 1){ 41 | 42 | //left coordinate of cell on our canvas 43 | float rx = i*width; 44 | 45 | //top coordinate of cell on our canvas 46 | //coordinate system adjustment because the java canvas 47 | //origin is in the top left instead of the bottom right 48 | float ry = cHeight - height - j*height; 49 | 50 | //paint the rectangle 51 | g2.fill(new Rectangle2D.Float(rx, ry, width, height)); 52 | 53 | } 54 | 55 | 56 | } 57 | } 58 | 59 | } 60 | 61 | 62 | } 63 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/learningrate/ConstantLR.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.learningrate; 2 | 3 | import burlap.oomdp.core.AbstractGroundedAction; 4 | import burlap.oomdp.core.states.State; 5 | 6 | 7 | /** 8 | * A class for specifying a constant learning rate that never changes. 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public class ConstantLR implements LearningRate { 13 | 14 | public double learningRate = 0.1; 15 | 16 | /** 17 | * Constructs constant learning rate of 0.1 18 | */ 19 | public ConstantLR(){ 20 | //do nothing 21 | } 22 | 23 | /** 24 | * Constructs a constant learning rate for the given value 25 | * @param learningRate the constant learning rate to use 26 | */ 27 | public ConstantLR(Double learningRate){ 28 | this.learningRate = learningRate; 29 | } 30 | 31 | @Override 32 | public double peekAtLearningRate(State s, AbstractGroundedAction ga) { 33 | return this.learningRate; 34 | } 35 | 36 | @Override 37 | public double pollLearningRate(int agentTime, State s, AbstractGroundedAction ga) { 38 | return this.learningRate; 39 | } 40 | 41 | @Override 42 | public void resetDecay() { 43 | //no change needed 44 | } 45 | 46 | @Override 47 | public double peekAtLearningRate(int featureId) { 48 | return this.learningRate; 49 | } 50 | 51 | @Override 52 | public double pollLearningRate(int agentTime, int featureId) { 53 | return this.learningRate; 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/policy/GreedyDeterministicQPolicy.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.policy; 2 | 3 | import java.util.List; 4 | 5 | import javax.management.RuntimeErrorException; 6 | 7 | import burlap.behavior.singleagent.MDPSolverInterface; 8 | import burlap.behavior.valuefunction.QValue; 9 | import burlap.behavior.valuefunction.QFunction; 10 | import burlap.oomdp.core.AbstractGroundedAction; 11 | import burlap.oomdp.core.states.State; 12 | import burlap.oomdp.singleagent.GroundedAction; 13 | 14 | 15 | /** 16 | * A greedy policy that breaks ties by choosing the first action with the maximum value. This class requires a QComputablePlanner 17 | * @author James MacGlashan 18 | * 19 | */ 20 | public class GreedyDeterministicQPolicy extends Policy implements SolverDerivedPolicy { 21 | 22 | protected QFunction qplanner; 23 | 24 | public GreedyDeterministicQPolicy() { 25 | qplanner = null; 26 | } 27 | 28 | /** 29 | * Initializes with a QComputablePlanner 30 | * @param qplanner the QComputablePlanner to use 31 | */ 32 | public GreedyDeterministicQPolicy(QFunction qplanner){ 33 | this.qplanner = qplanner; 34 | } 35 | 36 | @Override 37 | public void setSolver(MDPSolverInterface solver){ 38 | 39 | if(!(solver instanceof QFunction)){ 40 | throw new RuntimeErrorException(new Error("Planner is not a QComputablePlanner")); 41 | } 42 | 43 | this.qplanner = (QFunction) solver; 44 | } 45 | 46 | 47 | @Override 48 | public AbstractGroundedAction getAction(State s) { 49 | 50 | List qValues = this.qplanner.getQs(s); 51 | double maxQV = Double.NEGATIVE_INFINITY; 52 | QValue maxQ = null; 53 | for(QValue q : qValues){ 54 | if(q.q > maxQV){ 55 | maxQV = q.q; 56 | maxQ = q; 57 | } 58 | } 59 | 60 | return ((GroundedAction)maxQ.a).translateParameters(maxQ.s, s); 61 | } 62 | 63 | @Override 64 | public List getActionDistributionForState(State s) { 65 | return this.getDeterministicPolicy(s); 66 | } 67 | 68 | @Override 69 | public boolean isStochastic() { 70 | return false; 71 | } 72 | 73 | @Override 74 | public boolean isDefinedFor(State s) { 75 | return true; //can always find q-values with default value 76 | } 77 | 78 | } 79 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/policy/SolverDerivedPolicy.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.policy; 2 | 3 | import burlap.behavior.singleagent.MDPSolverInterface; 4 | 5 | 6 | /** 7 | * An interface for defining policies that refer to a {@link burlap.behavior.singleagent.MDPSolverInterface} 8 | * objects to defined the policy. For example, selecting actions based on the maximum Q-value that a solver computed. 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public interface SolverDerivedPolicy { 13 | /** 14 | * Sets the valueFunction whose results affect this policy. 15 | * @param solver the solver from which this policy is derived 16 | */ 17 | public void setSolver(MDPSolverInterface solver); 18 | } 19 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/auxiliary/performance/ExperimentalEnvironment.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.auxiliary.performance; 2 | 3 | /** 4 | * An interface to be used in conjunction with {@link burlap.oomdp.singleagent.environment.Environment} implementations 5 | * that can accept a message informing the environment that a new experiment for a {@link burlap.behavior.singleagent.learning.LearningAgent} has started. 6 | * This is useful if when comparing multiple agents the same initial state sequence is desired. 7 | * @author James MacGlashan. 8 | */ 9 | public interface ExperimentalEnvironment { 10 | 11 | /** 12 | * Tells this {@link burlap.oomdp.singleagent.environment.Environment} that an experiment with a new {@link burlap.behavior.singleagent.learning.LearningAgent} 13 | * has begun. 14 | */ 15 | void startNewExperiment(); 16 | } 17 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/auxiliary/performance/PerformanceMetric.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.auxiliary.performance; 2 | 3 | /** 4 | * Enumerator for the types of statistics that can be plotted by {@link PerformancePlotter}. 5 | * @author James MacGlashan 6 | * 7 | */ 8 | public enum PerformanceMetric { 9 | CUMULATIVEREWARDPERSTEP, 10 | CUMULTAIVEREWARDPEREPISODE, 11 | AVERAGEEPISODEREWARD, 12 | MEDIANEPISODEREWARD, 13 | CUMULATIVESTEPSPEREPISODE, 14 | STEPSPEREPISODE; 15 | } 16 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/auxiliary/performance/TrialMode.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.auxiliary.performance; 2 | 3 | 4 | /** 5 | * Enumerator for specifying the what kinds of plots for each {@link PerformanceMetric} will be plotted by {@link PerformancePlotter}. 6 | * The MOSTRECENTTTRIALONLY mode will result in only the most recent trial's performance being displayed. TRIALAVERAGESONLY will 7 | * result in only plots for the trial averages to be shown. MOSTRECENTANDAVERAGE will result in both the most recent trial and the trial 8 | * average plots to be shown. 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public enum TrialMode { 13 | MOSTRECENTTTRIALONLY, 14 | TRIALAVERAGESONLY, 15 | MOSTRECENTANDAVERAGE; 16 | 17 | /** 18 | * Returns true if the most recent trial plots will be plotted by this mode. 19 | * @return true if the most recent trial plots will be plotted by this mode; false otherwise. 20 | */ 21 | public boolean mostRecentTrialEnabled(){ 22 | return this == MOSTRECENTTTRIALONLY || this == MOSTRECENTANDAVERAGE; 23 | } 24 | 25 | 26 | /** 27 | * Returns true if the trial average plots will be plotted by this mode. 28 | * @return true if the trial average plots will be plotted by this mode; false otherwise. 29 | */ 30 | public boolean averagesEnabled(){ 31 | return this == TRIALAVERAGESONLY || this == MOSTRECENTANDAVERAGE; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/auxiliary/valuefunctionvis/StatePolicyPainter.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.auxiliary.valuefunctionvis; 2 | 3 | import java.awt.Graphics2D; 4 | 5 | import burlap.behavior.policy.Policy; 6 | import burlap.oomdp.core.states.State; 7 | 8 | /** 9 | * An interface for painting a representation of the policy for a specific state onto a 2D Graphics context. 10 | * @author James MacGlashan 11 | * 12 | */ 13 | public interface StatePolicyPainter { 14 | 15 | /** 16 | * Paints a representation of the given policy for a specific state to a 2D graphics context. 17 | * @param g2 graphics context to which the object should be painted 18 | * @param s the state of the object to be painted 19 | * @param policy the policy that can be used on state s 20 | * @param cWidth width of the canvas size 21 | * @param cHeight height of the canvas size 22 | */ 23 | public void paintStatePolicy(Graphics2D g2, State s, Policy policy, float cWidth, float cHeight); 24 | 25 | } 26 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/auxiliary/valuefunctionvis/StateValuePainter.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.auxiliary.valuefunctionvis; 2 | 3 | import java.awt.Graphics2D; 4 | 5 | import burlap.oomdp.core.states.State; 6 | 7 | 8 | /** 9 | * An abstract class for defining the interface and common methods to paint the representation of the value function for a specific state onto 10 | * a 2D graphics context. 11 | * @author James MacGlashan 12 | * 13 | */ 14 | public abstract class StateValuePainter { 15 | 16 | /** 17 | * Indicates whether this painter should scale its rendering of values to whatever it is told the minimum and maximum values are. 18 | */ 19 | protected boolean shouldRescaleValues = true; 20 | 21 | 22 | /** 23 | * Paints the representation of a value function for a specific state. 24 | * @param g2 graphics context to which the object should be painted 25 | * @param s the state of the object to be painted 26 | * @param value the value function evaluation of state s 27 | * @param cWidth width of the canvas size 28 | * @param cHeight height of the canvas size 29 | */ 30 | public abstract void paintStateValue(Graphics2D g2, State s, double value, float cWidth, float cHeight); 31 | 32 | /** 33 | * Used to tell this painter that it should render state values so that the minimum possible value is lowerValue and the maximum is upperValue. 34 | * @param lowerValue the minimum value of state values 35 | * @param upperValue the maximium value of state values 36 | */ 37 | public abstract void rescale(double lowerValue, double upperValue); 38 | 39 | 40 | /** 41 | * Enabling value rescaling allows the painter to adjust to the minimum and maximum values passed to it. 42 | * @param rescale whether this painter should rescale to the minimum and maximum value of the value function. 43 | */ 44 | public void useValueRescaling(boolean rescale){ 45 | this.shouldRescaleValues = rescale; 46 | } 47 | 48 | 49 | 50 | 51 | 52 | } 53 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/auxiliary/valuefunctionvis/StaticDomainPainter.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.auxiliary.valuefunctionvis; 2 | 3 | import java.awt.Graphics2D; 4 | 5 | 6 | /** 7 | * An interface for painting general domain information to a 2D graphics context. 8 | * @author James MacGlashan 9 | * 10 | */ 11 | public interface StaticDomainPainter { 12 | 13 | /** 14 | * Use to paint general domain information to a 2D graphics context. 15 | * @param g2 graphics context to which the static data should be painted 16 | * @param cWidth the width of the canvas 17 | * @param cHeight the height of the canvas 18 | */ 19 | public void paint(Graphics2D g2, float cWidth, float cHeight); 20 | 21 | } 22 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/auxiliary/valuefunctionvis/common/ActionGlyphPainter.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.auxiliary.valuefunctionvis.common; 2 | 3 | import java.awt.Graphics2D; 4 | 5 | 6 | /** 7 | * An interface for painting glyphs that correspond to actions. 8 | * @author James MacGlashan 9 | * 10 | */ 11 | public interface ActionGlyphPainter { 12 | /** 13 | * Called to paint a glyph in the rectangle defined by the top left origin (x,y) with the given width and height. 14 | * @param g2 the graphics context to paint to 15 | * @param x the left of the rectangle origin 16 | * @param y the top of the rectangle origin 17 | * @param width the width of the rectangle 18 | * @param height the height of the rectangle. 19 | */ 20 | public void paintGlyph(Graphics2D g2, float x, float y, float width, float height); 21 | } 22 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/auxiliary/valuefunctionvis/common/ColorBlend.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.auxiliary.valuefunctionvis.common; 2 | 3 | import java.awt.Color; 4 | 5 | /** 6 | * An interface for defining methods that return a color for a given double value. 7 | * @author James MacGlashan 8 | * 9 | */ 10 | public interface ColorBlend { 11 | 12 | /** 13 | * Returns a {@link java.awt.Color} for a given double value 14 | * @param v the input double value 15 | * @return a {@link java.awt.Color} for a given double value 16 | */ 17 | public Color color(double v); 18 | 19 | /** 20 | * Tells this object the minimum value and the maximum value it can receive. 21 | * @param minV the minimum value 22 | * @param maxV the maximum value 23 | */ 24 | public void rescale(double minV, double maxV); 25 | } 26 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/learnfromdemo/mlirl/differentiableplanners/diffvinit/DiffVFRF.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.learnfromdemo.mlirl.differentiableplanners.diffvinit; 2 | 3 | import burlap.behavior.singleagent.learnfromdemo.mlirl.support.DifferentiableRF; 4 | import burlap.oomdp.core.states.State; 5 | import burlap.oomdp.singleagent.GroundedAction; 6 | import burlap.oomdp.singleagent.RewardFunction; 7 | 8 | /** 9 | * A differentiable reward function wrapper for use with {@link burlap.behavior.singleagent.learnfromdemo.mlirl.MLIRL} when 10 | * the reward function is known, but the value function initialization for leaf nodes is to be learned. 11 | * This class takes as input the true reward function and a {@link burlap.behavior.singleagent.learnfromdemo.mlirl.differentiableplanners.diffvinit.DifferentiableVInit} 12 | * object to form the {@link burlap.behavior.singleagent.learnfromdemo.mlirl.support.DifferentiableRF} object 13 | * that {@link burlap.behavior.singleagent.learnfromdemo.mlirl.MLIRL} will use. 14 | * 15 | * @author James MacGlashan. 16 | */ 17 | public class DiffVFRF extends DifferentiableRF { 18 | 19 | protected RewardFunction objectiveRF; 20 | protected DifferentiableVInit.ParamedDiffVInit diffVInit; 21 | 22 | 23 | public DiffVFRF(RewardFunction objectiveRF, DifferentiableVInit.ParamedDiffVInit diffVinit){ 24 | this.objectiveRF = objectiveRF; 25 | this.diffVInit = diffVinit; 26 | 27 | this.dim = diffVinit.getParameterDimension(); 28 | this.parameters = diffVinit.getParameters(); 29 | } 30 | 31 | @Override 32 | public double[] getGradient(State s, GroundedAction ga, State sp) { 33 | return new double[this.dim]; 34 | } 35 | 36 | @Override 37 | protected DifferentiableRF copyHelper() { 38 | return null; 39 | } 40 | 41 | @Override 42 | public double reward(State s, GroundedAction a, State sprime) { 43 | return this.objectiveRF.reward(s, a, sprime); 44 | } 45 | 46 | 47 | @Override 48 | public void setParameters(double[] parameters) { 49 | super.setParameters(parameters); 50 | this.diffVInit.setParameters(parameters); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/learnfromdemo/mlirl/differentiableplanners/diffvinit/LinearStateDiffVF.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.learnfromdemo.mlirl.differentiableplanners.diffvinit; 2 | 3 | import burlap.behavior.singleagent.vfa.StateToFeatureVectorGenerator; 4 | import burlap.oomdp.core.AbstractGroundedAction; 5 | import burlap.oomdp.core.states.State; 6 | 7 | /** 8 | * A class for defining a (differentiable) linear function over state features for value function initialization. This class is useful 9 | * for learning the value function initialization for leaf nodes of a finite horizon valueFunction with {@link burlap.behavior.singleagent.learnfromdemo.mlirl.MLIRL}. 10 | * @author James MacGlashan. 11 | */ 12 | public class LinearStateDiffVF extends DifferentiableVInit.ParamedDiffVInit { 13 | 14 | 15 | /** 16 | * The state feature vector generator over which the linear function operates 17 | */ 18 | protected StateToFeatureVectorGenerator fvgen; 19 | 20 | 21 | /** 22 | * Initializes with the state feature vector generator over which the linear function is defined and the dimensionality of it. 23 | * @param fvgen the state feature vector generator over which the linear function is defined. 24 | * @param dim the dimensionality of the feature vector/parameters 25 | */ 26 | public LinearStateDiffVF(StateToFeatureVectorGenerator fvgen, int dim){ 27 | this.dim = dim; 28 | this.parameters = new double[dim]; 29 | this.fvgen = fvgen; 30 | } 31 | 32 | @Override 33 | public double[] getVGradient(State s) { 34 | return this.fvgen.generateFeatureVectorFrom(s); 35 | } 36 | 37 | @Override 38 | public double[] getQGradient(State s, AbstractGroundedAction ga) { 39 | return this.fvgen.generateFeatureVectorFrom(s); 40 | } 41 | 42 | @Override 43 | public double value(State s) { 44 | 45 | double [] features = this.fvgen.generateFeatureVectorFrom(s); 46 | 47 | double sum = 0.; 48 | for(int i = 0; i < features.length; i++){ 49 | sum += features[i] * this.parameters[i]; 50 | } 51 | return sum; 52 | } 53 | 54 | @Override 55 | public double qValue(State s, AbstractGroundedAction a) { 56 | return this.value(s); 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/learnfromdemo/mlirl/differentiableplanners/diffvinit/VanillaDiffVinit.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.learnfromdemo.mlirl.differentiableplanners.diffvinit; 2 | 3 | import burlap.behavior.valuefunction.ValueFunctionInitialization; 4 | import burlap.behavior.singleagent.learnfromdemo.mlirl.support.DifferentiableRF; 5 | import burlap.oomdp.core.AbstractGroundedAction; 6 | import burlap.oomdp.core.states.State; 7 | 8 | /** 9 | * A class for the default condition when a value function initialization returns an unparameterized value 10 | * for each state, but must be differentiable 11 | * with respect to the reward function parameters for use with a differentiable finite horizon valueFunction. 12 | * @author James MacGlashan. 13 | */ 14 | public class VanillaDiffVinit implements DifferentiableVInit { 15 | 16 | 17 | /** 18 | * The source value function initialization. 19 | */ 20 | protected ValueFunctionInitialization vinit; 21 | 22 | /** 23 | * The differentiable reward function that defines the parameter space over which this value function 24 | * initialization must differentiate. 25 | */ 26 | protected DifferentiableRF rf; 27 | 28 | 29 | /** 30 | * Initializes. 31 | * @param vinit The vanilla unparameterized value function initialization 32 | * @param rf the differentiable reward function that defines the total parameter space 33 | */ 34 | public VanillaDiffVinit(ValueFunctionInitialization vinit, DifferentiableRF rf) { 35 | this.vinit = vinit; 36 | this.rf = rf; 37 | } 38 | 39 | @Override 40 | public double[] getVGradient(State s) { 41 | return new double[rf.getParameterDimension()]; 42 | } 43 | 44 | @Override 45 | public double[] getQGradient(State s, AbstractGroundedAction ga) { 46 | return new double[rf.getParameterDimension()]; 47 | } 48 | 49 | @Override 50 | public double value(State s) { 51 | return this.vinit.value(s); 52 | } 53 | 54 | @Override 55 | public double qValue(State s, AbstractGroundedAction a) { 56 | return this.vinit.qValue(s, a); 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/learnfromdemo/mlirl/support/QGradientPlanner.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.learnfromdemo.mlirl.support; 2 | 3 | import burlap.behavior.valuefunction.QFunction; 4 | import burlap.oomdp.core.states.State; 5 | import burlap.oomdp.singleagent.GroundedAction; 6 | 7 | import java.util.List; 8 | 9 | /** 10 | * An interface for a valueFunction that can produce Q-value gradients. 11 | * @author James MacGlashan. 12 | */ 13 | public interface QGradientPlanner extends QFunction { 14 | 15 | 16 | /** 17 | * Returns the list of Q-value gradients (returned as {@link burlap.behavior.singleagent.learnfromdemo.mlirl.support.QGradientTuple objects}) for each action permissible in the given state. 18 | * @param s the state for which Q-value gradients are to be returned. 19 | * @return the list of Q-value gradients for each action permissible in the given state. 20 | */ 21 | public List getAllQGradients(State s); 22 | 23 | 24 | /** 25 | * Returns the Q-value gradient ({@link burlap.behavior.singleagent.learnfromdemo.mlirl.support.QGradientTuple}) for the given state and action. 26 | * @param s the state for which the Q-value gradient is to be returned 27 | * @param a the action for which the Q-value gradient is to be returned. 28 | * @return the Q-value gradient for the given state and action. 29 | */ 30 | public QGradientTuple getQGradient(State s, GroundedAction a); 31 | 32 | 33 | /** 34 | * Sets this valueFunction's Boltzmann beta parameter used to compute gradients. As beta gets larger, the policy becomes more deterministic. 35 | * @param beta the value to which this valueFunction's Boltzmann beta parameter will be set 36 | */ 37 | public void setBoltzmannBetaParameter(double beta); 38 | 39 | } 40 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/learnfromdemo/mlirl/support/QGradientTuple.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.learnfromdemo.mlirl.support; 2 | 3 | import burlap.oomdp.core.AbstractGroundedAction; 4 | import burlap.oomdp.core.states.State; 5 | 6 | /** 7 | * A tuple (triple) for storing the Q-gradient associated with a state and action. The gradient is stored in a double array. 8 | * @author James MacGlashan. 9 | */ 10 | public class QGradientTuple { 11 | 12 | /** 13 | * The state 14 | */ 15 | public State s; 16 | 17 | /** 18 | * The action 19 | */ 20 | public AbstractGroundedAction a; 21 | 22 | /** 23 | * The gradient for the state and action. 24 | */ 25 | public double [] gradient; 26 | 27 | 28 | /** 29 | * Initializes. 30 | * @param s the state 31 | * @param a the action 32 | * @param gradient the gradient for the state an action 33 | */ 34 | public QGradientTuple(State s, AbstractGroundedAction a, double [] gradient){ 35 | this.s = s; 36 | this.a = a; 37 | this.gradient = gradient; 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/learning/LearningAgent.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.learning; 2 | 3 | 4 | import burlap.behavior.singleagent.EpisodeAnalysis; 5 | import burlap.oomdp.singleagent.environment.Environment; 6 | 7 | /** 8 | * This is the standard interface for defining an agent that learns how to behave in the world through experience. There 9 | * are two methods that need to be implemented. {@link #runLearningEpisode(burlap.oomdp.singleagent.environment.Environment)} 10 | * and {@link #runLearningEpisode(burlap.oomdp.singleagent.environment.Environment, int)}. Implementing the former method 11 | * should have the agent interact with the provided {@link burlap.oomdp.singleagent.environment.Environment} 12 | * until the {@link burlap.oomdp.singleagent.environment.Environment} transitions to a terminal state. The 13 | * {@link #runLearningEpisode(burlap.oomdp.singleagent.environment.Environment, int)} should have the agent interact 14 | * with the {@link burlap.oomdp.singleagent.environment.Environment} until either a terminal state is reached or 15 | * the agent has taken maxSteps in the environment. Both methods should return an {@link burlap.behavior.singleagent.EpisodeAnalysis} 16 | * object that records the interactions. 17 | * 18 | * 19 | * @author James MacGlashan 20 | * 21 | */ 22 | public interface LearningAgent { 23 | 24 | 25 | EpisodeAnalysis runLearningEpisode(Environment env); 26 | 27 | EpisodeAnalysis runLearningEpisode(Environment env, int maxSteps); 28 | 29 | 30 | } 31 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/learning/LearningAgentFactory.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.learning; 2 | 3 | 4 | /** 5 | * A factory interface for generating learning agents. 6 | * @author James MacGlashan 7 | * 8 | */ 9 | public interface LearningAgentFactory { 10 | 11 | /** 12 | * Will return a name to identify the kind of agent that will be generated by this factory. This is useful for enabling the {@link burlap.behavior.singleagent.auxiliary.performance.LearningAlgorithmExperimenter} class 13 | * to label the results for different kinds of agents that are tested. 14 | * @return a name to identify the kind of agent that will be generated 15 | */ 16 | public String getAgentName(); 17 | 18 | /** 19 | * Generates a new LearningAgent object and returns it. 20 | * @return a LearningAgent object. 21 | */ 22 | public LearningAgent generateAgent(); 23 | } 24 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/learning/actorcritic/Actor.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.learning.actorcritic; 2 | 3 | 4 | import burlap.behavior.policy.Policy; 5 | import burlap.oomdp.singleagent.Action; 6 | 7 | 8 | /** 9 | * This class provides interface necessary for the actor portion of an Actor-Critic learning algorithm. Actors are almost entirely 10 | * identical to policies since they effectively specify how the agent should act; in fact, this abstract class extends the Policy 11 | * class. However, the extra important functionality that an actor must incorporate is the ability to adjust its policy 12 | * in response to some critique of its behavior. In this class, this functionality should be implemented in the 13 | * {@link #updateFromCritqique(CritiqueResult)} method. 14 | * 15 | * 16 | * 17 | * @author James MacGlashan 18 | * 19 | */ 20 | public abstract class Actor extends Policy { 21 | 22 | /** 23 | * Causes this object to update its behavior is response to a critique of its behavior. 24 | * @param critqiue the critique of the agents behavior represented by a {@link CritiqueResult} object 25 | */ 26 | public abstract void updateFromCritqique(CritiqueResult critqiue); 27 | 28 | /** 29 | * This method allows the actor to utilize actions that are not apart of the domain definition. 30 | * @param a an action not apart of the of the domain definition that this actor should be able to use. 31 | */ 32 | public abstract void addNonDomainReferencedAction(Action a); 33 | 34 | 35 | /** 36 | * Used to reset any data that was created/modified during learning so that learning can be begin anew. 37 | */ 38 | public abstract void resetData(); 39 | 40 | } 41 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/learning/actorcritic/Critic.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.learning.actorcritic; 2 | 3 | import burlap.oomdp.core.states.State; 4 | import burlap.oomdp.singleagent.Action; 5 | import burlap.oomdp.singleagent.GroundedAction; 6 | 7 | 8 | /** 9 | * This interface provides the methods necessary for implementing the critic part of an actor-critic learning algorithm. The critic 10 | * is responsible for observing behavior (state, action, state tuples) and returning a critique of that behavior. Typically, 11 | * Critic objects will need to take as input a reward function to judge this behavior. 12 | * 13 | * 14 | * @author James MacGlashan 15 | * 16 | */ 17 | public interface Critic { 18 | 19 | /** 20 | * This method allows the critic to critique actions that are not apart of the domain definition. 21 | * @param a a an action not apart of the of the domain definition that this critic should be able to crique. 22 | */ 23 | public void addNonDomainReferencedAction(Action a); 24 | 25 | 26 | /** 27 | * This method is called whenever a new learning episode begins 28 | * @param s the initial state of the new learning episode 29 | */ 30 | public void initializeEpisode(State s); 31 | 32 | /** 33 | * This method is called whenever a learning episode terminates 34 | */ 35 | public void endEpisode(); 36 | 37 | 38 | /** 39 | * This method's implementation provides the critique for some specific instance of the behavior. 40 | * @param s an input state 41 | * @param ga an action taken in s 42 | * @param sprime the state the agent transitioned to for taking action ga in state s 43 | * @return the critique of this behavior. 44 | */ 45 | public CritiqueResult critiqueAndUpdate(State s, GroundedAction ga, State sprime); 46 | 47 | /** 48 | * Used to reset any data that was created/modified during learning so that learning can be begin anew. 49 | */ 50 | public abstract void resetData(); 51 | 52 | } 53 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/learning/actorcritic/CritiqueResult.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.learning.actorcritic; 2 | 3 | import burlap.oomdp.core.states.State; 4 | import burlap.oomdp.singleagent.GroundedAction; 5 | 6 | 7 | /** 8 | * The CritiqueResult class stores the relevant information regarding a critique of behavior. Specifically, it contains 9 | * the value of the critique, the and the state-action-state tuple that is being critiqued. 10 | * 11 | * @author James MacGlashan 12 | * 13 | */ 14 | public class CritiqueResult { 15 | 16 | 17 | /** 18 | * The source state 19 | */ 20 | protected State s; 21 | 22 | /** 23 | * The action taken in state s 24 | */ 25 | protected GroundedAction a; 26 | 27 | /** 28 | * The state to which the agent transitioned for when it took action a in state s. 29 | */ 30 | protected State sprime; 31 | 32 | /** 33 | * The critique of this behavior. 34 | */ 35 | protected double critique; 36 | 37 | 38 | /** 39 | * Initializes with a state-action-state behavior tuple and the value of the critique for this behavior. 40 | * @param s a source state 41 | * @param a the action taken in state s 42 | * @param sprime the state to which the agent transitioned for when it took action a in state s 43 | * @param critique the critique of this behavior. 44 | */ 45 | public CritiqueResult(State s, GroundedAction a, State sprime, double critique) { 46 | this.s = s; 47 | this.a = a; 48 | this.sprime = sprime; 49 | this.critique = critique; 50 | } 51 | 52 | /** 53 | * Returns the source state of this behavior. 54 | * @return the source state of this behavior. 55 | */ 56 | public State getS() { 57 | return s; 58 | } 59 | 60 | 61 | /** 62 | * Returns the action of this behavior. 63 | * @return the action of this behavior. 64 | */ 65 | public GroundedAction getA() { 66 | return a; 67 | } 68 | 69 | 70 | /** 71 | * Returns the resulting state of this behavior. 72 | * @return the resulting state of this behavior. 73 | */ 74 | public State getSprime() { 75 | return sprime; 76 | } 77 | 78 | 79 | /** 80 | * Returns the critique of this behavior. 81 | * @return the critique of this behavior. 82 | */ 83 | public double getCritique() { 84 | return critique; 85 | } 86 | 87 | 88 | 89 | } 90 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/learning/modellearning/ModelLearningPlanner.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.learning.modellearning; 2 | 3 | import burlap.behavior.policy.Policy; 4 | import burlap.behavior.singleagent.planning.Planner; 5 | import burlap.oomdp.core.Domain; 6 | import burlap.oomdp.core.states.State; 7 | import burlap.oomdp.core.TerminalFunction; 8 | import burlap.oomdp.singleagent.RewardFunction; 9 | 10 | 11 | /** 12 | * Interface for defining planning algorithms that operate on iteratively learned models. Planning algorithms that operate on iteratively learned models 13 | * must support features for replanning when the model changes and returning the policy of the plan under the current model. 14 | * @author James MacGlashan 15 | * 16 | */ 17 | public interface ModelLearningPlanner extends Planner{ 18 | 19 | /** 20 | * This is method is expected to be called at the beginning of any new learning episode. This may be useful for planning algorithms 21 | * that do not solve the policy for every state since new episodes may starts in states the planning algorithm had not previously considered. 22 | * before a learning episode begins. 23 | * @param s the input state 24 | */ 25 | public void initializePlannerIn(State s); 26 | 27 | /** 28 | * Tells the valueFunction that the model has changed and that it will need to replan accordingly 29 | * @param changedState the source state that caused a change in the model. 30 | */ 31 | public void modelChanged(State changedState); 32 | 33 | /** 34 | * Returns a policy encoding the planner's results. 35 | * @return a policy object 36 | */ 37 | public Policy modelPlannedPolicy(); 38 | 39 | 40 | } 41 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/learning/modellearning/rmax/UnmodeledFavoredPolicy.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.learning.modellearning.rmax; 2 | 3 | import burlap.behavior.policy.Policy; 4 | import burlap.behavior.singleagent.learning.modellearning.Model; 5 | import burlap.debugtools.RandomFactory; 6 | import burlap.oomdp.core.AbstractGroundedAction; 7 | import burlap.oomdp.core.states.State; 8 | import burlap.oomdp.singleagent.Action; 9 | 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | 13 | /** 14 | * @author James MacGlashan. 15 | */ 16 | public class UnmodeledFavoredPolicy extends Policy{ 17 | 18 | protected Policy sourcePolicy; 19 | protected Model model; 20 | protected List allActions; 21 | 22 | 23 | public UnmodeledFavoredPolicy(Policy sourcePolicy, Model model, List actions){ 24 | this.sourcePolicy = sourcePolicy; 25 | this.model = model; 26 | this.allActions = actions; 27 | } 28 | 29 | @Override 30 | public AbstractGroundedAction getAction(State s) { 31 | 32 | List unmodeled = this.model.getUnmodeledActionsForState(s); 33 | 34 | if(unmodeled.size() > 0){ 35 | return unmodeled.get(RandomFactory.getMapped(0).nextInt(unmodeled.size())); 36 | } 37 | 38 | return this.sourcePolicy.getAction(s); 39 | } 40 | 41 | @Override 42 | public List getActionDistributionForState(State s) { 43 | 44 | List unmodeled = this.model.getUnmodeledActionsForState(s); 45 | 46 | if(unmodeled.size() > 0){ 47 | List aps = new ArrayList(unmodeled.size()); 48 | double p = 1./(double)unmodeled.size(); 49 | for(AbstractGroundedAction ga : unmodeled){ 50 | aps.add(new ActionProb(ga, p)); 51 | } 52 | return aps; 53 | } 54 | 55 | return this.sourcePolicy.getActionDistributionForState(s); 56 | } 57 | 58 | @Override 59 | public boolean isStochastic() { 60 | return true; 61 | } 62 | 63 | @Override 64 | public boolean isDefinedFor(State s) { 65 | return true; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/learning/tdmethods/QLearningStateNode.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.learning.tdmethods; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import burlap.behavior.valuefunction.QValue; 7 | import burlap.oomdp.statehashing.HashableState; 8 | import burlap.oomdp.singleagent.GroundedAction; 9 | 10 | 11 | /** 12 | * This class is used to store the associated {@link burlap.behavior.valuefunction.QValue} objects for a given hashed sated. 13 | * @author James MacGlashan 14 | * 15 | */ 16 | public class QLearningStateNode { 17 | 18 | /** 19 | * A hashed state entry for which Q-value will be stored. 20 | */ 21 | public HashableState s; 22 | 23 | /** 24 | * The Q-values for this object's state. 25 | */ 26 | public List qEntry; 27 | 28 | 29 | /** 30 | * Creates a new object for the given hashed state. The list of {@link burlap.behavior.valuefunction.QValue} objects is initialized to be empty. 31 | * @param s the hashed state for which to associate Q-values 32 | */ 33 | public QLearningStateNode(HashableState s) { 34 | this.s = s; 35 | qEntry = new ArrayList(); 36 | } 37 | 38 | 39 | /** 40 | * Adds a Q-value to this state with the given numeric Q-value. 41 | * @param a the action this Q-value is fore 42 | * @param q the numeric Q-value 43 | */ 44 | public void addQValue(GroundedAction a, double q){ 45 | QValue qv = new QValue(s.s, a, q); 46 | qEntry.add(qv); 47 | } 48 | 49 | 50 | } 51 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/options/support/LocalSubgoalTF.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.options.support; 2 | 3 | import burlap.oomdp.auxiliary.stateconditiontest.StateConditionTest; 4 | import burlap.oomdp.core.states.State; 5 | import burlap.oomdp.core.TerminalFunction; 6 | 7 | 8 | /** 9 | * It is typical for options to be defined for following policies to subgoals and it is often useful 10 | * to use a planning or learning algorithm to define these policies, in which case a terminal 11 | * function for the option would need to be specified in order to learn or plan for its policy. This terminal function 12 | * defines a set of states in which an option is applicable and the subgoal states of the option. 13 | * The subgoal state and applicable states are specified using {@link burlap.oomdp.auxiliary.stateconditiontest.StateConditionTest} 14 | * objects. The agent will terminate in any subgoal state or any state that is not an applicable state. 15 | * @author James MacGlashan 16 | * 17 | */ 18 | public class LocalSubgoalTF implements TerminalFunction { 19 | 20 | 21 | /** 22 | * Defines the set of states in which the option is applicable 23 | */ 24 | protected StateConditionTest applicableStateTest; 25 | 26 | /** 27 | * Defines he set of subgoal states for the option 28 | */ 29 | protected StateConditionTest subgoalStateTest; 30 | 31 | 32 | 33 | /** 34 | * Initializes with a set of subgoal states. The option is assumed to be applicable everywhere. 35 | * @param subgoalStateTest the subgoal states. 36 | */ 37 | public LocalSubgoalTF(StateConditionTest subgoalStateTest) { 38 | this.applicableStateTest = null; 39 | this.subgoalStateTest = subgoalStateTest; 40 | } 41 | 42 | 43 | /** 44 | * Initializes with a set of states in which the option is applicable and the options subgoal states. 45 | * @param applicableStateTest the states in which the option is applicable. 46 | * @param subgoalStateTest the subgoal states 47 | */ 48 | public LocalSubgoalTF(StateConditionTest applicableStateTest, StateConditionTest subgoalStateTest) { 49 | this.applicableStateTest = applicableStateTest; 50 | this.subgoalStateTest = subgoalStateTest; 51 | } 52 | 53 | @Override 54 | public boolean isTerminal(State s) { 55 | 56 | if(this.applicableStateTest != null){ 57 | if(!this.applicableStateTest.satisfies(s)){ 58 | return true; //terminate when reaching a state that is not an initiation state 59 | } 60 | } 61 | 62 | return this.subgoalStateTest.satisfies(s); 63 | 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/options/support/OptionEvaluatingRF.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.options.support; 2 | 3 | import burlap.behavior.singleagent.options.Option; 4 | import burlap.oomdp.core.states.State; 5 | import burlap.oomdp.singleagent.GroundedAction; 6 | import burlap.oomdp.singleagent.RewardFunction; 7 | 8 | 9 | /** 10 | * This class is a reward function that accepts a reward function for primitive actions and returns 11 | * that when the query action is a primitive. If the query action is a option it 12 | * return the cumulative reward from the options last execution using the assumption that any options that need evaluating 13 | * have been set to internally keep track of their reward after each successive application. It also 14 | * is assumed that those options are using the same reward function as the inputed primitive RF 15 | * 16 | * This is useful for planners that would want to execute the option and evaluate the reward afterwards 17 | * 18 | * @author James MacGlashan 19 | * 20 | */ 21 | public class OptionEvaluatingRF implements RewardFunction { 22 | 23 | /** 24 | * The source primitive action reward function for the MDP 25 | */ 26 | RewardFunction primitiveRF; 27 | 28 | 29 | /** 30 | * Initializes. 31 | * @param rf the source primitive action reward function for the MDP 32 | */ 33 | public OptionEvaluatingRF(RewardFunction rf){ 34 | this.primitiveRF = rf; 35 | } 36 | 37 | 38 | @Override 39 | public double reward(State s, GroundedAction a, State sprime) { 40 | 41 | if(a.action.isPrimitive()){ 42 | return primitiveRF.reward(s, a, sprime); 43 | } 44 | 45 | //otherwise return the cumulative reward from the last option execution 46 | //with the assumption that the last call to the option produced this SAS tuple 47 | Option o = (Option)a.action; 48 | return o.getLastCumulativeReward(); 49 | 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/planning/Planner.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.planning; 2 | 3 | import burlap.behavior.policy.Policy; 4 | import burlap.behavior.singleagent.MDPSolverInterface; 5 | import burlap.oomdp.core.states.State; 6 | 7 | /** 8 | * @author James MacGlashan. 9 | */ 10 | public interface Planner extends MDPSolverInterface{ 11 | 12 | /** 13 | * This method will cause the {@link burlap.behavior.singleagent.planning.Planner} to begin planning from the specified initial {@link burlap.oomdp.core.states.State}. 14 | * It will then return an appropriate {@link burlap.behavior.policy.Policy} object that captured the planning results. 15 | * Note that typically you can use a variety of different {@link burlap.behavior.policy.Policy} objects 16 | * in conjunction with this {@link burlap.behavior.singleagent.planning.Planner} to get varying behavior and 17 | * the returned {@link burlap.behavior.policy.Policy} is not required to be used. 18 | * @param initialState the initial state of the planning problem 19 | * @return a {@link burlap.behavior.policy.Policy} that captures the planning results from input {@link burlap.oomdp.core.states.State}. 20 | */ 21 | Policy planFromState(State initialState); 22 | 23 | } 24 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/planning/deterministic/MultiStatePrePlanner.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.planning.deterministic; 2 | 3 | import java.util.Collection; 4 | 5 | import burlap.behavior.singleagent.planning.Planner; 6 | import burlap.oomdp.auxiliary.stateconditiontest.StateConditionTestIterable; 7 | import burlap.oomdp.core.states.State; 8 | 9 | 10 | /** 11 | * This is a helper class that is used to run a valueFunction from multiple initial states to ensure 12 | * that an adequate plan/policy exists for each them. It makes uses of an iterable state 13 | * condition test to define the states from which planning should performed or a collection 14 | * of state objects. 15 | * @author James MacGlashan 16 | * 17 | */ 18 | public class MultiStatePrePlanner { 19 | 20 | /** 21 | * Runs a planning algorithm from multiple initial states to ensure that an adequate plan/policy exist for of the states. 22 | * @param planner the valueFunction to be used. 23 | * @param initialStates a {@link burlap.oomdp.auxiliary.stateconditiontest.StateConditionTestIterable} object that will iterate over the initial states from which to plan. 24 | */ 25 | public static void runPlannerForAllInitStates(Planner planner, StateConditionTestIterable initialStates){ 26 | for(State s : initialStates){ 27 | planner.planFromState(s); 28 | } 29 | } 30 | 31 | 32 | /** 33 | * Runs a planning algorithm from multiple initial states to ensure that an adequate plan/policy exist for of the states. 34 | * @param planner the valueFunction to be used. 35 | * @param initialStates a collection of states from which to plan. 36 | */ 37 | public static void runPlannerForAllInitStates(Planner planner, Collection initialStates){ 38 | for(State s : initialStates){ 39 | planner.planFromState(s); 40 | } 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/planning/deterministic/SearchNode.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.planning.deterministic; 2 | 3 | import burlap.oomdp.statehashing.HashableState; 4 | import burlap.oomdp.singleagent.GroundedAction; 5 | 6 | 7 | /** 8 | * The SearchNode class is used for classic deterministic forward search planners. It represents a current state, a back pointer 9 | * to the search node from which this node's state was generated, and the action that was taken in the generating node's state to 10 | * produce this node's state. Once a goal state is found by the forward search valueFunction, the back pointers can be traced to 11 | * find the plan that got to the goal. 12 | * @author James MacGlashan 13 | * 14 | */ 15 | public class SearchNode { 16 | 17 | /** 18 | * The (hashed) state of this node 19 | */ 20 | public HashableState s; 21 | 22 | 23 | /** 24 | * The action that generated this state in the previous state. Null if this node is for the initial state. 25 | */ 26 | public GroundedAction generatingAction; 27 | 28 | /** 29 | * The search node for the previous state that generated this node's state. Null if this node is for the initial state. 30 | */ 31 | public SearchNode backPointer; 32 | 33 | 34 | 35 | /** 36 | * Constructs a SearchNode for the input state. The generating action and back pointer are set to null, which is valid if this 37 | * is the search node for an initial state. Otherwise, these fields should be filled in. 38 | * @param s the hashed input state this node will represent. 39 | */ 40 | public SearchNode(HashableState s){ 41 | this.s = s; 42 | this.generatingAction = null; 43 | this.backPointer = null; 44 | } 45 | 46 | 47 | /** 48 | * Constructs a SearchNode for the input state and sets the generating action and back pointer to the provided elements. 49 | * @param s the hashed input state this node will represent. 50 | * @param ga the action that was used to generate s 51 | * @param bp the search node that contains the previous state from which s was generated. 52 | */ 53 | public SearchNode(HashableState s, GroundedAction ga, SearchNode bp){ 54 | this.s = s; 55 | this.generatingAction = ga; 56 | this.backPointer = bp; 57 | } 58 | 59 | 60 | @Override 61 | public boolean equals(Object o){ 62 | SearchNode so = (SearchNode)o; 63 | return s.equals(so.s); 64 | } 65 | 66 | 67 | @Override 68 | public int hashCode(){ 69 | return s.hashCode(); 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/planning/deterministic/informed/Heuristic.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.planning.deterministic.informed; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | /** 6 | * An interface for defining heuristics. The heuristic function should return an estimate of the amount of *reward* that will be accumulated from that given 7 | * state. Since deterministic forward search planning algorithms typically expect costs, this is represented by simply using negative reward, where 8 | * values closer to zero are better. For instance, if it was known that a state was 3 steps away from the goal, an optimal heuristic (and the true cost 9 | * from the state) would return -3. 10 | * @author James MacGlashan 11 | * 12 | */ 13 | public interface Heuristic { 14 | 15 | /** 16 | * Returns the estimated amount of reward that will be received when following the optimal policy from the given state. 17 | * Since deterministic forward search planning algorithms typically expect costs, this is represented by simply using negative reward, where 18 | * values closer to zero are better. For instance, if it was known that state s was 3 steps away from the goal, an optimal heuristic (the true reward 19 | * from the state) would return -3. 20 | * @param s the state from which to estimate the future reward. 21 | * @return the estimated amount of reward that will be received when following the optimal policy from s. 22 | */ 23 | public double h(State s); 24 | 25 | } 26 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/planning/deterministic/informed/NullHeuristic.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.planning.deterministic.informed; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | /** 6 | * A {@link Heuristic} implementation that always returns 0. This is always admissible 7 | * and effectively causes planners like A* to perform Uniform Cost Search. 8 | * @author James MacGlashan 9 | * 10 | */ 11 | public class NullHeuristic implements Heuristic { 12 | 13 | @Override 14 | public double h(State s) { 15 | return 0; 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/planning/stochastic/HashedTransitionProbability.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.planning.stochastic; 2 | 3 | import burlap.oomdp.statehashing.HashableStateFactory; 4 | import burlap.oomdp.statehashing.HashableState; 5 | import burlap.oomdp.core.states.State; 6 | import burlap.oomdp.core.TransitionProbability; 7 | 8 | /** 9 | * An analog to the {@link burlap.oomdp.core.TransitionProbability}, except it stores {@link burlap.oomdp.statehashing.HashableState} objects 10 | * instead of {@link burlap.oomdp.core.states.State} objects. 11 | * @author James MacGlashan 12 | * 13 | */ 14 | public class HashedTransitionProbability { 15 | 16 | public HashableState sh; 17 | public double p; 18 | 19 | 20 | /** 21 | * Initializes with a {@link burlap.oomdp.statehashing.HashableState} and probability for the transition 22 | * @param sh the hashed state that the agent transitions to 23 | * @param p the probability of the transition 24 | */ 25 | public HashedTransitionProbability(HashableState sh, double p){ 26 | this.sh = sh; 27 | this.p = p; 28 | } 29 | 30 | 31 | /** 32 | * Takes a {@link burlap.oomdp.core.states.State} object, hashes it, and sets the transition probability to the hashed state to p 33 | * @param s the state that the agent transitions to 34 | * @param p the probability of the transition 35 | * @param hashingFactory the hashing factory to use to hash the input state 36 | */ 37 | public HashedTransitionProbability(State s, double p, HashableStateFactory hashingFactory){ 38 | this.sh = hashingFactory.hashState(s); 39 | this.p = p; 40 | } 41 | 42 | 43 | /** 44 | * Takes a {@link burlap.oomdp.core.TransitionProbability} and hashes its state using the hashingFactory object 45 | * @param tp the {@link burlap.oomdp.core.TransitionProbability} to hash 46 | * @param hashingFactory the hashing factory to use. 47 | */ 48 | public HashedTransitionProbability(TransitionProbability tp, HashableStateFactory hashingFactory){ 49 | this.sh = hashingFactory.hashState(tp.s); 50 | this.p = tp.p; 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/planning/vfa/fittedvi/SupervisedVFA.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.planning.vfa.fittedvi; 2 | 3 | import burlap.behavior.valuefunction.ValueFunction; 4 | import burlap.oomdp.core.states.State; 5 | 6 | import java.util.List; 7 | 8 | /** 9 | * An interface for learning value function approximation via a supervised learning algorithm. This interface 10 | * defines the method {@link #train} which takes as input a list of {@link burlap.behavior.singleagent.planning.vfa.fittedvi.SupervisedVFA.SupervisedVFAInstance} 11 | * objects, runs a regression algorithm, and returns the learned function, which is an interface of {@link burlap.behavior.valuefunction.ValueFunction}. 12 | *

13 | * A {@link burlap.behavior.singleagent.planning.vfa.fittedvi.SupervisedVFA.SupervisedVFAInstance} is a pair consisting 14 | * of a {@link burlap.oomdp.core.states.State} and the target state value that is to be learned. 15 | * @author James MacGlashan. 16 | */ 17 | public interface SupervisedVFA { 18 | 19 | /** 20 | * Uses supervised learning (regression) to learn a value function approximation of the input training data. 21 | * @param trainingData the training data to fit. 22 | * @return a {@link burlap.behavior.valuefunction.ValueFunction} that fits the training data. 23 | */ 24 | public ValueFunction train(List trainingData); 25 | 26 | 27 | /** 28 | * A pair for a state and it's target value function value. 29 | */ 30 | public static class SupervisedVFAInstance{ 31 | 32 | /** 33 | * The state 34 | */ 35 | public State s; 36 | 37 | /** 38 | * The state's associated value 39 | */ 40 | public double v; 41 | 42 | 43 | /** 44 | * Initializes 45 | * @param s tne state 46 | * @param v the state's associated value 47 | */ 48 | public SupervisedVFAInstance(State s, double v){ 49 | this.s = s; 50 | this.v = v; 51 | } 52 | 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/pomdp/BeliefPolicyAgent.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.pomdp; 2 | 3 | import burlap.behavior.policy.Policy; 4 | import burlap.oomdp.singleagent.GroundedAction; 5 | import burlap.oomdp.singleagent.environment.Environment; 6 | import burlap.oomdp.singleagent.pomdp.BeliefAgent; 7 | import burlap.oomdp.singleagent.pomdp.beliefstate.BeliefState; 8 | import burlap.oomdp.singleagent.pomdp.PODomain; 9 | 10 | 11 | /** 12 | * A Belief agent that follows a specified policy. 13 | */ 14 | public class BeliefPolicyAgent extends BeliefAgent { 15 | 16 | /** 17 | * The policy that the agent will follow. 18 | */ 19 | protected Policy policy; 20 | 21 | 22 | /** 23 | * Initializes. 24 | * @param domain the POMDP domain 25 | * @param environment the environment with which the agent will interact 26 | * @param policy the policy the agent will follow. 27 | */ 28 | public BeliefPolicyAgent(PODomain domain, Environment environment, Policy policy){ 29 | super(domain, environment); 30 | this.policy = policy; 31 | } 32 | 33 | 34 | @Override 35 | public GroundedAction getAction(BeliefState curBelief) { 36 | 37 | GroundedAction ga = (GroundedAction)this.policy.getAction(curBelief); 38 | return ga; 39 | } 40 | 41 | 42 | 43 | } 44 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/shaping/ShapedRewardFunction.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.shaping; 2 | 3 | import burlap.oomdp.core.states.State; 4 | import burlap.oomdp.singleagent.GroundedAction; 5 | import burlap.oomdp.singleagent.RewardFunction; 6 | 7 | 8 | /** 9 | * This abstract class is used to define shaped reward functions. Shaped reward functions take the base 10 | * true objective reward function of a task and add some additional reward value to it that helps suggest 11 | * useful states. Subclasses of the ShapedRewardFunction must implement a method that specifies the 12 | * additive reward to the base reward. 13 | * 14 | * 15 | * @author James MacGlashan 16 | * 17 | */ 18 | public abstract class ShapedRewardFunction implements RewardFunction { 19 | 20 | 21 | /** 22 | * The base objective reward function for the task. 23 | */ 24 | protected RewardFunction baseRF; 25 | 26 | 27 | /** 28 | * Returns the reward value to add to the base objective reward function. 29 | * @param s the previous state 30 | * @param a the action taken the previous state 31 | * @param sprime the successor state 32 | * @return the reward value to add to the base objective reward function. 33 | */ 34 | public abstract double additiveReward(State s, GroundedAction a, State sprime); 35 | 36 | 37 | /** 38 | * Initializes with the base objective task reward function. 39 | * @param baseRF the objective task reward function. 40 | */ 41 | public ShapedRewardFunction(RewardFunction baseRF) { 42 | this.baseRF = baseRF; 43 | } 44 | 45 | @Override 46 | public double reward(State s, GroundedAction a, State sprime) { 47 | return this.baseRF.reward(s, a, sprime) + this.additiveReward(s, a, sprime); 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/shaping/potential/PotentialFunction.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.shaping.potential; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | 6 | /** 7 | * Defines an interface for reward potential functions. This interface will be used by potential-based reward shaping. Note: potential functions 8 | * should always be defined to return 0 for terminal states. 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public interface PotentialFunction { 13 | 14 | /** 15 | * Returns the reward potential from the given state. 16 | * Note: the potential function should always return 0 for terminal states. 17 | * @param s the input state for which to get the reward potential. 18 | * @return the reward potential from the given state. 19 | */ 20 | public double potentialValue(State s); 21 | } 22 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/shaping/potential/PotentialShapedRF.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.shaping.potential; 2 | 3 | import burlap.behavior.singleagent.shaping.ShapedRewardFunction; 4 | import burlap.oomdp.core.states.State; 5 | import burlap.oomdp.singleagent.GroundedAction; 6 | import burlap.oomdp.singleagent.RewardFunction; 7 | 8 | 9 | /** 10 | * This class is used to implement Potential-based reward shaping [1] which is guaranteed to preserve the optimal policy. This class 11 | * requires a {@link PotentialFunction} and the discount being used by the MDP. The additive reward is defined as: 12 | * d * p(s') - p(s) 13 | * where d is this discount factor, s' is the most recent state, s is the previous state, and p(s) is the potential of state s. 14 | * 15 | * 16 | * 1. Ng, Andrew Y., Daishi Harada, and Stuart Russell. "Policy invariance under reward transformations: Theory and application to reward shaping." ICML. 1999. 17 | * 18 | * @author James MacGlashan 19 | * 20 | */ 21 | public class PotentialShapedRF extends ShapedRewardFunction { 22 | 23 | 24 | /** 25 | * The potential function that can be used to return the potential reward from input states. 26 | */ 27 | protected PotentialFunction potentialFunction; 28 | 29 | /** 30 | * The discount factor the MDP (required for this to shaping to preserve policy optimality) 31 | */ 32 | protected double discount; 33 | 34 | 35 | /** 36 | * Initializes the shaping with the objective reward function, the potential function, and the discount of the MDP. 37 | * @param baseRF the objective task reward function. 38 | * @param potentialFunction the potential function to use. 39 | * @param discount the discount factor of the MDP. 40 | */ 41 | public PotentialShapedRF(RewardFunction baseRF, PotentialFunction potentialFunction, double discount) { 42 | super(baseRF); 43 | 44 | this.potentialFunction = potentialFunction; 45 | this.discount = discount; 46 | 47 | } 48 | 49 | @Override 50 | public double additiveReward(State s, GroundedAction a, State sprime) { 51 | return (this.discount * this.potentialFunction.potentialValue(sprime)) - this.potentialFunction.potentialValue(s); 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/vfa/ActionApproximationResult.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.vfa; 2 | 3 | import java.util.List; 4 | 5 | import burlap.oomdp.singleagent.GroundedAction; 6 | 7 | 8 | /** 9 | * A class that ties function approximation results to actions. This is useful for approximating Q-values. 10 | * @author James MacGlashan 11 | * 12 | */ 13 | public class ActionApproximationResult { 14 | 15 | /** 16 | * The grounded action this approximation was for 17 | */ 18 | public GroundedAction ga; 19 | 20 | /** 21 | * The actual approximation result 22 | */ 23 | public ApproximationResult approximationResult; 24 | 25 | 26 | /** 27 | * Initializes with a given action and approximation result 28 | * @param ga the grounded action that this approximation is for 29 | * @param approximationResult the approximation result 30 | */ 31 | public ActionApproximationResult(GroundedAction ga, ApproximationResult approximationResult) { 32 | this.ga = ga; 33 | this.approximationResult = approximationResult; 34 | } 35 | 36 | 37 | /** 38 | * Given a list of {@link ActionApproximationResult} objects, this method will return the corresponding {@link ActionApproximationResult} 39 | * for the given action. 40 | * @param approximations list of approximations 41 | * @param ga the grounded action for which the corrsponding approximation result should be returned. 42 | * @return the corresponding {@link ActionApproximationResult} for the given action. Null if there is no corresponding approximation result. 43 | */ 44 | public static ActionApproximationResult extractApproximationForAction(List approximations, GroundedAction ga){ 45 | for(ActionApproximationResult aar : approximations){ 46 | if(aar.ga.equals(ga)){ 47 | return aar; 48 | } 49 | } 50 | 51 | return null; 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/vfa/ApproximationResult.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.vfa; 2 | 3 | import java.util.List; 4 | 5 | 6 | /** 7 | * A list associating a predicted value that was generated from a list of state features and the weights for those features. Note that 8 | * the predicted value does *not* have to be a linear combination of the state features and the weights, so it may not be possible 9 | * to reconstruct the predicted value from the features and weights alone. 10 | * @author James MacGlashan 11 | * 12 | */ 13 | public class ApproximationResult { 14 | 15 | /** 16 | * The predicted valued 17 | */ 18 | public double predictedValue; 19 | 20 | /** 21 | * The state features used to produce the predicted value. 22 | */ 23 | public List stateFeatures; 24 | 25 | /** 26 | * The function weights used to produce the predicted value. 27 | */ 28 | public List functionWeights; 29 | 30 | 31 | 32 | /** 33 | * Initializes 34 | * @param predictedValue the predicted value 35 | * @param stateFeatures the state features used to produce the predicted value. 36 | * @param functionWeights the function weights used to produce the predicted value. 37 | */ 38 | public ApproximationResult(double predictedValue, List stateFeatures, List functionWeights) { 39 | this.predictedValue = predictedValue; 40 | this.stateFeatures = stateFeatures; 41 | this.functionWeights = functionWeights; 42 | 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/vfa/FunctionWeight.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.vfa; 2 | 3 | /** 4 | * This class holds the weight value for weights defined by a ValueFunctionApproximation class. It is expected that when a weight value is changed 5 | * on this object that the corresponding weight value in the ValueFunctionApproximation object is changed as well, which means the 6 | * ValueFunctionApproximation should store its weights with this data structure. Alternative, the ValueFunctionApproximation class 7 | * can use a different data structure and subclass this FunctionWeight class so that when the setWeight method is called on it, it also 8 | * updates the corresponding data structure in the ValueFunctionApproximation object. 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public class FunctionWeight { 13 | 14 | /** 15 | * The int value that uniquely identifies this weight 16 | */ 17 | protected int weightId; 18 | 19 | /** 20 | * The value of this weight. 21 | */ 22 | protected double weightValue; 23 | 24 | 25 | /** 26 | * Initializes. 27 | * @param weightId the weight identifier 28 | * @param weightValue the value of the weight 29 | */ 30 | public FunctionWeight(int weightId, double weightValue) { 31 | this.weightId = weightId; 32 | this.weightValue = weightValue; 33 | } 34 | 35 | 36 | /** 37 | * Returns the weight identifier 38 | * @return the weight identifier 39 | */ 40 | public int weightId(){ 41 | return this.weightId; 42 | } 43 | 44 | 45 | /** 46 | * Returns the weight value 47 | * @return the weight value 48 | */ 49 | public double weightValue(){ 50 | return weightValue; 51 | } 52 | 53 | 54 | /** 55 | * Sets the weight 56 | * @param w the value to set the weight to 57 | */ 58 | public void setWeight(double w){ 59 | this.weightValue = w; 60 | } 61 | 62 | 63 | } 64 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/vfa/StateFeature.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.vfa; 2 | 3 | 4 | /** 5 | * A class for associating a state feature identifier with a value of that state feature 6 | * @author James MacGlashan 7 | * 8 | */ 9 | public class StateFeature { 10 | 11 | /** 12 | * The state feature identifier 13 | */ 14 | public int id; 15 | 16 | /** 17 | * The value of the state feature 18 | */ 19 | public double value; 20 | 21 | 22 | /** 23 | * Initializes. 24 | * @param id the state feature identifier 25 | * @param value the value of the state feature 26 | */ 27 | public StateFeature(int id, double value) { 28 | this.id = id; 29 | this.value = value; 30 | } 31 | 32 | 33 | 34 | } 35 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/vfa/StateToFeatureVectorGenerator.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.vfa; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | /** 6 | * Many functions approximation techniques require a fixed feature vector to work and in many cases, using abstract features from 7 | * the state attributes is useful. This interface provides a means to take a BURLAP OO-MDP state and transform it into 8 | * a feature vector represented as a double array so that these function approximation techniques may be used. 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public interface StateToFeatureVectorGenerator { 13 | 14 | /** 15 | * Returns a feature vector represented as a double array for a given input state. 16 | * @param s the input state to turn into a feature vector. 17 | * @return the feature vector represented as a double array. 18 | */ 19 | public double [] generateFeatureVectorFrom(State s); 20 | 21 | } 22 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/vfa/WeightGradient.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.vfa; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | 7 | /** 8 | * A data structure for defining the gradient of the weights for a vector. If the weight gradient is not stored for a given 9 | * feature, then zero will be returned. 10 | * @author James MacGlashan 11 | * 12 | */ 13 | public class WeightGradient { 14 | 15 | /** 16 | * A map from weight identifiers to their partial derivative 17 | */ 18 | Map gradient; 19 | 20 | 21 | /** 22 | * Initializes with the gradient unspecified for any weights. 23 | */ 24 | public WeightGradient() { 25 | gradient = new HashMap(); 26 | } 27 | 28 | 29 | /** 30 | * Initializes with the gradient unspecified, but reserves space for the given capacity 31 | * @param capacity how much space to reserve for storing the gradient; i.e., the number of weights over which the gradient will be defined 32 | */ 33 | public WeightGradient(int capacity) { 34 | gradient = new HashMap(capacity); 35 | } 36 | 37 | /** 38 | * Adds the partial derivative for a given weight 39 | * @param weightId the weight identifier for which the partial derivative is to be stored is to be stored 40 | * @param partialDerivative the partial derivative value for the weight 41 | */ 42 | public void put(int weightId, double partialDerivative){ 43 | this.gradient.put(weightId, partialDerivative); 44 | } 45 | 46 | 47 | /** 48 | * Returns the partial derivative for the given weight 49 | * @param weightId 50 | * @return the partial derivative for the given weight; 0 if it is not stored. 51 | */ 52 | public double getPartialDerivative(int weightId){ 53 | Double stored = gradient.get(weightId); 54 | if(stored == null){ 55 | return 0.; 56 | } 57 | return stored; 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/vfa/cmac/AttributeTileSpecification.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.vfa.cmac; 2 | 3 | import burlap.oomdp.core.Attribute; 4 | 5 | 6 | /** 7 | * Specifies how a single attribute of a specific object class is to be tiled. 8 | * @author James MacGlashan 9 | * 10 | */ 11 | public class AttributeTileSpecification { 12 | 13 | 14 | /** 15 | * The object class name this tiling specification concerns 16 | */ 17 | public String className; 18 | 19 | /** 20 | * The attribute this tiling specification concerns 21 | */ 22 | public Attribute attribute; 23 | 24 | /** 25 | * How large of a window to use; i.e., the width a tile along this attribute dimension 26 | */ 27 | public double windowSize; 28 | 29 | /** 30 | * The offset of this tile alignment; that is, where the first tiling boundary starts 31 | */ 32 | public double bucketBoundary; 33 | 34 | 35 | 36 | /** 37 | * Initializes 38 | * @param className The object class name this tiling specification concerns 39 | * @param attribute The attribute this tiling specification concerns 40 | * @param windowSize How large of a window to use; i.e., the width a tile along this attribute dimension 41 | * @param bucketBoundary The offset of this tile alignment; that is, where the first tiling boundary starts 42 | */ 43 | public AttributeTileSpecification(String className, Attribute attribute, double windowSize, double bucketBoundary) { 44 | this.className = className; 45 | this.attribute = attribute; 46 | this.windowSize = windowSize; 47 | this.bucketBoundary = bucketBoundary; 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/vfa/common/FDFeatureVectorGenerator.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.vfa.common; 2 | 3 | import burlap.behavior.singleagent.vfa.FeatureDatabase; 4 | import burlap.behavior.singleagent.vfa.StateFeature; 5 | import burlap.behavior.singleagent.vfa.StateToFeatureVectorGenerator; 6 | import burlap.oomdp.core.states.State; 7 | 8 | import java.util.List; 9 | 10 | /** 11 | * A wrapper for turning the features from a {@link burlap.behavior.singleagent.vfa.FeatureDatabase} into a double array. 12 | * Note that this wrapper is not advised for feature databases like CMACs/Tile coding, since those have very large numbers 13 | * of sparse features and this wrapper will create entries for all features, including the zero-valued ones. 14 | * @author James MacGlashan. 15 | */ 16 | public class FDFeatureVectorGenerator implements StateToFeatureVectorGenerator{ 17 | 18 | protected FeatureDatabase fd; 19 | 20 | 21 | /** 22 | * Initializes. 23 | * @param fd the feature database used for generating state features. 24 | */ 25 | public FDFeatureVectorGenerator(FeatureDatabase fd){ 26 | this.fd = fd; 27 | } 28 | 29 | public FeatureDatabase getFd() { 30 | return fd; 31 | } 32 | 33 | public void setFd(FeatureDatabase fd) { 34 | this.fd = fd; 35 | } 36 | 37 | @Override 38 | public double[] generateFeatureVectorFrom(State s) { 39 | 40 | List sfs = this.fd.getStateFeatures(s); 41 | double [] fv = new double[this.fd.numberOfFeatures()]; 42 | for(StateFeature sf : sfs){ 43 | fv[sf.id] = sf.value; 44 | } 45 | return fv; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/vfa/common/PFFeatureVectorGenerator.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.vfa.common; 2 | 3 | import java.util.LinkedList; 4 | import java.util.List; 5 | 6 | import burlap.behavior.singleagent.vfa.StateToFeatureVectorGenerator; 7 | import burlap.oomdp.core.Domain; 8 | import burlap.oomdp.core.GroundedProp; 9 | import burlap.oomdp.core.PropositionalFunction; 10 | import burlap.oomdp.core.states.State; 11 | 12 | public class PFFeatureVectorGenerator implements StateToFeatureVectorGenerator { 13 | 14 | protected PropositionalFunction [] pfsToUse; 15 | 16 | 17 | /** 18 | * Initializes using all propositional functions that belong to the domain 19 | * @param domain the domain containing all the propositional functions to use 20 | */ 21 | public PFFeatureVectorGenerator(Domain domain){ 22 | 23 | this.pfsToUse = new PropositionalFunction[domain.getPropFunctions().size()]; 24 | int i = 0; 25 | for(PropositionalFunction pf : domain.getPropFunctions()){ 26 | this.pfsToUse[i] = pf; 27 | i++; 28 | } 29 | 30 | } 31 | 32 | /** 33 | * Initializes using the list of given propositional functions. 34 | * @param pfs the propositional functions to use. 35 | */ 36 | public PFFeatureVectorGenerator(List pfs){ 37 | this.pfsToUse = new PropositionalFunction[pfs.size()]; 38 | this.pfsToUse = pfs.toArray(this.pfsToUse); 39 | } 40 | 41 | 42 | /** 43 | * Initializes using the array of given propositional functions. 44 | * @param pfs the propositional functions to use. 45 | */ 46 | public PFFeatureVectorGenerator(PropositionalFunction [] pfs){ 47 | this.pfsToUse = pfs.clone(); 48 | } 49 | 50 | 51 | @Override 52 | public double[] generateFeatureVectorFrom(State s) { 53 | 54 | List featureValueList = new LinkedList(); 55 | for(PropositionalFunction pf : this.pfsToUse){ 56 | //List gps = s.getAllGroundedPropsFor(pf); 57 | List gps = pf.getAllGroundedPropsForState(s); 58 | for(GroundedProp gp : gps){ 59 | if(gp.isTrue(s)){ 60 | featureValueList.add(1.); 61 | } 62 | else{ 63 | featureValueList.add(0.); 64 | } 65 | } 66 | } 67 | 68 | double [] fv = new double[featureValueList.size()]; 69 | int i = 0; 70 | for(double f : featureValueList){ 71 | fv[i] = f; 72 | i++; 73 | } 74 | 75 | return fv; 76 | } 77 | 78 | } 79 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/vfa/rbf/DistanceMetric.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.vfa.rbf; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | /** 6 | * An interface for defining distant metrics between OO-MDP {@link burlap.oomdp.core.states.State} objects. 7 | * @author Anubhav Malhotra and Daniel Fernandez and Spandan Dutta 8 | * 9 | */ 10 | public interface DistanceMetric { 11 | public double distance(State s0, State s1); 12 | } 13 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/vfa/rbf/FVDistanceMetric.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.vfa.rbf; 2 | 3 | /** 4 | * An interface for defining the distance between two states that are represented with double arrays. 5 | * @author James MacGlashan. 6 | */ 7 | public interface FVDistanceMetric { 8 | 9 | /** 10 | * Returns the distance between state s0 and state s1. 11 | * @param s0 a state represented with a double array 12 | * @param s1 a state represented with a double array 13 | * @return the distance between s0 and s1. 14 | */ 15 | public double distance(double [] s0, double [] s1); 16 | } 17 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/vfa/rbf/FVRBF.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.vfa.rbf; 2 | 3 | /** 4 | * A class for defining radial basis functions for states represented with a double array. 5 | * @author James MacGlashan. 6 | */ 7 | public abstract class FVRBF { 8 | 9 | /** 10 | * The center state of the RBF unit. 11 | */ 12 | protected double [] centeredState; 13 | 14 | /** 15 | * The distance metric to compare query input states to the centeredState 16 | */ 17 | protected FVDistanceMetric metric; 18 | 19 | 20 | /** 21 | * Initializes. 22 | * @param centeredState the center state of the RBF unit. 23 | * @param metric the distance metric to compare query input states to the centeredState 24 | */ 25 | public FVRBF(double [] centeredState, FVDistanceMetric metric){ 26 | this.centeredState = centeredState; 27 | this.metric = metric; 28 | } 29 | 30 | /** 31 | * Returns the RBF response from its center state to the query input state. 32 | * @param input the query input state represented with a double array. 33 | * @return the double response value of this RBF unit to the query input state. 34 | */ 35 | public abstract double responseFor(double [] input); 36 | 37 | } 38 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/vfa/rbf/RBF.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.vfa.rbf; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | /** 6 | * An abstract class for defining RBF units. An RBF unit is defined by a centered state and a distance metric that can be used 7 | * to measure the distance between the unit's center and an input state. RBF units return a response value to an input state that is a 8 | * function of the distance from the unit's centered state to the input state. Different RBF subclasses implement the response function differently. 9 | * The less distant an input state is from a unit's center state, the greater the response value. 10 | * @author Anubhav Malhotra and Daniel Fernandez and Spandan Dutta 11 | * 12 | */ 13 | public abstract class RBF { 14 | 15 | /** 16 | * The center state of this unit 17 | */ 18 | protected State centeredState; 19 | 20 | /** 21 | * The distance metric used to compare input states to this unit's center state. 22 | */ 23 | protected DistanceMetric metric; 24 | 25 | 26 | /** 27 | * Initializes with a center state for this unit and a distance metric to compare input states to it. 28 | * @param centeredState the center state to use for this unit. 29 | * @param metric the distance metric to use to compare this unit's center state to input states. 30 | */ 31 | public RBF(State centeredState, DistanceMetric metric){ 32 | this.centeredState = centeredState; 33 | this.metric = metric; 34 | } 35 | 36 | 37 | /** 38 | * Returns a response value to an input state that is a function of the distance between the input and this unit's center state. 39 | * The less distant a query state is from this unit's center state, the greater the resposne value. 40 | * @param input the input state for which a response value is returned. 41 | * @return a response value to the given input state 42 | */ 43 | public abstract double responseFor(State input); 44 | } 45 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/vfa/rbf/metrics/EuclideanDistance.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.vfa.rbf.metrics; 2 | 3 | import burlap.behavior.singleagent.vfa.StateToFeatureVectorGenerator; 4 | import burlap.behavior.singleagent.vfa.rbf.DistanceMetric; 5 | import burlap.oomdp.core.states.State; 6 | 7 | public class EuclideanDistance implements DistanceMetric { 8 | 9 | protected StateToFeatureVectorGenerator vectorGenerator; 10 | 11 | 12 | public EuclideanDistance(StateToFeatureVectorGenerator vectorGenerator){ 13 | this.vectorGenerator = vectorGenerator; 14 | } 15 | 16 | 17 | @Override 18 | public double distance(State s0, State s1) { 19 | 20 | double [] f0 = this.vectorGenerator.generateFeatureVectorFrom(s0); 21 | double [] f1 = this.vectorGenerator.generateFeatureVectorFrom(s1); 22 | 23 | if(f0.length != f1.length){ 24 | throw new RuntimeException("Cannot compute Euclidean distance; feature vectors for the two input states are not equal in size."); 25 | } 26 | 27 | double sum = 0.; 28 | for(int i = 0; i < f0.length; i++){ 29 | double diff = f0[i] - f1[i]; 30 | sum += diff*diff; 31 | } 32 | 33 | double dist = Math.sqrt(sum); 34 | 35 | return dist; 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/singleagent/vfa/rbf/metrics/FVEuclideanDistance.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.singleagent.vfa.rbf.metrics; 2 | 3 | import burlap.behavior.singleagent.vfa.rbf.FVDistanceMetric; 4 | 5 | /** 6 | * A distance metric; returns sqrt( sum_i (x_i - y_i)^2 ) 7 | * @author James MacGlashan. 8 | */ 9 | public class FVEuclideanDistance implements FVDistanceMetric{ 10 | 11 | @Override 12 | public double distance(double[] s0, double[] s1) { 13 | 14 | if(s0.length != s1.length){ 15 | throw new RuntimeException("Cannot compute Euclidean distance; feature vectors for the two input states are not equal in size."); 16 | } 17 | 18 | double sum = 0.; 19 | for(int i = 0; i < s0.length; i++){ 20 | double diff = s0[i] - s1[i]; 21 | sum += diff*diff; 22 | } 23 | 24 | double dist = Math.sqrt(sum); 25 | 26 | return dist; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/stochasticgames/agents/RandomSGAgent.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.stochasticgames.agents; 2 | 3 | import java.util.List; 4 | import java.util.Map; 5 | 6 | import burlap.debugtools.RandomFactory; 7 | import burlap.oomdp.core.states.State; 8 | import burlap.oomdp.stochasticgames.SGAgent; 9 | import burlap.oomdp.stochasticgames.agentactions.GroundedSGAgentAction; 10 | import burlap.oomdp.stochasticgames.JointAction; 11 | import burlap.oomdp.stochasticgames.agentactions.SGAgentAction; 12 | 13 | 14 | /** 15 | * Stochastic games agent that chooses actions uniformly randomly. 16 | * @author James MacGlashan 17 | * 18 | */ 19 | public class RandomSGAgent extends SGAgent { 20 | 21 | @Override 22 | public void gameStarting() { 23 | //do nothing 24 | 25 | } 26 | 27 | @Override 28 | public GroundedSGAgentAction getAction(State s) { 29 | 30 | List gsas = SGAgentAction.getAllApplicableGroundedActionsFromActionList(s, this.worldAgentName, this.agentType.actions); 31 | 32 | int r = RandomFactory.getMapped(0).nextInt(gsas.size()); 33 | GroundedSGAgentAction gsa = gsas.get(r); 34 | 35 | return gsa; 36 | } 37 | 38 | @Override 39 | public void observeOutcome(State s, JointAction jointAction, 40 | Map jointReward, State sprime, boolean isTerminal) { 41 | //do nothing 42 | 43 | } 44 | 45 | @Override 46 | public void gameTerminated() { 47 | //do nothing 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/stochasticgames/agents/madp/MADPPlanAgentFactory.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.stochasticgames.agents.madp; 2 | 3 | import burlap.behavior.stochasticgames.PolicyFromJointPolicy; 4 | import burlap.behavior.stochasticgames.madynamicprogramming.MADynamicProgramming; 5 | import burlap.oomdp.stochasticgames.SGAgent; 6 | import burlap.oomdp.stochasticgames.AgentFactory; 7 | import burlap.oomdp.stochasticgames.SGDomain; 8 | 9 | 10 | /** 11 | * An agent factory for the {@link MultiAgentDPPlanningAgent} agent. Generated agents are always provided a copy of the provided 12 | * policy object to ensure that multiple agents from the same factory use a policy specific to them. 13 | * @author James MacGlashan 14 | * 15 | */ 16 | public class MADPPlanAgentFactory implements AgentFactory { 17 | 18 | protected SGDomain domain; 19 | protected MADPPlannerFactory plannerFactory; 20 | protected PolicyFromJointPolicy policy; 21 | 22 | 23 | /** 24 | * Initializes. 25 | * @param domain the domain for the agents 26 | * @param planner the valueFunction object that will be used by all generated agents 27 | * @param policy the policy that will be copied and supplied to all generated objects 28 | */ 29 | public MADPPlanAgentFactory(SGDomain domain, MADynamicProgramming planner, PolicyFromJointPolicy policy){ 30 | this.domain = domain; 31 | this.plannerFactory = new MADPPlannerFactory.ConstantMADPPlannerFactory(planner); 32 | this.policy = policy; 33 | } 34 | 35 | 36 | /** 37 | * Initializes 38 | * @param domain the domain for the agents 39 | * @param plannerFactory the valueFunction factory that will be used to generate a valueFunction for the agents 40 | * @param policy the policy that will be copied and supplied to all generated objects 41 | */ 42 | public MADPPlanAgentFactory(SGDomain domain, MADPPlannerFactory plannerFactory, PolicyFromJointPolicy policy){ 43 | this.domain = domain; 44 | this.plannerFactory = plannerFactory; 45 | this.policy = policy; 46 | } 47 | 48 | @Override 49 | public SGAgent generateAgent() { 50 | return new MultiAgentDPPlanningAgent(domain, this.plannerFactory.getPlannerInstance(), this.policy.copy()); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/stochasticgames/agents/naiveq/history/ActionIdMap.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.stochasticgames.agents.naiveq.history; 2 | 3 | import burlap.oomdp.stochasticgames.agentactions.GroundedSGAgentAction; 4 | 5 | /** 6 | * An interface that can turn a grounded action into an integer value 7 | * @author James MacGlashan 8 | * 9 | */ 10 | public interface ActionIdMap { 11 | 12 | /** 13 | * Returns an int value corresponding to the input action 14 | * @param gsa the input action 15 | * @return an int value corresponding to the input action 16 | */ 17 | public int getActionId(GroundedSGAgentAction gsa); 18 | 19 | /** 20 | * Returns an int value corresponding to the input action name and parameters 21 | * @param actionName the input action name 22 | * @param params the input action parameters 23 | * @return an int value corresponding to the input action name and parameters 24 | */ 25 | public int getActionId(String actionName, String [] params); 26 | 27 | /** 28 | * The maximum number of int values for actions 29 | * @return maximum number of int values for actions 30 | */ 31 | public int maxValue(); 32 | 33 | /** 34 | * Returns a corresponding GroundedSingleAction for a given int value 35 | * @param id the int value indicating which GroundedSingleAction to return. 36 | * @return a corresponding GroundedSingleAction for a given int value 37 | */ 38 | public GroundedSGAgentAction getActionForId(int id); 39 | } 40 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/stochasticgames/agents/naiveq/history/ParameterNaiveActionIdMap.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.stochasticgames.agents.naiveq.history; 2 | 3 | import java.util.HashMap; 4 | import java.util.List; 5 | import java.util.Map; 6 | 7 | import burlap.oomdp.core.Domain; 8 | import burlap.oomdp.stochasticgames.agentactions.GroundedSGAgentAction; 9 | import burlap.oomdp.stochasticgames.agentactions.SGAgentAction; 10 | import burlap.oomdp.stochasticgames.agentactions.SimpleGroundedSGAgentAction; 11 | 12 | 13 | /** 14 | * An action to int map that takes the list of possible action names in a domain and assigns and int value to them. 15 | * This method will not manage object identifier independence. 16 | * @author James MacGlashan 17 | * 18 | */ 19 | public class ParameterNaiveActionIdMap implements ActionIdMap { 20 | 21 | /** 22 | * The map from action names to their corresponding int value 23 | */ 24 | protected Map map; 25 | 26 | /** 27 | * The domain for which the action values should be created. 28 | */ 29 | protected Domain domain; 30 | 31 | 32 | /** 33 | * Initializes a mapping from the names of all actions in a given domain to an int value. 34 | * @param d the domain containing the actions. 35 | */ 36 | public ParameterNaiveActionIdMap(Domain d){ 37 | 38 | this.domain = d; 39 | List actions = d.getAgentActions(); 40 | map = new HashMap(actions.size()); 41 | for(int i = 0; i < actions.size(); i++){ 42 | map.put(actions.get(i).actionName, i); 43 | } 44 | } 45 | 46 | 47 | @Override 48 | public int getActionId(GroundedSGAgentAction gsa) { 49 | return map.get(gsa.action.actionName); 50 | } 51 | 52 | 53 | @Override 54 | public int getActionId(String actionName, String[] params) { 55 | return map.get(actionName); 56 | } 57 | 58 | @Override 59 | public int maxValue() { 60 | return map.size(); 61 | } 62 | 63 | 64 | @Override 65 | public GroundedSGAgentAction getActionForId(int id) { 66 | 67 | for(String key : map.keySet()){ 68 | int sid = map.get(key); 69 | if(sid == id){ 70 | //found it 71 | GroundedSGAgentAction gsa = new SimpleGroundedSGAgentAction("", domain.getSingleAction(key)); 72 | return gsa; 73 | } 74 | } 75 | 76 | return null; 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/stochasticgames/agents/twoplayer/singlestage/equilibriumplayer/equilibriumsolvers/MinMax.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.stochasticgames.agents.twoplayer.singlestage.equilibriumplayer.equilibriumsolvers; 2 | 3 | import burlap.behavior.stochasticgames.agents.twoplayer.singlestage.equilibriumplayer.BimatrixEquilibriumSolver; 4 | import burlap.behavior.stochasticgames.solvers.MinMaxSolver; 5 | 6 | 7 | /** 8 | * Finds the MinMax equilibrium using linear programming and returns the appropraite strategy. Note that 9 | * if the game is not zero sum, the resulting strategy produced will be as if it was by setting the opponent's payoff 10 | * matrix to the the negation of the querying player. 11 | * @author James MacGlashan 12 | * 13 | */ 14 | public class MinMax extends BimatrixEquilibriumSolver { 15 | 16 | @Override 17 | public double[] computeRowStrategy(double[][] rowPayoff, 18 | double[][] colPayoff) { 19 | return MinMaxSolver.getRowPlayersStrategy(rowPayoff); 20 | } 21 | 22 | @Override 23 | public double[] computeColStrategy(double[][] rowPayoff, 24 | double[][] colPayoff) { 25 | return MinMaxSolver.getColPlayersStrategy(colPayoff); 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/stochasticgames/agents/twoplayer/singlestage/equilibriumplayer/equilibriumsolvers/Utilitarian.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.stochasticgames.agents.twoplayer.singlestage.equilibriumplayer.equilibriumsolvers; 2 | 3 | import burlap.behavior.stochasticgames.agents.twoplayer.singlestage.equilibriumplayer.BimatrixEquilibriumSolver; 4 | import burlap.behavior.stochasticgames.solvers.GeneralBimatrixSolverTools; 5 | 6 | 7 | /** 8 | * Finds the maximum utilitarian value joint action and retuns a detemrinistic strategy respecting it. The utilitarian value 9 | * is the sum of the two player's payoffs for a cell. If there are multiple maximums, the first is always used. 10 | * @author James MacGlashan 11 | * 12 | */ 13 | public class Utilitarian extends BimatrixEquilibriumSolver { 14 | 15 | @Override 16 | public double[] computeRowStrategy(double[][] rowPayoff, 17 | double[][] colPayoff) { 18 | 19 | double max = Double.NEGATIVE_INFINITY; 20 | int maxInd = -1; 21 | for(int i = 0; i < rowPayoff.length; i++){ 22 | for(int j = 0; j < rowPayoff[i].length; j++){ 23 | double sumPay = rowPayoff[i][j] - colPayoff[i][j]; 24 | if(sumPay > max){ 25 | max = rowPayoff[i][j]; 26 | maxInd = i; 27 | } 28 | } 29 | } 30 | 31 | double [] strat = GeneralBimatrixSolverTools.zero1Array(maxInd, rowPayoff.length); 32 | 33 | return strat; 34 | } 35 | 36 | @Override 37 | public double[] computeColStrategy(double[][] rowPayoff, 38 | double[][] colPayoff) { 39 | 40 | double max = Double.NEGATIVE_INFINITY; 41 | int maxInd = -1; 42 | for(int i = 0; i < rowPayoff.length; i++){ 43 | for(int j = 0; j < rowPayoff[i].length; j++){ 44 | double sumPay = rowPayoff[i][j] - colPayoff[i][j]; 45 | if(sumPay > max){ 46 | max = rowPayoff[i][j]; 47 | maxInd = j; 48 | } 49 | } 50 | } 51 | 52 | double [] strat = GeneralBimatrixSolverTools.zero1Array(maxInd, rowPayoff[0].length); 53 | 54 | return strat; 55 | 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/stochasticgames/auxiliary/performance/AgentFactoryAndType.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.stochasticgames.auxiliary.performance; 2 | 3 | import burlap.oomdp.stochasticgames.AgentFactory; 4 | import burlap.oomdp.stochasticgames.SGAgentType; 5 | 6 | /** 7 | * A pair storing an agent factory and the agent type that the generated agent will join the world as. 8 | * @author James MacGlashan 9 | * 10 | */ 11 | public class AgentFactoryAndType { 12 | public AgentFactory agentFactory; 13 | public SGAgentType at; 14 | 15 | /** 16 | * Initializes 17 | * @param agentFactory the agent factory 18 | * @param at the agent type the agent will join a world as 19 | */ 20 | public AgentFactoryAndType(AgentFactory agentFactory, SGAgentType at){ 21 | this.agentFactory = agentFactory; 22 | this.at = at; 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/stochasticgames/madynamicprogramming/JAQValue.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.stochasticgames.madynamicprogramming; 2 | 3 | import burlap.oomdp.core.states.State; 4 | import burlap.oomdp.stochasticgames.JointAction; 5 | 6 | 7 | /** 8 | * Class for storing Q-value informartion for a joint action. It is effectively a triple consisting of a state, joint action, and a double for the corresponding q-value. 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public class JAQValue { 13 | public State s; 14 | public JointAction ja; 15 | public double q; 16 | 17 | public JAQValue(State s, JointAction ja, double q){ 18 | this.s = s; 19 | this.ja = ja; 20 | this.q = q; 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/stochasticgames/madynamicprogramming/MAQSourcePolicy.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.stochasticgames.madynamicprogramming; 2 | 3 | import burlap.behavior.stochasticgames.JointPolicy; 4 | 5 | 6 | /** 7 | * An abstract extension of the JointPolicy class that adds a required interface of being able to a {@link MultiAgentQSourceProvider}. This extension is useful 8 | * if the joint policy is derived from a set of multi-agent Q-values. 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public abstract class MAQSourcePolicy extends JointPolicy{ 13 | 14 | /** 15 | * Sets the {@link MultiAgentQSourceProvider} that will be used to define this object's joint policy. 16 | * @param provider the {@link MultiAgentQSourceProvider} that will be used to define this object's joint policy. 17 | */ 18 | public abstract void setQSourceProvider(MultiAgentQSourceProvider provider); 19 | } 20 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/stochasticgames/madynamicprogramming/MultiAgentQSourceProvider.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.stochasticgames.madynamicprogramming; 2 | 3 | 4 | /** 5 | * An interface for an object that can providing the Q-values stored for each agent in a problem. 6 | * @author James MacGlashan 7 | * 8 | */ 9 | public interface MultiAgentQSourceProvider { 10 | 11 | /** 12 | * Returns an object that can provide Q-value sources for each agent. 13 | * @return a {@link AgentQSourceMap} object. 14 | */ 15 | public AgentQSourceMap getQSources(); 16 | } 17 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/stochasticgames/madynamicprogramming/SGBackupOperator.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.stochasticgames.madynamicprogramming; 2 | 3 | import java.util.Map; 4 | 5 | import burlap.oomdp.core.states.State; 6 | import burlap.oomdp.stochasticgames.SGAgentType; 7 | 8 | 9 | /** 10 | * A stochastic games backup operator to be used in multi-agent Q-learning or value function planning. This operator 11 | * is meant to be applied to a next state, takes the set of Q-values for that state for all agents and returns 12 | * the backup operator. The classic Bellman MDP approach would be o use a max operator, but in stochastic games, 13 | * different solution concepts require different operators. 14 | * @author James MacGlashan; adapted from code written by Esha Gosh John Meehan and Michalis Michaelidis 15 | * 16 | */ 17 | public interface SGBackupOperator { 18 | 19 | public double performBackup(State s, String forAgent, Map agentDefinitions, AgentQSourceMap qSourceMap); 20 | 21 | } 22 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/stochasticgames/madynamicprogramming/backupOperators/MaxQ.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.stochasticgames.madynamicprogramming.backupOperators; 2 | 3 | import java.util.List; 4 | import java.util.Map; 5 | 6 | import burlap.behavior.stochasticgames.madynamicprogramming.AgentQSourceMap; 7 | import burlap.behavior.stochasticgames.madynamicprogramming.QSourceForSingleAgent; 8 | import burlap.behavior.stochasticgames.madynamicprogramming.SGBackupOperator; 9 | import burlap.oomdp.core.states.State; 10 | import burlap.oomdp.stochasticgames.SGAgentType; 11 | import burlap.oomdp.stochasticgames.JointAction; 12 | 13 | 14 | /** 15 | * A classic MDP-style max backup operator in which an agent back ups his max Q-value in the state. 16 | * @author James MacGlashan 17 | * 18 | */ 19 | public class MaxQ implements SGBackupOperator { 20 | 21 | @Override 22 | public double performBackup(State s, String forAgent, Map agentDefinitions, AgentQSourceMap qSourceMap) { 23 | 24 | List allJAs = JointAction.getAllJointActions(s, agentDefinitions); 25 | 26 | double maxQ = Double.NEGATIVE_INFINITY; 27 | 28 | QSourceForSingleAgent myQs = qSourceMap.agentQSource(forAgent); 29 | 30 | for(JointAction ja : allJAs){ 31 | double q = myQs.getQValueFor(s, ja).q; 32 | maxQ = Math.max(q, maxQ); 33 | } 34 | 35 | 36 | return maxQ; 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/stochasticgames/solvers/MinMaxSolver.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.stochasticgames.solvers; 2 | 3 | import scpsolver.constraints.LinearBiggerThanEqualsConstraint; 4 | import scpsolver.lpsolver.LinearProgramSolver; 5 | import scpsolver.lpsolver.SolverFactory; 6 | import scpsolver.problems.LinearProgram; 7 | 8 | public class MinMaxSolver { 9 | 10 | 11 | 12 | 13 | /** 14 | * Computes the minmax strategy for the row player of the given payoff matrix. 15 | * The entries of the payoff matrix are assumed to be the payouts for the *row* player. 16 | * @param payoffMatrix payoffs for the row player. 17 | * @return the strategy of the row player. 18 | */ 19 | public static double [] getRowPlayersStrategy(double [][] payoffMatrix){ 20 | double [][] t = GeneralBimatrixSolverTools.transposeMatrix(payoffMatrix); 21 | return getColPlayersStrategy(t); 22 | } 23 | 24 | 25 | 26 | /** 27 | * Computes the minmax strategy for the column player of the given payoff matrix. 28 | * The entries of the payoff matrix are assumed to be the payouts for the *column* player. 29 | * @param payoffMatrix payoffs for column player. 30 | * @return strategy of the column player. 31 | */ 32 | public static double [] getColPlayersStrategy(double [][] payoffMatrix){ 33 | 34 | //get positive matrix (finds the minimum value and adds -min + 1 to all elements) 35 | double [][] G = GeneralBimatrixSolverTools.getPositiveMatrix(payoffMatrix); 36 | 37 | LinearProgram lp = new LinearProgram(GeneralBimatrixSolverTools.constantDoubleArray(1., G[0].length)); 38 | 39 | int cCount = 0; 40 | 41 | //add payoff matrix constraints 42 | for(int i = 0; i < G.length; i++){ 43 | lp.addConstraint(new LinearBiggerThanEqualsConstraint(G[i], 1., "c" + cCount)); 44 | cCount++; 45 | } 46 | 47 | //add lower bound constraints 48 | for(int i = 0; i < G[0].length; i++){ 49 | lp.addConstraint(new LinearBiggerThanEqualsConstraint(GeneralBimatrixSolverTools.zero1Array(i, G[0].length), 0., "c" + cCount)); 50 | cCount++; 51 | } 52 | 53 | //solve it 54 | lp.setMinProblem(true); 55 | LinearProgramSolver solver = SolverFactory.newDefault(); 56 | double[] sol = solver.solve(lp); 57 | 58 | //convert LP solution into probability vector. 59 | double z = 0.; 60 | for(double d : sol){ 61 | z += d; 62 | } 63 | 64 | double v = 1/z; 65 | 66 | for(int i = 0; i < sol.length; i++){ 67 | sol[i] *= v; 68 | } 69 | 70 | 71 | 72 | return sol; 73 | } 74 | 75 | 76 | 77 | 78 | } 79 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/valuefunction/QValue.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.valuefunction; 2 | 3 | import burlap.oomdp.core.AbstractGroundedAction; 4 | import burlap.oomdp.core.states.State; 5 | 6 | 7 | /** 8 | * This class is used to store Q-values. 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public class QValue { 13 | 14 | /** 15 | * The state with which this Q-value is associated. 16 | */ 17 | public State s; 18 | 19 | /** 20 | * The action with which this Q-value is associated 21 | */ 22 | public AbstractGroundedAction a; 23 | 24 | /** 25 | * The numeric Q-value 26 | */ 27 | public double q; 28 | 29 | 30 | 31 | /** 32 | * Creates a Q-value for the given state an action pair with the specified q-value 33 | * @param s the state 34 | * @param a the action 35 | * @param q the initial Q-value 36 | */ 37 | public QValue(State s, AbstractGroundedAction a, double q){ 38 | this.s = s; 39 | this.a = a; 40 | this.q = q; 41 | } 42 | 43 | 44 | /** 45 | * Initialializes this Q-value by copying the information from another Q-value. 46 | * @param src the source Q-value from which to copy. 47 | */ 48 | public QValue(QValue src){ 49 | this.s = src.s.copy(); 50 | this.a = src.a.copy(); 51 | this.q = src.q; 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/behavior/valuefunction/ValueFunction.java: -------------------------------------------------------------------------------- 1 | package burlap.behavior.valuefunction; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | /** 6 | * An interface for algorithms that can return the value for states. 7 | * @author James MacGlashan. 8 | */ 9 | public interface ValueFunction { 10 | 11 | /** 12 | * Returns the value function evaluation of the given state. If the value is not stored, then the default value 13 | * specified by the ValueFunctionInitialization object of this class is returned. 14 | * @param s the state to evaluate. 15 | * @return the value function evaluation of the given state. 16 | */ 17 | public double value(State s); 18 | } 19 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/debugtools/DebugFlags.java: -------------------------------------------------------------------------------- 1 | package burlap.debugtools; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | 7 | /** 8 | * A data structure for specifying debug flags that can be accessed and modified from any class 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public class DebugFlags { 13 | 14 | /** 15 | * The flags and their values that are set 16 | */ 17 | private static Map flags; 18 | 19 | /** 20 | * Creates/sets a debug flag 21 | * @param id the flag identifier 22 | * @param v the value of the flag 23 | */ 24 | public static void setFlag(int id, int v){ 25 | if(flags == null){ 26 | flags = new HashMap (); 27 | } 28 | flags.put(id, v); 29 | } 30 | 31 | 32 | /** 33 | * Returns the value for a given flag; 0 if the flag has never been created/set 34 | * @param id the flag identifier 35 | * @return the value of the flag; 0 if the flag has never been created/set 36 | */ 37 | public static int getFlag(int id){ 38 | if(flags == null){ 39 | flags = new HashMap (); 40 | } 41 | Integer v = flags.get(id); 42 | if(v == null){ 43 | flags.put(id, 0); 44 | return 0; 45 | } 46 | return v; 47 | } 48 | 49 | 50 | } 51 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/domain/singleagent/blockdude/BlockDudeTF.java: -------------------------------------------------------------------------------- 1 | package burlap.domain.singleagent.blockdude; 2 | 3 | import burlap.oomdp.core.objects.ObjectInstance; 4 | import burlap.oomdp.core.states.State; 5 | import burlap.oomdp.core.TerminalFunction; 6 | 7 | import java.util.List; 8 | 9 | /** 10 | * A {@link burlap.oomdp.core.TerminalFunction} for {@link burlap.domain.singleagent.blockdude.BlockDude}. Returns true 11 | * when the agent is at an exit. If there are multiple exits, then returns true when the agent is at any exit. 12 | * @author James MacGlashan. 13 | */ 14 | public class BlockDudeTF implements TerminalFunction { 15 | 16 | @Override 17 | public boolean isTerminal(State s) { 18 | 19 | ObjectInstance agent = s.getFirstObjectOfClass(BlockDude.CLASSAGENT); 20 | List exits = s.getObjectsOfClass(BlockDude.CLASSEXIT); 21 | 22 | int ax = agent.getIntValForAttribute(BlockDude.ATTX); 23 | int ay = agent.getIntValForAttribute(BlockDude.ATTY); 24 | 25 | for(ObjectInstance e : exits){ 26 | int ex = e.getIntValForAttribute(BlockDude.ATTX); 27 | if(ex == ax){ 28 | int ey = e.getIntValForAttribute(BlockDude.ATTY); 29 | if(ey == ay){ 30 | return true; 31 | } 32 | } 33 | } 34 | 35 | return false; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/domain/singleagent/frostbite/FrostbiteRF.java: -------------------------------------------------------------------------------- 1 | package burlap.domain.singleagent.frostbite; 2 | 3 | import burlap.oomdp.core.Domain; 4 | import burlap.oomdp.core.objects.ObjectInstance; 5 | import burlap.oomdp.core.PropositionalFunction; 6 | import burlap.oomdp.core.states.State; 7 | import burlap.oomdp.singleagent.GroundedAction; 8 | import burlap.oomdp.singleagent.RewardFunction; 9 | 10 | import java.util.List; 11 | 12 | /** 13 | * @author Phillipe Morere 14 | */ 15 | public class FrostbiteRF implements RewardFunction{ 16 | 17 | public double goalReward = 1000.0; 18 | public double lostReward = -1000.0; 19 | public double activatedPlatformReward = 10.0; 20 | public double defaultReward = -1.0; 21 | private PropositionalFunction onIce; 22 | private PropositionalFunction inWater; 23 | private PropositionalFunction iglooBuilt; 24 | 25 | public FrostbiteRF(Domain domain) { 26 | this.inWater = domain.getPropFunction(FrostbiteDomain.PFINWATER); 27 | this.onIce = domain.getPropFunction(FrostbiteDomain.PFONICE); 28 | this.iglooBuilt = domain.getPropFunction(FrostbiteDomain.PFIGLOOBUILT); 29 | } 30 | 31 | @Override 32 | public double reward(State s, GroundedAction a, State sprime) { 33 | if (inWater.somePFGroundingIsTrue(sprime)) 34 | return lostReward; 35 | if (iglooBuilt.somePFGroundingIsTrue(sprime) && onIce.somePFGroundingIsTrue(s)) 36 | return goalReward; 37 | if (numberPlatformsActive(s) != numberPlatformsActive(sprime)) 38 | return activatedPlatformReward; 39 | return defaultReward; 40 | } 41 | 42 | private int numberPlatformsActive(State s) { 43 | List platforms = s.getObjectsOfClass(FrostbiteDomain.PLATFORMCLASS); 44 | int nb = 0; 45 | for (ObjectInstance platform : platforms) 46 | if (platform.getBooleanValForAttribute(FrostbiteDomain.ACTIVATEDATTNAME)) 47 | nb++; 48 | return nb; 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/domain/singleagent/frostbite/FrostbiteTF.java: -------------------------------------------------------------------------------- 1 | package burlap.domain.singleagent.frostbite; 2 | 3 | import burlap.oomdp.core.Domain; 4 | import burlap.oomdp.core.PropositionalFunction; 5 | import burlap.oomdp.core.states.State; 6 | import burlap.oomdp.core.TerminalFunction; 7 | 8 | /** 9 | * @author Phillipe Morere 10 | */ 11 | public class FrostbiteTF implements TerminalFunction{ 12 | 13 | private PropositionalFunction onIce; 14 | private PropositionalFunction inWater; 15 | private PropositionalFunction iglooBuilt; 16 | 17 | public FrostbiteTF(Domain domain) { 18 | this.inWater = domain.getPropFunction(FrostbiteDomain.PFINWATER); 19 | this.onIce = domain.getPropFunction(FrostbiteDomain.PFONICE); 20 | this.iglooBuilt = domain.getPropFunction(FrostbiteDomain.PFIGLOOBUILT); 21 | } 22 | 23 | @Override 24 | public boolean isTerminal(State s) { 25 | if (inWater.somePFGroundingIsTrue(s)) 26 | return true; 27 | return iglooBuilt.somePFGroundingIsTrue(s) && onIce.somePFGroundingIsTrue(s); 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/domain/singleagent/graphdefined/GraphRF.java: -------------------------------------------------------------------------------- 1 | package burlap.domain.singleagent.graphdefined; 2 | 3 | import burlap.oomdp.core.states.State; 4 | import burlap.oomdp.singleagent.GroundedAction; 5 | import burlap.oomdp.singleagent.RewardFunction; 6 | 7 | /** 8 | * An abstract class for more easily defining {@link burlap.oomdp.singleagent.RewardFunction}s for {@link burlap.domain.singleagent.graphdefined.GraphDefinedDomain} 9 | * {@link burlap.oomdp.core.Domain}s. This class implements the standard {@link #reward(burlap.oomdp.core.states.State, burlap.oomdp.singleagent.GroundedAction, burlap.oomdp.core.states.State)} 10 | * method by converting the {@link burlap.oomdp.core.states.State} objects to their graph node integer representation and the {@link burlap.oomdp.singleagent.GroundedAction} to its 11 | * integer representation and then returning the value of {@link #reward(int, int, int)}, which is an abstract method 12 | * that the client must implement. 13 | * @author James MacGlashan. 14 | */ 15 | public abstract class GraphRF implements RewardFunction{ 16 | 17 | @Override 18 | public double reward(State s, GroundedAction a, State sprime) { 19 | int actionId = Integer.parseInt(a.toString().replaceAll(GraphDefinedDomain.BASEACTIONNAME, "")); 20 | return this.reward(GraphDefinedDomain.getNodeId(s), actionId, GraphDefinedDomain.getNodeId(sprime)); 21 | } 22 | 23 | /** 24 | * Returns the reward for taking action a in state node s and transition to state node sprime. 25 | * @param s the previous state node id 26 | * @param a the action id 27 | * @param sprime the next state node id 28 | * @return the received reward for the transition in the graph 29 | */ 30 | public abstract double reward(int s, int a, int sprime); 31 | } 32 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/domain/singleagent/graphdefined/GraphTF.java: -------------------------------------------------------------------------------- 1 | package burlap.domain.singleagent.graphdefined; 2 | 3 | import burlap.oomdp.core.states.State; 4 | import burlap.oomdp.core.TerminalFunction; 5 | 6 | import java.util.HashSet; 7 | import java.util.Set; 8 | 9 | /** 10 | * A {@link burlap.oomdp.core.TerminalFunction} for instances of {@link burlap.domain.singleagent.graphdefined.GraphDefinedDomain}. 11 | * Lets the user specify the integer node ids of the terminal states in the graph. 12 | * @author James MacGlashan. 13 | */ 14 | public class GraphTF implements TerminalFunction { 15 | 16 | /** 17 | * The set of nodes ids in the graph that are terminal states 18 | */ 19 | protected Set terminalStates; 20 | 21 | 22 | /** 23 | * Initializes setting all states with the provide integer node ids to be terminal states 24 | * @param nodes the state node ids that are terminal states 25 | */ 26 | public GraphTF(int...nodes){ 27 | this.terminalStates = new HashSet(nodes.length); 28 | for(int n : nodes){ 29 | this.terminalStates.add(n); 30 | } 31 | } 32 | 33 | @Override 34 | public boolean isTerminal(State s) { 35 | 36 | int sid = GraphDefinedDomain.getNodeId(s); 37 | return this.terminalStates.contains(sid); 38 | } 39 | 40 | public Set getTerminalStates() { 41 | return terminalStates; 42 | } 43 | 44 | public void setTerminalStates(Set terminalStates) { 45 | this.terminalStates = terminalStates; 46 | } 47 | 48 | /** 49 | * Adds additional terminal states 50 | * @param nodes the additional state node ids that are to be marked as terminal states 51 | */ 52 | public void addTerminals(int...nodes){ 53 | for(int n : nodes){ 54 | this.terminalStates.add(n); 55 | } 56 | } 57 | 58 | /** 59 | * Removes nodes as being marked as terminal states 60 | * @param nodes the nodes to remove as terminal states 61 | */ 62 | public void removeTerminals(int...nodes){ 63 | for(int n : nodes){ 64 | this.terminalStates.remove(n); 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/domain/singleagent/lunarlander/LunarLanderTF.java: -------------------------------------------------------------------------------- 1 | package burlap.domain.singleagent.lunarlander; 2 | 3 | import burlap.oomdp.core.Domain; 4 | import burlap.oomdp.core.PropositionalFunction; 5 | import burlap.oomdp.core.states.State; 6 | import burlap.oomdp.core.TerminalFunction; 7 | 8 | /** 9 | * A {@link burlap.oomdp.core.TerminalFunction} for the {@link burlap.domain.singleagent.lunarlander.LunarLanderDomain}. 10 | * This method sets all states in which the lunar lander is on a landing pad to be terminal states. 11 | * @author James MacGlashan. 12 | */ 13 | public class LunarLanderTF implements TerminalFunction{ 14 | 15 | private PropositionalFunction onPad; 16 | 17 | /** 18 | * Initializes. 19 | * @param domain a {@link burlap.domain.singleagent.lunarlander.LunarLanderDomain} generated {@link burlap.oomdp.core.Domain} object. 20 | */ 21 | public LunarLanderTF(Domain domain){ 22 | this.onPad = domain.getPropFunction(LunarLanderDomain.PFONPAD); 23 | } 24 | 25 | 26 | @Override 27 | public boolean isTerminal(State s) { 28 | return this.onPad.somePFGroundingIsTrue(s); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/auxiliary/DomainGenerator.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.auxiliary; 2 | 3 | import burlap.oomdp.core.Domain; 4 | 5 | /** 6 | * This class provides a simple interface for constructing domains, but it is not required to create domains. All domains that 7 | * exist in BURLAP adhere to this interface for constructing domains. 8 | * @author James MacGlashan 9 | */ 10 | public interface DomainGenerator { 11 | 12 | /** 13 | * Returns a newly instanced Domain object 14 | * @return the newly instantiated Domain object. 15 | */ 16 | public Domain generateDomain(); 17 | 18 | 19 | } 20 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/auxiliary/StateAbstraction.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.auxiliary; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | 6 | /** 7 | * An interface for taking an input state and returning a simpler abstracted state representation. 8 | * @author James MacGlashan 9 | * 10 | */ 11 | public interface StateAbstraction { 12 | /** 13 | * Returns an abstracted version of state s. State s is not modified in this process. 14 | * @param s the input state to abstract 15 | * @return an abstracted version of state s 16 | */ 17 | public State abstraction(State s); 18 | } 19 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/auxiliary/StateGenerator.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.auxiliary; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | 6 | /** 7 | * An interface for generating State objects. This may be useful to define for learning in episodic tasks in which 8 | * the initial state is drawn from some distribution. 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public interface StateGenerator { 13 | /** 14 | * Returns a new state object. 15 | * @return a new state object. 16 | */ 17 | public State generateState(); 18 | } 19 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/auxiliary/StateMapping.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.auxiliary; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | /** 6 | * A state mapping interface that maps one state into another state. Can be useful if mapping one state from one domain into a different domain. 7 | * @author James MacGlashan 8 | * 9 | */ 10 | public interface StateMapping { 11 | State mapState(State s); 12 | } 13 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/auxiliary/common/ConstantStateGenerator.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.auxiliary.common; 2 | 3 | import burlap.oomdp.auxiliary.StateGenerator; 4 | import burlap.oomdp.core.states.State; 5 | 6 | /** 7 | * This class takes a source state as input as returns copies of it for every call of generateState(). 8 | * @author James MacGlashan 9 | * 10 | */ 11 | public class ConstantStateGenerator implements StateGenerator { 12 | 13 | protected State src; 14 | 15 | /** 16 | * This class takes a source state as input as returns copies of it for every call of generateState(). 17 | * @param src the source state of which to return copies 18 | */ 19 | public ConstantStateGenerator(State src){ 20 | this.src = src; 21 | } 22 | 23 | @Override 24 | public State generateState() { 25 | return src.copy(); 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/auxiliary/common/GoalConditionTF.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.auxiliary.common; 2 | 3 | import burlap.oomdp.auxiliary.stateconditiontest.StateConditionTest; 4 | import burlap.oomdp.core.states.State; 5 | import burlap.oomdp.core.TerminalFunction; 6 | 7 | 8 | /** 9 | * Creates a terminal function that indicates terminal states are any states that satisfy a goal condition 10 | * where the goal condition is specified by a {@link burlap.oomdp.auxiliary.stateconditiontest.StateConditionTest} object. 11 | * No other states are set as terminal states. 12 | * @author James MacGlashan 13 | * 14 | */ 15 | public class GoalConditionTF implements TerminalFunction { 16 | 17 | /** 18 | * The state condition test that is used to indicate terminal goal states 19 | */ 20 | StateConditionTest goalCondition; 21 | 22 | public GoalConditionTF(StateConditionTest goalCondition) { 23 | this.goalCondition = goalCondition; 24 | } 25 | 26 | @Override 27 | public boolean isTerminal(State s) { 28 | return this.goalCondition.satisfies(s); 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/auxiliary/common/NullAbstraction.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.auxiliary.common; 2 | 3 | import burlap.oomdp.auxiliary.StateAbstraction; 4 | import burlap.oomdp.core.states.State; 5 | 6 | 7 | /** 8 | * A StateAbstraction class that does nothing but returns a copy of input state. 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public class NullAbstraction implements StateAbstraction { 13 | 14 | @Override 15 | public State abstraction(State s) { 16 | return s.copy(); 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/auxiliary/common/NullAbstractionNoCopy.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.auxiliary.common; 2 | 3 | import burlap.oomdp.auxiliary.StateAbstraction; 4 | import burlap.oomdp.core.states.State; 5 | 6 | /** 7 | * A StateAbstraction class the input state without copying it. 8 | * @author James MacGlashan 9 | * 10 | */ 11 | public class NullAbstractionNoCopy implements StateAbstraction{ 12 | 13 | @Override 14 | public State abstraction(State s) { 15 | return s; 16 | } 17 | 18 | 19 | 20 | } 21 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/auxiliary/common/NullTermination.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.auxiliary.common; 2 | 3 | import burlap.oomdp.core.states.State; 4 | import burlap.oomdp.core.TerminalFunction; 5 | 6 | 7 | /** 8 | * A terminal state function in which no state is considered a terminal state. 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public class NullTermination implements TerminalFunction { 13 | 14 | 15 | @Override 16 | public boolean isTerminal(State s) { 17 | return false; 18 | } 19 | 20 | 21 | } 22 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/auxiliary/common/RandomStartStateGenerator.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.auxiliary.common; 2 | 3 | import burlap.behavior.singleagent.auxiliary.StateReachability; 4 | import burlap.oomdp.auxiliary.StateGenerator; 5 | import burlap.oomdp.core.states.State; 6 | import burlap.oomdp.singleagent.SADomain; 7 | import burlap.oomdp.statehashing.HashableStateFactory; 8 | import burlap.oomdp.statehashing.SimpleHashableStateFactory; 9 | 10 | import java.util.List; 11 | import java.util.Random; 12 | 13 | 14 | /** 15 | * This class will return a random state from a set of states that are reachable from a source seed state. 16 | * 17 | * @author Stephen Brawner and Mark Ho. Documented by James MacGlashan 18 | * 19 | */ 20 | public class RandomStartStateGenerator implements StateGenerator { 21 | 22 | private List reachableStates; 23 | private Random random; 24 | 25 | /** 26 | * Will discover the reachable states from which to randomly select. Reachable states found using a {@link burlap.oomdp.statehashing.SimpleHashableStateFactory} with identifier dependence. 27 | * @param domain the domain from which states will be drawn. 28 | * @param seedState the seed state from which the reachable states will be found. 29 | */ 30 | public RandomStartStateGenerator(SADomain domain, State seedState) { 31 | HashableStateFactory hashFactory = new SimpleHashableStateFactory(false); 32 | this.reachableStates = StateReachability.getReachableStates(seedState, domain, hashFactory); 33 | this.random = new Random(); 34 | } 35 | 36 | 37 | /** 38 | * Will discover reachable states from which to randomly select. 39 | * @param domain the domain from which states will be drawn. 40 | * @param seedState the seed state from which the reachable states will be found. 41 | * @param hashFactory the hash factory to use for the reachability analysis. 42 | */ 43 | public RandomStartStateGenerator(SADomain domain, State seedState, HashableStateFactory hashFactory) { 44 | this.reachableStates = StateReachability.getReachableStates(seedState, domain, hashFactory); 45 | this.random = new Random(); 46 | } 47 | 48 | @Override 49 | public State generateState() { 50 | return this.reachableStates.get(this.random.nextInt(this.reachableStates.size())); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/auxiliary/stateconditiontest/SinglePFSCT.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.auxiliary.stateconditiontest; 2 | 3 | import java.util.List; 4 | 5 | import burlap.oomdp.core.GroundedProp; 6 | import burlap.oomdp.core.PropositionalFunction; 7 | import burlap.oomdp.core.states.State; 8 | 9 | /** 10 | * A state condition class that returns true when ever any grounded version of a specified 11 | * propositional function is true in a state. Useful for specifying goal conditions. 12 | * @author James MacGlashan 13 | * 14 | */ 15 | public class SinglePFSCT implements StateConditionTest { 16 | 17 | PropositionalFunction pf; 18 | 19 | /** 20 | * Initializes with the propositional function that is checked for state satisfaction 21 | * @param pf the propositional function to use for satisfaction tests 22 | */ 23 | public SinglePFSCT(PropositionalFunction pf) { 24 | this.pf = pf; 25 | } 26 | 27 | @Override 28 | public boolean satisfies(State s) { 29 | 30 | //List gps = s.getAllGroundedPropsFor(pf); 31 | List gps = this.pf.getAllGroundedPropsForState(s); 32 | 33 | for(GroundedProp gp : gps){ 34 | if(gp.isTrue(s)){ 35 | return true; 36 | } 37 | } 38 | 39 | return false; 40 | 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/auxiliary/stateconditiontest/StateConditionTest.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.auxiliary.stateconditiontest; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | 6 | /** 7 | * And interface for defining classes that check for certain conditions in states. These are useful 8 | * for specifying binary goal conditions for classic search-based planners like A* 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public interface StateConditionTest { 13 | 14 | public boolean satisfies(State s); 15 | 16 | 17 | } 18 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/auxiliary/stateconditiontest/StateConditionTestIterable.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.auxiliary.stateconditiontest; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | 6 | /** 7 | * An extension of the StateConditionTest that is iterable. 8 | * @author James MacGlashan 9 | * 10 | */ 11 | public interface StateConditionTestIterable extends StateConditionTest, Iterable { 12 | /* 13 | * This method is used to set the state context to enumerate over states. 14 | * This is useful because typically a state test is independent of other state objects 15 | * and calling this method can be used to set the context of those variables and over which to enumerate 16 | */ 17 | public void setStateContext(State s); 18 | } 19 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/auxiliary/stateconditiontest/TFGoalCondition.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.auxiliary.stateconditiontest; 2 | 3 | import burlap.oomdp.core.states.State; 4 | import burlap.oomdp.core.TerminalFunction; 5 | 6 | 7 | /** 8 | * A simple StateConditionTest wrapper of TerminalFunciton. Deterministic forward search planners search for goal states that are indicated 9 | * by StateConditionTest objects. If a TerminalFunction only terminates in goal states, this class can be used to wrap the terminal function 10 | * to indicate that goal states are any previously defined terminal states. 11 | * @author James MacGlashan 12 | * 13 | */ 14 | public class TFGoalCondition implements StateConditionTest { 15 | 16 | protected TerminalFunction tf; 17 | 18 | /** 19 | * Sets this class to return true on any states that are terminal states as indicated by the TerminalFunction. 20 | * @param tf the TerminalFunction that indicates goal states. 21 | */ 22 | public TFGoalCondition(TerminalFunction tf){ 23 | this.tf = tf; 24 | } 25 | 26 | /** 27 | * Returns the {@link burlap.oomdp.core.TerminalFunction} used to specify the goal condition. 28 | * @return the {@link burlap.oomdp.core.TerminalFunction} used to specify the goal condition. 29 | */ 30 | public TerminalFunction getTf() { 31 | return tf; 32 | } 33 | 34 | /** 35 | * Sets the {@link burlap.oomdp.core.TerminalFunction} used to specify the goal condition. 36 | * @param tf the {@link burlap.oomdp.core.TerminalFunction} used to specify the goal condition. 37 | */ 38 | public void setTf(TerminalFunction tf) { 39 | this.tf = tf; 40 | } 41 | 42 | @Override 43 | public boolean satisfies(State s) { 44 | return tf.isTerminal(s); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/core/AbstractGroundedAction.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.core; 2 | 3 | import burlap.oomdp.singleagent.GroundedAction; 4 | import burlap.oomdp.stochasticgames.JointAction; 5 | 6 | /** 7 | * This is an interface for grounded actions. A grounded action is a reference to an action definition along with the specific parameters with which the action 8 | * is to be applied. Subclasses for this class include the single-agent action grounding ({@link GroundedAction}), an action grounding for a specific agent 9 | * in a stochastic game {@link burlap.oomdp.stochasticgames.agentactions.GroundedSGAgentAction}, and a joint action in a stochastic game ({@link JointAction}). 10 | * @author James MacGlashan 11 | * 12 | */ 13 | public interface AbstractGroundedAction { 14 | 15 | 16 | /** 17 | * Returns the action name for this grounded action. 18 | * @return the action name for this grounded action. 19 | */ 20 | String actionName(); 21 | 22 | 23 | /** 24 | * Returns a copy of this grounded action. 25 | * @return a copy of this grounded action. 26 | */ 27 | AbstractGroundedAction copy(); 28 | 29 | 30 | /** 31 | * Returns true if this action uses parameters 32 | * @return true if this action uses parameters; false otherwise 33 | */ 34 | boolean isParameterized(); 35 | 36 | 37 | /** 38 | * Initializes the parameter values of this {@link burlap.oomdp.core.AbstractGroundedAction} according 39 | * to the provided string representation of their values. 40 | * @param params an array in which each element is the string representation of one of this {@link burlap.oomdp.core.AbstractGroundedAction}'s values 41 | */ 42 | void initParamsWithStringRep(String [] params); 43 | 44 | 45 | /** 46 | * Returns an array of string representations of this {@link burlap.oomdp.core.AbstractGroundedAction}'s parameters 47 | * @return an array of string representations of this {@link burlap.oomdp.core.AbstractGroundedAction}'s parameters 48 | */ 49 | String [] getParametersAsString(); 50 | 51 | 52 | 53 | } 54 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/core/TerminalFunction.java: -------------------------------------------------------------------------------- 1 | /* Author: James MacGlashan 2 | * Description: 3 | * Abstract class for determining if a state in an OO-MDP domain is a terminal state 4 | * This kind of information is important for episode and goal-oriented MDPs 5 | */ 6 | 7 | 8 | package burlap.oomdp.core; 9 | 10 | 11 | import burlap.oomdp.core.states.State; 12 | 13 | /** 14 | * And interface for defining terminal states of an MDP. 15 | * @author James MacGlashan 16 | * 17 | */ 18 | public interface TerminalFunction { 19 | 20 | public boolean isTerminal(State s); 21 | 22 | } 23 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/core/TransitionProbability.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.core; 2 | 3 | 4 | import burlap.oomdp.core.states.State; 5 | 6 | /** 7 | * Represents the probability of transition to a given state. 8 | * @author James MacGlashan 9 | * 10 | */ 11 | public class TransitionProbability { 12 | 13 | /** 14 | * The state to which the agent may transition. 15 | */ 16 | public State s; 17 | 18 | /** 19 | * the probability of transitioning to state s 20 | */ 21 | public double p; 22 | 23 | public TransitionProbability(State s, double p){ 24 | this.s = s; 25 | this.p = p; 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/core/objects/OOMDPObjectInstance.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.core.objects; 2 | 3 | public abstract class OOMDPObjectInstance implements ObjectInstance{ 4 | 5 | public OOMDPObjectInstance() { 6 | } 7 | 8 | /** 9 | * Returns a string representation of this object including its name and value attribute value assignment. 10 | * @return a string representation of this object including its name and value attribute value assignment. 11 | */ 12 | public String getObjectDescription(){ 13 | return this.buildObjectDescription(new StringBuilder()).toString(); 14 | } 15 | 16 | /** 17 | * Sets an object's value based on it's java.lang type. 18 | */ 19 | public ObjectInstance setValue(String attName, T value) { 20 | String valueClass = value.getClass().getName(); 21 | if(valueClass.equals("boolean") || valueClass.equals("java.lang.Double")){ 22 | Boolean b = (Boolean)value; 23 | return this.setValue(attName, (boolean)b); 24 | } 25 | else if(valueClass.equals("double") || valueClass.equals("java.lang.Double")){ 26 | Double d = (Double)value; 27 | return this.setValue(attName, (double)d); 28 | } 29 | else if(valueClass.equals("double[]") || valueClass.equals("java.lang.Double[]")){ 30 | return this.setValue(attName, (double[])value); 31 | } 32 | else if(valueClass.equals("int") || valueClass.equals("java.lang.Integer")){ 33 | Integer i = (Integer)value; 34 | return this.setValue(attName, (int)i); 35 | } 36 | else if(valueClass.equals("int[]") || valueClass.equals("java.lang.Integer[]")){ 37 | return this.setValue(attName, (int[])value); 38 | } 39 | else if(valueClass.equals("java.lang.String")){ 40 | return this.setValue(attName, (String)value); 41 | } 42 | throw new RuntimeException("Unsupported value type " + valueClass); 43 | 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/core/states/ImmutableStateInterface.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.core.states; 2 | 3 | import java.util.List; 4 | 5 | import burlap.oomdp.core.objects.ImmutableObjectInstance; 6 | import burlap.oomdp.core.objects.ObjectInstance; 7 | 8 | import com.google.common.collect.ImmutableList; 9 | 10 | /** 11 | * A state that implements this interface implies that it itself is immutable and the ObjectInstances it uses are immutable. 12 | * An immutable state is one that doesn't allow you to make modifications to its underlying data 13 | * structure, by using the getObject, setValue paradigm. Any changes to a state will result in a copy that reflects 14 | * those changes. The original state will not be modified. 15 | * @author Stephen Brawner 16 | * 17 | */ 18 | public interface ImmutableStateInterface extends State, Iterable { 19 | ImmutableStateInterface replaceAndHash(ImmutableList objects, int code); 20 | ImmutableStateInterface replaceObject(ObjectInstance objectToReplace, ObjectInstance newObject); 21 | ImmutableStateInterface replaceAllObjects(List objectsToRemove, List objectsToAdd); 22 | ImmutableList getImmutableObjects(); 23 | boolean isHashed(); 24 | } 25 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/core/states/OOMDPState.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.core.states; 2 | 3 | import burlap.oomdp.core.objects.ObjectInstance; 4 | 5 | public abstract class OOMDPState implements State { 6 | 7 | 8 | @Override 9 | public String toString(){ 10 | return this.getCompleteStateDescription(); 11 | } 12 | 13 | /** 14 | * Renames the identifier for the object instance currently named originalName with the name newName. 15 | * @param originalName the original name of the object instance to be renamed in this state 16 | * @param newName the new name of the object instance 17 | */ 18 | public State renameObject(String originalName, String newName){ 19 | ObjectInstance o = this.getObject(originalName); 20 | return this.renameObject(o, newName); 21 | } 22 | 23 | /** 24 | * Sets an object's value. 25 | * @throws RuntimeException if the object doesn't exist, or the attribute name doesn't exist for the object. 26 | */ 27 | public State setObjectsValue(String objectName, String attName, T value) { 28 | ObjectInstance obj = this.getObject(objectName); 29 | if (obj == null) { 30 | throw new RuntimeException("Object " + objectName + " does not exist in this state"); 31 | } 32 | obj.setValue(attName, value); 33 | return this; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/core/values/StringValue.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.core.values; 2 | 3 | import burlap.oomdp.core.Attribute; 4 | 5 | 6 | /** 7 | * This class provides a value for a string. 8 | * @author Greg Yauney (gyauney) 9 | * 10 | */ 11 | public class StringValue extends OOMDPValue implements Value { 12 | private static final String UNSET = ""; 13 | /** 14 | * The string value 15 | */ 16 | protected final String stringVal; 17 | 18 | 19 | /** 20 | * Initializes for a given attribute. The default value will be set to 0. 21 | * @param attribute 22 | */ 23 | public StringValue(Attribute attribute) { 24 | super(attribute); 25 | this.stringVal = UNSET; 26 | } 27 | 28 | 29 | /** 30 | * Initializes from an existing value. 31 | * @param v the value to copy 32 | */ 33 | public StringValue(StringValue v) { 34 | super(v); 35 | this.stringVal = ((StringValue)v).stringVal; 36 | } 37 | 38 | public StringValue(Attribute attribute, String stringVal) { 39 | super(attribute); 40 | this.stringVal = stringVal; 41 | } 42 | 43 | @Override 44 | public Value copy() { 45 | return new StringValue(this); 46 | } 47 | 48 | @Override 49 | public boolean valueHasBeenSet() { 50 | return true; 51 | } 52 | 53 | @Override 54 | public Value setValue(int v) { 55 | return new StringValue(this.attribute, Integer.toString(v)); 56 | } 57 | 58 | @Override 59 | public Value setValue(double v) { 60 | return new StringValue(this.attribute, Double.toString(v)); 61 | } 62 | 63 | @Override 64 | public Value setValue(String v) { 65 | return new StringValue(this.attribute, v); 66 | } 67 | 68 | @Override 69 | public StringBuilder buildStringVal(StringBuilder builder) { 70 | return builder.append(this.stringVal); 71 | } 72 | 73 | @Override 74 | public boolean equals(Object obj){ 75 | if (this == obj) { 76 | return true; 77 | } 78 | if(!(obj instanceof StringValue)){ 79 | return false; 80 | } 81 | 82 | StringValue o = (StringValue)obj; 83 | 84 | if(!o.attribute.equals(attribute)){ 85 | return false; 86 | } 87 | 88 | return this.stringVal.equals(o.stringVal); 89 | 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/core/values/UnsetValueException.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.core.values; 2 | 3 | /** 4 | * A class for indicating that a OO-MDP object instance value is unset. 5 | * @author James MacGlashan 6 | * 7 | */ 8 | public class UnsetValueException extends RuntimeException { 9 | 10 | private static final long serialVersionUID = 1L; 11 | 12 | public UnsetValueException(){ 13 | super("OO-MDP Object Instance Value is Unset"); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/legacy/StateParser.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.legacy; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | /** 6 | * This interface is used to converting states to parsable string representations and parsing those string representations back into states. 7 | * Although there is a domain-universal string parser implementation of this interface, it is very verbose and file size and readability 8 | * may be improved by creating a domain-specific state parser. 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public interface StateParser { 13 | 14 | /** 15 | * Converts state s into a parsable string representation. 16 | * @param s the state to convert 17 | * @return a parsable string representation of state s. 18 | */ 19 | public String stateToString(State s); 20 | 21 | /** 22 | * Converts a string into a State object assuming the string representation was produced using this state parser. 23 | * @param str a string representation of a state 24 | * @return the state object that corresponds to the string representation. 25 | */ 26 | public State stringToState(String str); 27 | 28 | 29 | } 30 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/singleagent/ActionObserver.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.singleagent; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | public interface ActionObserver { 6 | public void actionEvent(State s, GroundedAction ga, State sp); 7 | } 8 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/singleagent/RewardFunction.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.singleagent; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | 6 | /** 7 | * Defines the reward function for a task. 8 | * @author James MacGlashan 9 | * 10 | */ 11 | public interface RewardFunction { 12 | 13 | /** 14 | * Returns the reward received when action a is executed in state s and the agent transitions to state sprime. 15 | * @param s the state in which the action was executed 16 | * @param a the action executed 17 | * @param sprime the state to which the agent transitioned 18 | * @return the reward received when action a is executed in state s and the agent transitions to state sprime. 19 | */ 20 | public abstract double reward(State s, GroundedAction a, State sprime); 21 | 22 | } 23 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/singleagent/common/NullAction.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.singleagent.common; 2 | 3 | import burlap.oomdp.core.Domain; 4 | import burlap.oomdp.core.states.State; 5 | import burlap.oomdp.core.TransitionProbability; 6 | import burlap.oomdp.singleagent.Action; 7 | import burlap.oomdp.singleagent.FullActionModel; 8 | import burlap.oomdp.singleagent.GroundedAction; 9 | 10 | import java.util.List; 11 | 12 | 13 | /** 14 | * @author James 15 | * This action is an action that does nothing. 16 | * It may be useful for making references to actions that do not have domain associations 17 | * or if a domain needs a no-op action 18 | * 19 | */ 20 | public class NullAction extends SimpleAction.SimpleDeterministicAction implements FullActionModel { 21 | 22 | 23 | public NullAction(String name){ 24 | this.name = name; 25 | this.domain = null; 26 | } 27 | 28 | public NullAction(String name, Domain domain){ 29 | super(name, domain); 30 | } 31 | 32 | 33 | @Override 34 | protected State performActionHelper(State st, GroundedAction groundedAction) { 35 | return st; 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/singleagent/common/NullRewardFunction.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.singleagent.common; 2 | 3 | import burlap.oomdp.core.states.State; 4 | import burlap.oomdp.singleagent.GroundedAction; 5 | import burlap.oomdp.singleagent.RewardFunction; 6 | 7 | /** 8 | * This class defines a reward function that always returns 0 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public class NullRewardFunction implements RewardFunction { 13 | 14 | @Override 15 | public double reward(State s, GroundedAction a, State sprime) { 16 | return 0; 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/singleagent/common/SimpleGroundedAction.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.singleagent.common; 2 | 3 | import burlap.oomdp.singleagent.Action; 4 | import burlap.oomdp.singleagent.GroundedAction; 5 | 6 | /** 7 | * A {@link burlap.oomdp.singleagent.GroundedAction} implementation for actions that have no parameters. 8 | * @author James MacGlashan. 9 | */ 10 | public class SimpleGroundedAction extends GroundedAction{ 11 | 12 | public SimpleGroundedAction(Action action) { 13 | super(action); 14 | } 15 | 16 | @Override 17 | public void initParamsWithStringRep(String[] params) { 18 | //do nothing 19 | } 20 | 21 | @Override 22 | public String[] getParametersAsString() { 23 | return new String[0]; 24 | } 25 | 26 | @Override 27 | public GroundedAction copy() { 28 | return new SimpleGroundedAction(this.action); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/singleagent/common/SingleGoalPFRF.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.singleagent.common; 2 | 3 | import burlap.oomdp.core.PropositionalFunction; 4 | import burlap.oomdp.core.states.State; 5 | import burlap.oomdp.singleagent.GroundedAction; 6 | import burlap.oomdp.singleagent.RewardFunction; 7 | 8 | 9 | /** 10 | * This class defines a reward function that returns a goal reward when any grounded form of a propositional 11 | * function is true in the resulting state and a default non-goal reward otherwise. 12 | * @author James MacGlashan 13 | * 14 | */ 15 | public class SingleGoalPFRF implements RewardFunction { 16 | 17 | PropositionalFunction pf; 18 | double goalReward; 19 | double nonGoalReward; 20 | 21 | 22 | 23 | /** 24 | * Initializes the reward function to return 1 when any grounded from of pf is true in the resulting 25 | * state. 26 | * @param pf the propositional function that must have a true grounded version for the goal reward to be returned. 27 | */ 28 | public SingleGoalPFRF(PropositionalFunction pf){ 29 | this.pf = pf; 30 | this.goalReward = 1.; 31 | this.nonGoalReward = 0.; 32 | } 33 | 34 | 35 | /** 36 | * Initializes the reward function to return the specified goal reward when any grounded from of pf is true in the resulting 37 | * state and the specified non-goal reward otherwise. 38 | * @param pf the propositional function that must have a true grounded version for the goal reward to be returned. 39 | * @param goalReward the goal reward value to be returned 40 | * @param nonGoalReward the non goal reward value to be returned. 41 | */ 42 | public SingleGoalPFRF(PropositionalFunction pf, double goalReward, double nonGoalReward){ 43 | this.pf = pf; 44 | this.goalReward = goalReward; 45 | this.nonGoalReward = nonGoalReward; 46 | } 47 | 48 | 49 | @Override 50 | public double reward(State s, GroundedAction a, State sprime) { 51 | 52 | if(this.pf.somePFGroundingIsTrue(sprime)){ 53 | return goalReward; 54 | } 55 | return nonGoalReward; 56 | 57 | 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/singleagent/common/UniformCostRF.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.singleagent.common; 2 | 3 | import burlap.oomdp.core.states.State; 4 | import burlap.oomdp.singleagent.GroundedAction; 5 | import burlap.oomdp.singleagent.RewardFunction; 6 | 7 | 8 | /** 9 | * Defines a reward function that always returns -1. 10 | * @author James MacGlashan 11 | * 12 | */ 13 | public class UniformCostRF implements RewardFunction { 14 | 15 | 16 | public UniformCostRF(){ 17 | 18 | } 19 | 20 | @Override 21 | public double reward(State s, GroundedAction a, State sprime) { 22 | return -1; 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/singleagent/environment/EnvironmentObserver.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.singleagent.environment; 2 | 3 | import burlap.oomdp.core.states.State; 4 | import burlap.oomdp.singleagent.GroundedAction; 5 | 6 | /** 7 | * A class that is told of interactions in an environment. This is typically called from an {@link burlap.oomdp.singleagent.environment.EnvironmentServer} 8 | * which intercepts the environment interactions. 9 | * @author James MacGlashan. 10 | */ 11 | public interface EnvironmentObserver { 12 | 13 | /** 14 | * This method is called when an {@link burlap.oomdp.singleagent.environment.Environment} receives an action to execute, but before the 15 | * {@link burlap.oomdp.singleagent.environment.Environment} has completed execution. 16 | * @param o the current {@link burlap.oomdp.singleagent.environment.Environment} observation in which the the action begins execution. 17 | * @param action the {@link burlap.oomdp.singleagent.GroundedAction} which will be executed in the {@link burlap.oomdp.singleagent.environment.Environment}. 18 | */ 19 | void observeEnvironmentActionInitiation(State o, GroundedAction action); 20 | 21 | /** 22 | * This method is called every time an {@link burlap.oomdp.singleagent.environment.Environment} is interacted with. 23 | * @param eo the resulting {@link burlap.oomdp.singleagent.environment.EnvironmentOutcome} 24 | */ 25 | void observeEnvironmentInteraction(EnvironmentOutcome eo); 26 | 27 | /** 28 | * This method is called every time an {@link burlap.oomdp.singleagent.environment.Environment} is reset (has the {@link Environment#resetEnvironment()} method called). 29 | * @param resetEnvironment the {@link burlap.oomdp.singleagent.environment.Environment} that was reset. 30 | */ 31 | void observeEnvironmentReset(Environment resetEnvironment); 32 | } 33 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/singleagent/environment/EnvironmentOutcome.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.singleagent.environment; 2 | 3 | import burlap.oomdp.core.states.State; 4 | import burlap.oomdp.singleagent.GroundedAction; 5 | 6 | /** 7 | * A class for specifying the outcome of executing an action in an {@link burlap.oomdp.singleagent.environment.Environment}. 8 | * The class consists of the previous environment observation (as a {@link burlap.oomdp.core.states.State}) in which the action was taken; 9 | * the action taken (as a {@link burlap.oomdp.singleagent.GroundedAction}); the next environment observation (also a {@link burlap.oomdp.core.states.State} 10 | * following the action; the reward received from the environment; and whether the new state of the environment is a 11 | * terminal state. 12 | * @author James MacGlashan. 13 | */ 14 | public class EnvironmentOutcome { 15 | 16 | /** 17 | * The previous environment observation (as a {@link burlap.oomdp.core.states.State} when the action was taken. 18 | */ 19 | public State o; 20 | 21 | /** 22 | * The action taken in the environment 23 | */ 24 | public GroundedAction a; 25 | 26 | /** 27 | * The next environment observation (as a {@link burlap.oomdp.core.states.State}) following the action's execution. 28 | */ 29 | public State op; 30 | 31 | /** 32 | * The reward received 33 | */ 34 | public double r; 35 | 36 | /** 37 | * Whether the next state to which the environment transitioned is a terminal state (true if so, false otherwise) 38 | */ 39 | public boolean terminated; 40 | 41 | 42 | /** 43 | * Initializes. 44 | * @param o The previous state of the environment when the action was taken. 45 | * @param a The action taken in the environment 46 | * @param op The next state to which the environment transitioned 47 | * @param r The reward received 48 | * @param terminated Whether the next state to which the environment transitioned is a terminal state (true if so, false otherwise) 49 | */ 50 | public EnvironmentOutcome(State o, GroundedAction a, State op, double r, boolean terminated) { 51 | this.o = o; 52 | this.a = a; 53 | this.op = op; 54 | this.r = r; 55 | this.terminated = terminated; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/singleagent/environment/StateSettableEnvironment.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.singleagent.environment; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | /** 6 | * An interface to be used with {@link burlap.oomdp.singleagent.environment.Environment} instances that allows 7 | * the environment to have its set set to a client specified state. 8 | * @author James MacGlashan. 9 | */ 10 | public interface StateSettableEnvironment extends Environment{ 11 | 12 | /** 13 | * Sets the current state of the environment to the specified state. 14 | * @param s the state to which this {@link burlap.oomdp.singleagent.environment.Environment} will be set. 15 | */ 16 | void setCurStateTo(State s); 17 | } 18 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/singleagent/environment/TaskSettableEnvironment.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.singleagent.environment; 2 | 3 | import burlap.oomdp.core.TerminalFunction; 4 | import burlap.oomdp.singleagent.RewardFunction; 5 | 6 | /** 7 | * And {@link burlap.oomdp.singleagent.environment.Environment} interface extension that allows the {@link burlap.oomdp.singleagent.RewardFunction} 8 | * and {@link burlap.oomdp.core.TerminalFunction} to set and accessed. 9 | * @author James MacGlashan. 10 | */ 11 | public interface TaskSettableEnvironment extends Environment{ 12 | 13 | /** 14 | * Sets the {@link burlap.oomdp.singleagent.RewardFunction} of this {@link burlap.oomdp.singleagent.environment.Environment} to 15 | * the specified reward function. 16 | * @param rf the new {@link burlap.oomdp.singleagent.RewardFunction} of the {@link burlap.oomdp.singleagent.environment.Environment}. 17 | */ 18 | void setRf(RewardFunction rf); 19 | 20 | /** 21 | * Sets the {@link burlap.oomdp.core.TerminalFunction} of this {@link burlap.oomdp.singleagent.environment.Environment} to 22 | * the specified terminal function. 23 | * @param tf the new {@link burlap.oomdp.core.TerminalFunction} of the {@link burlap.oomdp.singleagent.environment.Environment}. 24 | */ 25 | void setTf(TerminalFunction tf); 26 | 27 | /** 28 | * Returns the {@link burlap.oomdp.singleagent.RewardFunction} this {@link burlap.oomdp.singleagent.environment.Environment} uses 29 | * to determine rewards. 30 | * @return a {@link burlap.oomdp.singleagent.RewardFunction} 31 | */ 32 | RewardFunction getRf(); 33 | 34 | /** 35 | * Returns the {@link burlap.oomdp.core.TerminalFunction} this {@link burlap.oomdp.singleagent.environment.Environment} uses 36 | * to determine terminal states 37 | * @return a {@link burlap.oomdp.core.TerminalFunction} 38 | */ 39 | TerminalFunction getTf(); 40 | 41 | } 42 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/singleagent/explorer/SpecialExplorerAction.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.singleagent.explorer; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | 6 | /** 7 | * An interface for defining special non-domain actions to take in a visual explorer. 8 | * @author James MacGlashan 9 | * 10 | */ 11 | public interface SpecialExplorerAction { 12 | public State applySpecialAction(State curState); 13 | } 14 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/singleagent/explorer/StateResetSpecialAction.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.singleagent.explorer; 2 | 3 | import burlap.oomdp.core.states.State; 4 | import burlap.oomdp.singleagent.environment.Environment; 5 | 6 | 7 | /** 8 | * A special non-domain action that causes a {@link burlap.oomdp.singleagent.explorer.VisualExplorer}'s environment to be reset with the {@link burlap.oomdp.singleagent.environment.Environment#resetEnvironment()} 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public class StateResetSpecialAction implements SpecialExplorerAction { 13 | 14 | Environment env; 15 | 16 | /** 17 | * Initializes. 18 | * @param env the {@link burlap.oomdp.singleagent.environment.Environment} which will be reset by the {@link #applySpecialAction(burlap.oomdp.core.states.State)} method. 19 | */ 20 | public StateResetSpecialAction(Environment env){ 21 | this.env = env; 22 | } 23 | 24 | 25 | @Override 26 | public State applySpecialAction(State curState) { 27 | this.env.resetEnvironment(); 28 | return this.env.getCurrentObservation(); 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/singleagent/pomdp/beliefstate/BeliefState.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.singleagent.pomdp.beliefstate; 2 | 3 | import burlap.oomdp.core.states.State; 4 | import burlap.oomdp.singleagent.GroundedAction; 5 | 6 | /** 7 | * An interface for defining a belief state, which is a probability distribution over MDP states. This interface 8 | * does not require enumerating all states, because it is possible to have a belief state over an infinite number of MDP 9 | * states. However, it does require that the probability density function be returnable ({@link #belief(burlap.oomdp.core.states.State)}, 10 | * to be able to sample an MDP state from the belief distribution {@link #sampleStateFromBelief()}, 11 | * and a mechanism to update the belief state with respect to some observation and action {@link #getUpdatedBeliefState(burlap.oomdp.core.states.State, burlap.oomdp.singleagent.GroundedAction)}. 12 | * 13 | * @author James MacGlashan and Nakul Gopalan 14 | */ 15 | public interface BeliefState extends State { 16 | 17 | 18 | /** 19 | * Returns the probability density/mass for the input MDP state. 20 | * @param s the the input MDP state defined by a {@link burlap.oomdp.core.states.State} instance. 21 | * @return the probability density/mass of the input MDP state in this belief distribution. 22 | */ 23 | double belief(State s); 24 | 25 | /** 26 | * Samples an MDP state state from this belief distribution. 27 | * @return an MDP state defined by a {@link burlap.oomdp.core.states.State} instance. 28 | */ 29 | State sampleStateFromBelief(); 30 | 31 | /** 32 | * Computes a new belief distribution using this BeliefState as the prior and conditioned on the given POMDP observation 33 | * and action taken. 34 | * @param observation the conditioned POMDP observation defined by a {@link burlap.oomdp.core.states.State} instance. 35 | * @param ga the conditioned action selection in the previous time step. 36 | * @return the new belief state distribution represented by a new {@link BeliefState} instance. 37 | */ 38 | BeliefState getUpdatedBeliefState(State observation, GroundedAction ga); 39 | 40 | 41 | } 42 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/singleagent/pomdp/beliefstate/DenseBeliefVector.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.singleagent.pomdp.beliefstate; 2 | 3 | /** 4 | * An interface to be used in conjunction with {@link burlap.oomdp.singleagent.pomdp.beliefstate.BeliefState} instances 5 | * for belief states that can generate a dense belief vector representation. 6 | * @author James MacGlashan. 7 | */ 8 | public interface DenseBeliefVector extends EnumerableBeliefState{ 9 | 10 | /** 11 | * Returns a dense belief vector representation of the this belief state. 12 | * @return a double array specifying the dense belief vector representation. 13 | */ 14 | double [] getBeliefVector(); 15 | 16 | /** 17 | * Sets this belief state to the provided. Dense belief vector. If the belief vector dimensionality does not match 18 | * this objects dimensionality then a runtime exception will be thrown. 19 | * @param b the belief vector to set this belief state to. 20 | */ 21 | public void setBeliefVector(double [] b); 22 | } 23 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/singleagent/pomdp/beliefstate/EnumerableBeliefState.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.singleagent.pomdp.beliefstate; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | import java.util.List; 6 | 7 | /** 8 | * An interface to be used by {@link BeliefState} implementations that also can enumerate 9 | * the set of states that have probability mass. The probability mass of a state is specified by the 10 | * {@link burlap.oomdp.singleagent.pomdp.beliefstate.EnumerableBeliefState.StateBelief} class which is a pair 11 | * consisting of an MDP state defined by a {@link burlap.oomdp.core.states.State} instance, and its probability mass, defined by 12 | * a double. 13 | * @author James MacGlashan. 14 | */ 15 | public interface EnumerableBeliefState { 16 | 17 | /** 18 | * Returns the states, and their probability mass, that have non-zero probability mass. States that are not 19 | * included in the returned listed are assumed to have probability mass zero. 20 | * @return a {@link java.util.List} of {@link burlap.oomdp.singleagent.pomdp.beliefstate.EnumerableBeliefState.StateBelief} objects specifying the enumerated probability mass function. 21 | */ 22 | List getStatesAndBeliefsWithNonZeroProbability(); 23 | 24 | 25 | /** 26 | * A class for specifying the probability mass of an MDP state in a {@link BeliefState}. 27 | */ 28 | public static class StateBelief{ 29 | 30 | /** 31 | * The MDP state defined by a {@link burlap.oomdp.core.states.State} instance. 32 | */ 33 | public State s; 34 | 35 | /** 36 | * The probability mass of the MDP state. 37 | */ 38 | public double belief; 39 | 40 | 41 | /** 42 | * Initializes 43 | * @param s the MDP state defined by a {@link burlap.oomdp.core.states.State} instance. 44 | * @param belief the probability mass of the state. 45 | */ 46 | public StateBelief(State s, double belief){ 47 | this.s = s; 48 | this.belief = belief; 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/singleagent/pomdp/beliefstate/tabular/HashableTabularBeliefStateFactory.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.singleagent.pomdp.beliefstate.tabular; 2 | 3 | import burlap.oomdp.core.states.State; 4 | import burlap.oomdp.statehashing.HashableState; 5 | import burlap.oomdp.statehashing.HashableStateFactory; 6 | import org.apache.commons.lang3.builder.HashCodeBuilder; 7 | 8 | import java.util.Map; 9 | 10 | /** 11 | * A {@link burlap.oomdp.statehashing.HashableStateFactory} for {@link burlap.oomdp.singleagent.pomdp.beliefstate.tabular.TabularBeliefState} instances. 12 | * @author James MacGlashan. 13 | */ 14 | public class HashableTabularBeliefStateFactory implements HashableStateFactory{ 15 | 16 | @Override 17 | public HashableState hashState(State s) { 18 | 19 | if(!(s instanceof TabularBeliefState)){ 20 | throw new RuntimeException("Cannot generate HashableState for input state, because it is a " + s.getClass().getName() + " instance and HashableTabularBeliefStateFactory only hashes TabularBeliefState instances."); 21 | } 22 | 23 | return new HashableTabularBeliefState(s); 24 | } 25 | 26 | @Override 27 | public boolean objectIdentifierIndependent() { 28 | return true; 29 | } 30 | 31 | 32 | public static class HashableTabularBeliefState extends HashableState{ 33 | 34 | public HashableTabularBeliefState(State s) { 35 | super(s); 36 | } 37 | 38 | @Override 39 | public int hashCode() { 40 | 41 | HashCodeBuilder builder = new HashCodeBuilder(17, 31); 42 | for(Map.Entry e : ((TabularBeliefState)this.s).beliefValues.entrySet()){ 43 | int entryHash = 31 * e.getKey().hashCode() + e.getValue().hashCode(); 44 | builder.append(entryHash); 45 | } 46 | 47 | return builder.toHashCode(); 48 | } 49 | 50 | @Override 51 | public boolean equals(Object obj) { 52 | 53 | if(!(obj instanceof HashableTabularBeliefState)){ 54 | return false; 55 | } 56 | 57 | return this.s.equals(((HashableTabularBeliefState) obj).s); 58 | } 59 | 60 | @Override 61 | public State copy() { 62 | return new HashableTabularBeliefState(s); 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/statehashing/HashableObjectFactory.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.statehashing; 2 | 3 | import burlap.oomdp.core.objects.ObjectInstance; 4 | 5 | public interface HashableObjectFactory { 6 | 7 | HashableObject hashObject(ObjectInstance object); 8 | HashableValueFactory getValueHashFactory(); 9 | } 10 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/statehashing/HashableStateFactory.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.statehashing; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | 6 | /** 7 | * This interface is to be used by classes that can produce {@link HashableState} objects 8 | * that provide a hash values for {@link burlap.oomdp.core.states.State} objects. This is useful for tabular 9 | * methods that make use of {@link java.util.HashSet}s or {@link java.util.HashMap}s for fast retrieval. 10 | * @author James MacGlashan 11 | * 12 | */ 13 | public interface HashableStateFactory { 14 | 15 | /** 16 | * Turns {@link burlap.oomdp.core.states.State} s into a {@link burlap.oomdp.statehashing.HashableState} 17 | * @param s the input {@link burlap.oomdp.core.states.State} to transform. 18 | * @return a {@link burlap.oomdp.statehashing.HashableState}. 19 | */ 20 | HashableState hashState(State s); 21 | 22 | /** 23 | * Returns true if the {@link burlap.oomdp.statehashing.HashableState} objects returned are object identifier independent; false if they are dependent. 24 | * @return true if the {@link burlap.oomdp.statehashing.HashableState} objects returned are object identifier independent; false if they are dependent. 25 | */ 26 | boolean objectIdentifierIndependent(); 27 | 28 | } 29 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/statehashing/HashableValue.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.statehashing; 2 | 3 | import burlap.oomdp.core.values.Value; 4 | 5 | public class HashableValue { 6 | private final HashableValueFactory hashingFactory; 7 | private final Value value; 8 | private final int hashCode; 9 | 10 | public HashableValue(Value value, HashableValueFactory hashingFactory, int hashCode) { 11 | this.value = value; 12 | this.hashingFactory = hashingFactory; 13 | this.hashCode = hashCode; 14 | } 15 | 16 | public HashableValueFactory getHashFactory() { 17 | return this.hashingFactory; 18 | } 19 | 20 | public Value getValue() { 21 | return this.value; 22 | } 23 | 24 | @Override 25 | public int hashCode() { 26 | return this.hashCode; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/statehashing/HashableValueFactory.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.statehashing; 2 | 3 | import burlap.oomdp.core.values.Value; 4 | 5 | public interface HashableValueFactory { 6 | 7 | HashableValue hashValue(Value value); 8 | } 9 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stateserialization/SerializableStateFactory.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stateserialization; 2 | 3 | import burlap.oomdp.core.states.State; 4 | 5 | /** 6 | * A factory interface for generating {@link burlap.oomdp.stateserialization.SerializableState} instances from an input 7 | * {@link burlap.oomdp.core.states.State} by using the {@link #serialize(burlap.oomdp.core.states.State)} method. This 8 | * interface requires a {@link #getGeneratedClass()} method that reports the specific {@link burlap.oomdp.stateserialization.SerializableState} 9 | * implementation that is returned so that complex classes that have {@link burlap.oomdp.core.states.State} instances 10 | * to be turned into a {@link burlap.oomdp.stateserialization.SerializableState} can map them to the appropriate class type. 11 | * @author James MacGlashan. 12 | */ 13 | public interface SerializableStateFactory { 14 | 15 | /** 16 | * Takes a {@link burlap.oomdp.core.states.State} and turns it into a {@link burlap.oomdp.stateserialization.SerializableState}. 17 | * @param s the input {@link burlap.oomdp.core.states.State} to convert. 18 | * @return a {@link burlap.oomdp.stateserialization.SerializableState} 19 | */ 20 | SerializableState serialize(State s); 21 | 22 | /** 23 | * Returns the {@link burlap.oomdp.stateserialization.SerializableState} implementation that is generated by this factory. 24 | * @return the {@link java.lang.Class} of the {@link burlap.oomdp.stateserialization.SerializableState} implementation that is generated by this factory. 25 | */ 26 | Class getGeneratedClass(); 27 | } 28 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stateserialization/simple/SimpleSerializableState.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stateserialization.simple; 2 | 3 | import burlap.oomdp.core.Domain; 4 | import burlap.oomdp.core.objects.ObjectInstance; 5 | import burlap.oomdp.core.states.MutableState; 6 | import burlap.oomdp.core.states.State; 7 | import burlap.oomdp.stateserialization.SerializableState; 8 | 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | 12 | /** 13 | * A {@link burlap.oomdp.stateserialization.SerializableState} representation that reads all {@link burlap.oomdp.core.objects.ObjectInstance} and 14 | * {@link burlap.oomdp.core.values.Value} objects stored in a {@link burlap.oomdp.core.states.State} and represents their information 15 | * with {@link burlap.oomdp.stateserialization.simple.SimpleSerializedObjectInstance} and {@link burlap.oomdp.stateserialization.simple.SimpleSerializedValue} 16 | * instances. Deserialized {@link burlap.oomdp.core.states.State} objects are {@link burlap.oomdp.core.states.MutableState} instances. 17 | * @author James MacGlashan. 18 | */ 19 | public class SimpleSerializableState extends SerializableState { 20 | 21 | public List objects; 22 | 23 | public SimpleSerializableState(){ 24 | 25 | } 26 | 27 | public SimpleSerializableState(State s) { 28 | super(s); 29 | } 30 | 31 | @Override 32 | public void serialize(State s) { 33 | List objects = s.getAllObjects(); 34 | this.objects = new ArrayList(objects.size()); 35 | for(ObjectInstance o : objects){ 36 | this.objects.add(new SimpleSerializedObjectInstance(o)); 37 | } 38 | } 39 | 40 | @Override 41 | public State deserialize(Domain domain) { 42 | State s = new MutableState(); 43 | for(SimpleSerializedObjectInstance o : this.objects){ 44 | s.addObject(o.deserialize(domain)); 45 | } 46 | return s; 47 | } 48 | 49 | 50 | } 51 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stateserialization/simple/SimpleSerializableStateFactory.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stateserialization.simple; 2 | 3 | import burlap.oomdp.core.states.State; 4 | import burlap.oomdp.stateserialization.SerializableState; 5 | import burlap.oomdp.stateserialization.SerializableStateFactory; 6 | 7 | /** 8 | * A {@link burlap.oomdp.stateserialization.SerializableStateFactory} for {@link burlap.oomdp.stateserialization.simple.SimpleSerializableState} instances. 9 | * @author James MacGlashan. 10 | */ 11 | public class SimpleSerializableStateFactory implements SerializableStateFactory { 12 | @Override 13 | public SerializableState serialize(State s) { 14 | return new SimpleSerializableState(s); 15 | } 16 | 17 | @Override 18 | public Class getGeneratedClass() { 19 | return SimpleSerializableState.class; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stateserialization/simple/SimpleSerializedObjectInstance.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stateserialization.simple; 2 | 3 | import burlap.oomdp.core.Domain; 4 | import burlap.oomdp.core.objects.MutableObjectInstance; 5 | import burlap.oomdp.core.objects.ObjectInstance; 6 | import burlap.oomdp.core.values.Value; 7 | 8 | import java.io.Serializable; 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | 12 | /** 13 | * A serializable representation of {@link burlap.oomdp.core.objects.ObjectInstance} objects. 14 | * Deserialization produces {@link burlap.oomdp.core.objects.MutableObjectInstance} objects. 15 | * @author James MacGlashan. 16 | */ 17 | public class SimpleSerializedObjectInstance implements Serializable{ 18 | 19 | public String name; 20 | public String object_class; 21 | public List values; 22 | 23 | public SimpleSerializedObjectInstance() { 24 | } 25 | 26 | /** 27 | * Initializes by representing the input {@link burlap.oomdp.core.objects.ObjectInstance}. 28 | * @param o the {@link burlap.oomdp.core.objects.ObjectInstance} to represent. 29 | */ 30 | public SimpleSerializedObjectInstance(ObjectInstance o){ 31 | this.object_class = o.getClassName(); 32 | this.name = o.getName(); 33 | List values = o.getValues(); 34 | this.values = new ArrayList(values.size()); 35 | for(Value v : values){ 36 | this.values.add(new SimpleSerializedValue(v)); 37 | } 38 | } 39 | 40 | /** 41 | * Turns this representation into an actual {@link burlap.oomdp.core.objects.ObjectInstance} whose class and attributes 42 | * are associated with the input {@link burlap.oomdp.core.Domain} 43 | * @param domain the {@link burlap.oomdp.core.Domain} to which the returned {@link burlap.oomdp.core.objects.ObjectInstance} {@link burlap.oomdp.core.ObjectClass} and {@link burlap.oomdp.core.Attribute} refers. 44 | * @return a {@link burlap.oomdp.core.objects.MutableObjectInstance} 45 | */ 46 | public ObjectInstance deserialize(Domain domain){ 47 | MutableObjectInstance o = new MutableObjectInstance(domain.getObjectClass(this.object_class), this.name); 48 | for(SimpleSerializedValue v : this.values){ 49 | o.setValue(v.attribute, v.value); 50 | } 51 | return o; 52 | } 53 | 54 | 55 | } 56 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stateserialization/simple/SimpleSerializedValue.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stateserialization.simple; 2 | 3 | import burlap.oomdp.core.values.Value; 4 | 5 | import java.io.Serializable; 6 | 7 | /** 8 | * A serializable representation of {@link burlap.oomdp.core.values.Value} objects. 9 | * @author James MacGlashan. 10 | */ 11 | public class SimpleSerializedValue implements Serializable{ 12 | public String attribute; 13 | public String value; 14 | 15 | public SimpleSerializedValue(){ 16 | 17 | } 18 | 19 | /** 20 | * Creates a serializable representation for the given {@link burlap.oomdp.core.values.Value} 21 | * @param oomdpValue the {@link burlap.oomdp.core.values.Value} this object will represent. 22 | */ 23 | public SimpleSerializedValue(Value oomdpValue){ 24 | this.attribute = oomdpValue.attName(); 25 | this.value = oomdpValue.getStringVal(); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stochasticgames/AgentFactory.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stochasticgames; 2 | 3 | /** 4 | * An interface for generating agents 5 | * @author James MacGlashan 6 | * 7 | */ 8 | public interface AgentFactory { 9 | /** 10 | * Returns a new agent instance. 11 | * @return a new agent instance. 12 | */ 13 | public SGAgent generateAgent(); 14 | } 15 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stochasticgames/InvalidActionException.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stochasticgames; 2 | 3 | /** 4 | * Created by cayle on 5/22/15. 5 | */ 6 | public class InvalidActionException { 7 | } 8 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stochasticgames/JointReward.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stochasticgames; 2 | 3 | import java.util.Map; 4 | 5 | import burlap.oomdp.core.states.State; 6 | 7 | /** 8 | * This interface defines the method needed to return the reward received by each agent. 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public interface JointReward { 13 | 14 | /** 15 | * Returns the reward received by each agent specified in the joint action. The returned 16 | * result is a Map from agent names to the reward that they received. 17 | * @param s that state in which the joint action was taken. 18 | * @param ja the joint action taken. 19 | * @param sp the resulting state from taking the joint action 20 | * @return a Map from agent names to the reward that they received. 21 | */ 22 | public Map reward(State s, JointAction ja, State sp); 23 | } 24 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stochasticgames/SGAgentType.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stochasticgames; 2 | 3 | import java.util.List; 4 | 5 | import burlap.oomdp.core.ObjectClass; 6 | import burlap.oomdp.stochasticgames.agentactions.SGAgentAction; 7 | 8 | 9 | /** 10 | * This class specifies the type of agent a stochastic games agent can be. Different agent types may have different actions they can execute 11 | * and may also have different observable properties to other agents, which is indicated by the ObjectClass that represents their world state. 12 | * @author James MacGlashan 13 | * 14 | */ 15 | public class SGAgentType { 16 | 17 | public String typeName; 18 | public ObjectClass oclass; 19 | public List actions; 20 | 21 | 22 | /** 23 | * Creates a new agent type with a given name, object class describing the agent's world state, and actions available to the agent. 24 | * @param typeName the type name 25 | * @param oclass the object class that represents the agent's world state information 26 | * @param actionsAvailableToType the available actions that this agent can take in the world. 27 | */ 28 | public SGAgentType(String typeName, ObjectClass oclass, List actionsAvailableToType){ 29 | this.typeName = typeName; 30 | this.oclass = oclass; 31 | this.actions = actionsAvailableToType; 32 | } 33 | 34 | 35 | @Override 36 | public int hashCode(){ 37 | return typeName.hashCode(); 38 | } 39 | 40 | @Override 41 | public boolean equals(Object o){ 42 | if(!(o instanceof SGAgentType)){ 43 | return false; 44 | } 45 | 46 | return ((SGAgentType)o).typeName.equals(typeName); 47 | 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stochasticgames/SGStateGenerator.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stochasticgames; 2 | 3 | import java.util.List; 4 | 5 | import burlap.oomdp.core.objects.ObjectInstance; 6 | import burlap.oomdp.core.states.State; 7 | import burlap.oomdp.core.objects.MutableObjectInstance; 8 | 9 | 10 | /** 11 | * An abstract class defining the interface and common mechanism for generating State objects specifically for stochastic games domains. 12 | * Unlike the similar {@link burlap.oomdp.auxiliary.StateGenerator} class, this class requires a list of agents that will be in the world 13 | * and will create an ObjecInstance for each agent that belongs the OO-MDP object class specified by each agent's {@link SGAgentType}. 14 | * @author James MacGlashan 15 | * 16 | */ 17 | public abstract class SGStateGenerator { 18 | 19 | /** 20 | * Generates a new state with the given agents in it. 21 | * @param agents the agents that should be in the state. 22 | * @return a new state instance. 23 | */ 24 | public abstract State generateState(List agents); 25 | 26 | /** 27 | * Creates an object instance belonging to the object class specified in the agent's {@link SGAgentType} data member. 28 | * The returned object instance will have the name of the agent. 29 | * @param a the agent for which to create an OO-MDP state object instance 30 | * @return an object instance for this agent. 31 | */ 32 | protected ObjectInstance getAgentObjectInstance(SGAgent a){ 33 | return new MutableObjectInstance(a.agentType.oclass, a.worldAgentName); 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stochasticgames/WorldGenerator.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stochasticgames; 2 | 3 | 4 | /** 5 | * An interface for generating {@link World} instances. 6 | * @author James MacGlashan 7 | * 8 | */ 9 | public interface WorldGenerator { 10 | /** 11 | * Generates a new {@link World} instance. 12 | * @return a new {@link World} instance. 13 | */ 14 | public World generateWorld(); 15 | } 16 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stochasticgames/WorldObserver.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stochasticgames; 2 | 3 | import java.util.Map; 4 | 5 | import burlap.oomdp.core.states.State; 6 | 7 | /** 8 | * An interface for defining {@link burlap.oomdp.stochasticgames.World} observers. Observers 9 | * are told when a game states and in what state, what each interaction in the world was as they happen, and 10 | * what the final state of the world is when a game ends. 11 | */ 12 | public interface WorldObserver { 13 | 14 | /** 15 | * This method is called whenever a new game in a world is starting. 16 | * @param s the state in which the world is starting. 17 | */ 18 | public void gameStarting(State s); 19 | 20 | /** 21 | * This method is called whenever an interaction in the world occurs. 22 | * @param s the previous state of the world 23 | * @param ja the joint action taken in the world 24 | * @param reward the joint reward received by the agents 25 | * @param sp the next state of the world 26 | */ 27 | public void observe(State s, JointAction ja, Map reward, State sp); 28 | 29 | /** 30 | * This method is called whenever a game in a world ends. 31 | * @param s the final state of the world when it ends. 32 | */ 33 | public void gameEnding(State s); 34 | } 35 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stochasticgames/agentactions/SimpleGroundedSGAgentAction.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stochasticgames.agentactions; 2 | 3 | /** 4 | * A {@link burlap.oomdp.stochasticgames.agentactions.GroundedSGAgentAction} implementation for actions that 5 | * are parameter-less. 6 | * @author James MacGlashan. 7 | */ 8 | public class SimpleGroundedSGAgentAction extends GroundedSGAgentAction{ 9 | 10 | public SimpleGroundedSGAgentAction(String actingAgent, SGAgentAction a) { 11 | super(actingAgent, a); 12 | } 13 | 14 | @Override 15 | public GroundedSGAgentAction copy() { 16 | return new SimpleGroundedSGAgentAction(this.actingAgent, this.action); 17 | } 18 | 19 | @Override 20 | public void initParamsWithStringRep(String[] params) { 21 | //do nothing 22 | } 23 | 24 | @Override 25 | public String[] getParametersAsString() { 26 | return new String[0]; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stochasticgames/agentactions/SimpleSGAgentAction.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stochasticgames.agentactions; 2 | 3 | import burlap.oomdp.core.states.State; 4 | import burlap.oomdp.stochasticgames.SGDomain; 5 | import burlap.oomdp.stochasticgames.agentactions.GroundedSGAgentAction; 6 | import burlap.oomdp.stochasticgames.agentactions.SGAgentAction; 7 | 8 | import java.util.ArrayList; 9 | import java.util.Arrays; 10 | import java.util.List; 11 | 12 | 13 | /** 14 | * This {@link burlap.oomdp.stochasticgames.agentactions.SGAgentAction} definition defines a parameter-less agent action 15 | * that can be 16 | * executed in every state. This is a useful action definition for symmetric games. 17 | * @author James MacGlashan 18 | * 19 | */ 20 | public class SimpleSGAgentAction extends SGAgentAction { 21 | 22 | /** 23 | * Initializes this single action to be for the given domain and with the given name. This action 24 | * is automatically added to the given domain 25 | * @param d the domain to which this action belongs 26 | * @param name the name of this action 27 | */ 28 | public SimpleSGAgentAction(SGDomain d, String name) { 29 | super(d, name); 30 | } 31 | 32 | 33 | 34 | 35 | @Override 36 | public boolean applicableInState(State s, GroundedSGAgentAction gsa) { 37 | return true; 38 | } 39 | 40 | @Override 41 | public boolean isParameterized() { 42 | return false; 43 | } 44 | 45 | @Override 46 | public GroundedSGAgentAction getAssociatedGroundedAction(String actingAgent) { 47 | return new SimpleGroundedSGAgentAction(actingAgent, this); 48 | } 49 | 50 | @Override 51 | public List getAllApplicableGroundedActions(State s, String actingAgent) { 52 | GroundedSGAgentAction gaa = this.getAssociatedGroundedAction(actingAgent); 53 | return this.applicableInState(s, gaa) ? Arrays.asList(gaa) : new ArrayList(0); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stochasticgames/common/AgentFactoryWithSubjectiveReward.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stochasticgames.common; 2 | 3 | import burlap.oomdp.stochasticgames.SGAgent; 4 | import burlap.oomdp.stochasticgames.AgentFactory; 5 | import burlap.oomdp.stochasticgames.JointReward; 6 | 7 | /** 8 | * An agent generating factory that will produce an agent that uses an internal subjective reward function. 9 | * This can be useful for agents that use reward shaping. The base agent is first generated using 10 | * a different {@link burlap.oomdp.stochasticgames.AgentFactory} and the returned agent from 11 | * that provided agent has its internal reward function set to the one specified for use 12 | * in this factory. The agent is then returned by this factory. 13 | * @author James MacGlashan 14 | * 15 | */ 16 | public class AgentFactoryWithSubjectiveReward implements AgentFactory { 17 | 18 | protected AgentFactory baseFactory; 19 | protected JointReward internalReward; 20 | 21 | 22 | /** 23 | * Initializes the factory. 24 | * @param baseFactory the base factory for generating an agent. 25 | * @param internalReward the internal reward function to set the agent to use. 26 | */ 27 | public AgentFactoryWithSubjectiveReward(AgentFactory baseFactory, JointReward internalReward) { 28 | this.baseFactory = baseFactory; 29 | this.internalReward = internalReward; 30 | } 31 | 32 | @Override 33 | public SGAgent generateAgent() { 34 | SGAgent a = baseFactory.generateAgent(); 35 | a.setInternalRewardFunction(internalReward); 36 | return a; 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stochasticgames/common/ConstantSGStateGenerator.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stochasticgames.common; 2 | 3 | import java.util.List; 4 | 5 | import burlap.datastructures.HashedAggregator; 6 | import burlap.oomdp.core.objects.ObjectInstance; 7 | import burlap.oomdp.core.states.State; 8 | import burlap.oomdp.stochasticgames.SGAgent; 9 | import burlap.oomdp.stochasticgames.SGStateGenerator; 10 | 11 | 12 | /** 13 | * A stochastic games state generator that always returns the same base state, which is specified via the constructor. The 14 | * provided source state does *not* need to worry about the object name of OO-MDP objects corresponding to agent states. 15 | * This generator will automatically reassign the relevant OO-MDP object names to the names of each agent by querying the agent type 16 | * and agent name in the list of agents provides to the {@link #generateState(List)} method. This reassignment is done 17 | * each time the {@link #generateState(List)} method is called on a copy of the originally provided state. 18 | * @author James MacGlashan 19 | * 20 | */ 21 | public class ConstantSGStateGenerator extends SGStateGenerator { 22 | 23 | /** 24 | * The source state that will be copied and returned by the {@link #generateState(List)} method. 25 | */ 26 | protected State srcState; 27 | 28 | 29 | /** 30 | * Initializes. 31 | * @param srcState The source state that will be copied and returned by the {@link #generateState(List)} method. 32 | */ 33 | public ConstantSGStateGenerator(State srcState){ 34 | this.srcState = srcState; 35 | } 36 | 37 | @Override 38 | public State generateState(List agents) { 39 | 40 | State s = this.srcState.copy(); 41 | HashedAggregator counts = new HashedAggregator(); 42 | 43 | for(SGAgent a : agents){ 44 | String agentClassName = a.getAgentType().oclass.name; 45 | int index = (int) counts.v(agentClassName); 46 | List possibleAgentObjects = s.getObjectsOfClass(agentClassName); 47 | if(possibleAgentObjects.size() <= index){ 48 | throw new RuntimeException("Error: Constant state used by ConstanteStateSGGenerator does not have enough oo-mdp objects for agents defined by class: " + agentClassName); 49 | } 50 | ObjectInstance agentObject = possibleAgentObjects.get(index); 51 | s.renameObject(agentObject, a.getAgentName()); 52 | 53 | counts.add(agentClassName, 1.); 54 | 55 | } 56 | 57 | return s; 58 | 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stochasticgames/common/StaticRepeatedGameActionModel.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stochasticgames.common; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import burlap.oomdp.core.states.State; 7 | import burlap.oomdp.core.TransitionProbability; 8 | import burlap.oomdp.stochasticgames.JointAction; 9 | import burlap.oomdp.stochasticgames.JointActionModel; 10 | 11 | 12 | /** 13 | * This action model can be used to take a single stage game, and cause it to repeat itself. 14 | * This is achieved by simply having the same state returned after each joint action. 15 | * @author James MacGlashan 16 | * 17 | */ 18 | public class StaticRepeatedGameActionModel extends JointActionModel { 19 | 20 | public StaticRepeatedGameActionModel() { 21 | //nothing to do 22 | } 23 | 24 | @Override 25 | public List transitionProbsFor(State s, JointAction ja) { 26 | List res = new ArrayList(); 27 | TransitionProbability tp = new TransitionProbability(s.copy(), 1.); 28 | res.add(tp); 29 | 30 | return res; 31 | } 32 | 33 | @Override 34 | protected State actionHelper(State s, JointAction ja) { 35 | //do nothing, the state simply repeats itself 36 | return s; 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stochasticgames/explorers/HardStateResetSpecialAction.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stochasticgames.explorers; 2 | 3 | import burlap.oomdp.auxiliary.StateGenerator; 4 | import burlap.oomdp.auxiliary.common.ConstantStateGenerator; 5 | import burlap.oomdp.core.states.State; 6 | import burlap.oomdp.singleagent.explorer.SpecialExplorerAction; 7 | 8 | /** 9 | * @author James MacGlashan. 10 | */ 11 | public class HardStateResetSpecialAction implements SpecialExplorerAction { 12 | 13 | StateGenerator stateGenerator; 14 | 15 | /** 16 | * Initializes which base state to reset to 17 | * @param s the state to reset to when this action is executed 18 | */ 19 | public HardStateResetSpecialAction(State s){ 20 | this.stateGenerator = new ConstantStateGenerator(s); 21 | } 22 | 23 | /** 24 | * Initializes with a state generator to draw from on reset 25 | * @param stateGenerator the state generate to draw from. 26 | */ 27 | public HardStateResetSpecialAction(StateGenerator stateGenerator){ 28 | this.stateGenerator = stateGenerator; 29 | } 30 | 31 | /** 32 | * Sets the base state to reset to 33 | * @param s the state to reset to when this action is executed 34 | */ 35 | public void setBase(State s){ 36 | this.stateGenerator = new ConstantStateGenerator(s); 37 | } 38 | 39 | /** 40 | * Sets the state generator to draw from on reset 41 | * @param stateGenerator the state generator to draw from on reset 42 | */ 43 | public void setBaseStateGenerator(StateGenerator stateGenerator) { 44 | this.stateGenerator = stateGenerator; 45 | } 46 | 47 | @Override 48 | public State applySpecialAction(State curState) { 49 | return this.stateGenerator.generateState(); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stochasticgames/tournament/MatchEntry.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stochasticgames.tournament; 2 | 3 | import burlap.oomdp.stochasticgames.SGAgentType; 4 | 5 | /** 6 | * This class indicates which player in a tournament is to play in a match and what {@link burlap.oomdp.stochasticgames.SGAgentType} role they will play. 7 | * @author James MacGlashan 8 | * 9 | */ 10 | public class MatchEntry { 11 | 12 | public SGAgentType agentType; 13 | public int agentId; 14 | 15 | /** 16 | * Initializes the MatchEntry 17 | * @param at the {@link burlap.oomdp.stochasticgames.SGAgentType} the agent will play as 18 | * @param ai the index of this agent in the tournament 19 | */ 20 | public MatchEntry(SGAgentType at, int ai){ 21 | this.agentType = at; 22 | this.agentId = ai; 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stochasticgames/tournament/MatchSelector.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stochasticgames.tournament; 2 | 3 | import java.util.List; 4 | 5 | /** 6 | * An interface for defining how matches in a tournament will be determined 7 | * @author James MacGlashan 8 | * 9 | */ 10 | public interface MatchSelector { 11 | /** 12 | * Returns the next match information, which is a list of {@link MatchEntry} objects 13 | * @return the next match information, which is a list of {@link MatchEntry} objects 14 | */ 15 | public List getNextMatch(); 16 | 17 | /** 18 | * Resets the match selections and causes the {@link #getNextMatch()} method to start from the beginning of matches 19 | */ 20 | public void resetMatchSelections(); 21 | } 22 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/stochasticgames/tournament/common/AllPairWiseSameTypeMS.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.stochasticgames.tournament.common; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import burlap.oomdp.stochasticgames.SGAgentType; 7 | import burlap.oomdp.stochasticgames.tournament.MatchEntry; 8 | import burlap.oomdp.stochasticgames.tournament.MatchSelector; 9 | 10 | 11 | /** 12 | * This class defines a MatchSelctory that plays all pairwise matches of agents in a round robin. It sets 13 | * all agents to play as the same {@link burlap.oomdp.stochasticgames.SGAgentType} and therefore is only valid in symmetric games. 14 | * @author James MacGlashan 15 | * 16 | */ 17 | public class AllPairWiseSameTypeMS implements MatchSelector { 18 | 19 | protected int n; 20 | protected SGAgentType at; 21 | 22 | protected int p0; 23 | protected int p1; 24 | 25 | 26 | /** 27 | * Initializes the selector 28 | * @param at the {@link burlap.oomdp.stochasticgames.SGAgentType} that all agents will play as 29 | * @param n the number of agents in the tournament 30 | */ 31 | public AllPairWiseSameTypeMS(SGAgentType at, int n){ 32 | this.n = n; 33 | this.at = at; 34 | 35 | p0 = 0; 36 | p1 = 1; 37 | } 38 | 39 | @Override 40 | public List getNextMatch() { 41 | 42 | if(p0 >= n-1){ 43 | return null; //no more matches 44 | } 45 | 46 | MatchEntry me0 = new MatchEntry(at, p0); 47 | MatchEntry me1 = new MatchEntry(at, p1); 48 | 49 | List match = new ArrayList(); 50 | match.add(me0); 51 | match.add(me1); 52 | 53 | p1++; 54 | if(p1 >= n){ 55 | p0++; 56 | p1 = p0+1; 57 | } 58 | 59 | return match; 60 | } 61 | 62 | @Override 63 | public void resetMatchSelections() { 64 | p0 = 0; 65 | p1 = 1; 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/visualizer/ObjectPainter.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.visualizer; 2 | 3 | import java.awt.Graphics2D; 4 | 5 | import burlap.oomdp.core.objects.ObjectInstance; 6 | import burlap.oomdp.core.states.State; 7 | 8 | 9 | /** 10 | * And interface for defining painters that can render object instances to a graphics context. 11 | * @author James MacGlashan 12 | * 13 | */ 14 | public interface ObjectPainter { 15 | 16 | /** 17 | * Paints object instance ob to graphics context g2 18 | * @param g2 graphics context to which the object should be painted 19 | * @param s the state of the object to be painted 20 | * @param ob the instantiated object to be painted 21 | * @param cWidth width of the canvas size 22 | * @param cHeight height of the canvas size 23 | */ 24 | public void paintObject(Graphics2D g2, State s, ObjectInstance ob, float cWidth, float cHeight); 25 | 26 | 27 | } 28 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/visualizer/RenderLayer.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.visualizer; 2 | 3 | import java.awt.Graphics2D; 4 | 5 | /** 6 | * A RenderLayer is a 2 dimensional layer that paints to a provided 2D graphics context. The {@link MultiLayerRenderer} can take 7 | * a list of these objects and will paint them sequentially to the same 2D graphics context. This allows different kinds 8 | * of renderers that display different kinds of information to be layered on top of each other. 9 | * @author James MacGlashan 10 | * 11 | */ 12 | public interface RenderLayer { 13 | public void render(Graphics2D g2, float width, float height); 14 | } 15 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/oomdp/visualizer/StaticPainter.java: -------------------------------------------------------------------------------- 1 | package burlap.oomdp.visualizer; 2 | 3 | import java.awt.Graphics2D; 4 | 5 | import burlap.oomdp.core.states.State; 6 | 7 | 8 | 9 | /** 10 | * This class paints general properties of a state/domain that may not be represented 11 | * by any specific object instance data. For instance, the GridWorld class 12 | * may have walls that need to be painted, but the walls are part of the transition 13 | * dynamics of the domain and not captured in the object instance values assignments. 14 | * @author James MacGlashan 15 | * 16 | */ 17 | public interface StaticPainter { 18 | 19 | 20 | /** 21 | * Paints general state information not to graphics context g2 22 | * @param g2 graphics context to which the static data should be painted 23 | * @param s the state to be painted 24 | * @param cWidth the width of the canvas 25 | * @param cHeight the height of the canvas 26 | */ 27 | public void paint(Graphics2D g2, State s, float cWidth, float cHeight); 28 | 29 | } 30 | -------------------------------------------------------------------------------- /Assignment4/src/burlap/tutorials/hgw/HelloGridWorld.java: -------------------------------------------------------------------------------- 1 | package burlap.tutorials.hgw; 2 | 3 | 4 | import burlap.domain.singleagent.gridworld.GridWorldDomain; 5 | import burlap.domain.singleagent.gridworld.GridWorldVisualizer; 6 | import burlap.oomdp.core.Domain; 7 | import burlap.oomdp.core.states.State; 8 | import burlap.oomdp.singleagent.explorer.VisualExplorer; 9 | import burlap.oomdp.visualizer.Visualizer; 10 | 11 | public class HelloGridWorld { 12 | 13 | public static void main(String[] args) { 14 | 15 | GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world 16 | gw.setMapToFourRooms(); //four rooms layout 17 | gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate 18 | Domain domain = gw.generateDomain(); //generate the grid world domain 19 | 20 | //setup initial state 21 | State s = GridWorldDomain.getOneAgentOneLocationState(domain); 22 | GridWorldDomain.setAgent(s, 0, 0); 23 | GridWorldDomain.setLocation(s, 0, 10, 10); 24 | 25 | //create visualizer and explorer 26 | Visualizer v = GridWorldVisualizer.getVisualizer(gw.getMap()); 27 | VisualExplorer exp = new VisualExplorer(domain, v, s); 28 | 29 | //set control keys to use w-s-a-d 30 | exp.addKeyAction("w", GridWorldDomain.ACTIONNORTH); 31 | exp.addKeyAction("s", GridWorldDomain.ACTIONSOUTH); 32 | exp.addKeyAction("a", GridWorldDomain.ACTIONWEST); 33 | exp.addKeyAction("d", GridWorldDomain.ACTIONEAST); 34 | 35 | exp.initGUI(); 36 | 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /Assignment4/ycai87-analysis.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment4/ycai87-analysis.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CS7641-Machine-Learning 2 | 3 | This is the assignment repository for Georgia Tech CS7641 Machine Learning. 4 | 5 | 6 | Assignment 1 Phishing Website and Letter Recognition using Supervised Learning 7 | 8 | Assignment 2 Study on Randomized Optimization 9 | 10 | Assignment 3 Study on Unsupervised Learning 11 | 12 | Assignment 4 Study Markov Decision Process Problems using Reinforcement Learning 13 | 14 | 15 | Disclaimer: Directly copying and using the code for any of the course projects is forbidden. This is a violation of GA Tech Honor Code. 16 | You are welcome to use it as a reference and I would appreicate any comments. 17 | 18 | For Non-GATECH students, you can access the lectures from Udacity. 19 | --------------------------------------------------------------------------------