├── Assignment1
    ├── README.txt
    ├── letter.arff
    ├── phishing.arff
    ├── result.xlsx
    ├── table.docx
    └── ycai87-analysis.pdf
├── Assignment2
    ├── ContinuousPeaks.java
    ├── Knapsack.java
    ├── Knapsack_mimic.java
    ├── LetterRecognition.java
    ├── README.txt
    ├── TravelingSalesman.java
    ├── phishing.csv
    ├── phishing_ga.java
    ├── phishing_rhc.java
    ├── phishing_sa.java
    ├── phishingwebsite.java
    ├── travellingsales_ga.java
    └── ycai87-analysis.pdf
├── Assignment3
    ├── BRInformationGain.java
    ├── BR_ICAWithFilter.java
    ├── BR_PCAWithFilter.java
    ├── LRInformationGain.java
    ├── LR_DR.java
    ├── LR_ICAWithFilter.java
    ├── Lr_PCAWithFilter.java
    ├── README.txt
    ├── Randomprojection.java
    ├── bc_kmeans.py
    ├── br_dr_em.py
    ├── br_dr_kmeans.py
    ├── clustertesters
    │   ├── ExpectationMaximizationTestCluster.py
    │   ├── ExpectationMaximizationTestCluster.pyc
    │   ├── KMeansTestCluster.py
    │   ├── KMeansTestCluster.pyc
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── lr_ExpectationMaximizationTestCluster.py
    │   ├── lr_ExpectationMaximizationTestCluster.pyc
    │   ├── lr_KMeansTestCluster.py
    │   └── lr_KMeansTestCluster.pyc
    ├── letter.arff
    ├── lr_dr_em.py
    ├── lr_dr_kmeans.py
    ├── lr_em.py
    ├── lr_kmeans.py
    ├── wisconsin.arff
    └── ycai87-analysis.pdf
├── Assignment4
    ├── README.txt
    ├── easyGW.py
    ├── hardGW.py
    ├── src
    │   └── burlap
    │   │   ├── assignment4
    │   │       ├── BasicGridWorld.java
    │   │       ├── EasyGridWorldLauncher.java
    │   │       ├── HardGridWorldLauncher.java
    │   │       └── util
    │   │       │   ├── AgentPainter.java
    │   │       │   ├── AnalysisAggregator.java
    │   │       │   ├── AnalysisRunner.java
    │   │       │   ├── AtLocation.java
    │   │       │   ├── BasicRewardFunction.java
    │   │       │   ├── BasicTerminalFunction.java
    │   │       │   ├── LocationPainter.java
    │   │       │   ├── MapPrinter.java
    │   │       │   ├── Movement.java
    │   │       │   └── WallPainter.java
    │   │   ├── behavior
    │   │       ├── learningrate
    │   │       │   ├── ConstantLR.java
    │   │       │   ├── ExponentialDecayLR.java
    │   │       │   ├── LearningRate.java
    │   │       │   └── SoftTimeInverseDecayLR.java
    │   │       ├── policy
    │   │       │   ├── BeliefPolicyToPOMDPPolicy.java
    │   │       │   ├── BoltzmannQPolicy.java
    │   │       │   ├── CachedPolicy.java
    │   │       │   ├── DomainMappedPolicy.java
    │   │       │   ├── EpsilonGreedy.java
    │   │       │   ├── GreedyDeterministicQPolicy.java
    │   │       │   ├── GreedyQPolicy.java
    │   │       │   ├── Policy.java
    │   │       │   ├── RandomPolicy.java
    │   │       │   └── SolverDerivedPolicy.java
    │   │       ├── singleagent
    │   │       │   ├── EpisodeAnalysis.java
    │   │       │   ├── MDPSolver.java
    │   │       │   ├── MDPSolverInterface.java
    │   │       │   ├── auxiliary
    │   │       │   │   ├── EpisodeSequenceVisualizer.java
    │   │       │   │   ├── StateEnumerator.java
    │   │       │   │   ├── StateGridder.java
    │   │       │   │   ├── StateReachability.java
    │   │       │   │   ├── performance
    │   │       │   │   │   ├── ExperimentalEnvironment.java
    │   │       │   │   │   ├── LearningAlgorithmExperimenter.java
    │   │       │   │   │   ├── PerformanceMetric.java
    │   │       │   │   │   ├── PerformancePlotter.java
    │   │       │   │   │   └── TrialMode.java
    │   │       │   │   └── valuefunctionvis
    │   │       │   │   │   ├── PolicyRenderLayer.java
    │   │       │   │   │   ├── StatePolicyPainter.java
    │   │       │   │   │   ├── StateValuePainter.java
    │   │       │   │   │   ├── StaticDomainPainter.java
    │   │       │   │   │   ├── ValueFunctionRenderLayer.java
    │   │       │   │   │   ├── ValueFunctionVisualizerGUI.java
    │   │       │   │   │   └── common
    │   │       │   │   │       ├── ActionGlyphPainter.java
    │   │       │   │   │       ├── ArrowActionGlyph.java
    │   │       │   │   │       ├── ColorBlend.java
    │   │       │   │   │       ├── LandmarkColorBlendInterpolation.java
    │   │       │   │   │       ├── PolicyGlyphPainter2D.java
    │   │       │   │   │       └── StateValuePainter2D.java
    │   │       │   ├── interfaces
    │   │       │   │   └── rlglue
    │   │       │   │   │   ├── RLGlueDomain.java
    │   │       │   │   │   └── RLGlueEnvironmentInterface.java
    │   │       │   ├── learnfromdemo
    │   │       │   │   ├── IRLRequest.java
    │   │       │   │   ├── RewardValueProjection.java
    │   │       │   │   ├── apprenticeship
    │   │       │   │   │   ├── ApprenticeshipLearning.java
    │   │       │   │   │   └── ApprenticeshipLearningRequest.java
    │   │       │   │   └── mlirl
    │   │       │   │   │   ├── MLIRL.java
    │   │       │   │   │   ├── MLIRLRequest.java
    │   │       │   │   │   ├── MultipleIntentionsMLIRL.java
    │   │       │   │   │   ├── MultipleIntentionsMLIRLRequest.java
    │   │       │   │   │   ├── commonrfs
    │   │       │   │   │       ├── LinearStateActionDifferentiableRF.java
    │   │       │   │   │       └── LinearStateDifferentiableRF.java
    │   │       │   │   │   ├── differentiableplanners
    │   │       │   │   │       ├── DifferentiableDP.java
    │   │       │   │   │       ├── DifferentiableSparseSampling.java
    │   │       │   │   │       ├── DifferentiableVI.java
    │   │       │   │   │       └── diffvinit
    │   │       │   │   │       │   ├── DiffVFRF.java
    │   │       │   │   │       │   ├── DifferentiableVInit.java
    │   │       │   │   │       │   ├── LinearDiffRFVInit.java
    │   │       │   │   │       │   ├── LinearStateDiffVF.java
    │   │       │   │   │       │   └── VanillaDiffVinit.java
    │   │       │   │   │   └── support
    │   │       │   │   │       ├── BoltzmannPolicyGradient.java
    │   │       │   │   │       ├── DifferentiableRF.java
    │   │       │   │   │       ├── QGradientPlanner.java
    │   │       │   │   │       ├── QGradientPlannerFactory.java
    │   │       │   │   │       └── QGradientTuple.java
    │   │       │   ├── learning
    │   │       │   │   ├── LearningAgent.java
    │   │       │   │   ├── LearningAgentFactory.java
    │   │       │   │   ├── actorcritic
    │   │       │   │   │   ├── Actor.java
    │   │       │   │   │   ├── ActorCritic.java
    │   │       │   │   │   ├── Critic.java
    │   │       │   │   │   ├── CritiqueResult.java
    │   │       │   │   │   ├── actor
    │   │       │   │   │   │   └── BoltzmannActor.java
    │   │       │   │   │   └── critics
    │   │       │   │   │   │   ├── TDLambda.java
    │   │       │   │   │   │   └── TimeIndexedTDLambda.java
    │   │       │   │   ├── lspi
    │   │       │   │   │   ├── LSPI.java
    │   │       │   │   │   ├── SARSCollector.java
    │   │       │   │   │   └── SARSData.java
    │   │       │   │   ├── modellearning
    │   │       │   │   │   ├── Model.java
    │   │       │   │   │   ├── ModelLearningPlanner.java
    │   │       │   │   │   ├── ModeledDomainGenerator.java
    │   │       │   │   │   ├── artdp
    │   │       │   │   │   │   └── ARTDP.java
    │   │       │   │   │   ├── modelplanners
    │   │       │   │   │   │   └── VIModelLearningPlanner.java
    │   │       │   │   │   ├── models
    │   │       │   │   │   │   └── TabularModel.java
    │   │       │   │   │   └── rmax
    │   │       │   │   │   │   ├── PotentialShapedRMax.java
    │   │       │   │   │   │   └── UnmodeledFavoredPolicy.java
    │   │       │   │   └── tdmethods
    │   │       │   │   │   ├── QLearning.java
    │   │       │   │   │   ├── QLearningStateNode.java
    │   │       │   │   │   ├── SarsaLam.java
    │   │       │   │   │   └── vfa
    │   │       │   │   │       └── GradientDescentSarsaLam.java
    │   │       │   ├── options
    │   │       │   │   ├── DeterministicTerminationOption.java
    │   │       │   │   ├── MacroAction.java
    │   │       │   │   ├── Option.java
    │   │       │   │   ├── PolicyDefinedSubgoalOption.java
    │   │       │   │   └── support
    │   │       │   │   │   ├── DirectOptionTerminateMapper.java
    │   │       │   │   │   ├── EnvironmentOptionOutcome.java
    │   │       │   │   │   ├── LocalSubgoalRF.java
    │   │       │   │   │   ├── LocalSubgoalTF.java
    │   │       │   │   │   └── OptionEvaluatingRF.java
    │   │       │   ├── planning
    │   │       │   │   ├── Planner.java
    │   │       │   │   ├── deterministic
    │   │       │   │   │   ├── DDPlannerPolicy.java
    │   │       │   │   │   ├── DeterministicPlanner.java
    │   │       │   │   │   ├── MultiStatePrePlanner.java
    │   │       │   │   │   ├── SDPlannerPolicy.java
    │   │       │   │   │   ├── SearchNode.java
    │   │       │   │   │   ├── informed
    │   │       │   │   │   │   ├── BestFirst.java
    │   │       │   │   │   │   ├── Heuristic.java
    │   │       │   │   │   │   ├── NullHeuristic.java
    │   │       │   │   │   │   ├── PrioritizedSearchNode.java
    │   │       │   │   │   │   └── astar
    │   │       │   │   │   │   │   ├── AStar.java
    │   │       │   │   │   │   │   ├── DynamicWeightedAStar.java
    │   │       │   │   │   │   │   ├── IDAStar.java
    │   │       │   │   │   │   │   ├── StaticWeightedAStar.java
    │   │       │   │   │   │   │   └── WeightedGreedy.java
    │   │       │   │   │   └── uninformed
    │   │       │   │   │   │   ├── bfs
    │   │       │   │   │   │       └── BFS.java
    │   │       │   │   │   │   └── dfs
    │   │       │   │   │   │       ├── DFS.java
    │   │       │   │   │   │       └── LimitedMemoryDFS.java
    │   │       │   │   ├── stochastic
    │   │       │   │   │   ├── ActionTransitions.java
    │   │       │   │   │   ├── DynamicProgramming.java
    │   │       │   │   │   ├── HashedTransitionProbability.java
    │   │       │   │   │   ├── montecarlo
    │   │       │   │   │   │   └── uct
    │   │       │   │   │   │   │   ├── UCT.java
    │   │       │   │   │   │   │   ├── UCTActionNode.java
    │   │       │   │   │   │   │   ├── UCTStateNode.java
    │   │       │   │   │   │   │   └── UCTTreeWalkPolicy.java
    │   │       │   │   │   ├── policyiteration
    │   │       │   │   │   │   └── PolicyIteration.java
    │   │       │   │   │   ├── rtdp
    │   │       │   │   │   │   ├── BFSRTDP.java
    │   │       │   │   │   │   ├── BoundedRTDP.java
    │   │       │   │   │   │   └── RTDP.java
    │   │       │   │   │   ├── sparsesampling
    │   │       │   │   │   │   └── SparseSampling.java
    │   │       │   │   │   └── valueiteration
    │   │       │   │   │   │   ├── PrioritizedSweeping.java
    │   │       │   │   │   │   └── ValueIteration.java
    │   │       │   │   └── vfa
    │   │       │   │   │   └── fittedvi
    │   │       │   │   │       ├── FittedVI.java
    │   │       │   │   │       ├── SupervisedVFA.java
    │   │       │   │   │       └── WekaVFATrainer.java
    │   │       │   ├── pomdp
    │   │       │   │   ├── BeliefPolicyAgent.java
    │   │       │   │   ├── qmdp
    │   │       │   │   │   └── QMDP.java
    │   │       │   │   └── wrappedmdpalgs
    │   │       │   │   │   └── BeliefSparseSampling.java
    │   │       │   ├── shaping
    │   │       │   │   ├── ShapedRewardFunction.java
    │   │       │   │   └── potential
    │   │       │   │   │   ├── PotentialFunction.java
    │   │       │   │   │   └── PotentialShapedRF.java
    │   │       │   └── vfa
    │   │       │   │   ├── ActionApproximationResult.java
    │   │       │   │   ├── ActionFeaturesQuery.java
    │   │       │   │   ├── ApproximationResult.java
    │   │       │   │   ├── FeatureDatabase.java
    │   │       │   │   ├── FunctionWeight.java
    │   │       │   │   ├── StateFeature.java
    │   │       │   │   ├── StateToFeatureVectorGenerator.java
    │   │       │   │   ├── ValueFunctionApproximation.java
    │   │       │   │   ├── WeightGradient.java
    │   │       │   │   ├── cmac
    │   │       │   │       ├── AttributeTileSpecification.java
    │   │       │   │       ├── CMACFeatureDatabase.java
    │   │       │   │       ├── FVCMACFeatureDatabase.java
    │   │       │   │       ├── FVTiling.java
    │   │       │   │       └── Tiling.java
    │   │       │   │   ├── common
    │   │       │   │       ├── ConcatenatedObjectFeatureVectorGenerator.java
    │   │       │   │       ├── FDFeatureVectorGenerator.java
    │   │       │   │       ├── FVToFeatureDatabase.java
    │   │       │   │       ├── LinearFVVFA.java
    │   │       │   │       ├── LinearVFA.java
    │   │       │   │       └── PFFeatureVectorGenerator.java
    │   │       │   │   ├── fourier
    │   │       │   │       ├── FourierBasis.java
    │   │       │   │       └── FourierBasisLearningRateWrapper.java
    │   │       │   │   └── rbf
    │   │       │   │       ├── DistanceMetric.java
    │   │       │   │       ├── FVDistanceMetric.java
    │   │       │   │       ├── FVRBF.java
    │   │       │   │       ├── FVRBFFeatureDatabase.java
    │   │       │   │       ├── RBF.java
    │   │       │   │       ├── RBFFeatureDatabase.java
    │   │       │   │       ├── functions
    │   │       │   │           ├── FVGaussianRBF.java
    │   │       │   │           └── GaussianRBF.java
    │   │       │   │       └── metrics
    │   │       │   │           ├── EuclideanDistance.java
    │   │       │   │           └── FVEuclideanDistance.java
    │   │       ├── stochasticgames
    │   │       │   ├── GameAnalysis.java
    │   │       │   ├── JointPolicy.java
    │   │       │   ├── PolicyFromJointPolicy.java
    │   │       │   ├── agents
    │   │       │   │   ├── RandomSGAgent.java
    │   │       │   │   ├── SetStrategySGAgent.java
    │   │       │   │   ├── interfacing
    │   │       │   │   │   └── singleagent
    │   │       │   │   │   │   ├── LearningAgentToSGAgentInterface.java
    │   │       │   │   │   │   └── SGToSADomain.java
    │   │       │   │   ├── madp
    │   │       │   │   │   ├── MADPPlanAgentFactory.java
    │   │       │   │   │   ├── MADPPlannerFactory.java
    │   │       │   │   │   └── MultiAgentDPPlanningAgent.java
    │   │       │   │   ├── maql
    │   │       │   │   │   ├── MAQLFactory.java
    │   │       │   │   │   └── MultiAgentQLearning.java
    │   │       │   │   ├── naiveq
    │   │       │   │   │   ├── SGNaiveQFactory.java
    │   │       │   │   │   ├── SGNaiveQLAgent.java
    │   │       │   │   │   └── history
    │   │       │   │   │   │   ├── ActionIdMap.java
    │   │       │   │   │   │   ├── ParameterNaiveActionIdMap.java
    │   │       │   │   │   │   ├── SGQWActionHistory.java
    │   │       │   │   │   │   └── SGQWActionHistoryFactory.java
    │   │       │   │   └── twoplayer
    │   │       │   │   │   ├── repeatedsinglestage
    │   │       │   │   │       ├── GrimTrigger.java
    │   │       │   │   │       └── TitForTat.java
    │   │       │   │   │   └── singlestage
    │   │       │   │   │       └── equilibriumplayer
    │   │       │   │   │           ├── BimatrixEquilibriumSolver.java
    │   │       │   │   │           ├── EquilibriumPlayingSGAgent.java
    │   │       │   │   │           └── equilibriumsolvers
    │   │       │   │   │               ├── CorrelatedEquilibrium.java
    │   │       │   │   │               ├── MaxMax.java
    │   │       │   │   │               ├── MinMax.java
    │   │       │   │   │               └── Utilitarian.java
    │   │       │   ├── auxiliary
    │   │       │   │   ├── GameSequenceVisualizer.java
    │   │       │   │   └── performance
    │   │       │   │   │   ├── AgentFactoryAndType.java
    │   │       │   │   │   ├── MultiAgentExperimenter.java
    │   │       │   │   │   └── MultiAgentPerformancePlotter.java
    │   │       │   ├── madynamicprogramming
    │   │       │   │   ├── AgentQSourceMap.java
    │   │       │   │   ├── JAQValue.java
    │   │       │   │   ├── MADynamicProgramming.java
    │   │       │   │   ├── MAQSourcePolicy.java
    │   │       │   │   ├── MultiAgentQSourceProvider.java
    │   │       │   │   ├── QSourceForSingleAgent.java
    │   │       │   │   ├── SGBackupOperator.java
    │   │       │   │   ├── backupOperators
    │   │       │   │   │   ├── CoCoQ.java
    │   │       │   │   │   ├── CorrelatedQ.java
    │   │       │   │   │   ├── MaxQ.java
    │   │       │   │   │   └── MinMaxQ.java
    │   │       │   │   ├── dpplanners
    │   │       │   │   │   └── MAValueIteration.java
    │   │       │   │   └── policies
    │   │       │   │   │   ├── ECorrelatedQJointPolicy.java
    │   │       │   │   │   ├── EGreedyJointPolicy.java
    │   │       │   │   │   ├── EGreedyMaxWellfare.java
    │   │       │   │   │   └── EMinMaxPolicy.java
    │   │       │   └── solvers
    │   │       │   │   ├── CorrelatedEquilibriumSolver.java
    │   │       │   │   ├── GeneralBimatrixSolverTools.java
    │   │       │   │   └── MinMaxSolver.java
    │   │       └── valuefunction
    │   │       │   ├── QFunction.java
    │   │       │   ├── QValue.java
    │   │       │   ├── ValueFunction.java
    │   │       │   └── ValueFunctionInitialization.java
    │   │   ├── datastructures
    │   │       ├── AlphanumericSorting.java
    │   │       ├── BoltzmannDistribution.java
    │   │       ├── CommandLineOptions.java
    │   │       ├── HashIndexedHeap.java
    │   │       ├── HashedAggregator.java
    │   │       ├── StochasticTree.java
    │   │       └── WekaInterfaces.java
    │   │   ├── debugtools
    │   │       ├── DPrint.java
    │   │       ├── DebugFlags.java
    │   │       ├── MyTimer.java
    │   │       └── RandomFactory.java
    │   │   ├── domain
    │   │       ├── singleagent
    │   │       │   ├── blockdude
    │   │       │   │   ├── BlockDude.java
    │   │       │   │   ├── BlockDudeLevelConstructor.java
    │   │       │   │   ├── BlockDudeTF.java
    │   │       │   │   └── BlockDudeVisualizer.java
    │   │       │   ├── blocksworld
    │   │       │   │   ├── BlocksWorld.java
    │   │       │   │   └── BlocksWorldVisualizer.java
    │   │       │   ├── cartpole
    │   │       │   │   ├── CartPoleDomain.java
    │   │       │   │   ├── CartPoleVisualizer.java
    │   │       │   │   ├── InvertedPendulum.java
    │   │       │   │   ├── InvertedPendulumVisualizer.java
    │   │       │   │   ├── SerializableCartPoleStateFactory.java
    │   │       │   │   └── SerializableInvertedPendulumStateFactory.java
    │   │       │   ├── frostbite
    │   │       │   │   ├── FrostbiteDomain.java
    │   │       │   │   ├── FrostbiteRF.java
    │   │       │   │   ├── FrostbiteTF.java
    │   │       │   │   ├── FrostbiteVisualizer.java
    │   │       │   │   └── SerializableFrostbiteStateFactory.java
    │   │       │   ├── graphdefined
    │   │       │   │   ├── GraphDefinedDomain.java
    │   │       │   │   ├── GraphRF.java
    │   │       │   │   └── GraphTF.java
    │   │       │   ├── gridworld
    │   │       │   │   ├── GridWorldDomain.java
    │   │       │   │   ├── GridWorldRewardFunction.java
    │   │       │   │   ├── GridWorldTerminalFunction.java
    │   │       │   │   ├── GridWorldVisualizer.java
    │   │       │   │   ├── SerializableGridWorldStateFactory.java
    │   │       │   │   └── macro
    │   │       │   │   │   ├── MacroCellGridWorld.java
    │   │       │   │   │   └── MacroCellVisualizer.java
    │   │       │   ├── lunarlander
    │   │       │   │   ├── LLVisualizer.java
    │   │       │   │   ├── LunarLanderDomain.java
    │   │       │   │   ├── LunarLanderRF.java
    │   │       │   │   ├── LunarLanderTF.java
    │   │       │   │   └── SerializableLunarLanderStateFactory.java
    │   │       │   ├── mountaincar
    │   │       │   │   ├── MCRandomStateGenerator.java
    │   │       │   │   ├── MountainCar.java
    │   │       │   │   ├── MountainCarVisualizer.java
    │   │       │   │   └── SerializableMountainCarStateFactory.java
    │   │       │   ├── pomdp
    │   │       │   │   └── tiger
    │   │       │   │   │   └── TigerDomain.java
    │   │       │   └── tabularized
    │   │       │   │   └── TabulatedDomainWrapper.java
    │   │       └── stochasticgames
    │   │       │   ├── gridgame
    │   │       │       ├── GGVisualizer.java
    │   │       │       ├── GridGame.java
    │   │       │       └── GridGameStandardMechanics.java
    │   │       │   └── normalform
    │   │       │       └── SingleStageNormalFormGame.java
    │   │   ├── oomdp
    │   │       ├── auxiliary
    │   │       │   ├── DomainGenerator.java
    │   │       │   ├── StateAbstraction.java
    │   │       │   ├── StateGenerator.java
    │   │       │   ├── StateMapping.java
    │   │       │   ├── common
    │   │       │   │   ├── ConstantStateGenerator.java
    │   │       │   │   ├── GoalConditionTF.java
    │   │       │   │   ├── NullAbstraction.java
    │   │       │   │   ├── NullAbstractionNoCopy.java
    │   │       │   │   ├── NullTermination.java
    │   │       │   │   ├── RandomStartStateGenerator.java
    │   │       │   │   └── SinglePFTF.java
    │   │       │   └── stateconditiontest
    │   │       │   │   ├── SinglePFSCT.java
    │   │       │   │   ├── StateConditionTest.java
    │   │       │   │   ├── StateConditionTestIterable.java
    │   │       │   │   └── TFGoalCondition.java
    │   │       ├── core
    │   │       │   ├── AbstractGroundedAction.java
    │   │       │   ├── AbstractObjectParameterizedGroundedAction.java
    │   │       │   ├── Attribute.java
    │   │       │   ├── Domain.java
    │   │       │   ├── GroundedProp.java
    │   │       │   ├── ObjectClass.java
    │   │       │   ├── PropositionalFunction.java
    │   │       │   ├── TerminalFunction.java
    │   │       │   ├── TransitionProbability.java
    │   │       │   ├── objects
    │   │       │   │   ├── ImmutableObjectInstance.java
    │   │       │   │   ├── MutableObjectInstance.java
    │   │       │   │   ├── OOMDPObjectInstance.java
    │   │       │   │   └── ObjectInstance.java
    │   │       │   ├── states
    │   │       │   │   ├── FixedSizeImmutableState.java
    │   │       │   │   ├── ImmutableState.java
    │   │       │   │   ├── ImmutableStateInterface.java
    │   │       │   │   ├── MutableState.java
    │   │       │   │   ├── OOMDPState.java
    │   │       │   │   └── State.java
    │   │       │   └── values
    │   │       │   │   ├── DiscreteValue.java
    │   │       │   │   ├── DoubleArrayValue.java
    │   │       │   │   ├── IntArrayValue.java
    │   │       │   │   ├── IntValue.java
    │   │       │   │   ├── MultiTargetRelationalValue.java
    │   │       │   │   ├── OOMDPValue.java
    │   │       │   │   ├── RealValue.java
    │   │       │   │   ├── RelationalValue.java
    │   │       │   │   ├── StringValue.java
    │   │       │   │   ├── UnsetValueException.java
    │   │       │   │   └── Value.java
    │   │       ├── legacy
    │   │       │   ├── StateJSONParser.java
    │   │       │   ├── StateParser.java
    │   │       │   └── StateYAMLParser.java
    │   │       ├── singleagent
    │   │       │   ├── Action.java
    │   │       │   ├── ActionObserver.java
    │   │       │   ├── FullActionModel.java
    │   │       │   ├── GroundedAction.java
    │   │       │   ├── ObjectParameterizedAction.java
    │   │       │   ├── RewardFunction.java
    │   │       │   ├── SADomain.java
    │   │       │   ├── common
    │   │       │   │   ├── GoalBasedRF.java
    │   │       │   │   ├── NullAction.java
    │   │       │   │   ├── NullRewardFunction.java
    │   │       │   │   ├── SimpleAction.java
    │   │       │   │   ├── SimpleGroundedAction.java
    │   │       │   │   ├── SingleGoalPFRF.java
    │   │       │   │   ├── UniformCostRF.java
    │   │       │   │   └── VisualActionObserver.java
    │   │       │   ├── environment
    │   │       │   │   ├── Environment.java
    │   │       │   │   ├── EnvironmentObserver.java
    │   │       │   │   ├── EnvironmentOutcome.java
    │   │       │   │   ├── EnvironmentServer.java
    │   │       │   │   ├── SimulatedEnvironment.java
    │   │       │   │   ├── StateSettableEnvironment.java
    │   │       │   │   └── TaskSettableEnvironment.java
    │   │       │   ├── explorer
    │   │       │   │   ├── SpecialExplorerAction.java
    │   │       │   │   ├── StateResetSpecialAction.java
    │   │       │   │   ├── TerminalExplorer.java
    │   │       │   │   └── VisualExplorer.java
    │   │       │   ├── interfaces
    │   │       │   │   └── rlglue
    │   │       │   │   │   └── RLGlueEnvironment.java
    │   │       │   └── pomdp
    │   │       │   │   ├── BeliefAgent.java
    │   │       │   │   ├── BeliefMDPGenerator.java
    │   │       │   │   ├── ObservationFunction.java
    │   │       │   │   ├── PODomain.java
    │   │       │   │   ├── SimulatedPOEnvironment.java
    │   │       │   │   └── beliefstate
    │   │       │   │       ├── BeliefState.java
    │   │       │   │       ├── DenseBeliefVector.java
    │   │       │   │       ├── EnumerableBeliefState.java
    │   │       │   │       └── tabular
    │   │       │   │           ├── HashableTabularBeliefStateFactory.java
    │   │       │   │           └── TabularBeliefState.java
    │   │       ├── statehashing
    │   │       │   ├── DiscretizingHashableStateFactory.java
    │   │       │   ├── DiscretizingMaskedHashableStateFactory.java
    │   │       │   ├── FixedSizeStateHashableStateFactory.java
    │   │       │   ├── HashableObject.java
    │   │       │   ├── HashableObjectFactory.java
    │   │       │   ├── HashableState.java
    │   │       │   ├── HashableStateFactory.java
    │   │       │   ├── HashableValue.java
    │   │       │   ├── HashableValueFactory.java
    │   │       │   ├── ImmutableHashableObjectFactory.java
    │   │       │   ├── ImmutableStateHashableStateFactory.java
    │   │       │   ├── MaskedHashableStateFactory.java
    │   │       │   └── SimpleHashableStateFactory.java
    │   │       ├── stateserialization
    │   │       │   ├── SerializableState.java
    │   │       │   ├── SerializableStateFactory.java
    │   │       │   └── simple
    │   │       │   │   ├── SimpleSerializableState.java
    │   │       │   │   ├── SimpleSerializableStateFactory.java
    │   │       │   │   ├── SimpleSerializedObjectInstance.java
    │   │       │   │   └── SimpleSerializedValue.java
    │   │       ├── stochasticgames
    │   │       │   ├── AgentFactory.java
    │   │       │   ├── InvalidActionException.java
    │   │       │   ├── JointAction.java
    │   │       │   ├── JointActionModel.java
    │   │       │   ├── JointReward.java
    │   │       │   ├── SGAgent.java
    │   │       │   ├── SGAgentType.java
    │   │       │   ├── SGDomain.java
    │   │       │   ├── SGStateGenerator.java
    │   │       │   ├── World.java
    │   │       │   ├── WorldGenerator.java
    │   │       │   ├── WorldObserver.java
    │   │       │   ├── agentactions
    │   │       │   │   ├── GroundedSGAgentAction.java
    │   │       │   │   ├── ObParamSGAgentAction.java
    │   │       │   │   ├── SGAgentAction.java
    │   │       │   │   ├── SimpleGroundedSGAgentAction.java
    │   │       │   │   └── SimpleSGAgentAction.java
    │   │       │   ├── common
    │   │       │   │   ├── AgentFactoryWithSubjectiveReward.java
    │   │       │   │   ├── ConstantSGStateGenerator.java
    │   │       │   │   ├── StaticRepeatedGameActionModel.java
    │   │       │   │   └── VisualWorldObserver.java
    │   │       │   ├── explorers
    │   │       │   │   ├── HardStateResetSpecialAction.java
    │   │       │   │   ├── SGTerminalExplorer.java
    │   │       │   │   └── SGVisualExplorer.java
    │   │       │   └── tournament
    │   │       │   │   ├── MatchEntry.java
    │   │       │   │   ├── MatchSelector.java
    │   │       │   │   ├── Tournament.java
    │   │       │   │   └── common
    │   │       │   │       ├── AllPairWiseSameTypeMS.java
    │   │       │   │       └── ConstantWorldGenerator.java
    │   │       └── visualizer
    │   │       │   ├── MultiLayerRenderer.java
    │   │       │   ├── ObjectPainter.java
    │   │       │   ├── RenderLayer.java
    │   │       │   ├── StateActionRenderLayer.java
    │   │       │   ├── StateRenderLayer.java
    │   │       │   ├── StaticPainter.java
    │   │       │   └── Visualizer.java
    │   │   └── tutorials
    │   │       ├── bd
    │   │           └── ExampleGridWorld.java
    │   │       ├── bpl
    │   │           └── BasicBehavior.java
    │   │       ├── cpl
    │   │           ├── QLTutorial.java
    │   │           └── VITutorial.java
    │   │       ├── hgw
    │   │           ├── HelloGridWorld.java
    │   │           └── PlotTest.java
    │   │       ├── scd
    │   │           └── ContinuousDomainTutorial.java
    │   │       └── video
    │   │           └── mc
    │   │               └── MCVideo.java
    └── ycai87-analysis.pdf
└── README.md


/Assignment1/README.txt:
--------------------------------------------------------------------------------
1 | All the analysis are done in Weka GUI. For data preprocessing, resample and removepecentage filter
2 | is used with no replacement option to 70-30 split the original dataset to training data and test data.
3 | 10, 20, 30, ...,90% of the training data is generated by removepencetage filter. Training and test results can be
4 | accessed by Weka explorer and experiment. J48 is used for decision tree. IBK is used for K-nearest neighbor. AdaboostM1 is used 
5 | for boosting. LibSVM is used for SVM. Multilayerpeceptron is used for ANN. Learning curve results are generated by Weka experiment and plot
6 | by Microsoft Office Excel. 
7 | 
8 | Results spreadsheet and accuracy table for different classifiers are in the folder. Two datasets are also included.


--------------------------------------------------------------------------------
/Assignment1/result.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment1/result.xlsx


--------------------------------------------------------------------------------
/Assignment1/table.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment1/table.docx


--------------------------------------------------------------------------------
/Assignment1/ycai87-analysis.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment1/ycai87-analysis.pdf


--------------------------------------------------------------------------------
/Assignment2/README.txt:
--------------------------------------------------------------------------------
1 | The data file is phishing.csv.
2 | Download ABAGAIL package from: https://github.com/pushkar/ABAGAIL
3 | copy source code to src/opt/ycai87/
4 | compile abajail.jar and code and run the tests.


--------------------------------------------------------------------------------
/Assignment2/ycai87-analysis.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment2/ycai87-analysis.pdf


--------------------------------------------------------------------------------
/Assignment3/README.txt:
--------------------------------------------------------------------------------
1 | The experiment is run in Weka and Scikit-Learn.
2 | Use the python files to run the clustering analysis.
3 | Use java code to run the dimensionality reduction analysis, which runs J48 with forward search: adding lower ranking components one by one.
4 | The dataset are attached.
5 | Use weka GUI to do PCA, ICA, Random Projection and Information Gain. Use the filter to get transformed the data.
6 | Save the data to arff and csv files to feed to java code or python code to do dimensionality reduction analysis and clustering analysis. 


--------------------------------------------------------------------------------
/Assignment3/bc_kmeans.py:
--------------------------------------------------------------------------------
 1 | from sklearn import datasets
 2 | 
 3 | from clustertesters import KMeansTestCluster as kmtc
 4 | 
 5 | if __name__ == "__main__":
 6 |     breast_cancer = datasets.load_breast_cancer()
 7 |     #print breast_cancer
 8 |     X, y = breast_cancer.data, breast_cancer.target
 9 |     #print X
10 | 
11 | 
12 |     tester = kmtc.KMeansTestCluster(X, y, clusters=range(1,10), plot=True, targetcluster=3, stats=True)
13 |     tester.run()
14 | 
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/Assignment3/br_dr_em.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from sklearn import  datasets, metrics
 3 | from clustertesters import ExpectationMaximizationTestCluster as emtc
 4 | import pandas as pd
 5 | 
 6 | def encode_target(df, target_column):
 7 |     """Add column to df with integers for the target.
 8 | 
 9 |     Args
10 |     ----
11 |     df -- pandas DataFrame.
12 |     target_column -- column to map to int, producing
13 |                      new Target column.
14 | 
15 |     Returns
16 |     -------
17 |     df_mod -- modified DataFrame.
18 |     targets -- list of target names.
19 |     """
20 |     df_mod = df.copy()
21 |     targets = df_mod[target_column].unique()
22 |     map_to_int = {name: n for n, name in enumerate(targets)}
23 |     df_mod[target_column].replace(map_to_int, inplace=True)
24 |     return (df_mod, map_to_int)
25 | 
26 | if __name__ == "__main__":
27 |     letter_recognition = pd.read_csv("wisconsin_ig_reduced.csv")
28 |     dft, mapping = encode_target(letter_recognition, "class")
29 | 
30 |     X = (dft.ix[:, :-1])
31 |     y = dft.ix[:, -1]
32 | 
33 |     tester = emtc.ExpectationMaximizationTestCluster(X, y, clusters=range(1,11), plot=True, targetcluster=3, stats=True)
34 |     tester.run()
35 | 
36 | 


--------------------------------------------------------------------------------
/Assignment3/br_dr_kmeans.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | from clustertesters import KMeansTestCluster as kmtc
 4 | 
 5 | 
 6 | def encode_target(df, target_column):
 7 |     """Add column to df with integers for the target.
 8 | 
 9 |     Args
10 |     ----
11 |     df -- pandas DataFrame.
12 |     target_column -- column to map to int, producing
13 |                      new Target column.
14 | 
15 |     Returns
16 |     -------
17 |     df_mod -- modified DataFrame.
18 |     targets -- list of target names.
19 |     """
20 |     df_mod = df.copy()
21 |     targets = df_mod[target_column].unique()
22 |     map_to_int = {name: n for n, name in enumerate(targets)}
23 |     df_mod[target_column].replace(map_to_int, inplace=True)
24 |     return (df_mod, map_to_int)
25 | 
26 | if __name__ == "__main__":
27 |     letter_recognition = pd.read_csv("wisconsin_ig_reduced.csv")
28 |     dft, mapping = encode_target(letter_recognition, "class")
29 |     #dft.to_csv('letternew.cvs')
30 |     #print dft
31 |     #dft2 = pd.read_csv("phishing.csv")
32 |     X = (dft.ix[:,:-1])
33 |     y = dft.ix[:, -1]
34 |     #print X
35 |     #print y
36 |     tester = kmtc.KMeansTestCluster(X, y, clusters=range(1,11), plot=True, targetcluster=2, stats=True)
37 |     tester.run()
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/Assignment3/clustertesters/ExpectationMaximizationTestCluster.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment3/clustertesters/ExpectationMaximizationTestCluster.pyc


--------------------------------------------------------------------------------
/Assignment3/clustertesters/KMeansTestCluster.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment3/clustertesters/KMeansTestCluster.pyc


--------------------------------------------------------------------------------
/Assignment3/clustertesters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment3/clustertesters/__init__.py


--------------------------------------------------------------------------------
/Assignment3/clustertesters/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment3/clustertesters/__init__.pyc


--------------------------------------------------------------------------------
/Assignment3/clustertesters/lr_ExpectationMaximizationTestCluster.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment3/clustertesters/lr_ExpectationMaximizationTestCluster.pyc


--------------------------------------------------------------------------------
/Assignment3/clustertesters/lr_KMeansTestCluster.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment3/clustertesters/lr_KMeansTestCluster.pyc


--------------------------------------------------------------------------------
/Assignment3/lr_dr_em.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from sklearn import  datasets, metrics
 3 | from clustertesters import lr_ExpectationMaximizationTestCluster as emtc
 4 | import pandas as pd
 5 | 
 6 | def encode_target(df, target_column):
 7 |     """Add column to df with integers for the target.
 8 | 
 9 |     Args
10 |     ----
11 |     df -- pandas DataFrame.
12 |     target_column -- column to map to int, producing
13 |                      new Target column.
14 | 
15 |     Returns
16 |     -------
17 |     df_mod -- modified DataFrame.
18 |     targets -- list of target names.
19 |     """
20 |     df_mod = df.copy()
21 |     targets = df_mod[target_column].unique()
22 |     map_to_int = {name: n for n, name in enumerate(targets)}
23 |     df_mod[target_column].replace(map_to_int, inplace=True)
24 |     return (df_mod, map_to_int)
25 | 
26 | if __name__ == "__main__":
27 |     letter_recognition = pd.read_csv("letter_ig_reduced.csv")
28 |     dft, mapping = encode_target(letter_recognition, "class")
29 | 
30 |     X = (dft.ix[:, :-1])
31 |     y = dft.ix[:, -1]
32 | 
33 |     tester = emtc.ExpectationMaximizationTestCluster(X, y, clusters=range(1,31), plot=True, targetcluster=3, stats=True)
34 |     tester.run()
35 | 
36 | 


--------------------------------------------------------------------------------
/Assignment3/lr_dr_kmeans.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | from clustertesters import lr_KMeansTestCluster as kmtc
 4 | 
 5 | 
 6 | def encode_target(df, target_column):
 7 |     """Add column to df with integers for the target.
 8 | 
 9 |     Args
10 |     ----
11 |     df -- pandas DataFrame.
12 |     target_column -- column to map to int, producing
13 |                      new Target column.
14 | 
15 |     Returns
16 |     -------
17 |     df_mod -- modified DataFrame.
18 |     targets -- list of target names.
19 |     """
20 |     df_mod = df.copy()
21 |     targets = df_mod[target_column].unique()
22 |     map_to_int = {name: n for n, name in enumerate(targets)}
23 |     df_mod[target_column].replace(map_to_int, inplace=True)
24 |     return (df_mod, map_to_int)
25 | 
26 | if __name__ == "__main__":
27 |     letter_recognition = pd.read_csv("letter_ig_reduced.csv")
28 |     dft, mapping = encode_target(letter_recognition, "class")
29 |     #dft.to_csv('letternew.cvs')
30 |     #print dft
31 |     #dft2 = pd.read_csv("phishing.csv")
32 |     X = (dft.ix[:,:-1])
33 |     y = dft.ix[:, -1]
34 |     #print X
35 |     #print y
36 |     tester = kmtc.KMeansTestCluster(X, y, clusters=range(1,31), plot=True, targetcluster=2, stats=True)
37 |     tester.run()
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/Assignment3/lr_em.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from sklearn import  datasets, metrics
 3 | from clustertesters import lr_ExpectationMaximizationTestCluster as emtc
 4 | import pandas as pd
 5 | 
 6 | def encode_target(df, target_column):
 7 |     """Add column to df with integers for the target.
 8 | 
 9 |     Args
10 |     ----
11 |     df -- pandas DataFrame.
12 |     target_column -- column to map to int, producing
13 |                      new Target column.
14 | 
15 |     Returns
16 |     -------
17 |     df_mod -- modified DataFrame.
18 |     targets -- list of target names.
19 |     """
20 |     df_mod = df.copy()
21 |     targets = df_mod[target_column].unique()
22 |     map_to_int = {name: n for n, name in enumerate(targets)}
23 |     df_mod[target_column].replace(map_to_int, inplace=True)
24 |     return (df_mod, map_to_int)
25 | 
26 | if __name__ == "__main__":
27 |     letter_recognition = pd.read_csv("letter.csv")
28 |     dft, mapping = encode_target(letter_recognition, "class")
29 | 
30 |     X = (dft.ix[:, :-1])
31 |     y = dft.ix[:, -1]
32 | 
33 |     tester = emtc.ExpectationMaximizationTestCluster(X, y, clusters=range(1,31), plot=True, targetcluster=3, stats=True)
34 |     tester.run()
35 | 
36 | 


--------------------------------------------------------------------------------
/Assignment3/lr_kmeans.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | from clustertesters import lr_KMeansTestCluster as kmtc
 4 | 
 5 | 
 6 | def encode_target(df, target_column):
 7 |     """Add column to df with integers for the target.
 8 | 
 9 |     Args
10 |     ----
11 |     df -- pandas DataFrame.
12 |     target_column -- column to map to int, producing
13 |                      new Target column.
14 | 
15 |     Returns
16 |     -------
17 |     df_mod -- modified DataFrame.
18 |     targets -- list of target names.
19 |     """
20 |     df_mod = df.copy()
21 |     targets = df_mod[target_column].unique()
22 |     map_to_int = {name: n for n, name in enumerate(targets)}
23 |     df_mod[target_column].replace(map_to_int, inplace=True)
24 |     return (df_mod, map_to_int)
25 | 
26 | if __name__ == "__main__":
27 |     letter_recognition = pd.read_csv("letter.csv")
28 |     dft, mapping = encode_target(letter_recognition, "class")
29 |     dft.to_csv('letternew.cvs')
30 |     #print dft
31 |     #dft2 = pd.read_csv("phishing.csv")
32 |     X = (dft.ix[:,:-1])
33 |     y = dft.ix[:, -1]
34 |     #print X
35 |     #print y
36 |     tester = kmtc.KMeansTestCluster(X, y, clusters=range(1,31), plot=True, targetcluster=2, stats=True)
37 |     tester.run()
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/Assignment3/ycai87-analysis.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment3/ycai87-analysis.pdf


--------------------------------------------------------------------------------
/Assignment4/README.txt:
--------------------------------------------------------------------------------
 1 | python files are from Jon Tay, shared on OMSCS slack 7641 channel. 
 2 | 
 3 | 1. Install jython.
 4 | 2. Compile BURLAP source to jar file
 5 | 3. use jython to run easyGW and hardGW python files.
 6 |     command: 
 7 |     C:\jython2.7.0\bin\jython  easyGW.py 
 8 |     C:\jython2.7.0\bin\jython  hardGW.py 
 9 | 
10 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/assignment4/util/AgentPainter.java:
--------------------------------------------------------------------------------
 1 | package burlap.assignment4.util;
 2 | 
 3 | import java.awt.Color;
 4 | import java.awt.Graphics2D;
 5 | import java.awt.geom.Ellipse2D;
 6 | 
 7 | import burlap.assignment4.BasicGridWorld;
 8 | import burlap.oomdp.core.objects.ObjectInstance;
 9 | import burlap.oomdp.core.states.State;
10 | import burlap.oomdp.visualizer.ObjectPainter;
11 | 
12 | public class AgentPainter implements ObjectPainter{
13 | 	
14 | 	protected int[][] map;
15 | 	
16 | 	public AgentPainter(int[][] map){
17 | 		this.map = map;
18 | 	}
19 | 	@Override
20 | 	public void paintObject(Graphics2D g2, State s, ObjectInstance ob,
21 | 							float cWidth, float cHeight) {
22 | 		
23 | 		//agent will be filled in gray
24 | 		g2.setColor(Color.GRAY);
25 | 
26 | 		//set up floats for the width and height of our domain
27 | 		float fWidth = this.map.length;
28 | 		float fHeight = this.map[0].length;
29 | 
30 | 		//determine the width of a single cell on our canvas
31 | 		//such that the whole map can be painted
32 | 		float width = cWidth / fWidth;
33 | 		float height = cHeight / fHeight;
34 | 
35 | 		int ax = ob.getIntValForAttribute(BasicGridWorld.ATTX);
36 | 		int ay = ob.getIntValForAttribute(BasicGridWorld.ATTY);
37 | 
38 | 		//left coordinate of cell on our canvas
39 | 		float rx = ax*width;
40 | 
41 | 		//top coordinate of cell on our canvas
42 | 		//coordinate system adjustment because the java canvas
43 | 		//origin is in the top left instead of the bottom right
44 | 		float ry = cHeight - height - ay*height;
45 | 
46 | 		//paint the rectangle
47 | 		g2.fill(new Ellipse2D.Float(rx, ry, width, height));
48 | 
49 | 
50 | 	}
51 | 	}
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/assignment4/util/AtLocation.java:
--------------------------------------------------------------------------------
 1 | package burlap.assignment4.util;
 2 | 
 3 | import burlap.assignment4.BasicGridWorld;
 4 | import burlap.oomdp.core.Domain;
 5 | import burlap.oomdp.core.PropositionalFunction;
 6 | import burlap.oomdp.core.objects.ObjectInstance;
 7 | import burlap.oomdp.core.states.State;
 8 | 
 9 | public class AtLocation extends PropositionalFunction {
10 | 
11 | 	public AtLocation(Domain domain) {
12 | 		super(BasicGridWorld.PFAT, domain, new String[] { BasicGridWorld.CLASSAGENT, BasicGridWorld.CLASSLOCATION });
13 | 	}
14 | 
15 | 	@Override
16 | 	public boolean isTrue(State s, String... params) {
17 | 		ObjectInstance agent = s.getObject(params[0]);
18 | 		ObjectInstance location = s.getObject(params[1]);
19 | 
20 | 		int ax = agent.getIntValForAttribute(BasicGridWorld.ATTX);
21 | 		int ay = agent.getIntValForAttribute(BasicGridWorld.ATTY);
22 | 
23 | 		int lx = location.getIntValForAttribute(BasicGridWorld.ATTX);
24 | 		int ly = location.getIntValForAttribute(BasicGridWorld.ATTY);
25 | 
26 | 		return ax == lx && ay == ly;
27 | 	}
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/assignment4/util/BasicRewardFunction.java:
--------------------------------------------------------------------------------
 1 | package burlap.assignment4.util;
 2 | 
 3 | import burlap.assignment4.BasicGridWorld;
 4 | import burlap.oomdp.core.objects.ObjectInstance;
 5 | import burlap.oomdp.core.states.State;
 6 | import burlap.oomdp.singleagent.GroundedAction;
 7 | import burlap.oomdp.singleagent.RewardFunction;
 8 | 
 9 | public class BasicRewardFunction implements RewardFunction {
10 | 
11 | 	int goalX;
12 | 	int goalY;
13 | 	int[][] map;
14 | 
15 | 	public BasicRewardFunction(int goalX, int goalY,int[][] map) {
16 | 		this.goalX = goalX;
17 | 		this.goalY = goalY;
18 | 		this.map = map;
19 | 	}
20 | 
21 | 	@Override
22 | 	public double reward(State s, GroundedAction a, State sprime) {
23 | 
24 | 		// get location of agent in next state
25 | 		ObjectInstance agent = sprime.getFirstObjectOfClass(BasicGridWorld.CLASSAGENT);
26 | 		int ax = agent.getIntValForAttribute(BasicGridWorld.ATTX);
27 | 		int ay = agent.getIntValForAttribute(BasicGridWorld.ATTY);
28 | 
29 | 		// are they at goal location?
30 | 		if (ax == this.goalX && ay == this.goalY) {
31 | 			return 100.;
32 | 		}
33 | 		if (map[ax][ay]<0){
34 | 			return (float) map[ax][ay]*10;
35 | 		}
36 | 
37 | 		return -1;
38 | 	}
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/assignment4/util/BasicTerminalFunction.java:
--------------------------------------------------------------------------------
 1 | package burlap.assignment4.util;
 2 | 
 3 | import burlap.assignment4.BasicGridWorld;
 4 | import burlap.oomdp.core.TerminalFunction;
 5 | import burlap.oomdp.core.objects.ObjectInstance;
 6 | import burlap.oomdp.core.states.State;
 7 | 
 8 | public class BasicTerminalFunction implements TerminalFunction {
 9 | 
10 | 	int goalX;
11 | 	int goalY;
12 | 
13 | 	public BasicTerminalFunction(int goalX, int goalY) {
14 | 		this.goalX = goalX;
15 | 		this.goalY = goalY;
16 | 	}
17 | 
18 | 	@Override
19 | 	public boolean isTerminal(State s) {
20 | 
21 | 		// get location of agent in next state
22 | 		ObjectInstance agent = s.getFirstObjectOfClass(BasicGridWorld.CLASSAGENT);
23 | 		int ax = agent.getIntValForAttribute(BasicGridWorld.ATTX);
24 | 		int ay = agent.getIntValForAttribute(BasicGridWorld.ATTY);
25 | 
26 | 		// are they at goal location?
27 | 		if (ax == this.goalX && ay == this.goalY) {
28 | 			return true;
29 | 		}
30 | 
31 | 		return false;
32 | 	}
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/assignment4/util/LocationPainter.java:
--------------------------------------------------------------------------------
 1 | package burlap.assignment4.util;
 2 | 
 3 | import java.awt.Color;
 4 | import java.awt.Graphics2D;
 5 | import java.awt.geom.Rectangle2D;
 6 | 
 7 | import burlap.assignment4.BasicGridWorld;
 8 | import burlap.oomdp.core.objects.ObjectInstance;
 9 | import burlap.oomdp.core.states.State;
10 | import burlap.oomdp.visualizer.ObjectPainter;
11 | 
12 | 
13 | public class LocationPainter implements ObjectPainter {
14 | 	int[][] map;
15 | 
16 | 	public LocationPainter(int[][] map){
17 | 		this.map = map;
18 | 	}
19 | 	@Override
20 | 	public void paintObject(Graphics2D g2, State s, ObjectInstance ob,
21 | 							float cWidth, float cHeight) {
22 | 
23 | 		//agent will be filled in blue
24 | 		g2.setColor(Color.BLUE);
25 | 
26 | 		//set up floats for the width and height of our domain
27 | 		float fWidth = this.map.length;
28 | 		float fHeight = this.map[0].length;
29 | 
30 | 		//determine the width of a single cell on our canvas
31 | 		//such that the whole map can be painted
32 | 		float width = cWidth / fWidth;
33 | 		float height = cHeight / fHeight;
34 | 
35 | 		int ax = ob.getIntValForAttribute(BasicGridWorld.ATTX);
36 | 		int ay = ob.getIntValForAttribute(BasicGridWorld.ATTY);
37 | 
38 | 		//left coordinate of cell on our canvas
39 | 		float rx = ax*width;
40 | 
41 | 		//top coordinate of cell on our canvas
42 | 		//coordinate system adjustment because the java canvas
43 | 		//origin is in the top left instead of the bottom right
44 | 		float ry = cHeight - height - ay*height;
45 | 
46 | 		//paint the rectangle
47 | 		g2.fill(new Rectangle2D.Float(rx, ry, width, height));
48 | 
49 | 
50 | 	}
51 | 	
52 | 
53 | 
54 | }


--------------------------------------------------------------------------------
/Assignment4/src/burlap/assignment4/util/WallPainter.java:
--------------------------------------------------------------------------------
 1 | package burlap.assignment4.util;
 2 | 
 3 | import java.awt.Color;
 4 | import java.awt.Graphics2D;
 5 | import java.awt.geom.Rectangle2D;
 6 | 
 7 | import burlap.oomdp.core.states.State;
 8 | import burlap.oomdp.visualizer.StaticPainter;
 9 | 
10 | 
11 | public class WallPainter implements StaticPainter {
12 | 	
13 | 	private int[][] map;
14 | 	
15 | 	public WallPainter(int[][] map){
16 | 		this.map = map;
17 | 	}
18 | 	
19 | 	@Override
20 | 	public void paint(Graphics2D g2, State s, float cWidth, float cHeight) {
21 | 
22 | 		//walls will be filled in black
23 | 		g2.setColor(Color.BLACK);
24 | 
25 | 		//set up floats for the width and height of our domain
26 | 		float fWidth = this.map.length;
27 | 		float fHeight = this.map[0].length;
28 | 
29 | 		//determine the width of a single cell
30 | 		//on our canvas such that the whole map can be painted
31 | 		float width = cWidth / fWidth;
32 | 		float height = cHeight / fHeight;
33 | 
34 | 		//pass through each cell of our map and if it's a wall, paint a black rectangle on our
35 | 		//cavas of dimension widthxheight
36 | 		for(int i = 0; i < this.map.length; i++){
37 | 			for(int j = 0; j < this.map[0].length; j++){
38 | 
39 | 				//is there a wall here?
40 | 				if(this.map[i][j] == 1){
41 | 
42 | 					//left coordinate of cell on our canvas
43 | 					float rx = i*width;
44 | 
45 | 					//top coordinate of cell on our canvas
46 | 					//coordinate system adjustment because the java canvas
47 | 					//origin is in the top left instead of the bottom right
48 | 					float ry = cHeight - height - j*height;
49 | 
50 | 					//paint the rectangle
51 | 					g2.fill(new Rectangle2D.Float(rx, ry, width, height));
52 | 
53 | 				}
54 | 
55 | 
56 | 			}
57 | 		}
58 | 
59 | 	}
60 | 
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/learningrate/ConstantLR.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.learningrate;
 2 | 
 3 | import burlap.oomdp.core.AbstractGroundedAction;
 4 | import burlap.oomdp.core.states.State;
 5 | 
 6 | 
 7 | /**
 8 |  * A class for specifying a constant learning rate that never changes.
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public class ConstantLR implements LearningRate {
13 | 
14 | 	public double learningRate = 0.1;
15 | 	
16 | 	/**
17 | 	 * Constructs constant learning rate of 0.1
18 | 	 */
19 | 	public ConstantLR(){
20 | 		//do nothing
21 | 	}
22 | 	
23 | 	/**
24 | 	 * Constructs a constant learning rate for the given value
25 | 	 * @param learningRate the constant learning rate to use
26 | 	 */
27 | 	public ConstantLR(Double learningRate){
28 | 		this.learningRate = learningRate;
29 | 	}
30 | 	
31 | 	@Override
32 | 	public double peekAtLearningRate(State s, AbstractGroundedAction ga) {
33 | 		return this.learningRate;
34 | 	}
35 | 
36 | 	@Override
37 | 	public double pollLearningRate(int agentTime, State s, AbstractGroundedAction ga) {
38 | 		return this.learningRate;
39 | 	}
40 | 
41 | 	@Override
42 | 	public void resetDecay() {
43 | 		//no change needed
44 | 	}
45 | 
46 | 	@Override
47 | 	public double peekAtLearningRate(int featureId) {
48 | 		return this.learningRate;
49 | 	}
50 | 
51 | 	@Override
52 | 	public double pollLearningRate(int agentTime, int featureId) {
53 | 		return this.learningRate;
54 | 	}
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/policy/GreedyDeterministicQPolicy.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.policy;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import javax.management.RuntimeErrorException;
 6 | 
 7 | import burlap.behavior.singleagent.MDPSolverInterface;
 8 | import burlap.behavior.valuefunction.QValue;
 9 | import burlap.behavior.valuefunction.QFunction;
10 | import burlap.oomdp.core.AbstractGroundedAction;
11 | import burlap.oomdp.core.states.State;
12 | import burlap.oomdp.singleagent.GroundedAction;
13 | 
14 | 
15 | /**
16 |  * A greedy policy that breaks ties by choosing the first action with the maximum value. This class requires a QComputablePlanner
17 |  * @author James MacGlashan
18 |  *
19 |  */
20 | public class GreedyDeterministicQPolicy extends Policy implements SolverDerivedPolicy {
21 | 
22 | 	protected QFunction qplanner;
23 | 	
24 | 	public GreedyDeterministicQPolicy() {
25 | 		qplanner = null;
26 | 	}
27 | 	
28 | 	/**
29 | 	 * Initializes with a QComputablePlanner
30 | 	 * @param qplanner the QComputablePlanner to use
31 | 	 */
32 | 	public GreedyDeterministicQPolicy(QFunction qplanner){
33 | 		this.qplanner = qplanner;
34 | 	}
35 | 	
36 | 	@Override
37 | 	public void setSolver(MDPSolverInterface solver){
38 | 		
39 | 		if(!(solver instanceof QFunction)){
40 | 			throw new RuntimeErrorException(new Error("Planner is not a QComputablePlanner"));
41 | 		}
42 | 		
43 | 		this.qplanner = (QFunction) solver;
44 | 	}
45 | 	
46 | 
47 | 	@Override
48 | 	public AbstractGroundedAction getAction(State s) {
49 | 		
50 | 		List<QValue> qValues = this.qplanner.getQs(s);
51 | 		double maxQV = Double.NEGATIVE_INFINITY;
52 | 		QValue maxQ = null;
53 | 		for(QValue q : qValues){
54 | 			if(q.q > maxQV){
55 | 				maxQV = q.q;
56 | 				maxQ = q;
57 | 			}
58 | 		}
59 | 		
60 | 		return ((GroundedAction)maxQ.a).translateParameters(maxQ.s, s);
61 | 	}
62 | 
63 | 	@Override
64 | 	public List<ActionProb> getActionDistributionForState(State s) {
65 | 		return this.getDeterministicPolicy(s);
66 | 	}
67 | 
68 | 	@Override
69 | 	public boolean isStochastic() {
70 | 		return false;
71 | 	}
72 | 	
73 | 	@Override
74 | 	public boolean isDefinedFor(State s) {
75 | 		return true; //can always find q-values with default value
76 | 	}
77 | 
78 | }
79 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/policy/SolverDerivedPolicy.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.policy;
 2 | 
 3 | import burlap.behavior.singleagent.MDPSolverInterface;
 4 | 
 5 | 
 6 | /**
 7 |  * An interface for defining policies that refer to a {@link burlap.behavior.singleagent.MDPSolverInterface}
 8 |  * objects to defined the policy. For example, selecting actions based on the maximum Q-value that a solver computed.
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public interface SolverDerivedPolicy {
13 | 	/**
14 | 	 * Sets the valueFunction whose results affect this policy.
15 | 	 * @param solver the solver from which this policy is derived
16 | 	 */
17 | 	public void setSolver(MDPSolverInterface solver);
18 | }
19 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/auxiliary/performance/ExperimentalEnvironment.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.auxiliary.performance;
 2 | 
 3 | /**
 4 |  * An interface to be used in conjunction with {@link burlap.oomdp.singleagent.environment.Environment} implementations
 5 |  * that can accept a message informing the environment that a new experiment for a {@link burlap.behavior.singleagent.learning.LearningAgent} has started.
 6 |  * This is useful if when comparing multiple agents the same initial state sequence is desired.
 7 |  * @author James MacGlashan.
 8 |  */
 9 | public interface ExperimentalEnvironment {
10 | 
11 | 	/**
12 | 	 * Tells this {@link burlap.oomdp.singleagent.environment.Environment} that an experiment with a new {@link burlap.behavior.singleagent.learning.LearningAgent}
13 | 	 * has begun.
14 | 	 */
15 | 	void startNewExperiment();
16 | }
17 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/auxiliary/performance/PerformanceMetric.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.auxiliary.performance;
 2 | 
 3 | /**
 4 |  * Enumerator for the types of statistics that can be plotted by {@link PerformancePlotter}.
 5 |  * @author James MacGlashan
 6 |  *
 7 |  */
 8 | public enum PerformanceMetric {
 9 | 	CUMULATIVEREWARDPERSTEP,
10 | 	CUMULTAIVEREWARDPEREPISODE,
11 | 	AVERAGEEPISODEREWARD,
12 | 	MEDIANEPISODEREWARD,
13 | 	CUMULATIVESTEPSPEREPISODE,
14 | 	STEPSPEREPISODE;
15 | }
16 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/auxiliary/performance/TrialMode.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.auxiliary.performance;
 2 | 
 3 | 
 4 | /**
 5 |  * Enumerator for specifying the what kinds of plots for each {@link PerformanceMetric} will be plotted by {@link PerformancePlotter}.
 6 |  * The MOSTRECENTTTRIALONLY mode will result in only the most recent trial's performance being displayed. TRIALAVERAGESONLY will
 7 |  * result in only plots for the trial averages to be shown. MOSTRECENTANDAVERAGE will result in both the most recent trial and the trial
 8 |  * average plots to be shown.
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public enum TrialMode {
13 | 	MOSTRECENTTTRIALONLY,
14 | 	TRIALAVERAGESONLY,
15 | 	MOSTRECENTANDAVERAGE;
16 | 	
17 | 	/**
18 | 	 * Returns true if the most recent trial plots will be plotted by this mode.
19 | 	 * @return true if the most recent trial plots will be plotted by this mode; false otherwise.
20 | 	 */
21 | 	public boolean mostRecentTrialEnabled(){
22 | 		return this == MOSTRECENTTTRIALONLY || this == MOSTRECENTANDAVERAGE;
23 | 	}
24 | 	
25 | 	
26 | 	/**
27 | 	 * Returns true if the trial average plots will be plotted by this mode.
28 | 	 * @return true if the trial average plots will be plotted by this mode; false otherwise.
29 | 	 */
30 | 	public boolean averagesEnabled(){
31 | 		return this == TRIALAVERAGESONLY || this == MOSTRECENTANDAVERAGE;
32 | 	}
33 | }
34 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/auxiliary/valuefunctionvis/StatePolicyPainter.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.auxiliary.valuefunctionvis;
 2 | 
 3 | import java.awt.Graphics2D;
 4 | 
 5 | import burlap.behavior.policy.Policy;
 6 | import burlap.oomdp.core.states.State;
 7 | 
 8 | /**
 9 |  * An interface for painting a representation of the policy for a specific state onto a 2D Graphics context.
10 |  * @author James MacGlashan
11 |  *
12 |  */
13 | public interface StatePolicyPainter {
14 | 
15 | 	/**
16 | 	 * Paints a representation of the given policy for a specific state to a 2D graphics context.
17 | 	 * @param g2 graphics context to which the object should be painted
18 | 	 * @param s the state of the object to be painted
19 | 	 * @param policy the policy that can be used on state s
20 | 	 * @param cWidth width of the canvas size
21 | 	 * @param cHeight height of the canvas size
22 | 	 */
23 | 	public void paintStatePolicy(Graphics2D g2, State s, Policy policy, float cWidth, float cHeight);
24 | 	
25 | }
26 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/auxiliary/valuefunctionvis/StateValuePainter.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.auxiliary.valuefunctionvis;
 2 | 
 3 | import java.awt.Graphics2D;
 4 | 
 5 | import burlap.oomdp.core.states.State;
 6 | 
 7 | 
 8 | /**
 9 |  * An abstract class for defining the interface and common methods to paint the representation of the value function for a specific state onto
10 |  * a 2D graphics context.
11 |  * @author James MacGlashan
12 |  *
13 |  */
14 | public abstract class StateValuePainter {
15 | 
16 | 	/**
17 | 	 * Indicates whether this painter should scale its rendering of values to whatever it is told the minimum and maximum values are.
18 | 	 */
19 | 	protected boolean			shouldRescaleValues = true;
20 | 	
21 | 	
22 | 	/**
23 | 	 * Paints the representation of a value function for a specific state.
24 | 	 * @param g2 graphics context to which the object should be painted
25 | 	 * @param s the state of the object to be painted
26 | 	 * @param value the value function evaluation of state s
27 | 	 * @param cWidth width of the canvas size
28 | 	 * @param cHeight height of the canvas size
29 | 	 */
30 | 	public abstract void paintStateValue(Graphics2D g2, State s, double value, float cWidth, float cHeight);
31 | 	
32 | 	/**
33 | 	 * Used to tell this painter that it should render state values so that the minimum possible value is lowerValue and the maximum is upperValue.
34 | 	 * @param lowerValue the minimum value of state values
35 | 	 * @param upperValue the maximium value of state values
36 | 	 */
37 | 	public abstract void rescale(double lowerValue, double upperValue);
38 | 	
39 | 	
40 | 	/**
41 | 	 * Enabling value rescaling allows the painter to adjust to the minimum and maximum values passed to it.
42 | 	 * @param rescale whether this painter should rescale to the minimum and maximum value of the value function.
43 | 	 */
44 | 	public void useValueRescaling(boolean rescale){
45 | 		this.shouldRescaleValues = rescale;
46 | 	}
47 | 	
48 | 
49 | 	
50 | 
51 | 
52 | }
53 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/auxiliary/valuefunctionvis/StaticDomainPainter.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.auxiliary.valuefunctionvis;
 2 | 
 3 | import java.awt.Graphics2D;
 4 | 
 5 | 
 6 | /**
 7 |  * An interface for painting general domain information to a 2D graphics context.
 8 |  * @author James MacGlashan
 9 |  *
10 |  */
11 | public interface StaticDomainPainter {
12 | 
13 | 	/** 
14 | 	 * Use to paint general domain information to a 2D graphics context.
15 | 	 * @param g2 graphics context to which the static data should be painted
16 | 	 * @param cWidth the width of the canvas
17 | 	 * @param cHeight the height of the canvas
18 | 	 */
19 | 	public void paint(Graphics2D g2, float cWidth, float cHeight);
20 | 	
21 | }
22 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/auxiliary/valuefunctionvis/common/ActionGlyphPainter.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.auxiliary.valuefunctionvis.common;
 2 | 
 3 | import java.awt.Graphics2D;
 4 | 
 5 | 
 6 | /**
 7 |  * An interface for painting glyphs that correspond to actions.
 8 |  * @author James MacGlashan
 9 |  *
10 |  */
11 | public interface ActionGlyphPainter {
12 | 	/**
13 | 	 * Called to paint a glyph in the rectangle defined by the top left origin (x,y) with the given width and height.
14 | 	 * @param g2 the graphics context to paint to
15 | 	 * @param x the left of the rectangle origin
16 | 	 * @param y the top of the rectangle origin
17 | 	 * @param width the width of the rectangle
18 | 	 * @param height the height of the rectangle.
19 | 	 */
20 | 	public void paintGlyph(Graphics2D g2, float x, float y, float width, float height);
21 | }
22 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/auxiliary/valuefunctionvis/common/ColorBlend.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.auxiliary.valuefunctionvis.common;
 2 | 
 3 | import java.awt.Color;
 4 | 
 5 | /**
 6 |  * An interface for defining methods that return a color for a given double value.
 7 |  * @author James MacGlashan
 8 |  *
 9 |  */
10 | public interface ColorBlend {
11 | 	
12 | 	/**
13 | 	 * Returns a {@link java.awt.Color} for a given double value
14 | 	 * @param v the input double value
15 | 	 * @return a {@link java.awt.Color} for a given double value
16 | 	 */
17 | 	public Color color(double v);
18 | 	
19 | 	/**
20 | 	 * Tells this object the minimum value and the maximum value it can receive.
21 | 	 * @param minV the minimum value
22 | 	 * @param maxV the maximum value
23 | 	 */
24 | 	public void rescale(double minV, double maxV);
25 | }
26 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/learnfromdemo/mlirl/differentiableplanners/diffvinit/DiffVFRF.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.learnfromdemo.mlirl.differentiableplanners.diffvinit;
 2 | 
 3 | import burlap.behavior.singleagent.learnfromdemo.mlirl.support.DifferentiableRF;
 4 | import burlap.oomdp.core.states.State;
 5 | import burlap.oomdp.singleagent.GroundedAction;
 6 | import burlap.oomdp.singleagent.RewardFunction;
 7 | 
 8 | /**
 9 |  * A differentiable reward function wrapper for use with {@link burlap.behavior.singleagent.learnfromdemo.mlirl.MLIRL} when
10 |  * the reward function is known, but the value function initialization for leaf nodes is to be learned.
11 |  * This class takes as input the true reward function and a {@link burlap.behavior.singleagent.learnfromdemo.mlirl.differentiableplanners.diffvinit.DifferentiableVInit}
12 |  * object to form the {@link burlap.behavior.singleagent.learnfromdemo.mlirl.support.DifferentiableRF} object
13 |  * that {@link burlap.behavior.singleagent.learnfromdemo.mlirl.MLIRL} will use.
14 |  *
15 |  * @author James MacGlashan.
16 |  */
17 | public class DiffVFRF extends DifferentiableRF {
18 | 
19 | 	protected RewardFunction objectiveRF;
20 | 	protected DifferentiableVInit.ParamedDiffVInit diffVInit;
21 | 
22 | 
23 | 	public DiffVFRF(RewardFunction objectiveRF, DifferentiableVInit.ParamedDiffVInit diffVinit){
24 | 		this.objectiveRF = objectiveRF;
25 | 		this.diffVInit = diffVinit;
26 | 
27 | 		this.dim = diffVinit.getParameterDimension();
28 | 		this.parameters = diffVinit.getParameters();
29 | 	}
30 | 
31 | 	@Override
32 | 	public double[] getGradient(State s, GroundedAction ga, State sp) {
33 | 		return new double[this.dim];
34 | 	}
35 | 
36 | 	@Override
37 | 	protected DifferentiableRF copyHelper() {
38 | 		return null;
39 | 	}
40 | 
41 | 	@Override
42 | 	public double reward(State s, GroundedAction a, State sprime) {
43 | 		return this.objectiveRF.reward(s, a, sprime);
44 | 	}
45 | 
46 | 
47 | 	@Override
48 | 	public void setParameters(double[] parameters) {
49 | 		super.setParameters(parameters);
50 | 		this.diffVInit.setParameters(parameters);
51 | 	}
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/learnfromdemo/mlirl/differentiableplanners/diffvinit/LinearStateDiffVF.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.learnfromdemo.mlirl.differentiableplanners.diffvinit;
 2 | 
 3 | import burlap.behavior.singleagent.vfa.StateToFeatureVectorGenerator;
 4 | import burlap.oomdp.core.AbstractGroundedAction;
 5 | import burlap.oomdp.core.states.State;
 6 | 
 7 | /**
 8 |  * A class for defining a (differentiable) linear function over state features for value function initialization. This class is useful
 9 |  * for learning the value function initialization for leaf nodes of a finite horizon valueFunction with {@link burlap.behavior.singleagent.learnfromdemo.mlirl.MLIRL}.
10 |  * @author James MacGlashan.
11 |  */
12 | public class LinearStateDiffVF extends DifferentiableVInit.ParamedDiffVInit {
13 | 
14 | 
15 | 	/**
16 | 	 * The state feature vector generator over which the linear function operates
17 | 	 */
18 | 	protected StateToFeatureVectorGenerator fvgen;
19 | 
20 | 
21 | 	/**
22 | 	 * Initializes with the state feature vector generator over which the linear function is defined and the dimensionality of it.
23 | 	 * @param fvgen the state feature vector generator over which the linear function is defined.
24 | 	 * @param dim the dimensionality of the feature vector/parameters
25 | 	 */
26 | 	public LinearStateDiffVF(StateToFeatureVectorGenerator fvgen, int dim){
27 | 		this.dim = dim;
28 | 		this.parameters = new double[dim];
29 | 		this.fvgen = fvgen;
30 | 	}
31 | 
32 | 	@Override
33 | 	public double[] getVGradient(State s) {
34 | 		return this.fvgen.generateFeatureVectorFrom(s);
35 | 	}
36 | 
37 | 	@Override
38 | 	public double[] getQGradient(State s, AbstractGroundedAction ga) {
39 | 		return this.fvgen.generateFeatureVectorFrom(s);
40 | 	}
41 | 
42 | 	@Override
43 | 	public double value(State s) {
44 | 
45 | 		double [] features = this.fvgen.generateFeatureVectorFrom(s);
46 | 
47 | 		double sum = 0.;
48 | 		for(int i = 0; i < features.length; i++){
49 | 			sum += features[i] * this.parameters[i];
50 | 		}
51 | 		return sum;
52 | 	}
53 | 
54 | 	@Override
55 | 	public double qValue(State s, AbstractGroundedAction a) {
56 | 		return this.value(s);
57 | 	}
58 | 
59 | }
60 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/learnfromdemo/mlirl/differentiableplanners/diffvinit/VanillaDiffVinit.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.learnfromdemo.mlirl.differentiableplanners.diffvinit;
 2 | 
 3 | import burlap.behavior.valuefunction.ValueFunctionInitialization;
 4 | import burlap.behavior.singleagent.learnfromdemo.mlirl.support.DifferentiableRF;
 5 | import burlap.oomdp.core.AbstractGroundedAction;
 6 | import burlap.oomdp.core.states.State;
 7 | 
 8 | /**
 9 |  * A class for the default condition when a value function initialization returns an unparameterized  value
10 |  * for each state, but must be differentiable
11 |  * with respect to the reward function parameters for use with a differentiable finite horizon valueFunction.
12 |  * @author James MacGlashan.
13 |  */
14 | public class VanillaDiffVinit implements DifferentiableVInit {
15 | 
16 | 
17 | 	/**
18 | 	 * The source value function initialization.
19 | 	 */
20 | 	protected ValueFunctionInitialization vinit;
21 | 
22 | 	/**
23 | 	 * The differentiable reward function that defines the parameter space over which this value function
24 | 	 * initialization must differentiate.
25 | 	 */
26 | 	protected DifferentiableRF rf;
27 | 
28 | 
29 | 	/**
30 | 	 * Initializes.
31 | 	 * @param vinit The vanilla unparameterized value function initialization
32 | 	 * @param rf the differentiable reward function that defines the total parameter space
33 | 	 */
34 | 	public VanillaDiffVinit(ValueFunctionInitialization vinit, DifferentiableRF rf) {
35 | 		this.vinit = vinit;
36 | 		this.rf = rf;
37 | 	}
38 | 
39 | 	@Override
40 | 	public double[] getVGradient(State s) {
41 | 		return new double[rf.getParameterDimension()];
42 | 	}
43 | 
44 | 	@Override
45 | 	public double[] getQGradient(State s, AbstractGroundedAction ga) {
46 | 		return new double[rf.getParameterDimension()];
47 | 	}
48 | 
49 | 	@Override
50 | 	public double value(State s) {
51 | 		return this.vinit.value(s);
52 | 	}
53 | 
54 | 	@Override
55 | 	public double qValue(State s, AbstractGroundedAction a) {
56 | 		return this.vinit.qValue(s, a);
57 | 	}
58 | 
59 | }
60 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/learnfromdemo/mlirl/support/QGradientPlanner.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.learnfromdemo.mlirl.support;
 2 | 
 3 | import burlap.behavior.valuefunction.QFunction;
 4 | import burlap.oomdp.core.states.State;
 5 | import burlap.oomdp.singleagent.GroundedAction;
 6 | 
 7 | import java.util.List;
 8 | 
 9 | /**
10 |  * An interface for a valueFunction that can produce Q-value gradients.
11 |  * @author James MacGlashan.
12 |  */
13 | public interface QGradientPlanner extends QFunction {
14 | 
15 | 
16 | 	/**
17 | 	 * Returns the list of Q-value gradients (returned as {@link burlap.behavior.singleagent.learnfromdemo.mlirl.support.QGradientTuple objects}) for each action permissible in the given state.
18 | 	 * @param s the state for which Q-value gradients are to be returned.
19 | 	 * @return the list of Q-value gradients for each action permissible in the given state.
20 | 	 */
21 | 	public List<QGradientTuple> getAllQGradients(State s);
22 | 
23 | 
24 | 	/**
25 | 	 * Returns the Q-value gradient ({@link burlap.behavior.singleagent.learnfromdemo.mlirl.support.QGradientTuple}) for the given state and action.
26 | 	 * @param s the state for which the Q-value gradient is to be returned
27 | 	 * @param a the action for which the Q-value gradient is to be returned.
28 | 	 * @return the Q-value gradient for the given state and action.
29 | 	 */
30 | 	public QGradientTuple getQGradient(State s, GroundedAction a);
31 | 
32 | 
33 | 	/**
34 | 	 * Sets this valueFunction's Boltzmann beta parameter used to compute gradients. As beta gets larger, the policy becomes more deterministic.
35 | 	 * @param beta the value to which this valueFunction's Boltzmann beta parameter will be set
36 | 	 */
37 | 	public void setBoltzmannBetaParameter(double beta);
38 | 
39 | }
40 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/learnfromdemo/mlirl/support/QGradientTuple.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.learnfromdemo.mlirl.support;
 2 | 
 3 | import burlap.oomdp.core.AbstractGroundedAction;
 4 | import burlap.oomdp.core.states.State;
 5 | 
 6 | /**
 7 |  * A tuple (triple) for storing the Q-gradient associated with a state and action. The gradient is stored in a double array.
 8 |  * @author James MacGlashan.
 9 |  */
10 | public class QGradientTuple {
11 | 
12 | 	/**
13 | 	 * The state
14 | 	 */
15 | 	public State s;
16 | 
17 | 	/**
18 | 	 * The action
19 | 	 */
20 | 	public AbstractGroundedAction a;
21 | 
22 | 	/**
23 | 	 * The gradient for the state and action.
24 | 	 */
25 | 	public double [] gradient;
26 | 
27 | 
28 | 	/**
29 | 	 * Initializes.
30 | 	 * @param s the state
31 | 	 * @param a the action
32 | 	 * @param gradient the gradient for the state an action
33 | 	 */
34 | 	public QGradientTuple(State s, AbstractGroundedAction a, double [] gradient){
35 | 		this.s = s;
36 | 		this.a = a;
37 | 		this.gradient = gradient;
38 | 	}
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/learning/LearningAgent.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.learning;
 2 | 
 3 | 
 4 | import burlap.behavior.singleagent.EpisodeAnalysis;
 5 | import burlap.oomdp.singleagent.environment.Environment;
 6 | 
 7 | /**
 8 |  * This is the standard interface for defining an agent that learns how to behave in the world through experience. There
 9 |  * are two methods that need to be implemented. {@link #runLearningEpisode(burlap.oomdp.singleagent.environment.Environment)}
10 |  * and {@link #runLearningEpisode(burlap.oomdp.singleagent.environment.Environment, int)}. Implementing the former method
11 |  * should have the agent interact with the provided {@link burlap.oomdp.singleagent.environment.Environment}
12 |  * until the {@link burlap.oomdp.singleagent.environment.Environment} transitions to a terminal state. The
13 |  * {@link #runLearningEpisode(burlap.oomdp.singleagent.environment.Environment, int)} should have the agent interact
14 |  * with the {@link burlap.oomdp.singleagent.environment.Environment} until either a terminal state is reached or
15 |  * the agent has taken maxSteps in the environment. Both methods should return an {@link burlap.behavior.singleagent.EpisodeAnalysis}
16 |  * object that records the interactions.
17 |  *
18 |  *
19 |  * @author James MacGlashan
20 |  *
21 |  */
22 | public interface LearningAgent {
23 | 
24 | 
25 | 	EpisodeAnalysis runLearningEpisode(Environment env);
26 | 
27 | 	EpisodeAnalysis runLearningEpisode(Environment env, int maxSteps);
28 | 
29 | 	
30 | }
31 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/learning/LearningAgentFactory.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.learning;
 2 | 
 3 | 
 4 | /**
 5 |  * A factory interface for generating learning agents.
 6 |  * @author James MacGlashan
 7 |  *
 8 |  */
 9 | public interface LearningAgentFactory {
10 | 	
11 | 	/**
12 | 	 * Will return a name to identify the kind of agent that will be generated by this factory. This is useful for enabling the {@link burlap.behavior.singleagent.auxiliary.performance.LearningAlgorithmExperimenter} class
13 | 	 * to label the results for different kinds of agents that are tested.
14 | 	 * @return a name to identify the kind of agent that will be generated
15 | 	 */
16 | 	public String getAgentName();
17 | 	
18 | 	/**
19 | 	 * Generates a new LearningAgent object and returns it.
20 | 	 * @return a LearningAgent object.
21 | 	 */
22 | 	public LearningAgent generateAgent();
23 | }
24 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/learning/actorcritic/Actor.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.learning.actorcritic;
 2 | 
 3 | 
 4 | import burlap.behavior.policy.Policy;
 5 | import burlap.oomdp.singleagent.Action;
 6 | 
 7 | 
 8 | /**
 9 |  * This class provides interface necessary for the actor portion of an Actor-Critic learning algorithm. Actors are almost entirely
10 |  * identical to policies since they effectively specify how the agent should act; in fact, this abstract class extends the Policy
11 |  * class. However, the extra important functionality that an actor must incorporate is the ability to adjust its policy
12 |  * in response to some critique of its behavior. In this class, this functionality should be implemented in the
13 |  * {@link #updateFromCritqique(CritiqueResult)} method.
14 |  * 
15 |  * 
16 |  * 
17 |  * @author James MacGlashan
18 |  *
19 |  */
20 | public abstract class Actor extends Policy {
21 | 
22 | 	/**
23 | 	 * Causes this object to update its behavior is response to a critique of its behavior.
24 | 	 * @param critqiue the critique of the agents behavior represented by a {@link CritiqueResult} object
25 | 	 */
26 | 	public abstract void updateFromCritqique(CritiqueResult critqiue);
27 | 	
28 | 	/**
29 | 	 * This method allows the actor to utilize actions that are not apart of the domain definition.
30 | 	 * @param a an action not apart of the of the domain definition that this actor should be able to use.
31 | 	 */
32 | 	public abstract void addNonDomainReferencedAction(Action a);
33 | 	
34 | 	
35 | 	/**
36 | 	 * Used to reset any data that was created/modified during learning so that learning can be begin anew.
37 | 	 */
38 | 	public abstract void resetData();
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/learning/actorcritic/Critic.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.learning.actorcritic;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | import burlap.oomdp.singleagent.Action;
 5 | import burlap.oomdp.singleagent.GroundedAction;
 6 | 
 7 | 
 8 | /**
 9 |  * This interface provides the methods necessary for implementing the critic part of an actor-critic learning algorithm. The critic
10 |  * is responsible for observing behavior (state, action, state tuples) and returning a critique of that behavior. Typically,
11 |  * Critic objects will need to take as input a reward function to judge this behavior.
12 |  * 
13 |  * 
14 |  * @author James MacGlashan
15 |  *
16 |  */
17 | public interface Critic {
18 | 	
19 | 	/**
20 | 	 * This method allows the critic to critique actions that are not apart of the domain definition.
21 | 	 * @param a a an action not apart of the of the domain definition that this critic should be able to crique.
22 | 	 */
23 | 	public void addNonDomainReferencedAction(Action a);
24 | 	
25 | 	
26 | 	/**
27 | 	 * This method is called whenever a new learning episode begins
28 | 	 * @param s the initial state of the new learning episode
29 | 	 */
30 | 	public void initializeEpisode(State s);
31 | 	
32 | 	/**
33 | 	 * This method is called whenever a learning episode terminates
34 | 	 */
35 | 	public void endEpisode();
36 | 
37 | 	
38 | 	/**
39 | 	 * This method's implementation provides the critique for some specific instance of the behavior.
40 | 	 * @param s an input state
41 | 	 * @param ga an action taken in s
42 | 	 * @param sprime the state the agent transitioned to for taking action ga in state s
43 | 	 * @return the critique of this behavior.
44 | 	 */
45 | 	public CritiqueResult critiqueAndUpdate(State s, GroundedAction ga, State sprime);
46 | 	
47 | 	/**
48 | 	 * Used to reset any data that was created/modified during learning so that learning can be begin anew.
49 | 	 */
50 | 	public abstract void resetData();
51 | 	
52 | }
53 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/learning/actorcritic/CritiqueResult.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.learning.actorcritic;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | import burlap.oomdp.singleagent.GroundedAction;
 5 | 
 6 | 
 7 | /**
 8 |  * The CritiqueResult class stores the relevant information regarding a critique of behavior. Specifically, it contains
 9 |  * the value of the critique, the and the state-action-state tuple that is being critiqued.
10 |  * 
11 |  * @author James MacGlashan
12 |  *
13 |  */
14 | public class CritiqueResult {
15 | 
16 | 	
17 | 	/**
18 | 	 * The source state
19 | 	 */
20 | 	protected State					s;
21 | 	
22 | 	/**
23 | 	 * The action taken in state s
24 | 	 */
25 | 	protected GroundedAction		a;
26 | 	
27 | 	/**
28 | 	 * The state to which the agent transitioned for when it took action a in state s.
29 | 	 */
30 | 	protected State					sprime;
31 | 	
32 | 	/**
33 | 	 * The critique of this behavior.
34 | 	 */
35 | 	protected double				critique;
36 | 	
37 | 	
38 | 	/**
39 | 	 * Initializes with a state-action-state behavior tuple and the value of the critique for this behavior.
40 | 	 * @param s a source state
41 | 	 * @param a the action taken in state s
42 | 	 * @param sprime the state to which the agent transitioned for when it took action a in state s
43 | 	 * @param critique the critique of this behavior.
44 | 	 */
45 | 	public CritiqueResult(State s, GroundedAction a, State sprime, double critique) {
46 | 		this.s = s;
47 | 		this.a = a;
48 | 		this.sprime = sprime;
49 | 		this.critique = critique;
50 | 	}
51 | 
52 | 	/**
53 | 	 * Returns the source state of this behavior.
54 | 	 * @return the source state of this behavior.
55 | 	 */
56 | 	public State getS() {
57 | 		return s;
58 | 	}
59 | 
60 | 	
61 | 	/**
62 | 	 * Returns the action of this behavior.
63 | 	 * @return the action of this behavior.
64 | 	 */
65 | 	public GroundedAction getA() {
66 | 		return a;
67 | 	}
68 | 
69 | 	
70 | 	/**
71 | 	 * Returns the resulting state of this behavior.
72 | 	 * @return the resulting state of this behavior.
73 | 	 */
74 | 	public State getSprime() {
75 | 		return sprime;
76 | 	}
77 | 
78 | 	
79 | 	/**
80 | 	 * Returns the critique of this behavior.
81 | 	 * @return the critique of this behavior.
82 | 	 */
83 | 	public double getCritique() {
84 | 		return critique;
85 | 	}
86 | 
87 | 	
88 | 	
89 | }
90 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/learning/modellearning/ModelLearningPlanner.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.learning.modellearning;
 2 | 
 3 | import burlap.behavior.policy.Policy;
 4 | import burlap.behavior.singleagent.planning.Planner;
 5 | import burlap.oomdp.core.Domain;
 6 | import burlap.oomdp.core.states.State;
 7 | import burlap.oomdp.core.TerminalFunction;
 8 | import burlap.oomdp.singleagent.RewardFunction;
 9 | 
10 | 
11 | /**
12 |  * Interface for defining planning algorithms that operate on iteratively learned models. Planning algorithms that operate on iteratively learned models
13 |  * must support features for replanning when the model changes and returning the policy of the plan under the current model.
14 |  * @author James MacGlashan
15 |  *
16 |  */
17 | public interface ModelLearningPlanner extends Planner{
18 | 
19 | 	/**
20 | 	 * This is method is expected to be called at the beginning of any new learning episode. This may be useful for planning algorithms
21 | 	 * that do not solve the policy for every state since new episodes may starts in states the planning algorithm had not previously considered.
22 | 	 * before a learning episode begins.
23 | 	 * @param s the input state
24 | 	 */
25 | 	public void initializePlannerIn(State s);
26 | 	
27 | 	/**
28 | 	 * Tells the valueFunction that the model has changed and that it will need to replan accordingly
29 | 	 * @param changedState the source state that caused a change in the model.
30 | 	 */
31 | 	public void modelChanged(State changedState);
32 | 	
33 | 	/**
34 | 	 * Returns a policy encoding the planner's results.
35 | 	 * @return a policy object
36 | 	 */
37 | 	public Policy modelPlannedPolicy();
38 | 
39 | 	
40 | }
41 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/learning/modellearning/rmax/UnmodeledFavoredPolicy.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.learning.modellearning.rmax;
 2 | 
 3 | import burlap.behavior.policy.Policy;
 4 | import burlap.behavior.singleagent.learning.modellearning.Model;
 5 | import burlap.debugtools.RandomFactory;
 6 | import burlap.oomdp.core.AbstractGroundedAction;
 7 | import burlap.oomdp.core.states.State;
 8 | import burlap.oomdp.singleagent.Action;
 9 | 
10 | import java.util.ArrayList;
11 | import java.util.List;
12 | 
13 | /**
14 |  * @author James MacGlashan.
15 |  */
16 | public class UnmodeledFavoredPolicy extends Policy{
17 | 
18 | 	protected Policy sourcePolicy;
19 | 	protected Model model;
20 | 	protected List<Action> allActions;
21 | 
22 | 
23 | 	public UnmodeledFavoredPolicy(Policy sourcePolicy, Model model, List <Action> actions){
24 | 		this.sourcePolicy = sourcePolicy;
25 | 		this.model = model;
26 | 		this.allActions = actions;
27 | 	}
28 | 
29 | 	@Override
30 | 	public AbstractGroundedAction getAction(State s) {
31 | 
32 | 		List<AbstractGroundedAction> unmodeled = this.model.getUnmodeledActionsForState(s);
33 | 
34 | 		if(unmodeled.size() > 0){
35 | 			return unmodeled.get(RandomFactory.getMapped(0).nextInt(unmodeled.size()));
36 | 		}
37 | 
38 | 		return this.sourcePolicy.getAction(s);
39 | 	}
40 | 
41 | 	@Override
42 | 	public List<ActionProb> getActionDistributionForState(State s) {
43 | 
44 | 		List<AbstractGroundedAction> unmodeled = this.model.getUnmodeledActionsForState(s);
45 | 
46 | 		if(unmodeled.size() > 0){
47 | 			List<ActionProb> aps = new ArrayList<ActionProb>(unmodeled.size());
48 | 			double p = 1./(double)unmodeled.size();
49 | 			for(AbstractGroundedAction ga : unmodeled){
50 | 				aps.add(new ActionProb(ga, p));
51 | 			}
52 | 			return aps;
53 | 		}
54 | 
55 | 		return this.sourcePolicy.getActionDistributionForState(s);
56 | 	}
57 | 
58 | 	@Override
59 | 	public boolean isStochastic() {
60 | 		return true;
61 | 	}
62 | 
63 | 	@Override
64 | 	public boolean isDefinedFor(State s) {
65 | 		return true;
66 | 	}
67 | }
68 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/learning/tdmethods/QLearningStateNode.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.learning.tdmethods;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.List;
 5 | 
 6 | import burlap.behavior.valuefunction.QValue;
 7 | import burlap.oomdp.statehashing.HashableState;
 8 | import burlap.oomdp.singleagent.GroundedAction;
 9 | 
10 | 
11 | /**
12 |  * This class is used to store the associated {@link burlap.behavior.valuefunction.QValue} objects for a given hashed sated.
13 |  * @author James MacGlashan
14 |  *
15 |  */
16 | public class QLearningStateNode {
17 | 
18 | 	/**
19 | 	 * A hashed state entry for which Q-value will be stored.
20 | 	 */
21 | 	public HashableState s;
22 | 	
23 | 	/**
24 | 	 * The Q-values for this object's state.
25 | 	 */
26 | 	public List<QValue>				qEntry;
27 | 	
28 | 	
29 | 	/**
30 | 	 * Creates a new object for the given hashed state. The list of {@link burlap.behavior.valuefunction.QValue} objects is initialized to be empty.
31 | 	 * @param s the hashed state for which to associate Q-values
32 | 	 */
33 | 	public QLearningStateNode(HashableState s) {
34 | 		this.s = s;
35 | 		qEntry = new ArrayList<QValue>();
36 | 	}
37 | 
38 | 	
39 | 	/**
40 | 	 * Adds a Q-value to this state with the given numeric Q-value.
41 | 	 * @param a the action this Q-value is fore
42 | 	 * @param q the numeric Q-value
43 | 	 */
44 | 	public void addQValue(GroundedAction a, double q){
45 | 		QValue qv = new QValue(s.s, a, q);
46 | 		qEntry.add(qv);
47 | 	}
48 | 	
49 | 	
50 | }
51 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/options/support/LocalSubgoalTF.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.options.support;
 2 | 
 3 | import burlap.oomdp.auxiliary.stateconditiontest.StateConditionTest;
 4 | import burlap.oomdp.core.states.State;
 5 | import burlap.oomdp.core.TerminalFunction;
 6 | 
 7 | 
 8 | /**
 9 |  * It is typical for options to be defined for following policies to subgoals and it is often useful
10 |  * to use a planning or learning algorithm to define these policies, in which case a terminal
11 |  * function for the option would need to be specified in order to learn or plan for its policy. This terminal function 
12 |  * defines a set of states in which an option is applicable and the subgoal states of the option.
13 |  * The subgoal state and applicable states are specified using {@link burlap.oomdp.auxiliary.stateconditiontest.StateConditionTest}
14 |  * objects. The agent will terminate in any subgoal state or any state that is not an applicable state.
15 |  * @author James MacGlashan
16 |  *
17 |  */
18 | public class LocalSubgoalTF implements TerminalFunction {
19 | 
20 | 	
21 | 	/**
22 | 	 * Defines the set of states in which the option is applicable
23 | 	 */
24 | 	protected StateConditionTest		applicableStateTest;
25 | 	
26 | 	/**
27 | 	 * Defines he set of subgoal states for the option
28 | 	 */
29 | 	protected StateConditionTest		subgoalStateTest;
30 | 	
31 | 	
32 | 	
33 | 	/**
34 | 	 * Initializes with a set of subgoal states. The option is assumed to be applicable everywhere.
35 | 	 * @param subgoalStateTest the subgoal states.
36 | 	 */
37 | 	public LocalSubgoalTF(StateConditionTest subgoalStateTest) {
38 | 		this.applicableStateTest = null;
39 | 		this.subgoalStateTest = subgoalStateTest;
40 | 	}
41 | 	
42 | 	
43 | 	/**
44 | 	 * Initializes with a set of states in which the option is applicable and the options subgoal states.
45 | 	 * @param applicableStateTest the states in which the option is applicable.
46 | 	 * @param subgoalStateTest the subgoal states
47 | 	 */
48 | 	public LocalSubgoalTF(StateConditionTest applicableStateTest, StateConditionTest subgoalStateTest) {
49 | 		this.applicableStateTest = applicableStateTest;
50 | 		this.subgoalStateTest = subgoalStateTest;
51 | 	}
52 | 
53 | 	@Override
54 | 	public boolean isTerminal(State s) {
55 | 		
56 | 		if(this.applicableStateTest != null){
57 | 			if(!this.applicableStateTest.satisfies(s)){
58 | 				return true; //terminate when reaching a state that is not an initiation state
59 | 			}
60 | 		}
61 | 		
62 | 		return this.subgoalStateTest.satisfies(s);
63 | 
64 | 	}
65 | 
66 | }
67 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/options/support/OptionEvaluatingRF.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.options.support;
 2 | 
 3 | import burlap.behavior.singleagent.options.Option;
 4 | import burlap.oomdp.core.states.State;
 5 | import burlap.oomdp.singleagent.GroundedAction;
 6 | import burlap.oomdp.singleagent.RewardFunction;
 7 | 
 8 | 
 9 | /**
10 |  * This class is a reward function that accepts a reward function for primitive actions and returns
11 |  * that when the query action is a primitive. If the query action is a option it
12 |  * return the cumulative reward from the options last execution using the assumption that any options that need evaluating
13 |  * have been set to internally keep track of their reward after each successive application. It also
14 |  * is assumed that those options are using the same reward function as the inputed primitive RF
15 |  * 
16 |  * This is useful for planners that would want to execute the option and evaluate the reward afterwards
17 |  * 
18 |  * @author James MacGlashan
19 |  *
20 |  */
21 | public class OptionEvaluatingRF implements RewardFunction {
22 | 
23 | 	/**
24 | 	 * The source primitive action reward function for the MDP
25 | 	 */
26 | 	RewardFunction primitiveRF;
27 | 	
28 | 	
29 | 	/**
30 | 	 * Initializes.
31 | 	 * @param rf the source primitive action reward function for the MDP
32 | 	 */
33 | 	public OptionEvaluatingRF(RewardFunction rf){
34 | 		this.primitiveRF = rf;
35 | 	}
36 | 	
37 | 
38 | 	@Override
39 | 	public double reward(State s, GroundedAction a, State sprime) {
40 | 		
41 | 		if(a.action.isPrimitive()){
42 | 			return primitiveRF.reward(s, a, sprime);
43 | 		}
44 | 		
45 | 		//otherwise return the cumulative reward from the last option execution
46 | 		//with the assumption that the last call to the option produced this SAS tuple
47 | 		Option o = (Option)a.action;
48 | 		return o.getLastCumulativeReward();
49 | 
50 | 	}
51 | 
52 | }
53 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/planning/Planner.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.planning;
 2 | 
 3 | import burlap.behavior.policy.Policy;
 4 | import burlap.behavior.singleagent.MDPSolverInterface;
 5 | import burlap.oomdp.core.states.State;
 6 | 
 7 | /**
 8 |  * @author James MacGlashan.
 9 |  */
10 | public interface Planner extends MDPSolverInterface{
11 | 
12 | 	/**
13 | 	 * This method will cause the {@link burlap.behavior.singleagent.planning.Planner} to begin planning from the specified initial {@link burlap.oomdp.core.states.State}.
14 | 	 * It will then return an appropriate {@link burlap.behavior.policy.Policy} object that captured the planning results.
15 | 	 * Note that typically you can use a variety of different {@link burlap.behavior.policy.Policy} objects
16 | 	 * in conjunction with this {@link burlap.behavior.singleagent.planning.Planner} to get varying behavior and
17 | 	 * the returned {@link burlap.behavior.policy.Policy} is not required to be used.
18 | 	 * @param initialState the initial state of the planning problem
19 | 	 * @return a {@link burlap.behavior.policy.Policy} that captures the planning results from input {@link burlap.oomdp.core.states.State}.
20 | 	 */
21 | 	Policy planFromState(State initialState);
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/planning/deterministic/MultiStatePrePlanner.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.planning.deterministic;
 2 | 
 3 | import java.util.Collection;
 4 | 
 5 | import burlap.behavior.singleagent.planning.Planner;
 6 | import burlap.oomdp.auxiliary.stateconditiontest.StateConditionTestIterable;
 7 | import burlap.oomdp.core.states.State;
 8 | 
 9 | 
10 | /**
11 |  * This is a helper class that is used to run a valueFunction from multiple initial states to ensure
12 |  * that an adequate plan/policy exists for each them. It makes uses of an iterable state
13 |  * condition test to define the states from which planning should performed or a collection
14 |  * of state objects.
15 |  * @author James MacGlashan
16 |  *
17 |  */
18 | public class MultiStatePrePlanner {
19 | 
20 | 	/**
21 | 	 * Runs a planning algorithm from multiple initial states to ensure that an adequate plan/policy exist for of the states.
22 | 	 * @param planner the valueFunction to be used.
23 | 	 * @param initialStates a {@link burlap.oomdp.auxiliary.stateconditiontest.StateConditionTestIterable} object that will iterate over the initial states from which to plan.
24 | 	 */
25 | 	public static void runPlannerForAllInitStates(Planner planner, StateConditionTestIterable initialStates){
26 | 		for(State s : initialStates){
27 | 			planner.planFromState(s);
28 | 		}
29 | 	}
30 | 	
31 | 	
32 | 	/**
33 | 	 * Runs a planning algorithm from multiple initial states to ensure that an adequate plan/policy exist for of the states.
34 | 	 * @param planner the valueFunction to be used.
35 | 	 * @param initialStates a collection of states from which to plan.
36 | 	 */
37 | 	public static void runPlannerForAllInitStates(Planner planner, Collection <State> initialStates){
38 | 		for(State s : initialStates){
39 | 			planner.planFromState(s);
40 | 		}
41 | 	}
42 | 	
43 | }
44 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/planning/deterministic/SearchNode.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.planning.deterministic;
 2 | 
 3 | import burlap.oomdp.statehashing.HashableState;
 4 | import burlap.oomdp.singleagent.GroundedAction;
 5 | 
 6 | 
 7 | /**
 8 |  * The SearchNode class is used for classic deterministic forward search planners. It represents a current state, a back pointer
 9 |  * to the search node from which this node's state was generated, and the action that was taken in the generating node's state to
10 |  * produce this node's state. Once a goal state is found by the forward search valueFunction, the back pointers can be traced to
11 |  * find the plan that got to the goal.
12 |  * @author James MacGlashan
13 |  *
14 |  */
15 | public class SearchNode {
16 | 
17 | 	/**
18 | 	 * The (hashed) state of this node
19 | 	 */
20 | 	public HashableState s;
21 | 	
22 | 	
23 | 	/**
24 | 	 * The action that generated this state in the previous state. Null if this node is for the initial state.
25 | 	 */
26 | 	public GroundedAction		generatingAction;
27 | 	
28 | 	/**
29 | 	 * The search node for the previous state that generated this node's state. Null if this node is for the initial state.
30 | 	 */
31 | 	public SearchNode			backPointer;
32 | 	
33 | 	
34 | 	
35 | 	/**
36 | 	 * Constructs a SearchNode for the input state. The generating action and back pointer are set to null, which is valid if this
37 | 	 * is the search node for an initial state. Otherwise, these fields should be filled in.
38 | 	 * @param s the hashed input state this node will represent.
39 | 	 */
40 | 	public SearchNode(HashableState s){
41 | 		this.s = s;
42 | 		this.generatingAction = null;
43 | 		this.backPointer = null;
44 | 	}
45 | 	
46 | 	
47 | 	/**
48 | 	 * Constructs a SearchNode for the input state and sets the generating action and back pointer to the provided elements.
49 | 	 * @param s the hashed input state this node will represent.
50 | 	 * @param ga the action that was used to generate s
51 | 	 * @param bp the search node that contains the previous state from which s was generated.
52 | 	 */
53 | 	public SearchNode(HashableState s, GroundedAction ga, SearchNode bp){
54 | 		this.s = s;
55 | 		this.generatingAction = ga;
56 | 		this.backPointer = bp;
57 | 	}
58 | 	
59 | 	
60 | 	@Override
61 | 	public boolean equals(Object o){
62 | 		SearchNode so = (SearchNode)o;
63 | 		return s.equals(so.s);
64 | 	}
65 | 	
66 | 	
67 | 	@Override
68 | 	public int hashCode(){
69 | 		return s.hashCode();
70 | 	}
71 | 	
72 | }
73 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/planning/deterministic/informed/Heuristic.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.planning.deterministic.informed;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | /**
 6 |  * An interface for defining heuristics. The heuristic function should return an estimate of the amount of *reward* that will be accumulated from that given
 7 |  * state. Since deterministic forward search planning algorithms typically expect costs, this is represented by simply using negative reward, where
 8 |  * values closer to zero are better. For instance, if it was known that a state was 3 steps away from the goal, an optimal heuristic (and the true cost
 9 |  * from the state) would return -3.
10 |  * @author James MacGlashan
11 |  *
12 |  */
13 | public interface Heuristic {
14 | 
15 | 	/**
16 | 	 * Returns the estimated amount of reward that will be received when following the optimal policy from the given state.
17 | 	 * Since deterministic forward search planning algorithms typically expect costs, this is represented by simply using negative reward, where
18 | 	 * values closer to zero are better. For instance, if it was known that state s was 3 steps away from the goal, an optimal heuristic (the true reward
19 | 	 * from the state) would return -3.
20 | 	 * @param s the state from which to estimate the future reward.
21 | 	 * @return  the estimated amount of reward that will be received when following the optimal policy from s.
22 | 	 */
23 | 	public double h(State s);
24 | 	
25 | }
26 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/planning/deterministic/informed/NullHeuristic.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.planning.deterministic.informed;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | /**
 6 |  * A {@link Heuristic} implementation that always returns 0. This is always admissible
 7 |  * and effectively causes planners like A* to perform Uniform Cost Search.
 8 |  * @author James MacGlashan
 9 |  *
10 |  */
11 | public class NullHeuristic implements Heuristic {
12 | 
13 | 	@Override
14 | 	public double h(State s) {
15 | 		return 0;
16 | 	}
17 | 
18 | }
19 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/planning/stochastic/HashedTransitionProbability.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.planning.stochastic;
 2 | 
 3 | import burlap.oomdp.statehashing.HashableStateFactory;
 4 | import burlap.oomdp.statehashing.HashableState;
 5 | import burlap.oomdp.core.states.State;
 6 | import burlap.oomdp.core.TransitionProbability;
 7 | 
 8 | /**
 9 |  * An analog to the {@link burlap.oomdp.core.TransitionProbability}, except it stores {@link burlap.oomdp.statehashing.HashableState} objects
10 |  * instead of {@link burlap.oomdp.core.states.State} objects.
11 |  * @author James MacGlashan
12 |  *
13 |  */
14 | public class HashedTransitionProbability {
15 | 
16 | 	public HashableState sh;
17 | 	public double p;
18 | 	
19 | 	
20 | 	/**
21 | 	 * Initializes with a {@link burlap.oomdp.statehashing.HashableState} and probability for the transition
22 | 	 * @param sh the hashed state that the agent transitions to
23 | 	 * @param p the probability of the transition
24 | 	 */
25 | 	public HashedTransitionProbability(HashableState sh, double p){
26 | 		this.sh = sh;
27 | 		this.p = p;
28 | 	}
29 | 	
30 | 	
31 | 	/**
32 | 	 * Takes a {@link burlap.oomdp.core.states.State} object, hashes it, and sets the transition probability to the hashed state to p
33 | 	 * @param s the state that the agent transitions to
34 | 	 * @param p the probability of the transition
35 | 	 * @param hashingFactory the hashing factory to use to hash the input state
36 | 	 */
37 | 	public HashedTransitionProbability(State s, double p, HashableStateFactory hashingFactory){
38 | 		this.sh = hashingFactory.hashState(s);
39 | 		this.p = p;
40 | 	}
41 | 	
42 | 	
43 | 	/**
44 | 	 * Takes a {@link burlap.oomdp.core.TransitionProbability} and hashes its state using the hashingFactory object
45 | 	 * @param tp the {@link burlap.oomdp.core.TransitionProbability} to hash
46 | 	 * @param hashingFactory the hashing factory to use.
47 | 	 */
48 | 	public HashedTransitionProbability(TransitionProbability tp, HashableStateFactory hashingFactory){
49 | 		this.sh = hashingFactory.hashState(tp.s);
50 | 		this.p = tp.p;
51 | 	}
52 | 	
53 | }
54 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/planning/vfa/fittedvi/SupervisedVFA.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.planning.vfa.fittedvi;
 2 | 
 3 | import burlap.behavior.valuefunction.ValueFunction;
 4 | import burlap.oomdp.core.states.State;
 5 | 
 6 | import java.util.List;
 7 | 
 8 | /**
 9 |  * An interface for learning value function approximation via a supervised learning algorithm. This interface
10 |  * defines the method {@link #train} which takes as input a list of {@link burlap.behavior.singleagent.planning.vfa.fittedvi.SupervisedVFA.SupervisedVFAInstance}
11 |  * objects, runs a regression algorithm, and returns the learned function, which is an interface of {@link burlap.behavior.valuefunction.ValueFunction}.
12 |  * <br/><br/>
13 |  * A {@link burlap.behavior.singleagent.planning.vfa.fittedvi.SupervisedVFA.SupervisedVFAInstance} is a pair consisting
14 |  * of a {@link burlap.oomdp.core.states.State} and the target state value that is to be learned.
15 |  * @author James MacGlashan.
16 |  */
17 | public interface SupervisedVFA {
18 | 
19 | 	/**
20 | 	 * Uses supervised learning (regression) to learn a value function approximation of the input training data.
21 | 	 * @param trainingData the training data to fit.
22 | 	 * @return a {@link burlap.behavior.valuefunction.ValueFunction} that fits the training data.
23 | 	 */
24 | 	public ValueFunction train(List<SupervisedVFAInstance> trainingData);
25 | 
26 | 
27 | 	/**
28 | 	 * A pair for a state and it's target value function value.
29 | 	 */
30 | 	public static class SupervisedVFAInstance{
31 | 
32 | 		/**
33 | 		 * The state
34 | 		 */
35 | 		public State s;
36 | 
37 | 		/**
38 | 		 * The state's associated value
39 | 		 */
40 | 		public double v;
41 | 
42 | 
43 | 		/**
44 | 		 * Initializes
45 | 		 * @param s tne state
46 | 		 * @param v the state's associated value
47 | 		 */
48 | 		public SupervisedVFAInstance(State s, double v){
49 | 			this.s = s;
50 | 			this.v = v;
51 | 		}
52 | 
53 | 	}
54 | 
55 | }
56 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/pomdp/BeliefPolicyAgent.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.pomdp;
 2 | 
 3 | import burlap.behavior.policy.Policy;
 4 | import burlap.oomdp.singleagent.GroundedAction;
 5 | import burlap.oomdp.singleagent.environment.Environment;
 6 | import burlap.oomdp.singleagent.pomdp.BeliefAgent;
 7 | import burlap.oomdp.singleagent.pomdp.beliefstate.BeliefState;
 8 | import burlap.oomdp.singleagent.pomdp.PODomain;
 9 | 
10 | 
11 | /**
12 |  * A Belief agent that follows a specified policy.
13 |  */
14 | public class BeliefPolicyAgent extends BeliefAgent {
15 | 
16 | 	/**
17 | 	 * The policy that the agent will follow.
18 | 	 */
19 | 	protected Policy policy;
20 | 
21 | 
22 | 	/**
23 | 	 * Initializes.
24 | 	 * @param domain the POMDP domain
25 | 	 * @param environment the environment with which the agent will interact
26 | 	 * @param policy the policy the agent will follow.
27 | 	 */
28 | 	public BeliefPolicyAgent(PODomain domain, Environment environment, Policy policy){
29 | 		super(domain, environment);
30 | 		this.policy = policy;
31 | 	}
32 | 	
33 | 	
34 | 	@Override
35 | 	public GroundedAction getAction(BeliefState curBelief) {
36 | 
37 | 		GroundedAction ga = (GroundedAction)this.policy.getAction(curBelief);
38 | 		return ga;
39 | 	}
40 | 	
41 | 	
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/shaping/ShapedRewardFunction.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.shaping;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | import burlap.oomdp.singleagent.GroundedAction;
 5 | import burlap.oomdp.singleagent.RewardFunction;
 6 | 
 7 | 
 8 | /**
 9 |  * This abstract class is used to define shaped reward functions. Shaped reward functions take the base
10 |  * true objective reward function of a task and add some additional reward value to it that helps suggest
11 |  * useful states. Subclasses of the ShapedRewardFunction must implement a method that specifies the
12 |  * additive reward to the base reward.
13 |  * 
14 |  * 
15 |  * @author James MacGlashan
16 |  *
17 |  */
18 | public abstract class ShapedRewardFunction implements RewardFunction {
19 | 
20 | 	
21 | 	/**
22 | 	 * The base objective reward function for the task.
23 | 	 */
24 | 	protected RewardFunction		baseRF;
25 | 	
26 | 	
27 | 	/**
28 | 	 * Returns the reward value to add to the base objective reward function.
29 | 	 * @param s the previous state
30 | 	 * @param a the action taken the previous state
31 | 	 * @param sprime the successor state
32 | 	 * @return the reward value to add to the base objective reward function.
33 | 	 */
34 | 	public abstract double additiveReward(State s, GroundedAction a, State sprime);
35 | 	
36 | 	
37 | 	/**
38 | 	 * Initializes with the base objective task reward function.
39 | 	 * @param baseRF the objective task reward function.
40 | 	 */
41 | 	public ShapedRewardFunction(RewardFunction baseRF) {
42 | 		this.baseRF = baseRF;
43 | 	}
44 | 	
45 | 	@Override
46 | 	public double reward(State s, GroundedAction a, State sprime) {
47 | 		return this.baseRF.reward(s, a, sprime) + this.additiveReward(s, a, sprime);
48 | 	}
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/shaping/potential/PotentialFunction.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.shaping.potential;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | 
 6 | /**
 7 |  * Defines an interface for reward potential functions. This interface will be used by potential-based reward shaping. Note: potential functions
 8 |  * should always be defined to return 0 for terminal states.
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public interface PotentialFunction {
13 | 	
14 | 	/**
15 | 	 * Returns the reward potential from the given state. 
16 | 	 * Note: the potential function should always return 0 for terminal states.
17 | 	 * @param s the input state for which to get the reward potential.
18 | 	 * @return the reward potential from the given state.
19 | 	 */
20 | 	public double potentialValue(State s);
21 | }
22 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/shaping/potential/PotentialShapedRF.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.shaping.potential;
 2 | 
 3 | import burlap.behavior.singleagent.shaping.ShapedRewardFunction;
 4 | import burlap.oomdp.core.states.State;
 5 | import burlap.oomdp.singleagent.GroundedAction;
 6 | import burlap.oomdp.singleagent.RewardFunction;
 7 | 
 8 | 
 9 | /**
10 |  * This class is used to implement Potential-based reward shaping [1] which is guaranteed to preserve the optimal policy. This class
11 |  * requires a {@link PotentialFunction} and the discount being used by the MDP. The additive reward is defined as:
12 |  * d * p(s') - p(s)
13 |  * where d is this discount factor, s' is the most recent state, s is the previous state, and p(s) is the potential of state s.
14 |  * 
15 |  * 
16 |  * 1. Ng, Andrew Y., Daishi Harada, and Stuart Russell. "Policy invariance under reward transformations: Theory and application to reward shaping." ICML. 1999.
17 |  * 
18 |  * @author James MacGlashan
19 |  *
20 |  */
21 | public class PotentialShapedRF extends ShapedRewardFunction {
22 | 
23 | 	
24 | 	/**
25 | 	 * The potential function that can be used to return the potential reward from input states.
26 | 	 */
27 | 	protected PotentialFunction			potentialFunction;
28 | 	
29 | 	/**
30 | 	 * The discount factor the MDP (required for this to shaping to preserve policy optimality)
31 | 	 */
32 | 	protected double					discount;
33 | 	
34 | 	
35 | 	/**
36 | 	 * Initializes the shaping with the objective reward function, the potential function, and the discount of the MDP.
37 | 	 * @param baseRF the objective task reward function.
38 | 	 * @param potentialFunction the potential function to use.
39 | 	 * @param discount the discount factor of the MDP.
40 | 	 */
41 | 	public PotentialShapedRF(RewardFunction baseRF, PotentialFunction potentialFunction, double discount) {
42 | 		super(baseRF);
43 | 		
44 | 		this.potentialFunction = potentialFunction;
45 | 		this.discount = discount;
46 | 		
47 | 	}
48 | 
49 | 	@Override
50 | 	public double additiveReward(State s, GroundedAction a, State sprime) {
51 | 		return (this.discount * this.potentialFunction.potentialValue(sprime)) - this.potentialFunction.potentialValue(s);
52 | 	}
53 | 
54 | }
55 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/vfa/ActionApproximationResult.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.vfa;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import burlap.oomdp.singleagent.GroundedAction;
 6 | 
 7 | 
 8 | /**
 9 |  * A class that ties function approximation results to actions. This is useful for approximating Q-values.
10 |  * @author James MacGlashan
11 |  *
12 |  */
13 | public class ActionApproximationResult {
14 | 
15 | 	/**
16 | 	 * The grounded action this approximation was for
17 | 	 */
18 | 	public GroundedAction			ga;
19 | 	
20 | 	/**
21 | 	 * The actual approximation result
22 | 	 */
23 | 	public ApproximationResult		approximationResult;
24 | 	
25 | 	
26 | 	/**
27 | 	 * Initializes with a given action and approximation result
28 | 	 * @param ga the grounded action that this approximation is for
29 | 	 * @param approximationResult the approximation result
30 | 	 */
31 | 	public ActionApproximationResult(GroundedAction ga, ApproximationResult approximationResult) {
32 | 		this.ga = ga;
33 | 		this.approximationResult = approximationResult;
34 | 	}
35 | 	
36 | 	
37 | 	/**
38 | 	 * Given a list of {@link ActionApproximationResult} objects, this method will return the corresponding {@link ActionApproximationResult}
39 | 	 * for the given action.
40 | 	 * @param approximations list of approximations
41 | 	 * @param ga the grounded action for which the corrsponding approximation result should be returned.
42 | 	 * @return the corresponding {@link ActionApproximationResult} for the given action. Null if there is no corresponding approximation result.
43 | 	 */
44 | 	public static ActionApproximationResult extractApproximationForAction(List<ActionApproximationResult> approximations, GroundedAction ga){
45 | 		for(ActionApproximationResult aar : approximations){
46 | 			if(aar.ga.equals(ga)){
47 | 				return aar;
48 | 			}
49 | 		}
50 | 		
51 | 		return null;
52 | 	}
53 | 
54 | }
55 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/vfa/ApproximationResult.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.vfa;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | 
 6 | /**
 7 |  * A list associating a predicted value that was generated from a list of state features and the weights for those features. Note that
 8 |  * the predicted value does *not* have to be a linear combination of the state features and the weights, so it may not be possible
 9 |  * to reconstruct the predicted value from the features and weights alone.
10 |  * @author James MacGlashan
11 |  *
12 |  */
13 | public class ApproximationResult {
14 | 
15 | 	/**
16 | 	 * The predicted valued
17 | 	 */
18 | 	public double					predictedValue;
19 | 	
20 | 	/**
21 | 	 * The state features used to produce the predicted value.
22 | 	 */
23 | 	public List<StateFeature>		stateFeatures;
24 | 	
25 | 	/**
26 | 	 * The function weights used to produce the predicted value.
27 | 	 */
28 | 	public List<FunctionWeight>		functionWeights;
29 | 	
30 | 	
31 | 	
32 | 	/**
33 | 	 * Initializes
34 | 	 * @param predictedValue the predicted value
35 | 	 * @param stateFeatures the state features used to produce the predicted value.
36 | 	 * @param functionWeights the function weights used to produce the predicted value.
37 | 	 */
38 | 	public ApproximationResult(double predictedValue, List <StateFeature> stateFeatures, List <FunctionWeight> functionWeights) {
39 | 		this.predictedValue = predictedValue;
40 | 		this.stateFeatures = stateFeatures;
41 | 		this.functionWeights = functionWeights;
42 | 		
43 | 	}
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/vfa/FunctionWeight.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.vfa;
 2 | 
 3 | /**
 4 |  * This class holds the weight value for weights defined by a ValueFunctionApproximation class. It is expected that when a weight value is changed
 5 |  * on this object that the corresponding weight value in the ValueFunctionApproximation object is changed as well, which means the
 6 |  * ValueFunctionApproximation should store its weights with this data structure. Alternative, the ValueFunctionApproximation class
 7 |  * can use a different data structure and subclass this FunctionWeight class so that when the setWeight method is called on it, it also
 8 |  * updates the corresponding data structure in the ValueFunctionApproximation object. 
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public class FunctionWeight {
13 | 
14 | 	/**
15 | 	 * The int value that uniquely identifies this weight
16 | 	 */
17 | 	protected int weightId;
18 | 	
19 | 	/**
20 | 	 * The value of this weight.
21 | 	 */
22 | 	protected double weightValue;
23 | 	
24 | 	
25 | 	/**
26 | 	 * Initializes.
27 | 	 * @param weightId the weight identifier
28 | 	 * @param weightValue the value of the weight
29 | 	 */
30 | 	public FunctionWeight(int weightId, double weightValue) {
31 | 		this.weightId = weightId;
32 | 		this.weightValue = weightValue;
33 | 	}
34 | 	
35 | 	
36 | 	/**
37 | 	 * Returns the weight identifier
38 | 	 * @return the weight identifier
39 | 	 */
40 | 	public int weightId(){
41 | 		return this.weightId;
42 | 	}
43 | 	
44 | 	
45 | 	/**
46 | 	 * Returns the weight value
47 | 	 * @return the weight value
48 | 	 */
49 | 	public double weightValue(){
50 | 		return weightValue;
51 | 	}
52 | 	
53 | 	
54 | 	/**
55 | 	 * Sets the weight
56 | 	 * @param w the value to set the weight to
57 | 	 */
58 | 	public void setWeight(double w){
59 | 		this.weightValue = w;
60 | 	}
61 | 	
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/vfa/StateFeature.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.vfa;
 2 | 
 3 | 
 4 | /**
 5 |  * A class for associating a state feature identifier with a value of that state feature
 6 |  * @author James MacGlashan
 7 |  *
 8 |  */
 9 | public class StateFeature {
10 | 
11 | 	/**
12 | 	 * The state feature identifier
13 | 	 */
14 | 	public int				id;
15 | 	
16 | 	/**
17 | 	 * The value of the state feature
18 | 	 */
19 | 	public double			value;
20 | 	
21 | 	
22 | 	/**
23 | 	 * Initializes.
24 | 	 * @param id the state feature identifier
25 | 	 * @param value the value of the state feature
26 | 	 */
27 | 	public StateFeature(int id, double value) {
28 | 		this.id = id;
29 | 		this.value = value;
30 | 	}
31 | 	
32 | 	
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/vfa/StateToFeatureVectorGenerator.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.vfa;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | /**
 6 |  * Many functions approximation techniques require a fixed feature vector to work and in many cases, using abstract features from
 7 |  * the state attributes is useful. This interface provides a means to take a BURLAP OO-MDP state and transform it into
 8 |  * a feature vector represented as a double array so that these function approximation techniques may be used.
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public interface StateToFeatureVectorGenerator {
13 | 	
14 | 	/**
15 | 	 * Returns a feature vector represented as a double array for a given input state.
16 | 	 * @param s the input state to turn into a feature vector.
17 | 	 * @return the feature vector represented as a double array.
18 | 	 */
19 | 	public double [] generateFeatureVectorFrom(State s);
20 | 	
21 | }
22 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/vfa/WeightGradient.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.vfa;
 2 | 
 3 | import java.util.HashMap;
 4 | import java.util.Map;
 5 | 
 6 | 
 7 | /**
 8 |  * A data structure for defining the gradient of the weights for a vector. If the weight gradient is not stored for a given
 9 |  * feature, then zero will be returned.
10 |  * @author James MacGlashan
11 |  *
12 |  */
13 | public class WeightGradient {
14 | 
15 | 	/**
16 | 	 * A map from weight identifiers to their partial derivative
17 | 	 */
18 | 	Map<Integer, Double> gradient;
19 | 	
20 | 	
21 | 	/**
22 | 	 * Initializes with the gradient unspecified for any weights.
23 | 	 */
24 | 	public WeightGradient() {
25 | 		gradient = new HashMap<Integer, Double>();
26 | 	}
27 | 	
28 | 	
29 | 	/**
30 | 	 * Initializes with the gradient unspecified, but reserves space for the given capacity
31 | 	 * @param capacity how much space to reserve for storing the gradient; i.e., the number of weights over which the gradient will be defined
32 | 	 */
33 | 	public WeightGradient(int capacity) {
34 | 		gradient = new HashMap<Integer, Double>(capacity);
35 | 	}
36 | 	
37 | 	/**
38 | 	 * Adds the partial derivative for a given weight
39 | 	 * @param weightId the weight identifier for which the partial derivative is to be stored is to be stored
40 | 	 * @param partialDerivative the partial derivative value for the weight
41 | 	 */
42 | 	public void put(int weightId, double partialDerivative){
43 | 		this.gradient.put(weightId, partialDerivative);
44 | 	}
45 | 	
46 | 	
47 | 	/**
48 | 	 * Returns the partial derivative for the given weight
49 | 	 * @param weightId
50 | 	 * @return the partial derivative for the given weight; 0 if it is not stored.
51 | 	 */
52 | 	public double getPartialDerivative(int weightId){
53 | 		Double stored = gradient.get(weightId);
54 | 		if(stored == null){
55 | 			return 0.;
56 | 		}
57 | 		return stored;
58 | 	}
59 | 
60 | }
61 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/vfa/cmac/AttributeTileSpecification.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.vfa.cmac;
 2 | 
 3 | import burlap.oomdp.core.Attribute;
 4 | 
 5 | 
 6 | /**
 7 |  * Specifies how a single attribute of a specific object class is to be tiled.
 8 |  * @author James MacGlashan
 9 |  *
10 |  */
11 | public class AttributeTileSpecification {
12 | 
13 | 	
14 | 	/**
15 | 	 * The object class name this tiling specification concerns
16 | 	 */
17 | 	public String			className;
18 | 	
19 | 	/**
20 | 	 * The attribute this tiling specification concerns
21 | 	 */
22 | 	public Attribute		attribute;
23 | 	
24 | 	/**
25 | 	 * How large of a window to use; i.e., the width a tile along this attribute dimension
26 | 	 */
27 | 	public double			windowSize;
28 | 	
29 | 	/**
30 | 	 * The offset of this tile alignment; that is, where the first tiling boundary starts
31 | 	 */
32 | 	public double			bucketBoundary;
33 | 	
34 | 	
35 | 	
36 | 	/**
37 | 	 * Initializes
38 | 	 * @param className The object class name this tiling specification concerns
39 | 	 * @param attribute The attribute this tiling specification concerns
40 | 	 * @param windowSize How large of a window to use; i.e., the width a tile along this attribute dimension
41 | 	 * @param bucketBoundary The offset of this tile alignment; that is, where the first tiling boundary starts
42 | 	 */
43 | 	public AttributeTileSpecification(String className, Attribute attribute, double windowSize, double bucketBoundary) {
44 | 		this.className = className;
45 | 		this.attribute = attribute;
46 | 		this.windowSize = windowSize;
47 | 		this.bucketBoundary = bucketBoundary;
48 | 	}
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/vfa/common/FDFeatureVectorGenerator.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.vfa.common;
 2 | 
 3 | import burlap.behavior.singleagent.vfa.FeatureDatabase;
 4 | import burlap.behavior.singleagent.vfa.StateFeature;
 5 | import burlap.behavior.singleagent.vfa.StateToFeatureVectorGenerator;
 6 | import burlap.oomdp.core.states.State;
 7 | 
 8 | import java.util.List;
 9 | 
10 | /**
11 |  * A wrapper for turning the features from a {@link burlap.behavior.singleagent.vfa.FeatureDatabase} into a double array.
12 |  * Note that this wrapper is not advised for feature databases like CMACs/Tile coding, since those have very large numbers
13 |  * of sparse features and this wrapper will create entries for all features, including the zero-valued ones.
14 |  * @author James MacGlashan.
15 |  */
16 | public class FDFeatureVectorGenerator implements StateToFeatureVectorGenerator{
17 | 
18 | 	protected FeatureDatabase fd;
19 | 
20 | 
21 | 	/**
22 | 	 * Initializes.
23 | 	 * @param fd the feature database used for generating state features.
24 | 	 */
25 | 	public FDFeatureVectorGenerator(FeatureDatabase fd){
26 | 		this.fd = fd;
27 | 	}
28 | 
29 | 	public FeatureDatabase getFd() {
30 | 		return fd;
31 | 	}
32 | 
33 | 	public void setFd(FeatureDatabase fd) {
34 | 		this.fd = fd;
35 | 	}
36 | 
37 | 	@Override
38 | 	public double[] generateFeatureVectorFrom(State s) {
39 | 
40 | 		List<StateFeature> sfs = this.fd.getStateFeatures(s);
41 | 		double [] fv = new double[this.fd.numberOfFeatures()];
42 | 		for(StateFeature sf : sfs){
43 | 			fv[sf.id] = sf.value;
44 | 		}
45 | 		return fv;
46 | 	}
47 | }
48 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/vfa/common/PFFeatureVectorGenerator.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.vfa.common;
 2 | 
 3 | import java.util.LinkedList;
 4 | import java.util.List;
 5 | 
 6 | import burlap.behavior.singleagent.vfa.StateToFeatureVectorGenerator;
 7 | import burlap.oomdp.core.Domain;
 8 | import burlap.oomdp.core.GroundedProp;
 9 | import burlap.oomdp.core.PropositionalFunction;
10 | import burlap.oomdp.core.states.State;
11 | 
12 | public class PFFeatureVectorGenerator implements StateToFeatureVectorGenerator {
13 | 
14 | 	protected PropositionalFunction [] pfsToUse;
15 | 	
16 | 	
17 | 	/**
18 | 	 * Initializes using all propositional functions that belong to the domain
19 | 	 * @param domain the domain containing all the propositional functions to use
20 | 	 */
21 | 	public PFFeatureVectorGenerator(Domain domain){
22 | 		
23 | 		this.pfsToUse = new PropositionalFunction[domain.getPropFunctions().size()];
24 | 		int i = 0;
25 | 		for(PropositionalFunction pf : domain.getPropFunctions()){
26 | 			this.pfsToUse[i] = pf;
27 | 			i++;
28 | 		}
29 | 		
30 | 	}
31 | 	
32 | 	/**
33 | 	 * Initializes using the list of given propositional functions.
34 | 	 * @param pfs the propositional functions to use.
35 | 	 */
36 | 	public PFFeatureVectorGenerator(List<PropositionalFunction> pfs){
37 | 		this.pfsToUse = new PropositionalFunction[pfs.size()];
38 | 		this.pfsToUse = pfs.toArray(this.pfsToUse);
39 | 	}
40 | 	
41 | 	
42 | 	/**
43 | 	 * Initializes using the array of given propositional functions.
44 | 	 * @param pfs the propositional functions to use.
45 | 	 */
46 | 	public PFFeatureVectorGenerator(PropositionalFunction [] pfs){
47 | 		this.pfsToUse = pfs.clone();
48 | 	}
49 | 	
50 | 	
51 | 	@Override
52 | 	public double[] generateFeatureVectorFrom(State s) {
53 | 		
54 | 		List<Double> featureValueList = new LinkedList<Double>();
55 | 		for(PropositionalFunction pf : this.pfsToUse){
56 | 			//List<GroundedProp> gps = s.getAllGroundedPropsFor(pf);
57 | 			List<GroundedProp> gps = pf.getAllGroundedPropsForState(s);
58 | 			for(GroundedProp gp : gps){
59 | 				if(gp.isTrue(s)){
60 | 					featureValueList.add(1.);
61 | 				}
62 | 				else{
63 | 					featureValueList.add(0.);
64 | 				}
65 | 			}
66 | 		}
67 | 		
68 | 		double [] fv = new double[featureValueList.size()];
69 | 		int i = 0;
70 | 		for(double f : featureValueList){
71 | 			fv[i] = f;
72 | 			i++;
73 | 		}
74 | 		
75 | 		return fv;
76 | 	}
77 | 
78 | }
79 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/vfa/rbf/DistanceMetric.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.vfa.rbf;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | /**
 6 |  * An interface for defining distant metrics between OO-MDP {@link burlap.oomdp.core.states.State} objects.
 7 |  * @author Anubhav Malhotra and Daniel Fernandez and Spandan Dutta
 8 |  *
 9 |  */
10 | public interface DistanceMetric {
11 | 	public double distance(State s0, State s1);
12 | }
13 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/vfa/rbf/FVDistanceMetric.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.vfa.rbf;
 2 | 
 3 | /**
 4 |  * An interface for defining the distance between two states that are represented with double arrays.
 5 |  * @author James MacGlashan.
 6 |  */
 7 | public interface FVDistanceMetric {
 8 | 
 9 | 	/**
10 | 	 * Returns the distance between state s0 and state s1.
11 | 	 * @param s0 a state represented with a double array
12 | 	 * @param s1 a state represented with a double array
13 | 	 * @return the distance between s0 and s1.
14 | 	 */
15 | 	public double distance(double [] s0, double [] s1);
16 | }
17 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/vfa/rbf/FVRBF.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.vfa.rbf;
 2 | 
 3 | /**
 4 |  * A class for defining radial basis functions for states represented with a double array.
 5 |  * @author James MacGlashan.
 6 |  */
 7 | public abstract class FVRBF {
 8 | 
 9 | 	/**
10 | 	 * The center state of the RBF unit.
11 | 	 */
12 | 	protected double [] centeredState;
13 | 
14 | 	/**
15 | 	 * The distance metric to compare query input states to the centeredState
16 | 	 */
17 | 	protected FVDistanceMetric metric;
18 | 
19 | 
20 | 	/**
21 | 	 * Initializes.
22 | 	 * @param centeredState the center state of the RBF unit.
23 | 	 * @param metric the distance metric to compare query input states to the centeredState
24 | 	 */
25 | 	public FVRBF(double [] centeredState, FVDistanceMetric metric){
26 | 		this.centeredState = centeredState;
27 | 		this.metric = metric;
28 | 	}
29 | 
30 | 	/**
31 | 	 * Returns the RBF response from its center state to the query input state.
32 | 	 * @param input the query input state represented with a double array.
33 | 	 * @return the double response value of this RBF unit to the query input state.
34 | 	 */
35 | 	public abstract double responseFor(double [] input);
36 | 
37 | }
38 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/vfa/rbf/RBF.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.vfa.rbf;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | /**
 6 |  * An abstract class for defining RBF units. An RBF unit is defined by a centered state and a distance metric that can be used
 7 |  * to measure the distance between the unit's center and an input state. RBF units return a response value to an input state that is a
 8 |  * function of the distance from the unit's centered state to the input state. Different RBF subclasses implement the response function differently.
 9 |  * The less distant an input state is from a unit's center state, the greater the response value.
10 |  * @author Anubhav Malhotra and Daniel Fernandez and Spandan Dutta
11 |  *
12 |  */
13 | public abstract class RBF {
14 | 	
15 | 	/**
16 | 	 * The center state of this unit
17 | 	 */
18 | 	protected State centeredState;
19 | 	
20 | 	/**
21 | 	 * The distance metric used to compare input states to this unit's center state.
22 | 	 */
23 | 	protected DistanceMetric metric;
24 | 	
25 | 
26 | 	/**
27 | 	 * Initializes with a center state for this unit and a distance metric to compare input states to it.
28 | 	 * @param centeredState the center state to use for this unit.
29 | 	 * @param metric the distance metric to use to compare this unit's center state to input states.
30 | 	 */
31 | 	public RBF(State centeredState, DistanceMetric metric){
32 | 		this.centeredState = centeredState;
33 | 		this.metric = metric;
34 | 	}
35 | 	
36 | 	
37 | 	/**
38 | 	 * Returns a response value to an input state that is a function of the distance between the input and this unit's center state.
39 | 	 * The less distant a query state is from this unit's center state, the greater the resposne value.
40 | 	 * @param input the input state for which a response value is returned.
41 | 	 * @return a response value to the given input state
42 | 	 */
43 | 	public abstract double responseFor(State input);
44 | }
45 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/vfa/rbf/metrics/EuclideanDistance.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.vfa.rbf.metrics;
 2 | 
 3 | import burlap.behavior.singleagent.vfa.StateToFeatureVectorGenerator;
 4 | import burlap.behavior.singleagent.vfa.rbf.DistanceMetric;
 5 | import burlap.oomdp.core.states.State;
 6 | 
 7 | public class EuclideanDistance implements DistanceMetric {
 8 | 
 9 | 	protected StateToFeatureVectorGenerator vectorGenerator;
10 | 	
11 | 	
12 | 	public EuclideanDistance(StateToFeatureVectorGenerator vectorGenerator){
13 | 		this.vectorGenerator = vectorGenerator;
14 | 	}
15 | 	
16 | 	
17 | 	@Override
18 | 	public double distance(State s0, State s1) {
19 | 		
20 | 		double [] f0 = this.vectorGenerator.generateFeatureVectorFrom(s0);
21 | 		double [] f1 = this.vectorGenerator.generateFeatureVectorFrom(s1);
22 | 		
23 | 		if(f0.length != f1.length){
24 | 			throw new RuntimeException("Cannot compute Euclidean distance; feature vectors for the two input states are not equal in size.");
25 | 		}
26 | 		
27 | 		double sum = 0.;
28 | 		for(int i = 0; i < f0.length; i++){
29 | 			double diff = f0[i] - f1[i];
30 | 			sum += diff*diff;
31 | 		}
32 | 		
33 | 		double dist = Math.sqrt(sum);
34 | 		
35 | 		return dist;
36 | 	}
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/singleagent/vfa/rbf/metrics/FVEuclideanDistance.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.singleagent.vfa.rbf.metrics;
 2 | 
 3 | import burlap.behavior.singleagent.vfa.rbf.FVDistanceMetric;
 4 | 
 5 | /**
 6 |  * A distance metric; returns sqrt( sum_i (x_i - y_i)^2 )
 7 |  * @author James MacGlashan.
 8 |  */
 9 | public class FVEuclideanDistance implements FVDistanceMetric{
10 | 
11 | 	@Override
12 | 	public double distance(double[] s0, double[] s1) {
13 | 
14 | 		if(s0.length != s1.length){
15 | 			throw new RuntimeException("Cannot compute Euclidean distance; feature vectors for the two input states are not equal in size.");
16 | 		}
17 | 
18 | 		double sum = 0.;
19 | 		for(int i = 0; i < s0.length; i++){
20 | 			double diff = s0[i] - s1[i];
21 | 			sum += diff*diff;
22 | 		}
23 | 
24 | 		double dist = Math.sqrt(sum);
25 | 
26 | 		return dist;
27 | 	}
28 | }
29 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/stochasticgames/agents/RandomSGAgent.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.stochasticgames.agents;
 2 | 
 3 | import java.util.List;
 4 | import java.util.Map;
 5 | 
 6 | import burlap.debugtools.RandomFactory;
 7 | import burlap.oomdp.core.states.State;
 8 | import burlap.oomdp.stochasticgames.SGAgent;
 9 | import burlap.oomdp.stochasticgames.agentactions.GroundedSGAgentAction;
10 | import burlap.oomdp.stochasticgames.JointAction;
11 | import burlap.oomdp.stochasticgames.agentactions.SGAgentAction;
12 | 
13 | 
14 | /**
15 |  * Stochastic games agent that chooses actions uniformly randomly.
16 |  * @author James MacGlashan
17 |  *
18 |  */
19 | public class RandomSGAgent extends SGAgent {
20 | 
21 | 	@Override
22 | 	public void gameStarting() {
23 | 		//do nothing
24 | 
25 | 	}
26 | 
27 | 	@Override
28 | 	public GroundedSGAgentAction getAction(State s) {
29 | 		
30 | 		List<GroundedSGAgentAction> gsas = SGAgentAction.getAllApplicableGroundedActionsFromActionList(s, this.worldAgentName, this.agentType.actions);
31 | 		
32 | 		int r = RandomFactory.getMapped(0).nextInt(gsas.size());
33 | 		GroundedSGAgentAction gsa = gsas.get(r);
34 | 		
35 | 		return gsa;
36 | 	}
37 | 
38 | 	@Override
39 | 	public void observeOutcome(State s, JointAction jointAction,
40 | 			Map<String, Double> jointReward, State sprime, boolean isTerminal) {
41 | 		//do nothing
42 | 
43 | 	}
44 | 
45 | 	@Override
46 | 	public void gameTerminated() {
47 | 		//do nothing
48 | 	}
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/stochasticgames/agents/madp/MADPPlanAgentFactory.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.stochasticgames.agents.madp;
 2 | 
 3 | import burlap.behavior.stochasticgames.PolicyFromJointPolicy;
 4 | import burlap.behavior.stochasticgames.madynamicprogramming.MADynamicProgramming;
 5 | import burlap.oomdp.stochasticgames.SGAgent;
 6 | import burlap.oomdp.stochasticgames.AgentFactory;
 7 | import burlap.oomdp.stochasticgames.SGDomain;
 8 | 
 9 | 
10 | /**
11 |  * An agent factory for the {@link MultiAgentDPPlanningAgent} agent. Generated agents are always provided a copy of the provided
12 |  * policy object to ensure that multiple agents from the same factory use a policy specific to them.
13 |  * @author James MacGlashan
14 |  *
15 |  */
16 | public class MADPPlanAgentFactory implements AgentFactory {
17 | 
18 | 	protected SGDomain						domain;
19 | 	protected MADPPlannerFactory plannerFactory;
20 | 	protected PolicyFromJointPolicy			policy;
21 | 	
22 | 	
23 | 	/**
24 | 	 * Initializes.
25 | 	 * @param domain the domain for the agents
26 | 	 * @param planner the valueFunction object that will be used by all generated agents
27 | 	 * @param policy the policy that will be copied and supplied to all generated objects
28 | 	 */
29 | 	public MADPPlanAgentFactory(SGDomain domain, MADynamicProgramming planner, PolicyFromJointPolicy policy){
30 | 		this.domain = domain;
31 | 		this.plannerFactory = new MADPPlannerFactory.ConstantMADPPlannerFactory(planner);
32 | 		this.policy = policy;
33 | 	}
34 | 	
35 | 	
36 | 	/**
37 | 	 * Initializes
38 | 	 * @param domain the domain for the agents
39 | 	 * @param plannerFactory the valueFunction factory that will be used to generate a valueFunction for the agents
40 | 	 * @param policy the policy that will be copied and supplied to all generated objects
41 | 	 */
42 | 	public MADPPlanAgentFactory(SGDomain domain, MADPPlannerFactory plannerFactory, PolicyFromJointPolicy policy){
43 | 		this.domain = domain;
44 | 		this.plannerFactory = plannerFactory;
45 | 		this.policy = policy;
46 | 	}
47 | 	
48 | 	@Override
49 | 	public SGAgent generateAgent() {
50 | 		return new MultiAgentDPPlanningAgent(domain, this.plannerFactory.getPlannerInstance(), this.policy.copy());
51 | 	}
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/stochasticgames/agents/naiveq/history/ActionIdMap.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.stochasticgames.agents.naiveq.history;
 2 | 
 3 | import burlap.oomdp.stochasticgames.agentactions.GroundedSGAgentAction;
 4 | 
 5 | /**
 6 |  * An interface that can turn a grounded action into an integer value
 7 |  * @author James MacGlashan
 8 |  *
 9 |  */
10 | public interface ActionIdMap {
11 | 	
12 | 	/**
13 | 	 * Returns an int value corresponding to the input action
14 | 	 * @param gsa the input action
15 | 	 * @return an int value corresponding to the input action
16 | 	 */
17 | 	public int getActionId(GroundedSGAgentAction gsa);
18 | 	
19 | 	/**
20 | 	 * Returns an int value corresponding to the input action name and parameters
21 | 	 * @param actionName the input action name
22 | 	 * @param params the input action parameters
23 | 	 * @return an int value corresponding to the input action name and parameters
24 | 	 */
25 | 	public int getActionId(String actionName, String [] params);
26 | 	
27 | 	/**
28 | 	 * The maximum number of int values for actions
29 | 	 * @return maximum number of int values for actions
30 | 	 */
31 | 	public int maxValue();
32 | 	
33 | 	/**
34 | 	 * Returns a corresponding GroundedSingleAction for a given int value
35 | 	 * @param id the int value indicating which GroundedSingleAction to return.
36 | 	 * @return a corresponding GroundedSingleAction for a given int value
37 | 	 */
38 | 	public GroundedSGAgentAction getActionForId(int id);
39 | }
40 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/stochasticgames/agents/naiveq/history/ParameterNaiveActionIdMap.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.stochasticgames.agents.naiveq.history;
 2 | 
 3 | import java.util.HashMap;
 4 | import java.util.List;
 5 | import java.util.Map;
 6 | 
 7 | import burlap.oomdp.core.Domain;
 8 | import burlap.oomdp.stochasticgames.agentactions.GroundedSGAgentAction;
 9 | import burlap.oomdp.stochasticgames.agentactions.SGAgentAction;
10 | import burlap.oomdp.stochasticgames.agentactions.SimpleGroundedSGAgentAction;
11 | 
12 | 
13 | /**
14 |  * An action to int map that takes the list of possible action names in a domain and assigns and int value to them.
15 |  * This method will not manage object identifier independence.
16 |  * @author James MacGlashan
17 |  *
18 |  */
19 | public class ParameterNaiveActionIdMap implements ActionIdMap {
20 | 
21 | 	/**
22 | 	 * The map from action names to their corresponding int value
23 | 	 */
24 | 	protected Map<String, Integer> map;
25 | 	
26 | 	/**
27 | 	 * The domain for which the action values should be created.
28 | 	 */
29 | 	protected Domain domain;
30 | 	
31 | 	
32 | 	/**
33 | 	 * Initializes a mapping from the names of all actions in a given domain to an int value.
34 | 	 * @param d the domain containing the actions.
35 | 	 */
36 | 	public ParameterNaiveActionIdMap(Domain d){
37 | 		
38 | 		this.domain = d;
39 | 		List<SGAgentAction> actions = d.getAgentActions();
40 | 		map = new HashMap<String, Integer>(actions.size());
41 | 		for(int i = 0; i < actions.size(); i++){
42 | 			map.put(actions.get(i).actionName, i);
43 | 		}
44 | 	}
45 | 	
46 | 	
47 | 	@Override
48 | 	public int getActionId(GroundedSGAgentAction gsa) {
49 | 		return map.get(gsa.action.actionName);
50 | 	}
51 | 
52 | 
53 | 	@Override
54 | 	public int getActionId(String actionName, String[] params) {
55 | 		return map.get(actionName);
56 | 	}
57 | 	
58 | 	@Override
59 | 	public int maxValue() {
60 | 		return map.size();
61 | 	}
62 | 
63 | 
64 | 	@Override
65 | 	public GroundedSGAgentAction getActionForId(int id) {
66 | 		
67 | 		for(String key : map.keySet()){
68 | 			int sid = map.get(key);
69 | 			if(sid == id){
70 | 				//found it
71 | 				GroundedSGAgentAction gsa = new SimpleGroundedSGAgentAction("", domain.getSingleAction(key));
72 | 				return gsa;
73 | 			}
74 | 		}
75 | 		
76 | 		return null;
77 | 	}
78 | 
79 | }
80 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/stochasticgames/agents/twoplayer/singlestage/equilibriumplayer/equilibriumsolvers/MinMax.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.stochasticgames.agents.twoplayer.singlestage.equilibriumplayer.equilibriumsolvers;
 2 | 
 3 | import burlap.behavior.stochasticgames.agents.twoplayer.singlestage.equilibriumplayer.BimatrixEquilibriumSolver;
 4 | import burlap.behavior.stochasticgames.solvers.MinMaxSolver;
 5 | 
 6 | 
 7 | /**
 8 |  * Finds the MinMax equilibrium using linear programming and returns the appropraite strategy. Note that
 9 |  * if the game is not zero sum, the resulting strategy produced will be as if it was by setting the opponent's payoff
10 |  * matrix to the the negation of the querying player.
11 |  * @author James MacGlashan
12 |  *
13 |  */
14 | public class MinMax extends BimatrixEquilibriumSolver {
15 | 
16 | 	@Override
17 | 	public double[] computeRowStrategy(double[][] rowPayoff,
18 | 			double[][] colPayoff) {
19 | 		return MinMaxSolver.getRowPlayersStrategy(rowPayoff);
20 | 	}
21 | 
22 | 	@Override
23 | 	public double[] computeColStrategy(double[][] rowPayoff,
24 | 			double[][] colPayoff) {
25 | 		return MinMaxSolver.getColPlayersStrategy(colPayoff);
26 | 	}
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/stochasticgames/agents/twoplayer/singlestage/equilibriumplayer/equilibriumsolvers/Utilitarian.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.stochasticgames.agents.twoplayer.singlestage.equilibriumplayer.equilibriumsolvers;
 2 | 
 3 | import burlap.behavior.stochasticgames.agents.twoplayer.singlestage.equilibriumplayer.BimatrixEquilibriumSolver;
 4 | import burlap.behavior.stochasticgames.solvers.GeneralBimatrixSolverTools;
 5 | 
 6 | 
 7 | /**
 8 |  * Finds the maximum utilitarian value joint action and retuns a detemrinistic strategy respecting it. The utilitarian value
 9 |  * is the sum of the two player's payoffs for a cell. If there are multiple maximums, the first is always used.
10 |  * @author James MacGlashan
11 |  *
12 |  */
13 | public class Utilitarian extends BimatrixEquilibriumSolver {
14 | 
15 | 	@Override
16 | 	public double[] computeRowStrategy(double[][] rowPayoff,
17 | 			double[][] colPayoff) {
18 | 		
19 | 		double max = Double.NEGATIVE_INFINITY;
20 | 		int maxInd = -1;
21 | 		for(int i = 0; i < rowPayoff.length; i++){
22 | 			for(int j = 0; j < rowPayoff[i].length; j++){
23 | 				double sumPay = rowPayoff[i][j] - colPayoff[i][j];
24 | 				if(sumPay > max){
25 | 					max = rowPayoff[i][j];
26 | 					maxInd = i;
27 | 				}
28 | 			}
29 | 		}
30 | 		
31 | 		double [] strat = GeneralBimatrixSolverTools.zero1Array(maxInd, rowPayoff.length);
32 | 		
33 | 		return strat;
34 | 	}
35 | 
36 | 	@Override
37 | 	public double[] computeColStrategy(double[][] rowPayoff,
38 | 			double[][] colPayoff) {
39 | 		
40 | 		double max = Double.NEGATIVE_INFINITY;
41 | 		int maxInd = -1;
42 | 		for(int i = 0; i < rowPayoff.length; i++){
43 | 			for(int j = 0; j < rowPayoff[i].length; j++){
44 | 				double sumPay = rowPayoff[i][j] - colPayoff[i][j];
45 | 				if(sumPay > max){
46 | 					max = rowPayoff[i][j];
47 | 					maxInd = j;
48 | 				}
49 | 			}
50 | 		}
51 | 		
52 | 		double [] strat = GeneralBimatrixSolverTools.zero1Array(maxInd, rowPayoff[0].length);
53 | 		
54 | 		return strat;
55 | 		
56 | 	}
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/stochasticgames/auxiliary/performance/AgentFactoryAndType.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.stochasticgames.auxiliary.performance;
 2 | 
 3 | import burlap.oomdp.stochasticgames.AgentFactory;
 4 | import burlap.oomdp.stochasticgames.SGAgentType;
 5 | 
 6 | /**
 7 |  * A pair storing an agent factory and the agent type that the generated agent will join the world as.
 8 |  * @author James MacGlashan
 9 |  *
10 |  */
11 | public class AgentFactoryAndType {
12 | 	public AgentFactory agentFactory;
13 | 	public SGAgentType at;
14 | 	
15 | 	/**
16 | 	 * Initializes
17 | 	 * @param agentFactory the agent factory
18 | 	 * @param at the agent type the agent will join a world as
19 | 	 */
20 | 	public AgentFactoryAndType(AgentFactory agentFactory, SGAgentType at){
21 | 		this.agentFactory = agentFactory;
22 | 		this.at = at;
23 | 	}
24 | 	
25 | }
26 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/stochasticgames/madynamicprogramming/JAQValue.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.stochasticgames.madynamicprogramming;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | import burlap.oomdp.stochasticgames.JointAction;
 5 | 
 6 | 
 7 | /**
 8 |  * Class for storing Q-value informartion for a joint action. It is effectively a triple consisting of a state, joint action, and a double for the corresponding q-value.
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public class JAQValue {
13 | 	public State			s;
14 | 	public JointAction		ja;
15 | 	public double			q;
16 | 	
17 | 	public JAQValue(State s, JointAction ja, double q){
18 | 		this.s = s;
19 | 		this.ja = ja;
20 | 		this.q = q;
21 | 	}
22 | 	
23 | }
24 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/stochasticgames/madynamicprogramming/MAQSourcePolicy.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.stochasticgames.madynamicprogramming;
 2 | 
 3 | import burlap.behavior.stochasticgames.JointPolicy;
 4 | 
 5 | 
 6 | /**
 7 |  * An abstract extension of the JointPolicy class that adds a required interface of being able to a {@link MultiAgentQSourceProvider}. This extension is useful
 8 |  * if the joint policy is derived from a set of multi-agent Q-values.
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public abstract class MAQSourcePolicy extends JointPolicy{
13 | 	
14 | 	/**
15 | 	 * Sets the {@link MultiAgentQSourceProvider} that will be used to define this object's joint policy.
16 | 	 * @param provider the {@link MultiAgentQSourceProvider} that will be used to define this object's joint policy.
17 | 	 */
18 | 	public abstract void setQSourceProvider(MultiAgentQSourceProvider provider);
19 | }
20 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/stochasticgames/madynamicprogramming/MultiAgentQSourceProvider.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.stochasticgames.madynamicprogramming;
 2 | 
 3 | 
 4 | /**
 5 |  * An interface for an object that can providing the Q-values stored for each agent in a problem.
 6 |  * @author James MacGlashan
 7 |  *
 8 |  */
 9 | public interface MultiAgentQSourceProvider {
10 | 	
11 | 	/**
12 | 	 * Returns an object that can provide Q-value sources for each agent.
13 | 	 * @return a {@link AgentQSourceMap} object.
14 | 	 */
15 | 	public AgentQSourceMap getQSources();
16 | }
17 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/stochasticgames/madynamicprogramming/SGBackupOperator.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.stochasticgames.madynamicprogramming;
 2 | 
 3 | import java.util.Map;
 4 | 
 5 | import burlap.oomdp.core.states.State;
 6 | import burlap.oomdp.stochasticgames.SGAgentType;
 7 | 
 8 | 
 9 | /**
10 |  * A stochastic games backup operator to be used in multi-agent Q-learning or value function planning. This operator
11 |  * is meant to be applied to a next state, takes the set of Q-values for that state for all agents and returns
12 |  * the backup operator. The classic Bellman MDP approach would be o use a max operator, but in stochastic games,
13 |  * different solution concepts require different operators.
14 |  * @author James MacGlashan; adapted from code written by Esha Gosh John Meehan and Michalis Michaelidis
15 |  *
16 |  */
17 | public interface SGBackupOperator {
18 | 
19 | 	public double performBackup(State s, String forAgent, Map<String, SGAgentType> agentDefinitions, AgentQSourceMap qSourceMap);
20 | 	
21 | }
22 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/stochasticgames/madynamicprogramming/backupOperators/MaxQ.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.stochasticgames.madynamicprogramming.backupOperators;
 2 | 
 3 | import java.util.List;
 4 | import java.util.Map;
 5 | 
 6 | import burlap.behavior.stochasticgames.madynamicprogramming.AgentQSourceMap;
 7 | import burlap.behavior.stochasticgames.madynamicprogramming.QSourceForSingleAgent;
 8 | import burlap.behavior.stochasticgames.madynamicprogramming.SGBackupOperator;
 9 | import burlap.oomdp.core.states.State;
10 | import burlap.oomdp.stochasticgames.SGAgentType;
11 | import burlap.oomdp.stochasticgames.JointAction;
12 | 
13 | 
14 | /**
15 |  * A classic MDP-style max backup operator in which an agent back ups his max Q-value in the state.
16 |  * @author James MacGlashan
17 |  *
18 |  */
19 | public class MaxQ implements SGBackupOperator {
20 | 
21 | 	@Override
22 | 	public double performBackup(State s, String forAgent, Map<String, SGAgentType> agentDefinitions, AgentQSourceMap qSourceMap) {
23 | 		
24 | 		List<JointAction> allJAs = JointAction.getAllJointActions(s, agentDefinitions);
25 | 		
26 | 		double maxQ = Double.NEGATIVE_INFINITY;
27 | 		
28 | 		QSourceForSingleAgent myQs = qSourceMap.agentQSource(forAgent);
29 | 		
30 | 		for(JointAction ja : allJAs){
31 | 			double q = myQs.getQValueFor(s, ja).q;
32 | 			maxQ = Math.max(q, maxQ);
33 | 		}
34 | 		
35 | 		
36 | 		return maxQ;
37 | 	}
38 | 
39 | }
40 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/stochasticgames/solvers/MinMaxSolver.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.stochasticgames.solvers;
 2 | 
 3 | import scpsolver.constraints.LinearBiggerThanEqualsConstraint;
 4 | import scpsolver.lpsolver.LinearProgramSolver;
 5 | import scpsolver.lpsolver.SolverFactory;
 6 | import scpsolver.problems.LinearProgram;
 7 | 
 8 | public class MinMaxSolver {
 9 | 
10 | 	
11 | 	
12 | 	
13 | 	/**
14 | 	 * Computes the minmax strategy for the row player of the given payoff matrix.
15 | 	 * The entries of the payoff matrix are assumed to be the payouts for the *row* player.
16 | 	 * @param payoffMatrix payoffs for the row player.
17 | 	 * @return the strategy of the row player.
18 | 	 */
19 | 	public static double [] getRowPlayersStrategy(double [][] payoffMatrix){
20 | 		double [][] t = GeneralBimatrixSolverTools.transposeMatrix(payoffMatrix);
21 | 		return getColPlayersStrategy(t);
22 | 	}
23 | 	
24 | 	
25 | 	
26 | 	/**
27 | 	 * Computes the minmax strategy for the column player of the given payoff matrix.
28 | 	 * The entries of the payoff matrix are assumed to be the payouts for the *column* player.
29 | 	 * @param payoffMatrix payoffs for column player.
30 | 	 * @return strategy of the column player.
31 | 	 */
32 | 	public static double [] getColPlayersStrategy(double [][] payoffMatrix){
33 | 		
34 | 		//get positive matrix (finds the minimum value and adds -min + 1 to all elements)
35 | 		double [][] G = GeneralBimatrixSolverTools.getPositiveMatrix(payoffMatrix);
36 | 		
37 | 		LinearProgram lp = new LinearProgram(GeneralBimatrixSolverTools.constantDoubleArray(1., G[0].length));
38 | 		
39 | 		int cCount = 0;
40 | 		
41 | 		//add payoff matrix constraints
42 | 		for(int i = 0; i < G.length; i++){
43 | 			lp.addConstraint(new LinearBiggerThanEqualsConstraint(G[i], 1., "c" + cCount));
44 | 			cCount++;
45 | 		}
46 | 		
47 | 		//add lower bound constraints
48 | 		for(int i = 0; i < G[0].length; i++){
49 | 			lp.addConstraint(new LinearBiggerThanEqualsConstraint(GeneralBimatrixSolverTools.zero1Array(i, G[0].length), 0., "c" + cCount));
50 | 			cCount++;
51 | 		}
52 | 		
53 | 		//solve it
54 | 		lp.setMinProblem(true);
55 | 		LinearProgramSolver solver = SolverFactory.newDefault(); 
56 | 		double[] sol = solver.solve(lp);
57 | 		
58 | 		//convert LP solution into probability vector.
59 | 		double z = 0.;
60 | 		for(double d : sol){
61 | 			z += d;
62 | 		}
63 | 		
64 | 		double v = 1/z;
65 | 		
66 | 		for(int i = 0; i < sol.length; i++){
67 | 			sol[i] *= v;
68 | 		}
69 | 		
70 | 		
71 | 		
72 | 		return sol;
73 | 	}
74 | 	
75 | 	
76 | 	
77 | 	
78 | }
79 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/valuefunction/QValue.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.valuefunction;
 2 | 
 3 | import burlap.oomdp.core.AbstractGroundedAction;
 4 | import burlap.oomdp.core.states.State;
 5 | 
 6 | 
 7 | /**
 8 |  * This class is used to store Q-values.
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public class QValue {
13 | 	
14 | 	/**
15 | 	 * The state with which this Q-value is associated.
16 | 	 */
17 | 	public State 						s;
18 | 	
19 | 	/**
20 | 	 * The action with which this Q-value is associated
21 | 	 */
22 | 	public AbstractGroundedAction		a;
23 | 	
24 | 	/**
25 | 	 * The numeric Q-value
26 | 	 */
27 | 	public double						q;
28 | 	
29 | 	
30 | 	
31 | 	/**
32 | 	 * Creates a Q-value for the given state an action pair with the specified q-value
33 | 	 * @param s the state
34 | 	 * @param a the action
35 | 	 * @param q the initial Q-value
36 | 	 */
37 | 	public QValue(State s, AbstractGroundedAction a, double q){
38 | 		this.s = s;
39 | 		this.a = a;
40 | 		this.q = q;
41 | 	}
42 | 	
43 | 	
44 | 	/**
45 | 	 * Initialializes this Q-value by copying the information from another Q-value.
46 | 	 * @param src the source Q-value from which to copy.
47 | 	 */
48 | 	public QValue(QValue src){
49 | 		this.s = src.s.copy();
50 | 		this.a = src.a.copy();
51 | 		this.q = src.q;
52 | 	}
53 | 	
54 | }
55 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/behavior/valuefunction/ValueFunction.java:
--------------------------------------------------------------------------------
 1 | package burlap.behavior.valuefunction;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | /**
 6 |  * An interface for algorithms that can return the value for states.
 7 |  * @author James MacGlashan.
 8 |  */
 9 | public interface ValueFunction {
10 | 
11 | 	/**
12 | 	 * Returns the value function evaluation of the given state. If the value is not stored, then the default value
13 | 	 * specified by the ValueFunctionInitialization object of this class is returned.
14 | 	 * @param s the state to evaluate.
15 | 	 * @return the value function evaluation of the given state.
16 | 	 */
17 | 	public double value(State s);
18 | }
19 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/debugtools/DebugFlags.java:
--------------------------------------------------------------------------------
 1 | package burlap.debugtools;
 2 | 
 3 | import java.util.HashMap;
 4 | import java.util.Map;
 5 | 
 6 | 
 7 | /**
 8 |  * A data structure for specifying debug flags that can be accessed and modified from any class
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public class DebugFlags {
13 | 
14 | 	/**
15 | 	 * The flags and their values that are set
16 | 	 */
17 | 	private static Map <Integer, Integer> flags;
18 | 	
19 | 	/**
20 | 	 * Creates/sets a debug flag
21 | 	 * @param id the flag identifier
22 | 	 * @param v the value of the flag
23 | 	 */
24 | 	public static void setFlag(int id, int v){
25 | 		if(flags == null){
26 | 			flags = new HashMap <Integer, Integer>();
27 | 		}
28 | 		flags.put(id, v);
29 | 	}
30 | 	
31 | 	
32 | 	/**
33 | 	 * Returns the value for a given flag; 0 if the flag has never been created/set
34 | 	 * @param id the flag identifier
35 | 	 * @return the value of the flag; 0 if the flag has never been created/set
36 | 	 */
37 | 	public static int getFlag(int id){
38 | 		if(flags == null){
39 | 			flags = new HashMap <Integer, Integer>();
40 | 		}
41 | 		Integer v = flags.get(id);
42 | 		if(v == null){
43 | 			flags.put(id, 0);
44 | 			return 0;
45 | 		}
46 | 		return v;
47 | 	}
48 | 	
49 | 	
50 | }
51 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/domain/singleagent/blockdude/BlockDudeTF.java:
--------------------------------------------------------------------------------
 1 | package burlap.domain.singleagent.blockdude;
 2 | 
 3 | import burlap.oomdp.core.objects.ObjectInstance;
 4 | import burlap.oomdp.core.states.State;
 5 | import burlap.oomdp.core.TerminalFunction;
 6 | 
 7 | import java.util.List;
 8 | 
 9 | /**
10 |  * A {@link burlap.oomdp.core.TerminalFunction} for {@link burlap.domain.singleagent.blockdude.BlockDude}. Returns true
11 |  * when the agent is at an exit. If there are multiple exits, then returns true when the agent is at any exit.
12 |  * @author James MacGlashan.
13 |  */
14 | public class BlockDudeTF implements TerminalFunction {
15 | 
16 | 	@Override
17 | 	public boolean isTerminal(State s) {
18 | 
19 | 		ObjectInstance agent = s.getFirstObjectOfClass(BlockDude.CLASSAGENT);
20 | 		List<ObjectInstance> exits = s.getObjectsOfClass(BlockDude.CLASSEXIT);
21 | 
22 | 		int ax = agent.getIntValForAttribute(BlockDude.ATTX);
23 | 		int ay = agent.getIntValForAttribute(BlockDude.ATTY);
24 | 
25 | 		for(ObjectInstance e : exits){
26 | 			int ex = e.getIntValForAttribute(BlockDude.ATTX);
27 | 			if(ex == ax){
28 | 				int ey = e.getIntValForAttribute(BlockDude.ATTY);
29 | 				if(ey == ay){
30 | 					return true;
31 | 				}
32 | 			}
33 | 		}
34 | 
35 | 		return false;
36 | 	}
37 | }
38 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/domain/singleagent/frostbite/FrostbiteRF.java:
--------------------------------------------------------------------------------
 1 | package burlap.domain.singleagent.frostbite;
 2 | 
 3 | import burlap.oomdp.core.Domain;
 4 | import burlap.oomdp.core.objects.ObjectInstance;
 5 | import burlap.oomdp.core.PropositionalFunction;
 6 | import burlap.oomdp.core.states.State;
 7 | import burlap.oomdp.singleagent.GroundedAction;
 8 | import burlap.oomdp.singleagent.RewardFunction;
 9 | 
10 | import java.util.List;
11 | 
12 | /**
13 |  * @author Phillipe Morere
14 |  */
15 | public class FrostbiteRF implements RewardFunction{
16 | 
17 | 	public double goalReward = 1000.0;
18 | 	public double lostReward = -1000.0;
19 | 	public double activatedPlatformReward = 10.0;
20 | 	public double defaultReward = -1.0;
21 | 	private PropositionalFunction onIce;
22 | 	private PropositionalFunction inWater;
23 | 	private PropositionalFunction iglooBuilt;
24 | 
25 | 	public FrostbiteRF(Domain domain) {
26 | 		this.inWater = domain.getPropFunction(FrostbiteDomain.PFINWATER);
27 | 		this.onIce = domain.getPropFunction(FrostbiteDomain.PFONICE);
28 | 		this.iglooBuilt = domain.getPropFunction(FrostbiteDomain.PFIGLOOBUILT);
29 | 	}
30 | 
31 | 	@Override
32 | 	public double reward(State s, GroundedAction a, State sprime) {
33 | 		if (inWater.somePFGroundingIsTrue(sprime))
34 | 			return lostReward;
35 | 		if (iglooBuilt.somePFGroundingIsTrue(sprime) && onIce.somePFGroundingIsTrue(s))
36 | 			return goalReward;
37 | 		if (numberPlatformsActive(s) != numberPlatformsActive(sprime))
38 | 			return activatedPlatformReward;
39 | 		return defaultReward;
40 | 	}
41 | 
42 | 	private int numberPlatformsActive(State s) {
43 | 		List<ObjectInstance> platforms = s.getObjectsOfClass(FrostbiteDomain.PLATFORMCLASS);
44 | 		int nb = 0;
45 | 		for (ObjectInstance platform : platforms)
46 | 			if (platform.getBooleanValForAttribute(FrostbiteDomain.ACTIVATEDATTNAME))
47 | 				nb++;
48 | 		return nb;
49 | 	}
50 | 
51 | }
52 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/domain/singleagent/frostbite/FrostbiteTF.java:
--------------------------------------------------------------------------------
 1 | package burlap.domain.singleagent.frostbite;
 2 | 
 3 | import burlap.oomdp.core.Domain;
 4 | import burlap.oomdp.core.PropositionalFunction;
 5 | import burlap.oomdp.core.states.State;
 6 | import burlap.oomdp.core.TerminalFunction;
 7 | 
 8 | /**
 9 |  * @author Phillipe Morere
10 |  */
11 | public class FrostbiteTF implements TerminalFunction{
12 | 
13 | 	private PropositionalFunction onIce;
14 | 	private PropositionalFunction inWater;
15 | 	private PropositionalFunction iglooBuilt;
16 | 
17 | 	public FrostbiteTF(Domain domain) {
18 | 		this.inWater = domain.getPropFunction(FrostbiteDomain.PFINWATER);
19 | 		this.onIce = domain.getPropFunction(FrostbiteDomain.PFONICE);
20 | 		this.iglooBuilt = domain.getPropFunction(FrostbiteDomain.PFIGLOOBUILT);
21 | 	}
22 | 
23 | 	@Override
24 | 	public boolean isTerminal(State s) {
25 | 		if (inWater.somePFGroundingIsTrue(s))
26 | 			return true;
27 | 		return iglooBuilt.somePFGroundingIsTrue(s) && onIce.somePFGroundingIsTrue(s);
28 | 	}
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/domain/singleagent/graphdefined/GraphRF.java:
--------------------------------------------------------------------------------
 1 | package burlap.domain.singleagent.graphdefined;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | import burlap.oomdp.singleagent.GroundedAction;
 5 | import burlap.oomdp.singleagent.RewardFunction;
 6 | 
 7 | /**
 8 |  * An abstract class for more easily defining {@link burlap.oomdp.singleagent.RewardFunction}s for {@link burlap.domain.singleagent.graphdefined.GraphDefinedDomain}
 9 |  * {@link burlap.oomdp.core.Domain}s. This class implements the standard {@link #reward(burlap.oomdp.core.states.State, burlap.oomdp.singleagent.GroundedAction, burlap.oomdp.core.states.State)}
10 |  * method by converting the {@link burlap.oomdp.core.states.State} objects to their graph node integer representation and the {@link burlap.oomdp.singleagent.GroundedAction} to its
11 |  * integer representation and then returning the value of {@link #reward(int, int, int)}, which is an abstract method
12 |  * that the client must implement.
13 |  * @author James MacGlashan.
14 |  */
15 | public abstract class GraphRF implements RewardFunction{
16 | 
17 | 	@Override
18 | 	public double reward(State s, GroundedAction a, State sprime) {
19 | 		int actionId = Integer.parseInt(a.toString().replaceAll(GraphDefinedDomain.BASEACTIONNAME, ""));
20 | 		return this.reward(GraphDefinedDomain.getNodeId(s), actionId, GraphDefinedDomain.getNodeId(sprime));
21 | 	}
22 | 
23 | 	/**
24 | 	 * Returns the reward for taking action a in state node s and transition to state node sprime.
25 | 	 * @param s the previous state node id
26 | 	 * @param a the action id
27 | 	 * @param sprime the next state node id
28 | 	 * @return the received reward for the transition in the graph
29 | 	 */
30 | 	public abstract double reward(int s, int a, int sprime);
31 | }
32 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/domain/singleagent/graphdefined/GraphTF.java:
--------------------------------------------------------------------------------
 1 | package burlap.domain.singleagent.graphdefined;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | import burlap.oomdp.core.TerminalFunction;
 5 | 
 6 | import java.util.HashSet;
 7 | import java.util.Set;
 8 | 
 9 | /**
10 |  * A {@link burlap.oomdp.core.TerminalFunction} for instances of {@link burlap.domain.singleagent.graphdefined.GraphDefinedDomain}.
11 |  * Lets the user specify the integer node ids of the terminal states in the graph.
12 |  * @author James MacGlashan.
13 |  */
14 | public class GraphTF implements TerminalFunction {
15 | 
16 | 	/**
17 | 	 * The set of nodes ids in the graph that are terminal states
18 | 	 */
19 | 	protected Set<Integer> terminalStates;
20 | 
21 | 
22 | 	/**
23 | 	 * Initializes setting all states with the provide integer node ids to be terminal states
24 | 	 * @param nodes the state node ids that are terminal states
25 | 	 */
26 | 	public GraphTF(int...nodes){
27 | 		this.terminalStates = new HashSet<Integer>(nodes.length);
28 | 		for(int n : nodes){
29 | 			this.terminalStates.add(n);
30 | 		}
31 | 	}
32 | 
33 | 	@Override
34 | 	public boolean isTerminal(State s) {
35 | 
36 | 		int sid = GraphDefinedDomain.getNodeId(s);
37 | 		return this.terminalStates.contains(sid);
38 | 	}
39 | 
40 | 	public Set<Integer> getTerminalStates() {
41 | 		return terminalStates;
42 | 	}
43 | 
44 | 	public void setTerminalStates(Set<Integer> terminalStates) {
45 | 		this.terminalStates = terminalStates;
46 | 	}
47 | 
48 | 	/**
49 | 	 * Adds additional terminal states
50 | 	 * @param nodes the additional state node ids that are to be marked as terminal states
51 | 	 */
52 | 	public void addTerminals(int...nodes){
53 | 		for(int n : nodes){
54 | 			this.terminalStates.add(n);
55 | 		}
56 | 	}
57 | 
58 | 	/**
59 | 	 * Removes nodes as being marked as terminal states
60 | 	 * @param nodes the nodes to remove as terminal states
61 | 	 */
62 | 	public void removeTerminals(int...nodes){
63 | 		for(int n : nodes){
64 | 			this.terminalStates.remove(n);
65 | 		}
66 | 	}
67 | }
68 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/domain/singleagent/lunarlander/LunarLanderTF.java:
--------------------------------------------------------------------------------
 1 | package burlap.domain.singleagent.lunarlander;
 2 | 
 3 | import burlap.oomdp.core.Domain;
 4 | import burlap.oomdp.core.PropositionalFunction;
 5 | import burlap.oomdp.core.states.State;
 6 | import burlap.oomdp.core.TerminalFunction;
 7 | 
 8 | /**
 9 |  * A {@link burlap.oomdp.core.TerminalFunction} for the {@link burlap.domain.singleagent.lunarlander.LunarLanderDomain}.
10 |  * This method sets all states in which the lunar lander is on a landing pad to be terminal states.
11 |  * @author James MacGlashan.
12 |  */
13 | public class LunarLanderTF implements TerminalFunction{
14 | 
15 | 	private PropositionalFunction onPad;
16 | 
17 | 	/**
18 | 	 * Initializes.
19 | 	 * @param domain a {@link burlap.domain.singleagent.lunarlander.LunarLanderDomain} generated {@link burlap.oomdp.core.Domain} object.
20 | 	 */
21 | 	public LunarLanderTF(Domain domain){
22 | 		this.onPad = domain.getPropFunction(LunarLanderDomain.PFONPAD);
23 | 	}
24 | 
25 | 
26 | 	@Override
27 | 	public boolean isTerminal(State s) {
28 | 		return this.onPad.somePFGroundingIsTrue(s);
29 | 	}
30 | }
31 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/auxiliary/DomainGenerator.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.auxiliary;
 2 | 
 3 | import burlap.oomdp.core.Domain;
 4 | 
 5 | /**
 6 |  * This class provides a simple interface for constructing domains, but it is not required to create domains. All domains that
 7 |  * exist in BURLAP adhere to this interface for constructing domains.
 8 |  * @author James MacGlashan
 9 |  */
10 | public interface DomainGenerator {
11 | 
12 | 	/**
13 | 	 * Returns a newly instanced Domain object
14 | 	 * @return the newly instantiated Domain object.
15 | 	 */
16 | 	public Domain generateDomain();
17 | 	
18 | 	
19 | }
20 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/auxiliary/StateAbstraction.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.auxiliary;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | 
 6 | /**
 7 |  * An interface for taking an input state and returning a simpler abstracted state representation.
 8 |  * @author James MacGlashan
 9 |  *
10 |  */
11 | public interface StateAbstraction {
12 | 	/**
13 | 	 * Returns an abstracted version of state s. State s is not modified in this process.
14 | 	 * @param s the input state to abstract
15 | 	 * @return an abstracted version of state s
16 | 	 */
17 | 	public State abstraction(State s);
18 | }
19 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/auxiliary/StateGenerator.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.auxiliary;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | 
 6 | /**
 7 |  * An interface for generating State objects. This may be useful to define for learning in episodic tasks in which
 8 |  * the initial state is drawn from some distribution.
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public interface StateGenerator {
13 | 	/**
14 | 	 * Returns a new state object.
15 | 	 * @return a new state object.
16 | 	 */
17 | 	public State generateState();
18 | }
19 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/auxiliary/StateMapping.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.auxiliary;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | /**
 6 |  * A state mapping interface that maps one state into another state. Can be useful if mapping one state from one domain into a different domain.
 7 |  * @author James MacGlashan
 8 |  *
 9 |  */
10 | public interface StateMapping {
11 | 	State mapState(State s);
12 | }
13 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/auxiliary/common/ConstantStateGenerator.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.auxiliary.common;
 2 | 
 3 | import burlap.oomdp.auxiliary.StateGenerator;
 4 | import burlap.oomdp.core.states.State;
 5 | 
 6 | /**
 7 |  * This class takes a source state as input as returns copies of it for every call of generateState().
 8 |  * @author James MacGlashan
 9 |  *
10 |  */
11 | public class ConstantStateGenerator implements StateGenerator {
12 | 
13 | 	protected State src;
14 | 	
15 | 	/**
16 | 	 * This class takes a source state as input as returns copies of it for every call of generateState().
17 | 	 * @param src the source state of which to return copies
18 | 	 */
19 | 	public ConstantStateGenerator(State src){
20 | 		this.src = src;
21 | 	}
22 | 	
23 | 	@Override
24 | 	public State generateState() {
25 | 		return src.copy();
26 | 	}
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/auxiliary/common/GoalConditionTF.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.auxiliary.common;
 2 | 
 3 | import burlap.oomdp.auxiliary.stateconditiontest.StateConditionTest;
 4 | import burlap.oomdp.core.states.State;
 5 | import burlap.oomdp.core.TerminalFunction;
 6 | 
 7 | 
 8 | /**
 9 |  * Creates a terminal function that indicates terminal states are any states that satisfy a goal condition
10 |  * where the goal condition is specified by a {@link burlap.oomdp.auxiliary.stateconditiontest.StateConditionTest} object.
11 |  * No other states are set as terminal states.
12 |  * @author James MacGlashan
13 |  *
14 |  */
15 | public class GoalConditionTF implements TerminalFunction {
16 | 
17 | 	/**
18 | 	 * The state condition test that is used to indicate terminal goal states
19 | 	 */
20 | 	StateConditionTest		goalCondition;
21 | 	
22 | 	public GoalConditionTF(StateConditionTest goalCondition) {
23 | 		this.goalCondition = goalCondition;
24 | 	}
25 | 	
26 | 	@Override
27 | 	public boolean isTerminal(State s) {
28 | 		return this.goalCondition.satisfies(s);
29 | 	}
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/auxiliary/common/NullAbstraction.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.auxiliary.common;
 2 | 
 3 | import burlap.oomdp.auxiliary.StateAbstraction;
 4 | import burlap.oomdp.core.states.State;
 5 | 
 6 | 
 7 | /**
 8 |  * A StateAbstraction class that does nothing but returns a copy of input state.
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public class NullAbstraction implements StateAbstraction {
13 | 
14 | 	@Override
15 | 	public State abstraction(State s) {
16 | 		return s.copy();
17 | 	}
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/auxiliary/common/NullAbstractionNoCopy.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.auxiliary.common;
 2 | 
 3 | import burlap.oomdp.auxiliary.StateAbstraction;
 4 | import burlap.oomdp.core.states.State;
 5 | 
 6 | /**
 7 |  * A StateAbstraction class the input state without copying it.
 8 |  * @author James MacGlashan
 9 |  *
10 |  */
11 | public class NullAbstractionNoCopy implements StateAbstraction{
12 | 
13 | 	@Override
14 | 	public State abstraction(State s) {
15 | 		return s;
16 | 	}
17 | 
18 | 	
19 | 
20 | }
21 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/auxiliary/common/NullTermination.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.auxiliary.common;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | import burlap.oomdp.core.TerminalFunction;
 5 | 
 6 | 
 7 | /**
 8 |  * A terminal state function in which no state is considered a terminal state.
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public class NullTermination implements TerminalFunction {
13 | 
14 | 
15 | 	@Override
16 | 	public boolean isTerminal(State s) {
17 | 		return false;
18 | 	}
19 | 	
20 | 
21 | }
22 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/auxiliary/common/RandomStartStateGenerator.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.auxiliary.common;
 2 | 
 3 | import burlap.behavior.singleagent.auxiliary.StateReachability;
 4 | import burlap.oomdp.auxiliary.StateGenerator;
 5 | import burlap.oomdp.core.states.State;
 6 | import burlap.oomdp.singleagent.SADomain;
 7 | import burlap.oomdp.statehashing.HashableStateFactory;
 8 | import burlap.oomdp.statehashing.SimpleHashableStateFactory;
 9 | 
10 | import java.util.List;
11 | import java.util.Random;
12 | 
13 | 
14 | /**
15 |  * This class will return a random state from a set of states that are reachable from a source seed state.
16 |  * 
17 |  * @author Stephen Brawner and Mark Ho. Documented by James MacGlashan
18 |  *
19 |  */
20 | public class RandomStartStateGenerator implements StateGenerator {
21 | 
22 | 	private List<State> reachableStates;
23 | 	private Random 		random;
24 | 
25 | 	/**
26 | 	 * Will discover the reachable states from which to randomly select. Reachable states found using a {@link burlap.oomdp.statehashing.SimpleHashableStateFactory} with identifier dependence.
27 | 	 * @param domain the domain from which states will be drawn.
28 | 	 * @param seedState the seed state from which the reachable states will be found.
29 | 	 */
30 | 	public RandomStartStateGenerator(SADomain domain, State seedState) {
31 | 		HashableStateFactory hashFactory = new SimpleHashableStateFactory(false);
32 | 		this.reachableStates = StateReachability.getReachableStates(seedState, domain, hashFactory);
33 | 		this.random = new Random();
34 | 	}
35 | 	
36 | 	
37 | 	/**
38 | 	 * Will discover reachable states from which to randomly select.
39 | 	 * @param domain the domain from which states will be drawn.
40 | 	 * @param seedState the seed state from which the reachable states will be found.
41 | 	 * @param hashFactory the hash factory to use for the reachability analysis.
42 | 	 */
43 | 	public RandomStartStateGenerator(SADomain domain, State seedState, HashableStateFactory hashFactory) {
44 | 		this.reachableStates = StateReachability.getReachableStates(seedState, domain, hashFactory);
45 | 		this.random = new Random();
46 | 	}
47 | 
48 | 	@Override
49 | 	public State generateState() {
50 | 		return this.reachableStates.get(this.random.nextInt(this.reachableStates.size()));
51 | 	}
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/auxiliary/stateconditiontest/SinglePFSCT.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.auxiliary.stateconditiontest;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import burlap.oomdp.core.GroundedProp;
 6 | import burlap.oomdp.core.PropositionalFunction;
 7 | import burlap.oomdp.core.states.State;
 8 | 
 9 | /**
10 |  * A state condition class that returns true when ever any grounded version of a specified
11 |  * propositional function is true in a state. Useful for specifying goal conditions.
12 |  * @author James MacGlashan
13 |  *
14 |  */
15 | public class SinglePFSCT implements StateConditionTest {
16 | 
17 | 	PropositionalFunction pf;
18 | 	
19 | 	/**
20 | 	 * Initializes with the propositional function that is checked for state satisfaction
21 | 	 * @param pf the propositional function to use for satisfaction tests
22 | 	 */
23 | 	public SinglePFSCT(PropositionalFunction pf) {
24 | 		this.pf = pf;
25 | 	}
26 | 
27 | 	@Override
28 | 	public boolean satisfies(State s) {
29 | 		
30 | 		//List<GroundedProp> gps = s.getAllGroundedPropsFor(pf);
31 | 		List<GroundedProp> gps = this.pf.getAllGroundedPropsForState(s);
32 | 		
33 | 		for(GroundedProp gp : gps){
34 | 			if(gp.isTrue(s)){
35 | 				return true;
36 | 			}
37 | 		}
38 | 		
39 | 		return false;
40 | 		
41 | 	}
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/auxiliary/stateconditiontest/StateConditionTest.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.auxiliary.stateconditiontest;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | 
 6 | /**
 7 |  * And interface for defining classes that check for certain conditions in states. These are useful
 8 |  * for specifying binary goal conditions for classic search-based planners like A*
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public interface StateConditionTest {
13 | 
14 | 	public boolean satisfies(State s);
15 | 	
16 | 	
17 | }
18 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/auxiliary/stateconditiontest/StateConditionTestIterable.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.auxiliary.stateconditiontest;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | 
 6 | /**
 7 |  * An extension of the StateConditionTest that is iterable.
 8 |  * @author James MacGlashan
 9 |  *
10 |  */
11 | public interface StateConditionTestIterable extends StateConditionTest, Iterable<State> {
12 | 	/*
13 | 	 * This method is used to set the state context to enumerate over states.
14 | 	 * This is useful because typically a state test is independent of other state objects
15 | 	 * and calling this method can be used to set the context of those variables and over which to enumerate
16 | 	 */
17 | 	public void setStateContext(State s); 
18 | }
19 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/auxiliary/stateconditiontest/TFGoalCondition.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.auxiliary.stateconditiontest;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | import burlap.oomdp.core.TerminalFunction;
 5 | 
 6 | 
 7 | /**
 8 |  * A simple StateConditionTest wrapper of TerminalFunciton. Deterministic forward search planners search for goal states that are indicated
 9 |  * by StateConditionTest objects. If a TerminalFunction only terminates in goal states, this class can be used to wrap the terminal function
10 |  * to indicate that goal states are any previously defined terminal states.
11 |  * @author James MacGlashan
12 |  *
13 |  */
14 | public class TFGoalCondition implements StateConditionTest {
15 | 
16 | 	protected TerminalFunction tf;
17 | 	
18 | 	/**
19 | 	 * Sets this class to return true on any states that are terminal states as indicated by the TerminalFunction.
20 | 	 * @param tf the TerminalFunction that indicates goal states.
21 | 	 */
22 | 	public TFGoalCondition(TerminalFunction tf){
23 | 		this.tf = tf;
24 | 	}
25 | 
26 | 	/**
27 | 	 * Returns the {@link burlap.oomdp.core.TerminalFunction} used to specify the goal condition.
28 | 	 * @return the {@link burlap.oomdp.core.TerminalFunction} used to specify the goal condition.
29 | 	 */
30 | 	public TerminalFunction getTf() {
31 | 		return tf;
32 | 	}
33 | 
34 | 	/**
35 | 	 * Sets the {@link burlap.oomdp.core.TerminalFunction} used to specify the goal condition.
36 | 	 * @param tf the {@link burlap.oomdp.core.TerminalFunction} used to specify the goal condition.
37 | 	 */
38 | 	public void setTf(TerminalFunction tf) {
39 | 		this.tf = tf;
40 | 	}
41 | 
42 | 	@Override
43 | 	public boolean satisfies(State s) {
44 | 		return tf.isTerminal(s);
45 | 	}
46 | 
47 | }
48 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/core/AbstractGroundedAction.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.core;
 2 | 
 3 | import burlap.oomdp.singleagent.GroundedAction;
 4 | import burlap.oomdp.stochasticgames.JointAction;
 5 | 
 6 | /**
 7 |  * This is an interface for grounded actions. A grounded action is a reference to an action definition along with the specific parameters with which the action
 8 |  * is to be applied. Subclasses for this class include the single-agent action grounding ({@link GroundedAction}), an action grounding for a specific agent
 9 |  * in a stochastic game {@link burlap.oomdp.stochasticgames.agentactions.GroundedSGAgentAction}, and a joint action in a stochastic game ({@link JointAction}).
10 |  * @author James MacGlashan
11 |  *
12 |  */
13 | public interface AbstractGroundedAction {
14 | 
15 | 	
16 | 	/**
17 | 	 * Returns the action name for this grounded action.
18 | 	 * @return the action name for this grounded action.
19 | 	 */
20 | 	String actionName();
21 | 
22 | 	
23 | 	/**
24 | 	 * Returns a copy of this grounded action.
25 | 	 * @return a copy of this grounded action.
26 | 	 */
27 | 	AbstractGroundedAction copy();
28 | 	
29 | 	
30 | 	/**
31 | 	 * Returns true if this action uses parameters
32 | 	 * @return true if this action uses parameters; false otherwise
33 | 	 */
34 | 	boolean isParameterized();
35 | 
36 | 
37 | 	/**
38 | 	 * Initializes the parameter values of this {@link burlap.oomdp.core.AbstractGroundedAction} according
39 | 	 * to the provided string representation of their values.
40 | 	 * @param params an array in which each element is the string representation of one of this {@link burlap.oomdp.core.AbstractGroundedAction}'s values
41 | 	 */
42 | 	void initParamsWithStringRep(String [] params);
43 | 
44 | 
45 | 	/**
46 | 	 * Returns an array of string representations of this {@link burlap.oomdp.core.AbstractGroundedAction}'s parameters
47 | 	 * @return an array of string representations of this {@link burlap.oomdp.core.AbstractGroundedAction}'s parameters
48 | 	 */
49 | 	String [] getParametersAsString();
50 | 
51 | 
52 | 	
53 | }
54 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/core/TerminalFunction.java:
--------------------------------------------------------------------------------
 1 | /* Author: James MacGlashan
 2 |  * Description:
 3 |  * Abstract class for determining if a state in an OO-MDP domain is a terminal state
 4 |  * This kind of information is important for episode and goal-oriented MDPs
 5 |  */
 6 | 
 7 | 
 8 | package burlap.oomdp.core;
 9 | 
10 | 
11 | import burlap.oomdp.core.states.State;
12 | 
13 | /**
14 |  * And interface for defining terminal states of an MDP.
15 |  * @author James MacGlashan
16 |  *
17 |  */
18 | public interface TerminalFunction {
19 | 	
20 | 	public boolean isTerminal(State s);
21 | 	
22 | }
23 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/core/TransitionProbability.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.core;
 2 | 
 3 | 
 4 | import burlap.oomdp.core.states.State;
 5 | 
 6 | /**
 7 |  * Represents the probability of transition to a given state.
 8 |  * @author James MacGlashan
 9 |  *
10 |  */
11 | public class TransitionProbability {
12 | 
13 | 	/**
14 | 	 * The state to which the agent may transition.
15 | 	 */
16 | 	public State s;
17 | 	
18 | 	/**
19 | 	 * the probability of transitioning to state s
20 | 	 */
21 | 	public double		p;
22 | 	
23 | 	public TransitionProbability(State s, double p){
24 | 		this.s = s;
25 | 		this.p = p;
26 | 	}
27 | 	
28 | }
29 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/core/objects/OOMDPObjectInstance.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.core.objects;
 2 | 
 3 | public abstract class OOMDPObjectInstance implements ObjectInstance{
 4 | 
 5 | 	public OOMDPObjectInstance() {
 6 | 	}
 7 | 	
 8 | 	/**
 9 | 	 * Returns a string representation of this object including its name and value attribute value assignment.
10 | 	 * @return a string representation of this object including its name and value attribute value assignment.
11 | 	 */
12 | 	public String getObjectDescription(){
13 | 		return this.buildObjectDescription(new StringBuilder()).toString();
14 | 	}
15 | 	
16 | 	/**
17 | 	 * Sets an object's value based on it's java.lang type.
18 | 	 */
19 | 	public <T> ObjectInstance setValue(String attName, T value) {
20 | 		String valueClass = value.getClass().getName();
21 | 		if(valueClass.equals("boolean") || valueClass.equals("java.lang.Double")){
22 | 			Boolean b = (Boolean)value;
23 | 			return this.setValue(attName, (boolean)b);
24 | 		}
25 | 		else if(valueClass.equals("double") || valueClass.equals("java.lang.Double")){
26 | 			Double d = (Double)value;
27 | 			return this.setValue(attName, (double)d);
28 | 		}
29 | 		else if(valueClass.equals("double[]") || valueClass.equals("java.lang.Double[]")){
30 | 			return this.setValue(attName, (double[])value);
31 | 		}
32 | 		else if(valueClass.equals("int") || valueClass.equals("java.lang.Integer")){
33 | 			Integer i = (Integer)value;
34 | 			return this.setValue(attName, (int)i);
35 | 		}
36 | 		else if(valueClass.equals("int[]") || valueClass.equals("java.lang.Integer[]")){
37 | 			return this.setValue(attName, (int[])value);
38 | 		}
39 | 		else if(valueClass.equals("java.lang.String")){
40 | 			return this.setValue(attName, (String)value);
41 | 		}
42 | 		throw new RuntimeException("Unsupported value type " + valueClass);
43 | 
44 | 	}
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/core/states/ImmutableStateInterface.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.core.states;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import burlap.oomdp.core.objects.ImmutableObjectInstance;
 6 | import burlap.oomdp.core.objects.ObjectInstance;
 7 | 
 8 | import com.google.common.collect.ImmutableList;
 9 | 
10 | /**
11 |  * A state that implements this interface implies that it itself is immutable and the ObjectInstances it uses are immutable. 
12 |  * An immutable state is one that doesn't allow you to make modifications to its underlying data
13 |  * structure, by using the getObject, setValue paradigm. Any changes to a state will result in a copy that reflects 
14 |  * those changes. The original state will not be modified.
15 |  * @author Stephen Brawner
16 |  *
17 |  */
18 | public interface ImmutableStateInterface extends State, Iterable<ImmutableObjectInstance> {
19 | 	ImmutableStateInterface replaceAndHash(ImmutableList<ImmutableObjectInstance> objects, int code);
20 | 	ImmutableStateInterface replaceObject(ObjectInstance objectToReplace, ObjectInstance newObject);
21 | 	ImmutableStateInterface replaceAllObjects(List<ImmutableObjectInstance> objectsToRemove, List<ImmutableObjectInstance> objectsToAdd);
22 | 	ImmutableList<ImmutableObjectInstance> getImmutableObjects();
23 | 	boolean isHashed();
24 | }
25 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/core/states/OOMDPState.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.core.states;
 2 | 
 3 | import burlap.oomdp.core.objects.ObjectInstance;
 4 | 
 5 | public abstract class OOMDPState implements State {
 6 | 
 7 | 	
 8 | 	@Override
 9 | 	public String toString(){
10 | 		return this.getCompleteStateDescription();
11 | 	}
12 | 	
13 | 	/**
14 | 	 * Renames the identifier for the object instance currently named originalName with the name newName.
15 | 	 * @param originalName the original name of the object instance to be renamed in this state
16 | 	 * @param newName the new name of the object instance
17 | 	 */
18 | 	public State renameObject(String originalName, String newName){
19 | 		ObjectInstance o = this.getObject(originalName);
20 | 		return this.renameObject(o, newName);
21 | 	}
22 | 	
23 | 	/** 
24 | 	 * Sets an object's value.
25 | 	 * @throws RuntimeException if the object doesn't exist, or the attribute name doesn't exist for the object.
26 | 	 */
27 | 	public <T> State setObjectsValue(String objectName, String attName, T value) {
28 | 		ObjectInstance obj = this.getObject(objectName);
29 | 		if (obj == null) {
30 | 			throw new RuntimeException("Object " + objectName + " does not exist in this state");
31 | 		}
32 | 		obj.setValue(attName, value);
33 | 		return this;
34 | 	}
35 | }
36 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/core/values/StringValue.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.core.values;
 2 | 
 3 | import burlap.oomdp.core.Attribute;
 4 | 
 5 | 
 6 | /**
 7 |  * This class provides a value for a string. 
 8 |  * @author Greg Yauney (gyauney)
 9 |  *
10 |  */
11 | public class StringValue extends OOMDPValue implements Value {
12 | 	private static final String UNSET = "";
13 | 	/**
14 | 	 * The string value
15 | 	 */
16 | 	protected final String			stringVal;
17 | 	
18 | 	
19 | 	/**
20 | 	 * Initializes for a given attribute. The default value will be set to 0.
21 | 	 * @param attribute
22 | 	 */
23 | 	public StringValue(Attribute attribute) {
24 | 		super(attribute);
25 | 		this.stringVal = UNSET;
26 | 	}
27 | 	
28 | 	
29 | 	/**
30 | 	 * Initializes from an existing value.
31 | 	 * @param v the value to copy
32 | 	 */
33 | 	public StringValue(StringValue v) {
34 | 		super(v);
35 | 		this.stringVal = ((StringValue)v).stringVal;
36 | 	}
37 | 	
38 | 	public StringValue(Attribute attribute, String stringVal) {
39 | 		super(attribute);
40 | 		this.stringVal = stringVal;
41 | 	}
42 | 
43 | 	@Override
44 | 	public Value copy() {
45 | 		return new StringValue(this);
46 | 	}
47 | 
48 | 	@Override
49 | 	public boolean valueHasBeenSet() {
50 | 		return true;
51 | 	}
52 | 
53 | 	@Override
54 | 	public Value setValue(int v) {
55 | 		return new StringValue(this.attribute, Integer.toString(v));
56 | 	}
57 | 	
58 | 	@Override
59 | 	public Value setValue(double v) {
60 | 		return new StringValue(this.attribute, Double.toString(v));
61 | 	}
62 | 	
63 | 	@Override
64 | 	public Value setValue(String v) {
65 | 		return new StringValue(this.attribute, v);
66 | 	}
67 | 	
68 | 	@Override
69 | 	public StringBuilder buildStringVal(StringBuilder builder) {
70 | 		return builder.append(this.stringVal);
71 | 	}
72 | 	
73 | 	@Override
74 | 	public boolean equals(Object obj){
75 | 		if (this == obj) {
76 | 			return true;
77 | 		}
78 | 		if(!(obj instanceof StringValue)){
79 | 			return false;
80 | 		}
81 | 		
82 | 		StringValue o = (StringValue)obj;
83 | 		
84 | 		if(!o.attribute.equals(attribute)){
85 | 			return false;
86 | 		}
87 | 		
88 | 		return this.stringVal.equals(o.stringVal);
89 | 		
90 | 	}
91 | }
92 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/core/values/UnsetValueException.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.core.values;
 2 | 
 3 | /**
 4 |  * A class for indicating that a OO-MDP object instance value is unset.
 5 |  * @author James MacGlashan
 6 |  *
 7 |  */
 8 | public class UnsetValueException extends RuntimeException {
 9 | 
10 | 	private static final long serialVersionUID = 1L;
11 | 
12 | 	public UnsetValueException(){
13 | 		super("OO-MDP Object Instance Value is Unset");
14 | 	}
15 | }
16 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/legacy/StateParser.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.legacy;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | /**
 6 |  * This interface is used to converting states to parsable string representations and parsing those string representations back into states.
 7 |  * Although there is a domain-universal string parser implementation of this interface, it is very verbose and file size and readability
 8 |  * may be improved by creating a domain-specific state parser.
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public interface StateParser {
13 | 
14 | 	/**
15 | 	 * Converts state s into a parsable string representation.
16 | 	 * @param s the state to convert
17 | 	 * @return a parsable string representation of state s.
18 | 	 */
19 | 	public String stateToString(State s);
20 | 	
21 | 	/**
22 | 	 * Converts a string into a State object assuming the string representation was produced using this state parser.
23 | 	 * @param str a string representation of a state
24 | 	 * @return the state object that corresponds to the string representation.
25 | 	 */
26 | 	public State stringToState(String str);
27 | 	
28 | 	
29 | }
30 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/singleagent/ActionObserver.java:
--------------------------------------------------------------------------------
1 | package burlap.oomdp.singleagent;
2 | 
3 | import burlap.oomdp.core.states.State;
4 | 
5 | public interface ActionObserver {
6 | 	public void actionEvent(State s, GroundedAction ga, State sp);
7 | }
8 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/singleagent/RewardFunction.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.singleagent;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | 
 6 | /**
 7 |  * Defines the reward function for a task.
 8 |  * @author James MacGlashan
 9 |  *
10 |  */
11 | public interface RewardFunction {
12 | 	
13 | 	/**
14 | 	 * Returns the reward received when action a is executed in state s and the agent transitions to state sprime.
15 | 	 * @param s the state in which the action was executed
16 | 	 * @param a the action executed
17 | 	 * @param sprime the state to which the agent transitioned
18 | 	 * @return the reward received when action a is executed in state s and the agent transitions to state sprime.
19 | 	 */
20 | 	public abstract double reward(State s, GroundedAction a, State sprime);
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/singleagent/common/NullAction.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.singleagent.common;
 2 | 
 3 | import burlap.oomdp.core.Domain;
 4 | import burlap.oomdp.core.states.State;
 5 | import burlap.oomdp.core.TransitionProbability;
 6 | import burlap.oomdp.singleagent.Action;
 7 | import burlap.oomdp.singleagent.FullActionModel;
 8 | import burlap.oomdp.singleagent.GroundedAction;
 9 | 
10 | import java.util.List;
11 | 
12 | 
13 | /**
14 |  * @author James
15 |  * This action is an action that does nothing. 
16 |  * It may be useful for making references to actions that do not have domain associations
17 |  * or if a domain needs a no-op action
18 |  * 
19 |  */
20 | public class NullAction extends SimpleAction.SimpleDeterministicAction implements FullActionModel {
21 | 
22 | 	
23 | 	public NullAction(String name){
24 | 		this.name = name;
25 | 		this.domain = null;
26 | 	}
27 | 	
28 | 	public NullAction(String name, Domain domain){
29 | 		super(name, domain);
30 | 	}
31 | 
32 | 	
33 | 	@Override
34 | 	protected State performActionHelper(State st, GroundedAction groundedAction) {
35 | 		return st;
36 | 	}
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/singleagent/common/NullRewardFunction.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.singleagent.common;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | import burlap.oomdp.singleagent.GroundedAction;
 5 | import burlap.oomdp.singleagent.RewardFunction;
 6 | 
 7 | /**
 8 |  * This class defines a reward function that always returns 0
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public class NullRewardFunction implements RewardFunction {
13 | 
14 | 	@Override
15 | 	public double reward(State s, GroundedAction a, State sprime) {
16 | 		return 0;
17 | 	}
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/singleagent/common/SimpleGroundedAction.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.singleagent.common;
 2 | 
 3 | import burlap.oomdp.singleagent.Action;
 4 | import burlap.oomdp.singleagent.GroundedAction;
 5 | 
 6 | /**
 7 |  * A {@link burlap.oomdp.singleagent.GroundedAction} implementation for actions that have no parameters.
 8 |  * @author James MacGlashan.
 9 |  */
10 | public class SimpleGroundedAction extends GroundedAction{
11 | 
12 | 	public SimpleGroundedAction(Action action) {
13 | 		super(action);
14 | 	}
15 | 
16 | 	@Override
17 | 	public void initParamsWithStringRep(String[] params) {
18 | 		//do nothing
19 | 	}
20 | 
21 | 	@Override
22 | 	public String[] getParametersAsString() {
23 | 		return new String[0];
24 | 	}
25 | 
26 | 	@Override
27 | 	public GroundedAction copy() {
28 | 		return new SimpleGroundedAction(this.action);
29 | 	}
30 | }
31 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/singleagent/common/SingleGoalPFRF.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.singleagent.common;
 2 | 
 3 | import burlap.oomdp.core.PropositionalFunction;
 4 | import burlap.oomdp.core.states.State;
 5 | import burlap.oomdp.singleagent.GroundedAction;
 6 | import burlap.oomdp.singleagent.RewardFunction;
 7 | 
 8 | 
 9 | /**
10 |  * This class defines a reward function that returns a goal reward when any grounded form of a propositional
11 |  * function is true in the resulting state and a default non-goal reward otherwise.
12 |  * @author James MacGlashan
13 |  *
14 |  */
15 | public class SingleGoalPFRF implements RewardFunction {
16 | 
17 | 	PropositionalFunction			pf;
18 | 	double							goalReward;
19 | 	double							nonGoalReward;
20 | 	
21 | 	
22 | 	
23 | 	/**
24 | 	 * Initializes the reward function to return 1 when any grounded from of pf is true in the resulting
25 | 	 * state.
26 | 	 * @param pf the propositional function that must have a true grounded version for the goal reward to be returned.
27 | 	 */
28 | 	public SingleGoalPFRF(PropositionalFunction pf){
29 | 		this.pf = pf;
30 | 		this.goalReward = 1.;
31 | 		this.nonGoalReward = 0.;
32 | 	}
33 | 	
34 | 	
35 | 	/**
36 | 	 * Initializes the reward function to return the specified goal reward when any grounded from of pf is true in the resulting
37 | 	 * state and the specified non-goal reward otherwise.
38 | 	 * @param pf the propositional function that must have a true grounded version for the goal reward to be returned.
39 | 	 * @param goalReward the goal reward value to be returned
40 | 	 * @param nonGoalReward the non goal reward value to be returned.
41 | 	 */
42 | 	public SingleGoalPFRF(PropositionalFunction pf, double goalReward, double nonGoalReward){
43 | 		this.pf = pf;
44 | 		this.goalReward = goalReward;
45 | 		this.nonGoalReward = nonGoalReward;
46 | 	}
47 | 	
48 | 	
49 | 	@Override
50 | 	public double reward(State s, GroundedAction a, State sprime) {
51 | 		
52 | 		if(this.pf.somePFGroundingIsTrue(sprime)){
53 | 			return goalReward;
54 | 		}
55 | 		return nonGoalReward;
56 | 		
57 | 		
58 | 	}
59 | 
60 | }
61 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/singleagent/common/UniformCostRF.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.singleagent.common;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | import burlap.oomdp.singleagent.GroundedAction;
 5 | import burlap.oomdp.singleagent.RewardFunction;
 6 | 
 7 | 
 8 | /**
 9 |  * Defines a reward function that always returns -1.
10 |  * @author James MacGlashan
11 |  *
12 |  */
13 | public class UniformCostRF implements RewardFunction {
14 | 
15 | 	
16 | 	public UniformCostRF(){
17 | 		
18 | 	}
19 | 	
20 | 	@Override
21 | 	public double reward(State s, GroundedAction a, State sprime) {
22 | 		return -1;
23 | 	}
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/singleagent/environment/EnvironmentObserver.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.singleagent.environment;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | import burlap.oomdp.singleagent.GroundedAction;
 5 | 
 6 | /**
 7 |  * A class that is told of interactions in an environment. This is typically called from an {@link burlap.oomdp.singleagent.environment.EnvironmentServer}
 8 |  * which intercepts the environment interactions.
 9 |  * @author James MacGlashan.
10 |  */
11 | public interface EnvironmentObserver {
12 | 
13 | 	/**
14 | 	 * This method is called when an {@link burlap.oomdp.singleagent.environment.Environment} receives an action to execute, but before the
15 | 	 * {@link burlap.oomdp.singleagent.environment.Environment} has completed execution.
16 | 	 * @param o the current {@link burlap.oomdp.singleagent.environment.Environment} observation in which the the action begins execution.
17 | 	 * @param action the {@link burlap.oomdp.singleagent.GroundedAction} which will be executed in the {@link burlap.oomdp.singleagent.environment.Environment}.
18 | 	 */
19 | 	void observeEnvironmentActionInitiation(State o, GroundedAction action);
20 | 
21 | 	/**
22 | 	 * This method is called every time an {@link burlap.oomdp.singleagent.environment.Environment} is interacted with.
23 | 	 * @param eo the resulting {@link burlap.oomdp.singleagent.environment.EnvironmentOutcome}
24 | 	 */
25 | 	void observeEnvironmentInteraction(EnvironmentOutcome eo);
26 | 
27 | 	/**
28 | 	 * This method is called every time an {@link burlap.oomdp.singleagent.environment.Environment} is reset (has the {@link Environment#resetEnvironment()} method called).
29 | 	 * @param resetEnvironment the {@link burlap.oomdp.singleagent.environment.Environment} that was reset.
30 | 	 */
31 | 	void observeEnvironmentReset(Environment resetEnvironment);
32 | }
33 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/singleagent/environment/EnvironmentOutcome.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.singleagent.environment;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | import burlap.oomdp.singleagent.GroundedAction;
 5 | 
 6 | /**
 7 |  * A class for specifying the outcome of executing an action in an {@link burlap.oomdp.singleagent.environment.Environment}.
 8 |  * The class consists of the previous environment observation (as a {@link burlap.oomdp.core.states.State}) in which the action was taken;
 9 |  * the action taken (as a {@link burlap.oomdp.singleagent.GroundedAction}); the next environment observation (also a {@link burlap.oomdp.core.states.State}
10 |  * following the action; the reward received from the environment; and whether the new state of the environment is a
11 |  * terminal state.
12 |  * @author James MacGlashan.
13 |  */
14 | public class EnvironmentOutcome {
15 | 
16 | 	/**
17 | 	 * The previous environment observation (as a {@link burlap.oomdp.core.states.State} when the action was taken.
18 | 	 */
19 | 	public State o;
20 | 
21 | 	/**
22 | 	 * The action taken in the environment
23 | 	 */
24 | 	public GroundedAction a;
25 | 
26 | 	/**
27 | 	 * The next environment observation (as a {@link burlap.oomdp.core.states.State}) following the action's execution.
28 | 	 */
29 | 	public State op;
30 | 
31 | 	/**
32 | 	 * The reward received
33 | 	 */
34 | 	public double r;
35 | 
36 | 	/**
37 | 	 * Whether the next state to which the environment transitioned is a terminal state (true if so, false otherwise)
38 | 	 */
39 | 	public boolean terminated;
40 | 
41 | 
42 | 	/**
43 | 	 * Initializes.
44 | 	 * @param o The previous state of the environment when the action was taken.
45 | 	 * @param a The action taken in the environment
46 | 	 * @param op The next state to which the environment transitioned
47 | 	 * @param r The reward received
48 | 	 * @param terminated Whether the next state to which the environment transitioned is a terminal state (true if so, false otherwise)
49 | 	 */
50 | 	public EnvironmentOutcome(State o, GroundedAction a, State op, double r, boolean terminated) {
51 | 		this.o = o;
52 | 		this.a = a;
53 | 		this.op = op;
54 | 		this.r = r;
55 | 		this.terminated = terminated;
56 | 	}
57 | }
58 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/singleagent/environment/StateSettableEnvironment.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.singleagent.environment;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | /**
 6 |  * An interface to be used with {@link burlap.oomdp.singleagent.environment.Environment} instances that allows
 7 |  * the environment to have its set set to a client specified state.
 8 |  * @author James MacGlashan.
 9 |  */
10 | public interface StateSettableEnvironment extends Environment{
11 | 
12 | 	/**
13 | 	 * Sets the current state of the environment to the specified state.
14 | 	 * @param s the state to which this {@link burlap.oomdp.singleagent.environment.Environment} will be set.
15 | 	 */
16 | 	void setCurStateTo(State s);
17 | }
18 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/singleagent/environment/TaskSettableEnvironment.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.singleagent.environment;
 2 | 
 3 | import burlap.oomdp.core.TerminalFunction;
 4 | import burlap.oomdp.singleagent.RewardFunction;
 5 | 
 6 | /**
 7 |  * And {@link burlap.oomdp.singleagent.environment.Environment} interface extension that allows the {@link burlap.oomdp.singleagent.RewardFunction}
 8 |  * and {@link burlap.oomdp.core.TerminalFunction} to set and accessed.
 9 |  * @author James MacGlashan.
10 |  */
11 | public interface TaskSettableEnvironment extends Environment{
12 | 
13 | 	/**
14 | 	 * Sets the {@link burlap.oomdp.singleagent.RewardFunction} of this {@link burlap.oomdp.singleagent.environment.Environment} to
15 | 	 * the specified reward function.
16 | 	 * @param rf the new {@link burlap.oomdp.singleagent.RewardFunction} of the {@link burlap.oomdp.singleagent.environment.Environment}.
17 | 	 */
18 | 	void setRf(RewardFunction rf);
19 | 
20 | 	/**
21 | 	 * Sets the {@link burlap.oomdp.core.TerminalFunction} of this {@link burlap.oomdp.singleagent.environment.Environment} to
22 | 	 * the specified terminal function.
23 | 	 * @param tf the new {@link burlap.oomdp.core.TerminalFunction} of the {@link burlap.oomdp.singleagent.environment.Environment}.
24 | 	 */
25 | 	void setTf(TerminalFunction tf);
26 | 
27 | 	/**
28 | 	 * Returns the {@link burlap.oomdp.singleagent.RewardFunction} this {@link burlap.oomdp.singleagent.environment.Environment} uses
29 | 	 * to determine rewards.
30 | 	 * @return a {@link burlap.oomdp.singleagent.RewardFunction}
31 | 	 */
32 | 	RewardFunction getRf();
33 | 
34 | 	/**
35 | 	 * Returns the {@link burlap.oomdp.core.TerminalFunction} this {@link burlap.oomdp.singleagent.environment.Environment} uses
36 | 	 * to determine terminal states
37 | 	 * @return a {@link burlap.oomdp.core.TerminalFunction}
38 | 	 */
39 | 	TerminalFunction getTf();
40 | 
41 | }
42 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/singleagent/explorer/SpecialExplorerAction.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.singleagent.explorer;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | 
 6 | /**
 7 |  * An interface for defining special non-domain actions to take in a visual explorer.
 8 |  * @author James MacGlashan
 9 |  *
10 |  */
11 | public interface SpecialExplorerAction {
12 | 	public State applySpecialAction(State curState);
13 | }
14 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/singleagent/explorer/StateResetSpecialAction.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.singleagent.explorer;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | import burlap.oomdp.singleagent.environment.Environment;
 5 | 
 6 | 
 7 | /**
 8 |  * A special non-domain action that causes a {@link burlap.oomdp.singleagent.explorer.VisualExplorer}'s environment to be reset with the {@link burlap.oomdp.singleagent.environment.Environment#resetEnvironment()}
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public class StateResetSpecialAction implements SpecialExplorerAction {
13 | 
14 | 	Environment env;
15 | 	
16 | 	/**
17 | 	 * Initializes.
18 | 	 * @param env the {@link burlap.oomdp.singleagent.environment.Environment} which will be reset by the {@link #applySpecialAction(burlap.oomdp.core.states.State)} method.
19 | 	 */
20 | 	public StateResetSpecialAction(Environment env){
21 | 		this.env = env;
22 | 	}
23 | 
24 | 	
25 | 	@Override
26 | 	public State applySpecialAction(State curState) {
27 | 		this.env.resetEnvironment();
28 | 		return this.env.getCurrentObservation();
29 | 	}
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/singleagent/pomdp/beliefstate/BeliefState.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.singleagent.pomdp.beliefstate;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | import burlap.oomdp.singleagent.GroundedAction;
 5 | 
 6 | /**
 7 |  * An interface for defining a belief state, which is a probability distribution over MDP states. This interface
 8 |  * does not require enumerating all states, because it is possible to have a belief state over an infinite number of MDP
 9 |  * states. However, it does require that the probability density function be returnable ({@link #belief(burlap.oomdp.core.states.State)},
10 |  * to be able to sample an MDP state from the belief distribution {@link #sampleStateFromBelief()},
11 |  * and a mechanism to update the belief state with respect to some observation and action {@link #getUpdatedBeliefState(burlap.oomdp.core.states.State, burlap.oomdp.singleagent.GroundedAction)}.
12 |  *
13 |  * @author James MacGlashan and Nakul Gopalan
14 |  */
15 | public interface BeliefState extends State {
16 | 
17 | 
18 | 	/**
19 | 	 * Returns the probability density/mass for the input MDP state.
20 | 	 * @param s the the input MDP state defined by a {@link burlap.oomdp.core.states.State} instance.
21 | 	 * @return the probability density/mass of the input MDP state in this belief distribution.
22 | 	 */
23 | 	double belief(State s);
24 | 
25 | 	/**
26 | 	 * Samples an MDP state state from this belief distribution.
27 | 	 * @return an MDP state defined by a {@link burlap.oomdp.core.states.State} instance.
28 | 	 */
29 | 	State sampleStateFromBelief();
30 | 
31 | 	/**
32 | 	 * Computes a new belief distribution using this BeliefState as the prior and conditioned on the given POMDP observation
33 | 	 * and action taken.
34 | 	 * @param observation the conditioned POMDP observation defined by a {@link burlap.oomdp.core.states.State} instance.
35 | 	 * @param ga the conditioned action selection in the previous time step.
36 | 	 * @return the new belief state distribution represented by a new {@link BeliefState} instance.
37 | 	 */
38 | 	BeliefState getUpdatedBeliefState(State observation, GroundedAction ga);
39 | 
40 | 	
41 | }
42 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/singleagent/pomdp/beliefstate/DenseBeliefVector.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.singleagent.pomdp.beliefstate;
 2 | 
 3 | /**
 4 |  * An interface to be used in conjunction with {@link burlap.oomdp.singleagent.pomdp.beliefstate.BeliefState} instances
 5 |  * for belief states that can generate a dense belief vector representation.
 6 |  * @author James MacGlashan.
 7 |  */
 8 | public interface DenseBeliefVector extends EnumerableBeliefState{
 9 | 
10 | 	/**
11 | 	 * Returns a dense belief vector representation of the this belief state.
12 | 	 * @return a double array specifying the dense belief vector representation.
13 | 	 */
14 | 	double [] getBeliefVector();
15 | 
16 | 	/**
17 | 	 * Sets this belief state to the provided. Dense belief vector. If the belief vector dimensionality does not match
18 | 	 * this objects dimensionality then a runtime exception will be thrown.
19 | 	 * @param b the belief vector to set this belief state to.
20 | 	 */
21 | 	public void setBeliefVector(double [] b);
22 | }
23 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/singleagent/pomdp/beliefstate/EnumerableBeliefState.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.singleagent.pomdp.beliefstate;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | import java.util.List;
 6 | 
 7 | /**
 8 |  * An interface to be used by {@link BeliefState} implementations that also can enumerate
 9 |  * the set of states that have probability mass. The probability mass of a state is specified by the
10 |  * {@link burlap.oomdp.singleagent.pomdp.beliefstate.EnumerableBeliefState.StateBelief} class which is a pair
11 |  * consisting of an MDP state defined by a {@link burlap.oomdp.core.states.State} instance, and its probability mass, defined by
12 |  * a double.
13 |  * @author James MacGlashan.
14 |  */
15 | public interface EnumerableBeliefState {
16 | 
17 | 	/**
18 | 	 * Returns the states, and their probability mass, that have non-zero probability mass. States that are not
19 | 	 * included in the returned listed are assumed to have probability mass zero.
20 | 	 * @return a {@link java.util.List} of {@link burlap.oomdp.singleagent.pomdp.beliefstate.EnumerableBeliefState.StateBelief} objects specifying the enumerated probability mass function.
21 | 	 */
22 | 	List<StateBelief> getStatesAndBeliefsWithNonZeroProbability();
23 | 
24 | 
25 | 	/**
26 | 	 * A class for specifying the probability mass of an MDP state in a {@link BeliefState}.
27 | 	 */
28 | 	public static class StateBelief{
29 | 
30 | 		/**
31 | 		 * The MDP state defined by a {@link burlap.oomdp.core.states.State} instance.
32 | 		 */
33 | 		public State s;
34 | 
35 | 		/**
36 | 		 * The probability mass of the MDP state.
37 | 		 */
38 | 		public double belief;
39 | 
40 | 
41 | 		/**
42 | 		 * Initializes
43 | 		 * @param s the MDP state defined by a {@link burlap.oomdp.core.states.State} instance.
44 | 		 * @param belief the probability mass of the state.
45 | 		 */
46 | 		public StateBelief(State s, double belief){
47 | 			this.s = s;
48 | 			this.belief = belief;
49 | 		}
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/singleagent/pomdp/beliefstate/tabular/HashableTabularBeliefStateFactory.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.singleagent.pomdp.beliefstate.tabular;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | import burlap.oomdp.statehashing.HashableState;
 5 | import burlap.oomdp.statehashing.HashableStateFactory;
 6 | import org.apache.commons.lang3.builder.HashCodeBuilder;
 7 | 
 8 | import java.util.Map;
 9 | 
10 | /**
11 |  * A {@link burlap.oomdp.statehashing.HashableStateFactory} for {@link burlap.oomdp.singleagent.pomdp.beliefstate.tabular.TabularBeliefState} instances.
12 |  * @author James MacGlashan.
13 |  */
14 | public class HashableTabularBeliefStateFactory implements HashableStateFactory{
15 | 
16 | 	@Override
17 | 	public HashableState hashState(State s) {
18 | 
19 | 		if(!(s instanceof TabularBeliefState)){
20 | 			throw new RuntimeException("Cannot generate HashableState for input state, because it is a " + s.getClass().getName() + " instance and HashableTabularBeliefStateFactory only hashes TabularBeliefState instances.");
21 | 		}
22 | 
23 | 		return new HashableTabularBeliefState(s);
24 | 	}
25 | 
26 | 	@Override
27 | 	public boolean objectIdentifierIndependent() {
28 | 		return true;
29 | 	}
30 | 
31 | 
32 | 	public static class HashableTabularBeliefState extends HashableState{
33 | 
34 | 		public HashableTabularBeliefState(State s) {
35 | 			super(s);
36 | 		}
37 | 
38 | 		@Override
39 | 		public int hashCode() {
40 | 
41 | 			HashCodeBuilder builder = new HashCodeBuilder(17, 31);
42 | 			for(Map.Entry<Integer, Double> e : ((TabularBeliefState)this.s).beliefValues.entrySet()){
43 | 				int entryHash = 31 * e.getKey().hashCode() + e.getValue().hashCode();
44 | 				builder.append(entryHash);
45 | 			}
46 | 
47 | 			return builder.toHashCode();
48 | 		}
49 | 
50 | 		@Override
51 | 		public boolean equals(Object obj) {
52 | 
53 | 			if(!(obj instanceof HashableTabularBeliefState)){
54 | 				return false;
55 | 			}
56 | 
57 | 			return this.s.equals(((HashableTabularBeliefState) obj).s);
58 | 		}
59 | 
60 | 		@Override
61 | 		public State copy() {
62 | 			return new HashableTabularBeliefState(s);
63 | 		}
64 | 	}
65 | }
66 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/statehashing/HashableObjectFactory.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.statehashing;
 2 | 
 3 | import burlap.oomdp.core.objects.ObjectInstance;
 4 | 
 5 | public interface HashableObjectFactory {
 6 | 
 7 | 	HashableObject hashObject(ObjectInstance object);
 8 | 	HashableValueFactory getValueHashFactory();
 9 | }
10 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/statehashing/HashableStateFactory.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.statehashing;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | 
 6 | /**
 7 |  * This interface is to be used by classes that can produce {@link HashableState} objects
 8 |  * that provide a hash values for {@link burlap.oomdp.core.states.State} objects. This is useful for tabular
 9 |  * methods that make use of {@link java.util.HashSet}s or {@link java.util.HashMap}s for fast retrieval.
10 |  * @author James MacGlashan
11 |  *
12 |  */
13 | public interface HashableStateFactory {
14 | 
15 | 	/**
16 | 	 * Turns {@link burlap.oomdp.core.states.State} s into a {@link burlap.oomdp.statehashing.HashableState}
17 | 	 * @param s the input {@link burlap.oomdp.core.states.State} to transform.
18 | 	 * @return a {@link burlap.oomdp.statehashing.HashableState}.
19 | 	 */
20 | 	HashableState hashState(State s);
21 | 
22 | 	/**
23 | 	 * Returns true if the {@link burlap.oomdp.statehashing.HashableState} objects returned are object identifier independent; false if they are dependent.
24 | 	 * @return true if the {@link burlap.oomdp.statehashing.HashableState} objects returned are object identifier independent; false if they are dependent.
25 | 	 */
26 | 	boolean objectIdentifierIndependent();
27 | 	
28 | }
29 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/statehashing/HashableValue.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.statehashing;
 2 | 
 3 | import burlap.oomdp.core.values.Value;
 4 | 
 5 | public class HashableValue {
 6 | 	private final HashableValueFactory hashingFactory;
 7 | 	private final Value value;
 8 | 	private final int hashCode;
 9 | 	
10 | 	public HashableValue(Value value, HashableValueFactory hashingFactory, int hashCode) {
11 | 		this.value = value;
12 | 		this.hashingFactory = hashingFactory;
13 | 		this.hashCode = hashCode;
14 | 	}
15 | 	
16 | 	public HashableValueFactory getHashFactory() {
17 | 		return this.hashingFactory;
18 | 	}
19 | 	
20 | 	public Value getValue() {
21 | 		return this.value;
22 | 	}
23 | 	
24 | 	@Override
25 | 	public int hashCode() {
26 | 		return this.hashCode;
27 | 	}
28 | }
29 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/statehashing/HashableValueFactory.java:
--------------------------------------------------------------------------------
1 | package burlap.oomdp.statehashing;
2 | 
3 | import burlap.oomdp.core.values.Value;
4 | 
5 | public interface HashableValueFactory {
6 | 
7 | 	HashableValue hashValue(Value value);
8 | }
9 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stateserialization/SerializableStateFactory.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stateserialization;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | 
 5 | /**
 6 |  * A factory interface for generating {@link burlap.oomdp.stateserialization.SerializableState} instances from an input
 7 |  * {@link burlap.oomdp.core.states.State} by using the {@link #serialize(burlap.oomdp.core.states.State)} method. This
 8 |  * interface requires a {@link #getGeneratedClass()} method that reports the specific {@link burlap.oomdp.stateserialization.SerializableState}
 9 |  * implementation that is returned so that complex classes that have {@link burlap.oomdp.core.states.State} instances
10 |  * to be turned into a {@link burlap.oomdp.stateserialization.SerializableState} can map them to the appropriate class type.
11 |  * @author James MacGlashan.
12 |  */
13 | public interface SerializableStateFactory {
14 | 
15 | 	/**
16 | 	 * Takes a {@link burlap.oomdp.core.states.State} and turns it into a {@link burlap.oomdp.stateserialization.SerializableState}.
17 | 	 * @param s the input {@link burlap.oomdp.core.states.State} to convert.
18 | 	 * @return a {@link burlap.oomdp.stateserialization.SerializableState}
19 | 	 */
20 | 	SerializableState serialize(State s);
21 | 
22 | 	/**
23 | 	 * Returns the {@link burlap.oomdp.stateserialization.SerializableState} implementation that is generated by this factory.
24 | 	 * @return the {@link java.lang.Class} of the {@link burlap.oomdp.stateserialization.SerializableState} implementation that is generated by this factory.
25 | 	 */
26 | 	Class<?> getGeneratedClass();
27 | }
28 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stateserialization/simple/SimpleSerializableState.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stateserialization.simple;
 2 | 
 3 | import burlap.oomdp.core.Domain;
 4 | import burlap.oomdp.core.objects.ObjectInstance;
 5 | import burlap.oomdp.core.states.MutableState;
 6 | import burlap.oomdp.core.states.State;
 7 | import burlap.oomdp.stateserialization.SerializableState;
 8 | 
 9 | import java.util.ArrayList;
10 | import java.util.List;
11 | 
12 | /**
13 |  * A {@link burlap.oomdp.stateserialization.SerializableState} representation that reads all {@link burlap.oomdp.core.objects.ObjectInstance} and
14 |  * {@link burlap.oomdp.core.values.Value} objects stored in a {@link burlap.oomdp.core.states.State} and represents their information
15 |  * with {@link burlap.oomdp.stateserialization.simple.SimpleSerializedObjectInstance} and {@link burlap.oomdp.stateserialization.simple.SimpleSerializedValue}
16 |  * instances. Deserialized {@link burlap.oomdp.core.states.State} objects are {@link burlap.oomdp.core.states.MutableState} instances.
17 |  * @author James MacGlashan.
18 |  */
19 | public class SimpleSerializableState extends SerializableState {
20 | 
21 | 	public List<SimpleSerializedObjectInstance> objects;
22 | 
23 | 	public SimpleSerializableState(){
24 | 
25 | 	}
26 | 
27 | 	public SimpleSerializableState(State s) {
28 | 		super(s);
29 | 	}
30 | 
31 | 	@Override
32 | 	public void serialize(State s) {
33 | 		List<ObjectInstance> objects = s.getAllObjects();
34 | 		this.objects = new ArrayList<SimpleSerializedObjectInstance>(objects.size());
35 | 		for(ObjectInstance o : objects){
36 | 			this.objects.add(new SimpleSerializedObjectInstance(o));
37 | 		}
38 | 	}
39 | 
40 | 	@Override
41 | 	public State deserialize(Domain domain) {
42 | 		State s = new MutableState();
43 | 		for(SimpleSerializedObjectInstance o : this.objects){
44 | 			s.addObject(o.deserialize(domain));
45 | 		}
46 | 		return s;
47 | 	}
48 | 
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stateserialization/simple/SimpleSerializableStateFactory.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stateserialization.simple;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | import burlap.oomdp.stateserialization.SerializableState;
 5 | import burlap.oomdp.stateserialization.SerializableStateFactory;
 6 | 
 7 | /**
 8 |  * A {@link burlap.oomdp.stateserialization.SerializableStateFactory} for {@link burlap.oomdp.stateserialization.simple.SimpleSerializableState} instances.
 9 |  * @author James MacGlashan.
10 |  */
11 | public class SimpleSerializableStateFactory implements SerializableStateFactory {
12 | 	@Override
13 | 	public SerializableState serialize(State s) {
14 | 		return new SimpleSerializableState(s);
15 | 	}
16 | 
17 | 	@Override
18 | 	public Class<?> getGeneratedClass() {
19 | 		return SimpleSerializableState.class;
20 | 	}
21 | }
22 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stateserialization/simple/SimpleSerializedObjectInstance.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stateserialization.simple;
 2 | 
 3 | import burlap.oomdp.core.Domain;
 4 | import burlap.oomdp.core.objects.MutableObjectInstance;
 5 | import burlap.oomdp.core.objects.ObjectInstance;
 6 | import burlap.oomdp.core.values.Value;
 7 | 
 8 | import java.io.Serializable;
 9 | import java.util.ArrayList;
10 | import java.util.List;
11 | 
12 | /**
13 |  * A serializable representation of {@link burlap.oomdp.core.objects.ObjectInstance} objects.
14 |  * Deserialization produces {@link burlap.oomdp.core.objects.MutableObjectInstance} objects.
15 |  * @author James MacGlashan.
16 |  */
17 | public class SimpleSerializedObjectInstance implements Serializable{
18 | 
19 | 	public String name;
20 | 	public String object_class;
21 | 	public List<SimpleSerializedValue> values;
22 | 
23 | 	public SimpleSerializedObjectInstance() {
24 | 	}
25 | 
26 | 	/**
27 | 	 * Initializes by representing the input {@link burlap.oomdp.core.objects.ObjectInstance}.
28 | 	 * @param o the {@link burlap.oomdp.core.objects.ObjectInstance} to represent.
29 | 	 */
30 | 	public SimpleSerializedObjectInstance(ObjectInstance o){
31 | 		this.object_class = o.getClassName();
32 | 		this.name = o.getName();
33 | 		List<Value> values = o.getValues();
34 | 		this.values = new ArrayList<SimpleSerializedValue>(values.size());
35 | 		for(Value v : values){
36 | 			this.values.add(new SimpleSerializedValue(v));
37 | 		}
38 | 	}
39 | 
40 | 	/**
41 | 	 * Turns this representation into an actual {@link burlap.oomdp.core.objects.ObjectInstance} whose class and attributes
42 | 	 * are associated with the input {@link burlap.oomdp.core.Domain}
43 | 	 * @param domain the {@link burlap.oomdp.core.Domain} to which the returned {@link burlap.oomdp.core.objects.ObjectInstance} {@link burlap.oomdp.core.ObjectClass} and {@link burlap.oomdp.core.Attribute} refers.
44 | 	 * @return a {@link burlap.oomdp.core.objects.MutableObjectInstance}
45 | 	 */
46 | 	public ObjectInstance deserialize(Domain domain){
47 | 		MutableObjectInstance o = new MutableObjectInstance(domain.getObjectClass(this.object_class), this.name);
48 | 		for(SimpleSerializedValue v : this.values){
49 | 			o.setValue(v.attribute, v.value);
50 | 		}
51 | 		return o;
52 | 	}
53 | 
54 | 
55 | }
56 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stateserialization/simple/SimpleSerializedValue.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stateserialization.simple;
 2 | 
 3 | import burlap.oomdp.core.values.Value;
 4 | 
 5 | import java.io.Serializable;
 6 | 
 7 | /**
 8 |  * A serializable representation of {@link burlap.oomdp.core.values.Value} objects.
 9 |  * @author James MacGlashan.
10 |  */
11 | public class SimpleSerializedValue implements Serializable{
12 | 	public String attribute;
13 | 	public String value;
14 | 
15 | 	public SimpleSerializedValue(){
16 | 
17 | 	}
18 | 
19 | 	/**
20 | 	 * Creates a serializable representation for the given {@link burlap.oomdp.core.values.Value}
21 | 	 * @param oomdpValue the {@link burlap.oomdp.core.values.Value} this object will represent.
22 | 	 */
23 | 	public SimpleSerializedValue(Value oomdpValue){
24 | 		this.attribute = oomdpValue.attName();
25 | 		this.value = oomdpValue.getStringVal();
26 | 	}
27 | }
28 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stochasticgames/AgentFactory.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stochasticgames;
 2 | 
 3 | /**
 4 |  * An interface for generating agents
 5 |  * @author James MacGlashan
 6 |  *
 7 |  */
 8 | public interface AgentFactory {
 9 | 	/**
10 | 	 * Returns a new agent instance.
11 | 	 * @return a new agent instance.
12 | 	 */
13 | 	public SGAgent generateAgent();
14 | }
15 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stochasticgames/InvalidActionException.java:
--------------------------------------------------------------------------------
1 | package burlap.oomdp.stochasticgames;
2 | 
3 | /**
4 |  * Created by cayle on 5/22/15.
5 |  */
6 | public class InvalidActionException {
7 | }
8 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stochasticgames/JointReward.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stochasticgames;
 2 | 
 3 | import java.util.Map;
 4 | 
 5 | import burlap.oomdp.core.states.State;
 6 | 
 7 | /**
 8 |  * This interface defines the method needed to return the reward received by each agent.
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public interface JointReward {
13 | 	
14 | 	/**
15 | 	 * Returns the reward received by each agent specified in the joint action. The returned
16 | 	 * result is a Map from agent names to the reward that they received.
17 | 	 * @param s that state in which the joint action was taken.
18 | 	 * @param ja the joint action taken.
19 | 	 * @param sp the resulting state from taking the joint action
20 | 	 * @return a Map from agent names to the reward that they received.
21 | 	 */
22 | 	public Map<String, Double> reward(State s, JointAction ja, State sp);
23 | }
24 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stochasticgames/SGAgentType.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stochasticgames;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import burlap.oomdp.core.ObjectClass;
 6 | import burlap.oomdp.stochasticgames.agentactions.SGAgentAction;
 7 | 
 8 | 
 9 | /**
10 |  * This class specifies the type of agent a stochastic games agent can be. Different agent types may have different actions they can execute
11 |  * and may also have different observable properties to other agents, which is indicated by the ObjectClass that represents their world state. 
12 |  * @author James MacGlashan
13 |  *
14 |  */
15 | public class SGAgentType {
16 | 
17 | 	public String						typeName;
18 | 	public ObjectClass					oclass;
19 | 	public List<SGAgentAction>			actions;
20 | 	
21 | 	
22 | 	/**
23 | 	 * Creates a new agent type with a given name, object class describing the agent's world state, and actions available to the agent.
24 | 	 * @param typeName the type name
25 | 	 * @param oclass the object class that represents the agent's world state information
26 | 	 * @param actionsAvailableToType the available actions that this agent can take in the world.
27 | 	 */
28 | 	public SGAgentType(String typeName, ObjectClass oclass, List<SGAgentAction> actionsAvailableToType){
29 | 		this.typeName = typeName;
30 | 		this.oclass = oclass;
31 | 		this.actions = actionsAvailableToType;
32 | 	}
33 | 	
34 | 	
35 | 	@Override
36 | 	public int hashCode(){
37 | 		return typeName.hashCode();
38 | 	}
39 | 	
40 | 	@Override
41 | 	public boolean equals(Object o){
42 | 		if(!(o instanceof SGAgentType)){
43 | 			return false;
44 | 		}
45 | 		
46 | 		return ((SGAgentType)o).typeName.equals(typeName);
47 | 		
48 | 	}
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stochasticgames/SGStateGenerator.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stochasticgames;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import burlap.oomdp.core.objects.ObjectInstance;
 6 | import burlap.oomdp.core.states.State;
 7 | import burlap.oomdp.core.objects.MutableObjectInstance;
 8 | 
 9 | 
10 | /**
11 |  * An abstract class defining the interface and common mechanism for generating State objects specifically for stochastic games domains. 
12 |  * Unlike the similar {@link burlap.oomdp.auxiliary.StateGenerator} class, this class requires a list of agents that will be in the world
13 |  * and will create an ObjecInstance for each agent that belongs the OO-MDP object class specified by each agent's {@link SGAgentType}.
14 |  * @author James MacGlashan
15 |  *
16 |  */
17 | public abstract class SGStateGenerator {
18 | 
19 | 	/**
20 | 	 * Generates a new state with the given agents in it.
21 | 	 * @param agents the agents that should be in the state.
22 | 	 * @return a new state instance.
23 | 	 */
24 | 	public abstract State generateState(List <SGAgent> agents);
25 | 	
26 | 	/**
27 | 	 * Creates an object instance belonging to the object class specified in the agent's {@link SGAgentType} data member.
28 | 	 * The returned object instance will have the name of the agent.
29 | 	 * @param a the agent for which to create an OO-MDP state object instance
30 | 	 * @return an object instance for this agent.
31 | 	 */
32 | 	protected ObjectInstance getAgentObjectInstance(SGAgent a){
33 | 		return new MutableObjectInstance(a.agentType.oclass, a.worldAgentName);
34 | 	}
35 | 
36 | }
37 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stochasticgames/WorldGenerator.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stochasticgames;
 2 | 
 3 | 
 4 | /**
 5 |  * An interface for generating {@link World} instances.
 6 |  * @author James MacGlashan
 7 |  *
 8 |  */
 9 | public interface WorldGenerator {
10 | 	/**
11 | 	 * Generates a new {@link World} instance.
12 | 	 * @return a new {@link World} instance.
13 | 	 */
14 | 	public World generateWorld();
15 | }
16 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stochasticgames/WorldObserver.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stochasticgames;
 2 | 
 3 | import java.util.Map;
 4 | 
 5 | import burlap.oomdp.core.states.State;
 6 | 
 7 | /**
 8 |  * An interface for defining {@link burlap.oomdp.stochasticgames.World} observers. Observers
 9 |  * are told when a game states and in what state, what each interaction in the world was as they happen, and
10 |  * what the final state of the world is when a game ends.
11 |  */
12 | public interface WorldObserver {
13 | 
14 | 	/**
15 | 	 * This method is called whenever a new game in a world is starting.
16 | 	 * @param s the state in which the world is starting.
17 | 	 */
18 | 	public void gameStarting(State s);
19 | 
20 | 	/**
21 | 	 * This method is called whenever an interaction in the world occurs.
22 | 	 * @param s the previous state of the world
23 | 	 * @param ja the joint action taken in the world
24 | 	 * @param reward the joint reward received by the agents
25 | 	 * @param sp the next state of the world
26 | 	 */
27 | 	public void observe(State s, JointAction ja, Map<String, Double> reward, State sp);
28 | 
29 | 	/**
30 | 	 * This method is called whenever a game in a world ends.
31 | 	 * @param s the final state of the world when it ends.
32 | 	 */
33 | 	public void gameEnding(State s);
34 | }
35 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stochasticgames/agentactions/SimpleGroundedSGAgentAction.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stochasticgames.agentactions;
 2 | 
 3 | /**
 4 |  * A {@link burlap.oomdp.stochasticgames.agentactions.GroundedSGAgentAction} implementation for actions that
 5 |  * are parameter-less.
 6 |  * @author James MacGlashan.
 7 |  */
 8 | public class SimpleGroundedSGAgentAction extends GroundedSGAgentAction{
 9 | 
10 | 	public SimpleGroundedSGAgentAction(String actingAgent, SGAgentAction a) {
11 | 		super(actingAgent, a);
12 | 	}
13 | 
14 | 	@Override
15 | 	public GroundedSGAgentAction copy() {
16 | 		return new SimpleGroundedSGAgentAction(this.actingAgent, this.action);
17 | 	}
18 | 
19 | 	@Override
20 | 	public void initParamsWithStringRep(String[] params) {
21 | 		//do nothing
22 | 	}
23 | 
24 | 	@Override
25 | 	public String[] getParametersAsString() {
26 | 		return new String[0];
27 | 	}
28 | }
29 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stochasticgames/agentactions/SimpleSGAgentAction.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stochasticgames.agentactions;
 2 | 
 3 | import burlap.oomdp.core.states.State;
 4 | import burlap.oomdp.stochasticgames.SGDomain;
 5 | import burlap.oomdp.stochasticgames.agentactions.GroundedSGAgentAction;
 6 | import burlap.oomdp.stochasticgames.agentactions.SGAgentAction;
 7 | 
 8 | import java.util.ArrayList;
 9 | import java.util.Arrays;
10 | import java.util.List;
11 | 
12 | 
13 | /**
14 |  * This {@link burlap.oomdp.stochasticgames.agentactions.SGAgentAction} definition defines a parameter-less agent action
15 |  * that can be
16 |  * executed in every state. This is a useful action definition for symmetric games.
17 |  * @author James MacGlashan
18 |  *
19 |  */
20 | public class SimpleSGAgentAction extends SGAgentAction {
21 | 
22 | 	/**
23 | 	 * Initializes this single action to be for the given domain and with the given name. This action
24 | 	 * is automatically added to the given domain
25 | 	 * @param d the domain to which this action belongs
26 | 	 * @param name the name of this action
27 | 	 */
28 | 	public SimpleSGAgentAction(SGDomain d, String name) {
29 | 		super(d, name);
30 | 	}
31 | 	
32 | 
33 | 
34 | 
35 | 	@Override
36 | 	public boolean applicableInState(State s, GroundedSGAgentAction gsa) {
37 | 		return true;
38 | 	}
39 | 
40 | 	@Override
41 | 	public boolean isParameterized() {
42 | 		return false;
43 | 	}
44 | 
45 | 	@Override
46 | 	public GroundedSGAgentAction getAssociatedGroundedAction(String actingAgent) {
47 | 		return new SimpleGroundedSGAgentAction(actingAgent, this);
48 | 	}
49 | 
50 | 	@Override
51 | 	public List<GroundedSGAgentAction> getAllApplicableGroundedActions(State s, String actingAgent) {
52 | 		GroundedSGAgentAction gaa = this.getAssociatedGroundedAction(actingAgent);
53 | 		return this.applicableInState(s, gaa) ? Arrays.asList(gaa) : new ArrayList<GroundedSGAgentAction>(0);
54 | 	}
55 | }
56 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stochasticgames/common/AgentFactoryWithSubjectiveReward.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stochasticgames.common;
 2 | 
 3 | import burlap.oomdp.stochasticgames.SGAgent;
 4 | import burlap.oomdp.stochasticgames.AgentFactory;
 5 | import burlap.oomdp.stochasticgames.JointReward;
 6 | 
 7 | /**
 8 |  * An agent generating factory that will produce an agent that uses an internal subjective reward function.
 9 |  * This can be useful for agents that use reward shaping. The base agent is first generated using
10 |  * a different {@link burlap.oomdp.stochasticgames.AgentFactory} and the returned agent from
11 |  * that provided agent has its internal reward function set to the one specified for use
12 |  * in this factory. The agent is then returned by this factory.
13 |  * @author James MacGlashan
14 |  *
15 |  */
16 | public class AgentFactoryWithSubjectiveReward implements AgentFactory {
17 | 
18 | 	protected AgentFactory			baseFactory;
19 | 	protected JointReward			internalReward;
20 | 	
21 | 	
22 | 	/**
23 | 	 * Initializes the factory.
24 | 	 * @param baseFactory the base factory for generating an agent.
25 | 	 * @param internalReward the internal reward function to set the agent to use.
26 | 	 */
27 | 	public AgentFactoryWithSubjectiveReward(AgentFactory baseFactory, JointReward internalReward) {
28 | 		this.baseFactory = baseFactory;
29 | 		this.internalReward = internalReward;
30 | 	}
31 | 
32 | 	@Override
33 | 	public SGAgent generateAgent() {
34 | 		SGAgent a = baseFactory.generateAgent();
35 | 		a.setInternalRewardFunction(internalReward);
36 | 		return a;
37 | 	}
38 | 
39 | }
40 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stochasticgames/common/ConstantSGStateGenerator.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stochasticgames.common;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import burlap.datastructures.HashedAggregator;
 6 | import burlap.oomdp.core.objects.ObjectInstance;
 7 | import burlap.oomdp.core.states.State;
 8 | import burlap.oomdp.stochasticgames.SGAgent;
 9 | import burlap.oomdp.stochasticgames.SGStateGenerator;
10 | 
11 | 
12 | /**
13 |  * A stochastic games state generator that always returns the same base state, which is specified via the constructor. The
14 |  * provided source state does *not* need to worry about the object name of OO-MDP objects corresponding to agent states.
15 |  * This generator will automatically reassign the relevant OO-MDP object names to the names of each agent by querying the agent type
16 |  * and agent name in the list of agents provides to the {@link #generateState(List)} method. This reassignment is done
17 |  * each time the {@link #generateState(List)} method is called on a copy of the originally provided state.
18 |  * @author James MacGlashan
19 |  *
20 |  */
21 | public class ConstantSGStateGenerator extends SGStateGenerator {
22 | 
23 | 	/**
24 | 	 * The source state that will be copied and returned by the {@link #generateState(List)} method.
25 | 	 */
26 | 	protected State srcState;
27 | 	
28 | 	
29 | 	/**
30 | 	 * Initializes.
31 | 	 * @param srcState The source state that will be copied and returned by the {@link #generateState(List)} method.
32 | 	 */
33 | 	public ConstantSGStateGenerator(State srcState){
34 | 		this.srcState = srcState;
35 | 	}
36 | 	
37 | 	@Override
38 | 	public State generateState(List<SGAgent> agents) {
39 | 		
40 | 		State s = this.srcState.copy();
41 | 		HashedAggregator<String> counts = new HashedAggregator<String>();
42 | 		
43 | 		for(SGAgent a : agents){
44 | 			String agentClassName = a.getAgentType().oclass.name;
45 | 			int index = (int) counts.v(agentClassName);
46 | 			List<ObjectInstance> possibleAgentObjects = s.getObjectsOfClass(agentClassName);
47 | 			if(possibleAgentObjects.size() <= index){
48 | 				throw new RuntimeException("Error: Constant state used by ConstanteStateSGGenerator does not have enough oo-mdp objects for agents defined by class: " + agentClassName);
49 | 			}
50 | 			ObjectInstance agentObject = possibleAgentObjects.get(index);
51 | 			s.renameObject(agentObject, a.getAgentName());
52 | 			
53 | 			counts.add(agentClassName, 1.);
54 | 			
55 | 		}
56 | 		
57 | 		return s;
58 | 		
59 | 	}
60 | 
61 | }
62 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stochasticgames/common/StaticRepeatedGameActionModel.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stochasticgames.common;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.List;
 5 | 
 6 | import burlap.oomdp.core.states.State;
 7 | import burlap.oomdp.core.TransitionProbability;
 8 | import burlap.oomdp.stochasticgames.JointAction;
 9 | import burlap.oomdp.stochasticgames.JointActionModel;
10 | 
11 | 
12 | /**
13 |  * This action model can be used to take a single stage game, and cause it to repeat itself.
14 |  * This is achieved by simply having the same state returned after each joint action.
15 |  * @author James MacGlashan
16 |  *
17 |  */
18 | public class StaticRepeatedGameActionModel extends JointActionModel {
19 | 
20 | 	public StaticRepeatedGameActionModel() {
21 | 		//nothing to do
22 | 	}
23 | 
24 | 	@Override
25 | 	public List<TransitionProbability> transitionProbsFor(State s, JointAction ja) {
26 | 		List <TransitionProbability> res = new ArrayList<TransitionProbability>();
27 | 		TransitionProbability tp = new TransitionProbability(s.copy(), 1.);
28 | 		res.add(tp);
29 | 		
30 | 		return res;
31 | 	}
32 | 
33 | 	@Override
34 | 	protected State actionHelper(State s, JointAction ja) {
35 | 		//do nothing, the state simply repeats itself
36 | 		return s;
37 | 	}
38 | 
39 | }
40 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stochasticgames/explorers/HardStateResetSpecialAction.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stochasticgames.explorers;
 2 | 
 3 | import burlap.oomdp.auxiliary.StateGenerator;
 4 | import burlap.oomdp.auxiliary.common.ConstantStateGenerator;
 5 | import burlap.oomdp.core.states.State;
 6 | import burlap.oomdp.singleagent.explorer.SpecialExplorerAction;
 7 | 
 8 | /**
 9 |  * @author James MacGlashan.
10 |  */
11 | public class HardStateResetSpecialAction implements SpecialExplorerAction {
12 | 
13 | 	StateGenerator stateGenerator;
14 | 
15 | 	/**
16 | 	 * Initializes which base state to reset to
17 | 	 * @param s the state to reset to when this action is executed
18 | 	 */
19 | 	public HardStateResetSpecialAction(State s){
20 | 		this.stateGenerator = new ConstantStateGenerator(s);
21 | 	}
22 | 
23 | 	/**
24 | 	 * Initializes with a state generator to draw from on reset
25 | 	 * @param stateGenerator the state generate to draw from.
26 | 	 */
27 | 	public HardStateResetSpecialAction(StateGenerator stateGenerator){
28 | 		this.stateGenerator = stateGenerator;
29 | 	}
30 | 
31 | 	/**
32 | 	 * Sets the base state to reset to
33 | 	 * @param s the state to reset to when this action is executed
34 | 	 */
35 | 	public void setBase(State s){
36 | 		this.stateGenerator = new ConstantStateGenerator(s);
37 | 	}
38 | 
39 | 	/**
40 | 	 * Sets the state generator to draw from on reset
41 | 	 * @param stateGenerator the state generator to draw from on reset
42 | 	 */
43 | 	public void setBaseStateGenerator(StateGenerator stateGenerator) {
44 | 		this.stateGenerator = stateGenerator;
45 | 	}
46 | 
47 | 	@Override
48 | 	public State applySpecialAction(State curState) {
49 | 		return this.stateGenerator.generateState();
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stochasticgames/tournament/MatchEntry.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stochasticgames.tournament;
 2 | 
 3 | import burlap.oomdp.stochasticgames.SGAgentType;
 4 | 
 5 | /**
 6 |  * This class indicates which player in a tournament is to play in a match and what {@link burlap.oomdp.stochasticgames.SGAgentType} role they will play.
 7 |  * @author James MacGlashan
 8 |  *
 9 |  */
10 | public class MatchEntry {
11 | 
12 | 	public SGAgentType agentType;
13 | 	public int agentId;
14 | 	
15 | 	/**
16 | 	 * Initializes the MatchEntry
17 | 	 * @param at the {@link burlap.oomdp.stochasticgames.SGAgentType} the agent will play as
18 | 	 * @param ai the index of this agent in the tournament
19 | 	 */
20 | 	public MatchEntry(SGAgentType at, int ai){
21 | 		this.agentType = at;
22 | 		this.agentId = ai;
23 | 	}
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stochasticgames/tournament/MatchSelector.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stochasticgames.tournament;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | /**
 6 |  * An interface for defining how matches in a tournament will be determined
 7 |  * @author James MacGlashan
 8 |  *
 9 |  */
10 | public interface MatchSelector {
11 | 	/**
12 | 	 * Returns the next match information, which is a list of {@link MatchEntry} objects
13 | 	 * @return the next match information, which is a list of {@link MatchEntry} objects
14 | 	 */
15 | 	public List<MatchEntry> getNextMatch();
16 | 	
17 | 	/**
18 | 	 * Resets the match selections and causes the {@link #getNextMatch()} method to start from the beginning of matches
19 | 	 */
20 | 	public void resetMatchSelections();
21 | }
22 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/stochasticgames/tournament/common/AllPairWiseSameTypeMS.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.stochasticgames.tournament.common;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.List;
 5 | 
 6 | import burlap.oomdp.stochasticgames.SGAgentType;
 7 | import burlap.oomdp.stochasticgames.tournament.MatchEntry;
 8 | import burlap.oomdp.stochasticgames.tournament.MatchSelector;
 9 | 
10 | 
11 | /**
12 |  * This class defines a MatchSelctory that plays all pairwise matches of agents in a round robin. It sets
13 |  * all agents to play as the same {@link burlap.oomdp.stochasticgames.SGAgentType} and therefore is only valid in symmetric games.
14 |  * @author James MacGlashan
15 |  *
16 |  */
17 | public class AllPairWiseSameTypeMS implements MatchSelector {
18 | 	
19 | 	protected int n;
20 | 	protected SGAgentType at;
21 | 	
22 | 	protected int p0;
23 | 	protected int p1;
24 | 	
25 | 	
26 | 	/**
27 | 	 * Initializes the selector
28 | 	 * @param at the {@link burlap.oomdp.stochasticgames.SGAgentType} that all agents will play as
29 | 	 * @param n the number of agents in the tournament
30 | 	 */
31 | 	public AllPairWiseSameTypeMS(SGAgentType at, int n){
32 | 		this.n = n;
33 | 		this.at = at;
34 | 		
35 | 		p0 = 0;
36 | 		p1 = 1;
37 | 	}
38 | 	
39 | 	@Override
40 | 	public List<MatchEntry> getNextMatch() {
41 | 		
42 | 		if(p0 >= n-1){
43 | 			return null; //no more matches
44 | 		}
45 | 		
46 | 		MatchEntry me0 = new MatchEntry(at, p0);
47 | 		MatchEntry me1 = new MatchEntry(at, p1);
48 | 		
49 | 		List <MatchEntry> match = new ArrayList<MatchEntry>();
50 | 		match.add(me0);
51 | 		match.add(me1);
52 | 		
53 | 		p1++;
54 | 		if(p1 >= n){
55 | 			p0++;
56 | 			p1 = p0+1;
57 | 		}
58 | 		
59 | 		return match;
60 | 	}
61 | 
62 | 	@Override
63 | 	public void resetMatchSelections() {
64 | 		p0 = 0;
65 | 		p1 = 1;
66 | 	}
67 | 
68 | }
69 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/visualizer/ObjectPainter.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.visualizer;
 2 | 
 3 | import java.awt.Graphics2D;
 4 | 
 5 | import burlap.oomdp.core.objects.ObjectInstance;
 6 | import burlap.oomdp.core.states.State;
 7 | 
 8 | 
 9 | /**
10 |  * And interface for defining painters that can render object instances to a graphics context.
11 |  * @author James MacGlashan
12 |  *
13 |  */
14 | public interface ObjectPainter {
15 | 
16 | 	/**
17 | 	 * Paints object instance ob to graphics context g2
18 | 	 * @param g2 graphics context to which the object should be painted
19 | 	 * @param s the state of the object to be painted
20 | 	 * @param ob the instantiated object to be painted
21 | 	 * @param cWidth width of the canvas size
22 | 	 * @param cHeight height of the canvas size
23 | 	 */
24 | 	public void paintObject(Graphics2D g2, State s, ObjectInstance ob, float cWidth, float cHeight);
25 | 	
26 | 	
27 | }
28 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/visualizer/RenderLayer.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.visualizer;
 2 | 
 3 | import java.awt.Graphics2D;
 4 | 
 5 | /**
 6 |  * A RenderLayer is a 2 dimensional layer that paints to a provided 2D graphics context. The {@link MultiLayerRenderer} can take
 7 |  * a list of these objects and will paint them sequentially to the same 2D graphics context. This allows different kinds
 8 |  * of renderers that display different kinds of information to be layered on top of each other. 
 9 |  * @author James MacGlashan
10 |  *
11 |  */
12 | public interface RenderLayer {
13 | 	public void render(Graphics2D g2, float width, float height);
14 | }
15 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/oomdp/visualizer/StaticPainter.java:
--------------------------------------------------------------------------------
 1 | package burlap.oomdp.visualizer;
 2 | 
 3 | import java.awt.Graphics2D;
 4 | 
 5 | import burlap.oomdp.core.states.State;
 6 | 
 7 | 
 8 | 
 9 | /**
10 |  * This class paints general properties of a state/domain that may not be represented
11 |  * by any specific object instance data. For instance, the GridWorld class
12 |  * may have walls that need to be painted, but the walls are part of the transition
13 |  * dynamics of the domain and not captured in the object instance values assignments.
14 |  * @author James MacGlashan
15 |  *
16 |  */
17 | public interface StaticPainter {
18 | 	
19 | 	
20 | 	/** 
21 | 	 * Paints general state information not to graphics context g2
22 | 	 * @param g2 graphics context to which the static data should be painted
23 | 	 * @param s the state to be painted
24 | 	 * @param cWidth the width of the canvas
25 | 	 * @param cHeight the height of the canvas
26 | 	 */
27 | 	public void paint(Graphics2D g2, State s, float cWidth, float cHeight);
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/Assignment4/src/burlap/tutorials/hgw/HelloGridWorld.java:
--------------------------------------------------------------------------------
 1 | package burlap.tutorials.hgw;
 2 | 
 3 | 
 4 | import burlap.domain.singleagent.gridworld.GridWorldDomain;
 5 | import burlap.domain.singleagent.gridworld.GridWorldVisualizer;
 6 | import burlap.oomdp.core.Domain;
 7 | import burlap.oomdp.core.states.State;
 8 | import burlap.oomdp.singleagent.explorer.VisualExplorer;
 9 | import burlap.oomdp.visualizer.Visualizer;
10 | 
11 | public class HelloGridWorld {
12 | 
13 | 	public static void main(String[] args) {
14 | 
15 | 		GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world
16 | 		gw.setMapToFourRooms(); //four rooms layout
17 | 		gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate
18 | 		Domain domain = gw.generateDomain(); //generate the grid world domain
19 | 
20 | 		//setup initial state
21 | 		State s = GridWorldDomain.getOneAgentOneLocationState(domain);
22 | 		GridWorldDomain.setAgent(s, 0, 0);
23 | 		GridWorldDomain.setLocation(s, 0, 10, 10);
24 | 
25 | 		//create visualizer and explorer
26 | 		Visualizer v = GridWorldVisualizer.getVisualizer(gw.getMap());
27 | 		VisualExplorer exp = new VisualExplorer(domain, v, s);
28 | 
29 | 		//set control keys to use w-s-a-d
30 | 		exp.addKeyAction("w", GridWorldDomain.ACTIONNORTH);
31 | 		exp.addKeyAction("s", GridWorldDomain.ACTIONSOUTH);
32 | 		exp.addKeyAction("a", GridWorldDomain.ACTIONWEST);
33 | 		exp.addKeyAction("d", GridWorldDomain.ACTIONEAST);
34 | 
35 | 		exp.initGUI();
36 | 
37 | 	}
38 | }
39 | 


--------------------------------------------------------------------------------
/Assignment4/ycai87-analysis.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danielcy715/CS7641-Machine-Learning/06fd5c35398e95903173c94ea2214ad08e05d040/Assignment4/ycai87-analysis.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CS7641-Machine-Learning
 2 | 
 3 | This is the assignment repository for Georgia Tech CS7641 Machine Learning.
 4 | 
 5 | 
 6 | Assignment 1 Phishing Website and Letter Recognition using Supervised Learning
 7 | 
 8 | Assignment 2 Study on Randomized Optimization
 9 | 
10 | Assignment 3 Study on Unsupervised Learning
11 | 
12 | Assignment 4 Study Markov Decision Process Problems using Reinforcement Learning
13 | 
14 | 
15 | Disclaimer: Directly copying and using the code for any of the course projects is forbidden. This is a violation of GA Tech Honor Code.
16 | You are welcome to use it as a reference and I would appreicate any comments.
17 | 
18 | For Non-GATECH students, you can access the lectures from Udacity. 
19 | 


--------------------------------------------------------------------------------