├── .gitattributes ├── .gitignore ├── analysis.py ├── autograder.py ├── crawler.py ├── environment.py ├── featureExtractors.py ├── game.py ├── ghostAgents.py ├── grading.py ├── graphicsCrawlerDisplay.py ├── graphicsDisplay.py ├── graphicsGridworldDisplay.py ├── graphicsUtils.py ├── gridworld.py ├── keyboardAgents.py ├── layout.py ├── layouts ├── capsuleClassic.lay ├── contestClassic.lay ├── mediumClassic.lay ├── mediumGrid.lay ├── minimaxClassic.lay ├── openClassic.lay ├── originalClassic.lay ├── smallClassic.lay ├── smallGrid.lay ├── testClassic.lay ├── trappedClassic.lay └── trickyClassic.lay ├── learningAgents.py ├── mdp.py ├── pacman.py ├── pacmanAgents.py ├── projectParams.py ├── qlearningAgents.py ├── reinforcementTestClasses.py ├── testClasses.py ├── testParser.py ├── test_cases ├── CONFIG ├── q1 │ ├── 1-tinygrid.solution │ ├── 1-tinygrid.test │ ├── 2-tinygrid-noisy.solution │ ├── 2-tinygrid-noisy.test │ ├── 3-bridge.solution │ ├── 3-bridge.test │ ├── 4-discountgrid.solution │ ├── 4-discountgrid.test │ └── CONFIG ├── q2 │ ├── 1-bridge-grid.solution │ ├── 1-bridge-grid.test │ └── CONFIG ├── q3 │ ├── 1-question-3.1.solution │ ├── 1-question-3.1.test │ ├── 2-question-3.2.solution │ ├── 2-question-3.2.test │ ├── 3-question-3.3.solution │ ├── 3-question-3.3.test │ ├── 4-question-3.4.solution │ ├── 4-question-3.4.test │ ├── 5-question-3.5.solution │ ├── 5-question-3.5.test │ └── CONFIG ├── q4 │ ├── 1-tinygrid.solution │ ├── 1-tinygrid.test │ ├── 2-tinygrid-noisy.solution │ ├── 2-tinygrid-noisy.test │ ├── 3-bridge.solution │ ├── 3-bridge.test │ ├── 4-discountgrid.solution │ ├── 4-discountgrid.test │ └── CONFIG ├── q5 │ ├── 1-tinygrid.solution │ ├── 1-tinygrid.test │ ├── 2-tinygrid-noisy.solution │ ├── 2-tinygrid-noisy.test │ ├── 3-bridge.solution │ ├── 3-bridge.test │ ├── 4-discountgrid.solution │ ├── 4-discountgrid.test │ └── CONFIG ├── q6 │ ├── CONFIG │ ├── grade-agent.solution │ └── grade-agent.test ├── q7 │ ├── CONFIG │ ├── grade-agent.solution │ └── grade-agent.test └── q8 │ ├── 1-tinygrid.solution │ ├── 1-tinygrid.test │ ├── 2-tinygrid-noisy.solution │ ├── 2-tinygrid-noisy.test │ ├── 3-bridge.solution │ ├── 3-bridge.test │ ├── 4-discountgrid.solution │ ├── 4-discountgrid.test │ └── CONFIG ├── textDisplay.py ├── textGridworldDisplay.py ├── util.py └── valueIterationAgents.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | *.sln merge=union 7 | *.csproj merge=union 8 | *.vbproj merge=union 9 | *.fsproj merge=union 10 | *.dbproj merge=union 11 | 12 | # Standard to msysgit 13 | *.doc diff=astextplain 14 | *.DOC diff=astextplain 15 | *.docx diff=astextplain 16 | *.DOCX diff=astextplain 17 | *.dot diff=astextplain 18 | *.DOT diff=astextplain 19 | *.pdf diff=astextplain 20 | *.PDF diff=astextplain 21 | *.rtf diff=astextplain 22 | *.RTF diff=astextplain 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ################# 2 | ## Eclipse 3 | ################# 4 | 5 | *.pydevproject 6 | .project 7 | .metadata 8 | bin/ 9 | tmp/ 10 | *.tmp 11 | *.bak 12 | *.swp 13 | *~.nib 14 | local.properties 15 | .classpath 16 | .settings/ 17 | .loadpath 18 | 19 | # External tool builders 20 | .externalToolBuilders/ 21 | 22 | # Locally stored "Eclipse launch configurations" 23 | *.launch 24 | 25 | # CDT-specific 26 | .cproject 27 | 28 | # PDT-specific 29 | .buildpath 30 | 31 | 32 | ################# 33 | ## Visual Studio 34 | ################# 35 | 36 | ## Ignore Visual Studio temporary files, build results, and 37 | ## files generated by popular Visual Studio add-ons. 38 | 39 | # User-specific files 40 | *.suo 41 | *.user 42 | *.sln.docstates 43 | 44 | # Build results 45 | [Dd]ebug/ 46 | [Rr]elease/ 47 | *_i.c 48 | *_p.c 49 | *.ilk 50 | *.meta 51 | *.obj 52 | *.pch 53 | *.pdb 54 | *.pgc 55 | *.pgd 56 | *.rsp 57 | *.sbr 58 | *.tlb 59 | *.tli 60 | *.tlh 61 | *.tmp 62 | *.vspscc 63 | .builds 64 | *.dotCover 65 | 66 | ## TODO: If you have NuGet Package Restore enabled, uncomment this 67 | #packages/ 68 | 69 | # Visual C++ cache files 70 | ipch/ 71 | *.aps 72 | *.ncb 73 | *.opensdf 74 | *.sdf 75 | 76 | # Visual Studio profiler 77 | *.psess 78 | *.vsp 79 | 80 | # ReSharper is a .NET coding add-in 81 | _ReSharper* 82 | 83 | # Installshield output folder 84 | [Ee]xpress 85 | 86 | # DocProject is a documentation generator add-in 87 | DocProject/buildhelp/ 88 | DocProject/Help/*.HxT 89 | DocProject/Help/*.HxC 90 | DocProject/Help/*.hhc 91 | DocProject/Help/*.hhk 92 | DocProject/Help/*.hhp 93 | DocProject/Help/Html2 94 | DocProject/Help/html 95 | 96 | # Click-Once directory 97 | publish 98 | 99 | # Others 100 | [Bb]in 101 | [Oo]bj 102 | sql 103 | TestResults 104 | *.Cache 105 | ClientBin 106 | stylecop.* 107 | ~$* 108 | *.dbmdl 109 | Generated_Code #added for RIA/Silverlight projects 110 | 111 | # Backup & report files from converting an old project file to a newer 112 | # Visual Studio version. Backup files are not needed, because we have git ;-) 113 | _UpgradeReport_Files/ 114 | Backup*/ 115 | UpgradeLog*.XML 116 | 117 | 118 | 119 | ############ 120 | ## Windows 121 | ############ 122 | 123 | # Windows image file caches 124 | Thumbs.db 125 | 126 | # Folder config file 127 | Desktop.ini 128 | 129 | 130 | ############# 131 | ## Python 132 | ############# 133 | 134 | *.py[co] 135 | 136 | # Packages 137 | *.egg 138 | *.egg-info 139 | dist 140 | build 141 | eggs 142 | parts 143 | bin 144 | var 145 | sdist 146 | develop-eggs 147 | .installed.cfg 148 | 149 | # Installer logs 150 | pip-log.txt 151 | 152 | # Unit test / coverage reports 153 | .coverage 154 | .tox 155 | 156 | #Translations 157 | *.mo 158 | 159 | #Mr Developer 160 | .mr.developer.cfg 161 | 162 | # Mac crap 163 | .DS_Store 164 | -------------------------------------------------------------------------------- /analysis.py: -------------------------------------------------------------------------------- 1 | # analysis.py 2 | # ----------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | ###################### 12 | # ANALYSIS QUESTIONS # 13 | ###################### 14 | 15 | # Set the given parameters to obtain the specified policies through 16 | # value iteration. 17 | 18 | def question2(): 19 | answerDiscount = 0.9 20 | answerNoise = 0 21 | return answerDiscount, answerNoise 22 | 23 | def question3a(): 24 | answerDiscount = .1 25 | answerNoise = 0 26 | answerLivingReward = -1 27 | return answerDiscount, answerNoise, answerLivingReward 28 | # If not possible, return 'NOT POSSIBLE' 29 | 30 | def question3b(): 31 | answerDiscount = .3 32 | answerNoise = .2 33 | answerLivingReward = -1 34 | return answerDiscount, answerNoise, answerLivingReward 35 | # If not possible, return 'NOT POSSIBLE' 36 | 37 | def question3c(): 38 | answerDiscount = 1 39 | answerNoise = 0 40 | answerLivingReward = -1 41 | return answerDiscount, answerNoise, answerLivingReward 42 | # If not possible, return 'NOT POSSIBLE' 43 | 44 | def question3d(): 45 | answerDiscount = .9 46 | answerNoise = .4 47 | answerLivingReward = -1 48 | return answerDiscount, answerNoise, answerLivingReward 49 | # If not possible, return 'NOT POSSIBLE' 50 | 51 | def question3e(): 52 | answerDiscount = 0 53 | answerNoise = 0 54 | answerLivingReward = 0 55 | return answerDiscount, answerNoise, answerLivingReward 56 | # If not possible, return 'NOT POSSIBLE' 57 | 58 | def question6(): 59 | answerEpsilon = None 60 | answerLearningRate = None 61 | return 'NOT POSSIBLE' 62 | # If not possible, return 'NOT POSSIBLE' 63 | 64 | if __name__ == '__main__': 65 | print 'Answers to analysis questions:' 66 | import analysis 67 | for q in [q for q in dir(analysis) if q.startswith('question')]: 68 | response = getattr(analysis, q)() 69 | print ' Question %s:\t%s' % (q, str(response)) 70 | -------------------------------------------------------------------------------- /autograder.py: -------------------------------------------------------------------------------- 1 | # autograder.py 2 | # ------------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | # imports from python standard library 12 | import grading 13 | import imp 14 | import optparse 15 | import os 16 | import re 17 | import sys 18 | import projectParams 19 | import random 20 | from util import FixedRandom 21 | random.setstate(FixedRandom().random.getstate()) 22 | 23 | # register arguments and set default values 24 | def readCommand(argv): 25 | parser = optparse.OptionParser(description = 'Run public tests on student code') 26 | parser.set_defaults(generateSolutions=False, edxOutput=False, muteOutput=False, printTestCase=False) 27 | parser.add_option('--test-directory', 28 | dest = 'testRoot', 29 | default = 'test_cases', 30 | help = 'Root test directory which contains subdirectories corresponding to each question') 31 | parser.add_option('--student-code', 32 | dest = 'studentCode', 33 | default = projectParams.STUDENT_CODE_DEFAULT, 34 | help = 'comma separated list of student code files') 35 | parser.add_option('--code-directory', 36 | dest = 'codeRoot', 37 | default = "", 38 | help = 'Root directory containing the student and testClass code') 39 | parser.add_option('--test-case-code', 40 | dest = 'testCaseCode', 41 | default = projectParams.PROJECT_TEST_CLASSES, 42 | help = 'class containing testClass classes for this project') 43 | parser.add_option('--generate-solutions', 44 | dest = 'generateSolutions', 45 | action = 'store_true', 46 | help = 'Write solutions generated to .solution file') 47 | parser.add_option('--edx-output', 48 | dest = 'edxOutput', 49 | action = 'store_true', 50 | help = 'Generate edX output files') 51 | parser.add_option('--mute', 52 | dest = 'muteOutput', 53 | action = 'store_true', 54 | help = 'Mute output from executing tests') 55 | parser.add_option('--print-tests', '-p', 56 | dest = 'printTestCase', 57 | action = 'store_true', 58 | help = 'Print each test case before running them.') 59 | parser.add_option('--test', '-t', 60 | dest = 'runTest', 61 | default = None, 62 | help = 'Run one particular test. Relative to test root.') 63 | parser.add_option('--question', '-q', 64 | dest = 'gradeQuestion', 65 | default = None, 66 | help = 'Grade one particular question.') 67 | (options, args) = parser.parse_args(argv) 68 | return options 69 | 70 | 71 | # confirm we should author solution files 72 | def confirmGenerate(): 73 | print 'WARNING: this action will overwrite any solution files.' 74 | print 'Are you sure you want to proceed? (yes/no)' 75 | while True: 76 | ans = sys.stdin.readline().strip() 77 | if ans == 'yes': 78 | break 79 | elif ans == 'no': 80 | sys.exit(0) 81 | else: 82 | print 'please answer either "yes" or "no"' 83 | 84 | 85 | # TODO: Fix this so that it tracebacks work correctly 86 | # Looking at source of the traceback module, presuming it works 87 | # the same as the intepreters, it uses co_filename. This is, 88 | # however, a readonly attribute. 89 | def setModuleName(module, filename): 90 | functionType = type(confirmGenerate) 91 | classType = type(optparse.Option) 92 | 93 | for i in dir(module): 94 | o = getattr(module, i) 95 | if hasattr(o, '__file__'): continue 96 | 97 | if type(o) == functionType: 98 | setattr(o, '__file__', filename) 99 | elif type(o) == classType: 100 | setattr(o, '__file__', filename) 101 | # TODO: assign member __file__'s? 102 | #print i, type(o) 103 | 104 | 105 | #from cStringIO import StringIO 106 | 107 | def loadModuleString(moduleSource): 108 | # Below broken, imp doesn't believe its being passed a file: 109 | # ValueError: load_module arg#2 should be a file or None 110 | # 111 | #f = StringIO(moduleCodeDict[k]) 112 | #tmp = imp.load_module(k, f, k, (".py", "r", imp.PY_SOURCE)) 113 | tmp = imp.new_module(k) 114 | exec moduleCodeDict[k] in tmp.__dict__ 115 | setModuleName(tmp, k) 116 | return tmp 117 | 118 | import py_compile 119 | 120 | def loadModuleFile(moduleName, filePath): 121 | with open(filePath, 'r') as f: 122 | return imp.load_module(moduleName, f, "%s.py" % moduleName, (".py", "r", imp.PY_SOURCE)) 123 | 124 | 125 | def readFile(path, root=""): 126 | "Read file from disk at specified path and return as string" 127 | with open(os.path.join(root, path), 'r') as handle: 128 | return handle.read() 129 | 130 | 131 | ####################################################################### 132 | # Error Hint Map 133 | ####################################################################### 134 | 135 | # TODO: use these 136 | ERROR_HINT_MAP = { 137 | 'q1': { 138 | "": """ 139 | We noticed that your project threw an IndexError on q1. 140 | While many things may cause this, it may have been from 141 | assuming a certain number of successors from a state space 142 | or assuming a certain number of actions available from a given 143 | state. Try making your code more general (no hardcoded indices) 144 | and submit again! 145 | """ 146 | }, 147 | 'q3': { 148 | "": """ 149 | We noticed that your project threw an AttributeError on q3. 150 | While many things may cause this, it may have been from assuming 151 | a certain size or structure to the state space. For example, if you have 152 | a line of code assuming that the state is (x, y) and we run your code 153 | on a state space with (x, y, z), this error could be thrown. Try 154 | making your code more general and submit again! 155 | 156 | """ 157 | } 158 | } 159 | 160 | import pprint 161 | 162 | def splitStrings(d): 163 | d2 = dict(d) 164 | for k in d: 165 | if k[0:2] == "__": 166 | del d2[k] 167 | continue 168 | if d2[k].find("\n") >= 0: 169 | d2[k] = d2[k].split("\n") 170 | return d2 171 | 172 | 173 | def printTest(testDict, solutionDict): 174 | pp = pprint.PrettyPrinter(indent=4) 175 | print "Test case:" 176 | for line in testDict["__raw_lines__"]: 177 | print " |", line 178 | print "Solution:" 179 | for line in solutionDict["__raw_lines__"]: 180 | print " |", line 181 | 182 | 183 | def runTest(testName, moduleDict, printTestCase=False): 184 | import testParser 185 | import testClasses 186 | for module in moduleDict: 187 | setattr(sys.modules[__name__], module, moduleDict[module]) 188 | 189 | # This is a hack, will break if tests check question without testing for None 190 | question = None 191 | 192 | testDict = testParser.TestParser(testName + ".test").parse() 193 | solutionDict = testParser.TestParser(testName + ".solution").parse() 194 | test_out_file = os.path.join('%s.test_output' % testName) 195 | testDict['test_out_file'] = test_out_file 196 | testClass = getattr(projectTestClasses, testDict['class']) 197 | testCase = testClass(question, testDict) 198 | 199 | if printTestCase: 200 | printTest(testDict, solutionDict) 201 | 202 | # This is a fragile hack to create a stub grades object 203 | grades = grading.Grades(projectParams.PROJECT_NAME, [(None,0)]) 204 | testCase.execute(grades, moduleDict, solutionDict) 205 | 206 | # evaluate student code 207 | def evaluate(generateSolutions, testRoot, moduleDict, exceptionMap=ERROR_HINT_MAP, edxOutput=False, muteOutput=False, 208 | printTestCase=False, questionToGrade=None): 209 | # imports of testbench code. note that the testClasses import must follow 210 | # the import of student code due to dependencies 211 | import testParser 212 | import testClasses 213 | for module in moduleDict: 214 | setattr(sys.modules[__name__], module, moduleDict[module]) 215 | 216 | problemDict = testParser.TestParser(os.path.join(testRoot, 'CONFIG')).parse() 217 | 218 | # iterate through and run tests 219 | if 'order' in problemDict: 220 | test_subdirs = problemDict['order'].split() 221 | else: 222 | test_subdirs = sorted(os.listdir(testRoot)) 223 | questions = [] 224 | questionDicts = {} 225 | for q in test_subdirs: 226 | subdir_path = os.path.join(testRoot, q) 227 | if not os.path.isdir(subdir_path) or q[0] == '.': 228 | continue 229 | 230 | if questionToGrade != None and q != questionToGrade: 231 | continue 232 | 233 | # create a question object 234 | questionDict = testParser.TestParser(os.path.join(subdir_path, 'CONFIG')).parse() 235 | questionClass = getattr(testClasses, questionDict['class']) 236 | question = questionClass(questionDict) 237 | questionDicts[q] = questionDict 238 | 239 | # load test cases into question 240 | tests = filter(lambda t: re.match('[^#~.].*\.test\Z', t), os.listdir(subdir_path)) 241 | tests = map(lambda t: re.match('(.*)\.test\Z', t).group(1), tests) 242 | for t in sorted(tests): 243 | test_file = os.path.join(subdir_path, '%s.test' % t) 244 | solution_file = os.path.join(subdir_path, '%s.solution' % t) 245 | test_out_file = os.path.join(subdir_path, '%s.test_output' % t) 246 | testDict = testParser.TestParser(test_file).parse() 247 | if testDict.get("disabled", "false").lower() == "true": 248 | continue 249 | testDict['test_out_file'] = test_out_file 250 | testClass = getattr(projectTestClasses, testDict['class']) 251 | testCase = testClass(question, testDict) 252 | def makefun(testCase, solution_file): 253 | if generateSolutions: 254 | # write solution file to disk 255 | return lambda grades: testCase.writeSolution(moduleDict, solution_file) 256 | else: 257 | # read in solution dictionary and pass as an argument 258 | testDict = testParser.TestParser(test_file).parse() 259 | solutionDict = testParser.TestParser(solution_file).parse() 260 | if printTestCase: 261 | return lambda grades: printTest(testDict, solutionDict) or testCase.execute(grades, moduleDict, solutionDict) 262 | else: 263 | return lambda grades: testCase.execute(grades, moduleDict, solutionDict) 264 | question.addTestCase(testCase, makefun(testCase, solution_file)) 265 | 266 | # Note extra function is necessary for scoping reasons 267 | def makefun(question): 268 | return lambda grades: question.execute(grades) 269 | setattr(sys.modules[__name__], q, makefun(question)) 270 | questions.append((q, question.getMaxPoints())) 271 | 272 | grades = grading.Grades(projectParams.PROJECT_NAME, questions, edxOutput=edxOutput, muteOutput=muteOutput) 273 | if questionToGrade == None: 274 | for q in questionDicts: 275 | for prereq in questionDicts[q].get('depends', '').split(): 276 | grades.addPrereq(q, prereq) 277 | 278 | grades.grade(sys.modules[__name__]) 279 | return grades.points 280 | 281 | 282 | if __name__ == '__main__': 283 | options = readCommand(sys.argv) 284 | if options.generateSolutions: 285 | confirmGenerate() 286 | codePaths = options.studentCode.split(',') 287 | # moduleCodeDict = {} 288 | # for cp in codePaths: 289 | # moduleName = re.match('.*?([^/]*)\.py', cp).group(1) 290 | # moduleCodeDict[moduleName] = readFile(cp, root=options.codeRoot) 291 | # moduleCodeDict['projectTestClasses'] = readFile(options.testCaseCode, root=options.codeRoot) 292 | # moduleDict = loadModuleDict(moduleCodeDict) 293 | 294 | moduleDict = {} 295 | for cp in codePaths: 296 | moduleName = re.match('.*?([^/]*)\.py', cp).group(1) 297 | moduleDict[moduleName] = loadModuleFile(moduleName, os.path.join(options.codeRoot, cp)) 298 | moduleName = re.match('.*?([^/]*)\.py', options.testCaseCode).group(1) 299 | moduleDict['projectTestClasses'] = loadModuleFile(moduleName, os.path.join(options.codeRoot, options.testCaseCode)) 300 | 301 | 302 | if options.runTest != None: 303 | runTest(options.runTest, moduleDict, printTestCase=options.printTestCase) 304 | else: 305 | evaluate(options.generateSolutions, options.testRoot, moduleDict, 306 | edxOutput=options.edxOutput, muteOutput=options.muteOutput, printTestCase=options.printTestCase, 307 | questionToGrade=options.gradeQuestion) 308 | -------------------------------------------------------------------------------- /environment.py: -------------------------------------------------------------------------------- 1 | # environment.py 2 | # -------------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | #!/usr/bin/python 12 | 13 | class Environment: 14 | 15 | def getCurrentState(self): 16 | """ 17 | Returns the current state of enviornment 18 | """ 19 | abstract 20 | 21 | def getPossibleActions(self, state): 22 | """ 23 | Returns possible actions the agent 24 | can take in the given state. Can 25 | return the empty list if we are in 26 | a terminal state. 27 | """ 28 | abstract 29 | 30 | def doAction(self, action): 31 | """ 32 | Performs the given action in the current 33 | environment state and updates the enviornment. 34 | 35 | Returns a (reward, nextState) pair 36 | """ 37 | abstract 38 | 39 | def reset(self): 40 | """ 41 | Resets the current state to the start state 42 | """ 43 | abstract 44 | 45 | def isTerminal(self): 46 | """ 47 | Has the enviornment entered a terminal 48 | state? This means there are no successors 49 | """ 50 | state = self.getCurrentState() 51 | actions = self.getPossibleActions(state) 52 | return len(actions) == 0 53 | -------------------------------------------------------------------------------- /featureExtractors.py: -------------------------------------------------------------------------------- 1 | # featureExtractors.py 2 | # -------------------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | "Feature extractors for Pacman game states" 12 | 13 | from game import Directions, Actions 14 | import util 15 | 16 | class FeatureExtractor: 17 | def getFeatures(self, state, action): 18 | """ 19 | Returns a dict from features to counts 20 | Usually, the count will just be 1.0 for 21 | indicator functions. 22 | """ 23 | util.raiseNotDefined() 24 | 25 | class IdentityExtractor(FeatureExtractor): 26 | def getFeatures(self, state, action): 27 | feats = util.Counter() 28 | feats[(state,action)] = 1.0 29 | return feats 30 | 31 | def closestFood(pos, food, walls): 32 | """ 33 | closestFood -- this is similar to the function that we have 34 | worked on in the search project; here its all in one place 35 | """ 36 | fringe = [(pos[0], pos[1], 0)] 37 | expanded = set() 38 | while fringe: 39 | pos_x, pos_y, dist = fringe.pop(0) 40 | if (pos_x, pos_y) in expanded: 41 | continue 42 | expanded.add((pos_x, pos_y)) 43 | # if we find a food at this location then exit 44 | if food[pos_x][pos_y]: 45 | return dist 46 | # otherwise spread out from the location to its neighbours 47 | nbrs = Actions.getLegalNeighbors((pos_x, pos_y), walls) 48 | for nbr_x, nbr_y in nbrs: 49 | fringe.append((nbr_x, nbr_y, dist+1)) 50 | # no food found 51 | return None 52 | 53 | class SimpleExtractor(FeatureExtractor): 54 | """ 55 | Returns simple features for a basic reflex Pacman: 56 | - whether food will be eaten 57 | - how far away the next food is 58 | - whether a ghost collision is imminent 59 | - whether a ghost is one step away 60 | """ 61 | 62 | def getFeatures(self, state, action): 63 | # extract the grid of food and wall locations and get the ghost locations 64 | food = state.getFood() 65 | walls = state.getWalls() 66 | ghosts = state.getGhostPositions() 67 | 68 | features = util.Counter() 69 | 70 | features["bias"] = 1.0 71 | 72 | # compute the location of pacman after he takes the action 73 | x, y = state.getPacmanPosition() 74 | dx, dy = Actions.directionToVector(action) 75 | next_x, next_y = int(x + dx), int(y + dy) 76 | 77 | # count the number of ghosts 1-step away 78 | features["#-of-ghosts-1-step-away"] = sum((next_x, next_y) in Actions.getLegalNeighbors(g, walls) for g in ghosts) 79 | 80 | # if there is no danger of ghosts then add the food feature 81 | if not features["#-of-ghosts-1-step-away"] and food[next_x][next_y]: 82 | features["eats-food"] = 1.0 83 | 84 | dist = closestFood((next_x, next_y), food, walls) 85 | if dist is not None: 86 | # make the distance a number less than one otherwise the update 87 | # will diverge wildly 88 | features["closest-food"] = float(dist) / (walls.width * walls.height) 89 | features.divideAll(10.0) 90 | return features 91 | -------------------------------------------------------------------------------- /ghostAgents.py: -------------------------------------------------------------------------------- 1 | # ghostAgents.py 2 | # -------------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | from game import Agent 12 | from game import Actions 13 | from game import Directions 14 | import random 15 | from util import manhattanDistance 16 | import util 17 | 18 | class GhostAgent( Agent ): 19 | def __init__( self, index ): 20 | self.index = index 21 | 22 | def getAction( self, state ): 23 | dist = self.getDistribution(state) 24 | if len(dist) == 0: 25 | return Directions.STOP 26 | else: 27 | return util.chooseFromDistribution( dist ) 28 | 29 | def getDistribution(self, state): 30 | "Returns a Counter encoding a distribution over actions from the provided state." 31 | util.raiseNotDefined() 32 | 33 | class RandomGhost( GhostAgent ): 34 | "A ghost that chooses a legal action uniformly at random." 35 | def getDistribution( self, state ): 36 | dist = util.Counter() 37 | for a in state.getLegalActions( self.index ): dist[a] = 1.0 38 | dist.normalize() 39 | return dist 40 | 41 | class DirectionalGhost( GhostAgent ): 42 | "A ghost that prefers to rush Pacman, or flee when scared." 43 | def __init__( self, index, prob_attack=0.8, prob_scaredFlee=0.8 ): 44 | self.index = index 45 | self.prob_attack = prob_attack 46 | self.prob_scaredFlee = prob_scaredFlee 47 | 48 | def getDistribution( self, state ): 49 | # Read variables from state 50 | ghostState = state.getGhostState( self.index ) 51 | legalActions = state.getLegalActions( self.index ) 52 | pos = state.getGhostPosition( self.index ) 53 | isScared = ghostState.scaredTimer > 0 54 | 55 | speed = 1 56 | if isScared: speed = 0.5 57 | 58 | actionVectors = [Actions.directionToVector( a, speed ) for a in legalActions] 59 | newPositions = [( pos[0]+a[0], pos[1]+a[1] ) for a in actionVectors] 60 | pacmanPosition = state.getPacmanPosition() 61 | 62 | # Select best actions given the state 63 | distancesToPacman = [manhattanDistance( pos, pacmanPosition ) for pos in newPositions] 64 | if isScared: 65 | bestScore = max( distancesToPacman ) 66 | bestProb = self.prob_scaredFlee 67 | else: 68 | bestScore = min( distancesToPacman ) 69 | bestProb = self.prob_attack 70 | bestActions = [action for action, distance in zip( legalActions, distancesToPacman ) if distance == bestScore] 71 | 72 | # Construct distribution 73 | dist = util.Counter() 74 | for a in bestActions: dist[a] = bestProb / len(bestActions) 75 | for a in legalActions: dist[a] += ( 1-bestProb ) / len(legalActions) 76 | dist.normalize() 77 | return dist 78 | -------------------------------------------------------------------------------- /grading.py: -------------------------------------------------------------------------------- 1 | # grading.py 2 | # ---------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | "Common code for autograders" 12 | 13 | import cgi 14 | import time 15 | import sys 16 | import traceback 17 | import pdb 18 | from collections import defaultdict 19 | import util 20 | 21 | class Grades: 22 | "A data structure for project grades, along with formatting code to display them" 23 | def __init__(self, projectName, questionsAndMaxesList, edxOutput=False, muteOutput=False): 24 | """ 25 | Defines the grading scheme for a project 26 | projectName: project name 27 | questionsAndMaxesDict: a list of (question name, max points per question) 28 | """ 29 | self.questions = [el[0] for el in questionsAndMaxesList] 30 | self.maxes = dict(questionsAndMaxesList) 31 | self.points = Counter() 32 | self.messages = dict([(q, []) for q in self.questions]) 33 | self.project = projectName 34 | self.start = time.localtime()[1:6] 35 | self.sane = True # Sanity checks 36 | self.currentQuestion = None # Which question we're grading 37 | self.edxOutput = edxOutput 38 | self.mute = muteOutput 39 | self.prereqs = defaultdict(set) 40 | 41 | #print 'Autograder transcript for %s' % self.project 42 | print 'Starting on %d-%d at %d:%02d:%02d' % self.start 43 | 44 | def addPrereq(self, question, prereq): 45 | self.prereqs[question].add(prereq) 46 | 47 | def grade(self, gradingModule, exceptionMap = {}): 48 | """ 49 | Grades each question 50 | gradingModule: the module with all the grading functions (pass in with sys.modules[__name__]) 51 | """ 52 | 53 | completedQuestions = set([]) 54 | for q in self.questions: 55 | print '\nQuestion %s' % q 56 | print '=' * (9 + len(q)) 57 | print 58 | self.currentQuestion = q 59 | 60 | incompleted = self.prereqs[q].difference(completedQuestions) 61 | if len(incompleted) > 0: 62 | prereq = incompleted.pop() 63 | print \ 64 | """*** NOTE: Make sure to complete Question %s before working on Question %s, 65 | *** because Question %s builds upon your answer for Question %s. 66 | """ % (prereq, q, q, prereq) 67 | continue 68 | 69 | if self.mute: util.mutePrint() 70 | try: 71 | util.TimeoutFunction(getattr(gradingModule, q),300)(self) # Call the question's function 72 | #TimeoutFunction(getattr(gradingModule, q),1200)(self) # Call the question's function 73 | except Exception, inst: 74 | self.addExceptionMessage(q, inst, traceback) 75 | self.addErrorHints(exceptionMap, inst, q[1]) 76 | except: 77 | self.fail('FAIL: Terminated with a string exception.') 78 | finally: 79 | if self.mute: util.unmutePrint() 80 | 81 | if self.points[q] >= self.maxes[q]: 82 | completedQuestions.add(q) 83 | 84 | print '\n### Question %s: %d/%d ###\n' % (q, self.points[q], self.maxes[q]) 85 | 86 | 87 | print '\nFinished at %d:%02d:%02d' % time.localtime()[3:6] 88 | print "\nProvisional grades\n==================" 89 | 90 | for q in self.questions: 91 | print 'Question %s: %d/%d' % (q, self.points[q], self.maxes[q]) 92 | print '------------------' 93 | print 'Total: %d/%d' % (self.points.totalCount(), sum(self.maxes.values())) 94 | print """ 95 | Your grades are NOT yet registered. To register your grades you must 96 | submit your files to the edX website. The grades obtained through the 97 | edX website are your final grades unless your submission was not in 98 | the spirit of the course, such as if your submission simply hardcoded 99 | the answers to the tests. We will screen for this after the deadline. 100 | 101 | *If you worked with a partner, you must both submit separately.* 102 | """ 103 | 104 | if self.edxOutput: 105 | self.produceOutput() 106 | 107 | def addExceptionMessage(self, q, inst, traceback): 108 | """ 109 | Method to format the exception message, this is more complicated because 110 | we need to cgi.escape the traceback but wrap the exception in a
 tag
111 |     """
112 |     self.fail('FAIL: Exception raised: %s' % inst)
113 |     self.addMessage('')
114 |     for line in traceback.format_exc().split('\n'):
115 |         self.addMessage(line)
116 | 
117 |   def addErrorHints(self, exceptionMap, errorInstance, questionNum):
118 |     typeOf = str(type(errorInstance))
119 |     questionName = 'q' + questionNum
120 |     errorHint = ''
121 | 
122 |     # question specific error hints
123 |     if exceptionMap.get(questionName):
124 |       questionMap = exceptionMap.get(questionName)
125 |       if (questionMap.get(typeOf)):
126 |         errorHint = questionMap.get(typeOf)
127 |     # fall back to general error messages if a question specific
128 |     # one does not exist
129 |     if (exceptionMap.get(typeOf)):
130 |       errorHint = exceptionMap.get(typeOf)
131 | 
132 |     # dont include the HTML if we have no error hint
133 |     if not errorHint:
134 |       return ''
135 | 
136 |     for line in errorHint.split('\n'):
137 |       self.addMessage(line)
138 | 
139 |   def produceOutput(self):
140 |     edxOutput = open('edx_response.html', 'w')
141 |     edxOutput.write("
") 142 | 143 | # first sum 144 | total_possible = sum(self.maxes.values()) 145 | total_score = sum(self.points.values()) 146 | checkOrX = '' 147 | if (total_score >= total_possible): 148 | checkOrX = '' 149 | header = """ 150 |

151 | Total score ({total_score} / {total_possible}) 152 |

153 | """.format(total_score = total_score, 154 | total_possible = total_possible, 155 | checkOrX = checkOrX 156 | ) 157 | edxOutput.write(header) 158 | 159 | for q in self.questions: 160 | if len(q) == 2: 161 | name = q[1] 162 | else: 163 | name = q 164 | checkOrX = '' 165 | if (self.points[q] == self.maxes[q]): 166 | checkOrX = '' 167 | #messages = '\n
\n'.join(self.messages[q]) 168 | messages = "
%s
" % '\n'.join(self.messages[q]) 169 | output = """ 170 |
171 |
172 |
173 | Question {q} ({points}/{max}) {checkOrX} 174 |
175 |
176 | {messages} 177 |
178 |
179 |
180 | """.format(q = name, 181 | max = self.maxes[q], 182 | messages = messages, 183 | checkOrX = checkOrX, 184 | points = self.points[q] 185 | ) 186 | # print "*** output for Question %s " % q[1] 187 | # print output 188 | edxOutput.write(output) 189 | edxOutput.write("
") 190 | edxOutput.close() 191 | edxOutput = open('edx_grade', 'w') 192 | edxOutput.write(str(self.points.totalCount())) 193 | edxOutput.close() 194 | 195 | def fail(self, message, raw=False): 196 | "Sets sanity check bit to false and outputs a message" 197 | self.sane = False 198 | self.assignZeroCredit() 199 | self.addMessage(message, raw) 200 | 201 | def assignZeroCredit(self): 202 | self.points[self.currentQuestion] = 0 203 | 204 | def addPoints(self, amt): 205 | self.points[self.currentQuestion] += amt 206 | 207 | def deductPoints(self, amt): 208 | self.points[self.currentQuestion] -= amt 209 | 210 | def assignFullCredit(self, message="", raw=False): 211 | self.points[self.currentQuestion] = self.maxes[self.currentQuestion] 212 | if message != "": 213 | self.addMessage(message, raw) 214 | 215 | def addMessage(self, message, raw=False): 216 | if not raw: 217 | # We assume raw messages, formatted for HTML, are printed separately 218 | if self.mute: util.unmutePrint() 219 | print '*** ' + message 220 | if self.mute: util.mutePrint() 221 | message = cgi.escape(message) 222 | self.messages[self.currentQuestion].append(message) 223 | 224 | def addMessageToEmail(self, message): 225 | print "WARNING**** addMessageToEmail is deprecated %s" % message 226 | for line in message.split('\n'): 227 | pass 228 | #print '%%% ' + line + ' %%%' 229 | #self.messages[self.currentQuestion].append(line) 230 | 231 | 232 | 233 | 234 | 235 | class Counter(dict): 236 | """ 237 | Dict with default 0 238 | """ 239 | def __getitem__(self, idx): 240 | try: 241 | return dict.__getitem__(self, idx) 242 | except KeyError: 243 | return 0 244 | 245 | def totalCount(self): 246 | """ 247 | Returns the sum of counts for all keys. 248 | """ 249 | return sum(self.values()) 250 | 251 | -------------------------------------------------------------------------------- /graphicsCrawlerDisplay.py: -------------------------------------------------------------------------------- 1 | # graphicsCrawlerDisplay.py 2 | # ------------------------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | # graphicsCrawlerDisplay.py 12 | # ------------------------- 13 | # Licensing Information: Please do not distribute or publish solutions to this 14 | # project. You are free to use and extend these projects for educational 15 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 16 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 17 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 18 | # Abbeel in Spring 2013. 19 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 20 | 21 | import Tkinter 22 | import qlearningAgents 23 | import time 24 | import threading 25 | import sys 26 | import crawler 27 | #import pendulum 28 | import math 29 | from math import pi as PI 30 | 31 | robotType = 'crawler' 32 | 33 | class Application: 34 | 35 | def sigmoid(self, x): 36 | return 1.0 / (1.0 + 2.0 ** (-x)) 37 | 38 | def incrementSpeed(self, inc): 39 | self.tickTime *= inc 40 | # self.epsilon = min(1.0, self.epsilon) 41 | # self.epsilon = max(0.0,self.epsilon) 42 | # self.learner.setSpeed(self.epsilon) 43 | self.speed_label['text'] = 'Step Delay: %.5f' % (self.tickTime) 44 | 45 | def incrementEpsilon(self, inc): 46 | self.ep += inc 47 | self.epsilon = self.sigmoid(self.ep) 48 | self.learner.setEpsilon(self.epsilon) 49 | self.epsilon_label['text'] = 'Epsilon: %.3f' % (self.epsilon) 50 | 51 | def incrementGamma(self, inc): 52 | self.ga += inc 53 | self.gamma = self.sigmoid(self.ga) 54 | self.learner.setDiscount(self.gamma) 55 | self.gamma_label['text'] = 'Discount: %.3f' % (self.gamma) 56 | 57 | def incrementAlpha(self, inc): 58 | self.al += inc 59 | self.alpha = self.sigmoid(self.al) 60 | self.learner.setLearningRate(self.alpha) 61 | self.alpha_label['text'] = 'Learning Rate: %.3f' % (self.alpha) 62 | 63 | def __initGUI(self, win): 64 | ## Window ## 65 | self.win = win 66 | 67 | ## Initialize Frame ## 68 | win.grid() 69 | self.dec = -.5 70 | self.inc = .5 71 | self.tickTime = 0.1 72 | 73 | ## Epsilon Button + Label ## 74 | self.setupSpeedButtonAndLabel(win) 75 | 76 | self.setupEpsilonButtonAndLabel(win) 77 | 78 | ## Gamma Button + Label ## 79 | self.setUpGammaButtonAndLabel(win) 80 | 81 | ## Alpha Button + Label ## 82 | self.setupAlphaButtonAndLabel(win) 83 | 84 | ## Exit Button ## 85 | #self.exit_button = Tkinter.Button(win,text='Quit', command=self.exit) 86 | #self.exit_button.grid(row=0, column=9) 87 | 88 | ## Simulation Buttons ## 89 | # self.setupSimulationButtons(win) 90 | 91 | ## Canvas ## 92 | self.canvas = Tkinter.Canvas(root, height=200, width=1000) 93 | self.canvas.grid(row=2,columnspan=10) 94 | 95 | def setupAlphaButtonAndLabel(self, win): 96 | self.alpha_minus = Tkinter.Button(win, 97 | text="-",command=(lambda: self.incrementAlpha(self.dec))) 98 | self.alpha_minus.grid(row=1, column=3, padx=10) 99 | 100 | self.alpha = self.sigmoid(self.al) 101 | self.alpha_label = Tkinter.Label(win, text='Learning Rate: %.3f' % (self.alpha)) 102 | self.alpha_label.grid(row=1, column=4) 103 | 104 | self.alpha_plus = Tkinter.Button(win, 105 | text="+",command=(lambda: self.incrementAlpha(self.inc))) 106 | self.alpha_plus.grid(row=1, column=5, padx=10) 107 | 108 | def setUpGammaButtonAndLabel(self, win): 109 | self.gamma_minus = Tkinter.Button(win, 110 | text="-",command=(lambda: self.incrementGamma(self.dec))) 111 | self.gamma_minus.grid(row=1, column=0, padx=10) 112 | 113 | self.gamma = self.sigmoid(self.ga) 114 | self.gamma_label = Tkinter.Label(win, text='Discount: %.3f' % (self.gamma)) 115 | self.gamma_label.grid(row=1, column=1) 116 | 117 | self.gamma_plus = Tkinter.Button(win, 118 | text="+",command=(lambda: self.incrementGamma(self.inc))) 119 | self.gamma_plus.grid(row=1, column=2, padx=10) 120 | 121 | def setupEpsilonButtonAndLabel(self, win): 122 | self.epsilon_minus = Tkinter.Button(win, 123 | text="-",command=(lambda: self.incrementEpsilon(self.dec))) 124 | self.epsilon_minus.grid(row=0, column=3) 125 | 126 | self.epsilon = self.sigmoid(self.ep) 127 | self.epsilon_label = Tkinter.Label(win, text='Epsilon: %.3f' % (self.epsilon)) 128 | self.epsilon_label.grid(row=0, column=4) 129 | 130 | self.epsilon_plus = Tkinter.Button(win, 131 | text="+",command=(lambda: self.incrementEpsilon(self.inc))) 132 | self.epsilon_plus.grid(row=0, column=5) 133 | 134 | def setupSpeedButtonAndLabel(self, win): 135 | self.speed_minus = Tkinter.Button(win, 136 | text="-",command=(lambda: self.incrementSpeed(.5))) 137 | self.speed_minus.grid(row=0, column=0) 138 | 139 | self.speed_label = Tkinter.Label(win, text='Step Delay: %.5f' % (self.tickTime)) 140 | self.speed_label.grid(row=0, column=1) 141 | 142 | self.speed_plus = Tkinter.Button(win, 143 | text="+",command=(lambda: self.incrementSpeed(2))) 144 | self.speed_plus.grid(row=0, column=2) 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | def skip5kSteps(self): 153 | self.stepsToSkip = 5000 154 | 155 | def __init__(self, win): 156 | 157 | self.ep = 0 158 | self.ga = 2 159 | self.al = 2 160 | self.stepCount = 0 161 | ## Init Gui 162 | 163 | self.__initGUI(win) 164 | 165 | # Init environment 166 | if robotType == 'crawler': 167 | self.robot = crawler.CrawlingRobot(self.canvas) 168 | self.robotEnvironment = crawler.CrawlingRobotEnvironment(self.robot) 169 | elif robotType == 'pendulum': 170 | self.robot = pendulum.PendulumRobot(self.canvas) 171 | self.robotEnvironment = \ 172 | pendulum.PendulumRobotEnvironment(self.robot) 173 | else: 174 | raise "Unknown RobotType" 175 | 176 | # Init Agent 177 | simulationFn = lambda agent: \ 178 | simulation.SimulationEnvironment(self.robotEnvironment,agent) 179 | actionFn = lambda state: \ 180 | self.robotEnvironment.getPossibleActions(state) 181 | self.learner = qlearningAgents.QLearningAgent(actionFn=actionFn) 182 | 183 | self.learner.setEpsilon(self.epsilon) 184 | self.learner.setLearningRate(self.alpha) 185 | self.learner.setDiscount(self.gamma) 186 | 187 | # Start GUI 188 | self.running = True 189 | self.stopped = False 190 | self.stepsToSkip = 0 191 | self.thread = threading.Thread(target=self.run) 192 | self.thread.start() 193 | 194 | 195 | def exit(self): 196 | self.running = False 197 | for i in range(5): 198 | if not self.stopped: 199 | time.sleep(0.1) 200 | try: 201 | self.win.destroy() 202 | except: 203 | pass 204 | sys.exit(0) 205 | 206 | def step(self): 207 | 208 | self.stepCount += 1 209 | 210 | state = self.robotEnvironment.getCurrentState() 211 | actions = self.robotEnvironment.getPossibleActions(state) 212 | if len(actions) == 0.0: 213 | self.robotEnvironment.reset() 214 | state = self.robotEnvironment.getCurrentState() 215 | actions = self.robotEnvironment.getPossibleActions(state) 216 | print 'Reset!' 217 | action = self.learner.getAction(state) 218 | if action == None: 219 | raise 'None action returned: Code Not Complete' 220 | nextState, reward = self.robotEnvironment.doAction(action) 221 | self.learner.observeTransition(state, action, nextState, reward) 222 | 223 | def animatePolicy(self): 224 | if robotType != 'pendulum': 225 | raise 'Only pendulum can animatePolicy' 226 | 227 | 228 | totWidth = self.canvas.winfo_reqwidth() 229 | totHeight = self.canvas.winfo_reqheight() 230 | 231 | length = 0.48 * min(totWidth, totHeight) 232 | x,y = totWidth-length-30, length+10 233 | 234 | 235 | 236 | angleMin, angleMax = self.robot.getMinAndMaxAngle() 237 | velMin, velMax = self.robot.getMinAndMaxAngleVelocity() 238 | 239 | if not 'animatePolicyBox' in dir(self): 240 | self.canvas.create_line(x,y,x+length,y) 241 | self.canvas.create_line(x+length,y,x+length,y-length) 242 | self.canvas.create_line(x+length,y-length,x,y-length) 243 | self.canvas.create_line(x,y-length,x,y) 244 | self.animatePolicyBox = 1 245 | self.canvas.create_text(x+length/2,y+10,text='angle') 246 | self.canvas.create_text(x-30,y-length/2,text='velocity') 247 | self.canvas.create_text(x-60,y-length/4,text='Blue = kickLeft') 248 | self.canvas.create_text(x-60,y-length/4+20,text='Red = kickRight') 249 | self.canvas.create_text(x-60,y-length/4+40,text='White = doNothing') 250 | 251 | 252 | 253 | angleDelta = (angleMax-angleMin) / 100 254 | velDelta = (velMax-velMin) / 100 255 | for i in range(100): 256 | angle = angleMin + i * angleDelta 257 | 258 | for j in range(100): 259 | vel = velMin + j * velDelta 260 | state = self.robotEnvironment.getState(angle,vel) 261 | max, argMax = None, None 262 | if not self.learner.seenState(state): 263 | argMax = 'unseen' 264 | else: 265 | for action in ('kickLeft','kickRight','doNothing'): 266 | qVal = self.learner.getQValue(state, action) 267 | if max == None or qVal > max: 268 | max, argMax = qVal, action 269 | if argMax != 'unseen': 270 | if argMax == 'kickLeft': 271 | color = 'blue' 272 | elif argMax == 'kickRight': 273 | color = 'red' 274 | elif argMax == 'doNothing': 275 | color = 'white' 276 | dx = length / 100.0 277 | dy = length / 100.0 278 | x0, y0 = x+i*dx, y-j*dy 279 | self.canvas.create_rectangle(x0,y0,x0+dx,y0+dy,fill=color) 280 | 281 | 282 | 283 | 284 | def run(self): 285 | self.stepCount = 0 286 | self.learner.startEpisode() 287 | while True: 288 | minSleep = .01 289 | tm = max(minSleep, self.tickTime) 290 | time.sleep(tm) 291 | self.stepsToSkip = int(tm / self.tickTime) - 1 292 | 293 | if not self.running: 294 | self.stopped = True 295 | return 296 | for i in range(self.stepsToSkip): 297 | self.step() 298 | self.stepsToSkip = 0 299 | self.step() 300 | # self.robot.draw() 301 | self.learner.stopEpisode() 302 | 303 | def start(self): 304 | self.win.mainloop() 305 | 306 | 307 | 308 | 309 | 310 | def run(): 311 | global root 312 | root = Tkinter.Tk() 313 | root.title( 'Crawler GUI' ) 314 | root.resizable( 0, 0 ) 315 | 316 | # root.mainloop() 317 | 318 | 319 | app = Application(root) 320 | def update_gui(): 321 | app.robot.draw(app.stepCount, app.tickTime) 322 | root.after(10, update_gui) 323 | update_gui() 324 | 325 | root.protocol( 'WM_DELETE_WINDOW', app.exit) 326 | try: 327 | app.start() 328 | except: 329 | app.exit() 330 | -------------------------------------------------------------------------------- /graphicsGridworldDisplay.py: -------------------------------------------------------------------------------- 1 | # graphicsGridworldDisplay.py 2 | # --------------------------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | import util 12 | from graphicsUtils import * 13 | 14 | class GraphicsGridworldDisplay: 15 | 16 | def __init__(self, gridworld, size=120, speed=1.0): 17 | self.gridworld = gridworld 18 | self.size = size 19 | self.speed = speed 20 | 21 | def start(self): 22 | setup(self.gridworld, size=self.size) 23 | 24 | def pause(self): 25 | wait_for_keys() 26 | 27 | def displayValues(self, agent, currentState = None, message = 'Agent Values'): 28 | values = util.Counter() 29 | policy = {} 30 | states = self.gridworld.getStates() 31 | for state in states: 32 | values[state] = agent.getValue(state) 33 | policy[state] = agent.getPolicy(state) 34 | drawValues(self.gridworld, values, policy, currentState, message) 35 | sleep(0.05 / self.speed) 36 | 37 | def displayNullValues(self, currentState = None, message = ''): 38 | values = util.Counter() 39 | #policy = {} 40 | states = self.gridworld.getStates() 41 | for state in states: 42 | values[state] = 0.0 43 | #policy[state] = agent.getPolicy(state) 44 | drawNullValues(self.gridworld, currentState,'') 45 | # drawValues(self.gridworld, values, policy, currentState, message) 46 | sleep(0.05 / self.speed) 47 | 48 | def displayQValues(self, agent, currentState = None, message = 'Agent Q-Values'): 49 | qValues = util.Counter() 50 | states = self.gridworld.getStates() 51 | for state in states: 52 | for action in self.gridworld.getPossibleActions(state): 53 | qValues[(state, action)] = agent.getQValue(state, action) 54 | drawQValues(self.gridworld, qValues, currentState, message) 55 | sleep(0.05 / self.speed) 56 | 57 | BACKGROUND_COLOR = formatColor(0,0,0) 58 | EDGE_COLOR = formatColor(1,1,1) 59 | OBSTACLE_COLOR = formatColor(0.5,0.5,0.5) 60 | TEXT_COLOR = formatColor(1,1,1) 61 | MUTED_TEXT_COLOR = formatColor(0.7,0.7,0.7) 62 | LOCATION_COLOR = formatColor(0,0,1) 63 | 64 | WINDOW_SIZE = -1 65 | GRID_SIZE = -1 66 | GRID_HEIGHT = -1 67 | MARGIN = -1 68 | 69 | def setup(gridworld, title = "Gridworld Display", size = 120): 70 | global GRID_SIZE, MARGIN, SCREEN_WIDTH, SCREEN_HEIGHT, GRID_HEIGHT 71 | grid = gridworld.grid 72 | WINDOW_SIZE = size 73 | GRID_SIZE = size 74 | GRID_HEIGHT = grid.height 75 | MARGIN = GRID_SIZE * 0.75 76 | screen_width = (grid.width - 1) * GRID_SIZE + MARGIN * 2 77 | screen_height = (grid.height - 0.5) * GRID_SIZE + MARGIN * 2 78 | 79 | begin_graphics(screen_width, 80 | screen_height, 81 | BACKGROUND_COLOR, title=title) 82 | 83 | def drawNullValues(gridworld, currentState = None, message = ''): 84 | grid = gridworld.grid 85 | blank() 86 | for x in range(grid.width): 87 | for y in range(grid.height): 88 | state = (x, y) 89 | gridType = grid[x][y] 90 | isExit = (str(gridType) != gridType) 91 | isCurrent = (currentState == state) 92 | if gridType == '#': 93 | drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent) 94 | else: 95 | drawNullSquare(gridworld.grid, x, y, False, isExit, isCurrent) 96 | pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8)) 97 | text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c") 98 | 99 | 100 | def drawValues(gridworld, values, policy, currentState = None, message = 'State Values'): 101 | grid = gridworld.grid 102 | blank() 103 | valueList = [values[state] for state in gridworld.getStates()] + [0.0] 104 | minValue = min(valueList) 105 | maxValue = max(valueList) 106 | for x in range(grid.width): 107 | for y in range(grid.height): 108 | state = (x, y) 109 | gridType = grid[x][y] 110 | isExit = (str(gridType) != gridType) 111 | isCurrent = (currentState == state) 112 | if gridType == '#': 113 | drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent) 114 | else: 115 | value = values[state] 116 | action = None 117 | if policy != None and state in policy: 118 | action = policy[state] 119 | actions = gridworld.getPossibleActions(state) 120 | if action not in actions and 'exit' in actions: 121 | action = 'exit' 122 | valString = '%.2f' % value 123 | drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent) 124 | pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8)) 125 | text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c") 126 | 127 | def drawQValues(gridworld, qValues, currentState = None, message = 'State-Action Q-Values'): 128 | grid = gridworld.grid 129 | blank() 130 | stateCrossActions = [[(state, action) for action in gridworld.getPossibleActions(state)] for state in gridworld.getStates()] 131 | qStates = reduce(lambda x,y: x+y, stateCrossActions, []) 132 | qValueList = [qValues[(state, action)] for state, action in qStates] + [0.0] 133 | minValue = min(qValueList) 134 | maxValue = max(qValueList) 135 | for x in range(grid.width): 136 | for y in range(grid.height): 137 | state = (x, y) 138 | gridType = grid[x][y] 139 | isExit = (str(gridType) != gridType) 140 | isCurrent = (currentState == state) 141 | actions = gridworld.getPossibleActions(state) 142 | if actions == None or len(actions) == 0: 143 | actions = [None] 144 | bestQ = max([qValues[(state, action)] for action in actions]) 145 | bestActions = [action for action in actions if qValues[(state, action)] == bestQ] 146 | 147 | q = util.Counter() 148 | valStrings = {} 149 | for action in actions: 150 | v = qValues[(state, action)] 151 | q[action] += v 152 | valStrings[action] = '%.2f' % v 153 | if gridType == '#': 154 | drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent) 155 | elif isExit: 156 | action = 'exit' 157 | value = q[action] 158 | valString = '%.2f' % value 159 | drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent) 160 | else: 161 | drawSquareQ(x, y, q, minValue, maxValue, valStrings, actions, isCurrent) 162 | pos = to_screen(((grid.width - 1.0) / 2.0, - 0.8)) 163 | text( pos, TEXT_COLOR, message, "Courier", -32, "bold", "c") 164 | 165 | 166 | def blank(): 167 | clear_screen() 168 | 169 | def drawNullSquare(grid,x, y, isObstacle, isTerminal, isCurrent): 170 | 171 | square_color = getColor(0, -1, 1) 172 | 173 | if isObstacle: 174 | square_color = OBSTACLE_COLOR 175 | 176 | (screen_x, screen_y) = to_screen((x, y)) 177 | square( (screen_x, screen_y), 178 | 0.5* GRID_SIZE, 179 | color = square_color, 180 | filled = 1, 181 | width = 1) 182 | 183 | square( (screen_x, screen_y), 184 | 0.5* GRID_SIZE, 185 | color = EDGE_COLOR, 186 | filled = 0, 187 | width = 3) 188 | 189 | if isTerminal and not isObstacle: 190 | square( (screen_x, screen_y), 191 | 0.4* GRID_SIZE, 192 | color = EDGE_COLOR, 193 | filled = 0, 194 | width = 2) 195 | text( (screen_x, screen_y), 196 | TEXT_COLOR, 197 | str(grid[x][y]), 198 | "Courier", -24, "bold", "c") 199 | 200 | 201 | text_color = TEXT_COLOR 202 | 203 | if not isObstacle and isCurrent: 204 | circle( (screen_x, screen_y), 0.1*GRID_SIZE, LOCATION_COLOR, fillColor=LOCATION_COLOR ) 205 | 206 | # if not isObstacle: 207 | # text( (screen_x, screen_y), text_color, valStr, "Courier", 24, "bold", "c") 208 | 209 | def drawSquare(x, y, val, min, max, valStr, action, isObstacle, isTerminal, isCurrent): 210 | 211 | square_color = getColor(val, min, max) 212 | 213 | if isObstacle: 214 | square_color = OBSTACLE_COLOR 215 | 216 | (screen_x, screen_y) = to_screen((x, y)) 217 | square( (screen_x, screen_y), 218 | 0.5* GRID_SIZE, 219 | color = square_color, 220 | filled = 1, 221 | width = 1) 222 | square( (screen_x, screen_y), 223 | 0.5* GRID_SIZE, 224 | color = EDGE_COLOR, 225 | filled = 0, 226 | width = 3) 227 | if isTerminal and not isObstacle: 228 | square( (screen_x, screen_y), 229 | 0.4* GRID_SIZE, 230 | color = EDGE_COLOR, 231 | filled = 0, 232 | width = 2) 233 | 234 | 235 | if action == 'north': 236 | polygon( [(screen_x, screen_y - 0.45*GRID_SIZE), (screen_x+0.05*GRID_SIZE, screen_y-0.40*GRID_SIZE), (screen_x-0.05*GRID_SIZE, screen_y-0.40*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False) 237 | if action == 'south': 238 | polygon( [(screen_x, screen_y + 0.45*GRID_SIZE), (screen_x+0.05*GRID_SIZE, screen_y+0.40*GRID_SIZE), (screen_x-0.05*GRID_SIZE, screen_y+0.40*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False) 239 | if action == 'west': 240 | polygon( [(screen_x-0.45*GRID_SIZE, screen_y), (screen_x-0.4*GRID_SIZE, screen_y+0.05*GRID_SIZE), (screen_x-0.4*GRID_SIZE, screen_y-0.05*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False) 241 | if action == 'east': 242 | polygon( [(screen_x+0.45*GRID_SIZE, screen_y), (screen_x+0.4*GRID_SIZE, screen_y+0.05*GRID_SIZE), (screen_x+0.4*GRID_SIZE, screen_y-0.05*GRID_SIZE)], EDGE_COLOR, filled = 1, smoothed = False) 243 | 244 | 245 | text_color = TEXT_COLOR 246 | 247 | if not isObstacle and isCurrent: 248 | circle( (screen_x, screen_y), 0.1*GRID_SIZE, outlineColor=LOCATION_COLOR, fillColor=LOCATION_COLOR ) 249 | 250 | if not isObstacle: 251 | text( (screen_x, screen_y), text_color, valStr, "Courier", -30, "bold", "c") 252 | 253 | 254 | def drawSquareQ(x, y, qVals, minVal, maxVal, valStrs, bestActions, isCurrent): 255 | 256 | (screen_x, screen_y) = to_screen((x, y)) 257 | 258 | center = (screen_x, screen_y) 259 | nw = (screen_x-0.5*GRID_SIZE, screen_y-0.5*GRID_SIZE) 260 | ne = (screen_x+0.5*GRID_SIZE, screen_y-0.5*GRID_SIZE) 261 | se = (screen_x+0.5*GRID_SIZE, screen_y+0.5*GRID_SIZE) 262 | sw = (screen_x-0.5*GRID_SIZE, screen_y+0.5*GRID_SIZE) 263 | n = (screen_x, screen_y-0.5*GRID_SIZE+5) 264 | s = (screen_x, screen_y+0.5*GRID_SIZE-5) 265 | w = (screen_x-0.5*GRID_SIZE+5, screen_y) 266 | e = (screen_x+0.5*GRID_SIZE-5, screen_y) 267 | 268 | actions = qVals.keys() 269 | for action in actions: 270 | 271 | wedge_color = getColor(qVals[action], minVal, maxVal) 272 | 273 | if action == 'north': 274 | polygon( (center, nw, ne), wedge_color, filled = 1, smoothed = False) 275 | #text(n, text_color, valStr, "Courier", 8, "bold", "n") 276 | if action == 'south': 277 | polygon( (center, sw, se), wedge_color, filled = 1, smoothed = False) 278 | #text(s, text_color, valStr, "Courier", 8, "bold", "s") 279 | if action == 'east': 280 | polygon( (center, ne, se), wedge_color, filled = 1, smoothed = False) 281 | #text(e, text_color, valStr, "Courier", 8, "bold", "e") 282 | if action == 'west': 283 | polygon( (center, nw, sw), wedge_color, filled = 1, smoothed = False) 284 | #text(w, text_color, valStr, "Courier", 8, "bold", "w") 285 | 286 | square( (screen_x, screen_y), 287 | 0.5* GRID_SIZE, 288 | color = EDGE_COLOR, 289 | filled = 0, 290 | width = 3) 291 | line(ne, sw, color = EDGE_COLOR) 292 | line(nw, se, color = EDGE_COLOR) 293 | 294 | if isCurrent: 295 | circle( (screen_x, screen_y), 0.1*GRID_SIZE, LOCATION_COLOR, fillColor=LOCATION_COLOR ) 296 | 297 | for action in actions: 298 | text_color = TEXT_COLOR 299 | if qVals[action] < max(qVals.values()): text_color = MUTED_TEXT_COLOR 300 | valStr = "" 301 | if action in valStrs: 302 | valStr = valStrs[action] 303 | h = -20 304 | if action == 'north': 305 | #polygon( (center, nw, ne), wedge_color, filled = 1, smooth = 0) 306 | text(n, text_color, valStr, "Courier", h, "bold", "n") 307 | if action == 'south': 308 | #polygon( (center, sw, se), wedge_color, filled = 1, smooth = 0) 309 | text(s, text_color, valStr, "Courier", h, "bold", "s") 310 | if action == 'east': 311 | #polygon( (center, ne, se), wedge_color, filled = 1, smooth = 0) 312 | text(e, text_color, valStr, "Courier", h, "bold", "e") 313 | if action == 'west': 314 | #polygon( (center, nw, sw), wedge_color, filled = 1, smooth = 0) 315 | text(w, text_color, valStr, "Courier", h, "bold", "w") 316 | 317 | 318 | def getColor(val, minVal, max): 319 | r, g = 0.0, 0.0 320 | if val < 0 and minVal < 0: 321 | r = val * 0.65 / minVal 322 | if val > 0 and max > 0: 323 | g = val * 0.65 / max 324 | return formatColor(r,g,0.0) 325 | 326 | 327 | def square(pos, size, color, filled, width): 328 | x, y = pos 329 | dx, dy = size, size 330 | return polygon([(x - dx, y - dy), (x - dx, y + dy), (x + dx, y + dy), (x + dx, y - dy)], outlineColor=color, fillColor=color, filled=filled, width=width, smoothed=False) 331 | 332 | 333 | def to_screen(point): 334 | ( gamex, gamey ) = point 335 | x = gamex*GRID_SIZE + MARGIN 336 | y = (GRID_HEIGHT - gamey - 1)*GRID_SIZE + MARGIN 337 | return ( x, y ) 338 | 339 | def to_grid(point): 340 | (x, y) = point 341 | x = int ((y - MARGIN + GRID_SIZE * 0.5) / GRID_SIZE) 342 | y = int ((x - MARGIN + GRID_SIZE * 0.5) / GRID_SIZE) 343 | print point, "-->", (x, y) 344 | return (x, y) 345 | -------------------------------------------------------------------------------- /graphicsUtils.py: -------------------------------------------------------------------------------- 1 | # graphicsUtils.py 2 | # ---------------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | import sys 12 | import math 13 | import random 14 | import string 15 | import time 16 | import types 17 | import Tkinter 18 | 19 | _Windows = sys.platform == 'win32' # True if on Win95/98/NT 20 | 21 | _root_window = None # The root window for graphics output 22 | _canvas = None # The canvas which holds graphics 23 | _canvas_xs = None # Size of canvas object 24 | _canvas_ys = None 25 | _canvas_x = None # Current position on canvas 26 | _canvas_y = None 27 | _canvas_col = None # Current colour (set to black below) 28 | _canvas_tsize = 12 29 | _canvas_tserifs = 0 30 | 31 | def formatColor(r, g, b): 32 | return '#%02x%02x%02x' % (int(r * 255), int(g * 255), int(b * 255)) 33 | 34 | def colorToVector(color): 35 | return map(lambda x: int(x, 16) / 256.0, [color[1:3], color[3:5], color[5:7]]) 36 | 37 | if _Windows: 38 | _canvas_tfonts = ['times new roman', 'lucida console'] 39 | else: 40 | _canvas_tfonts = ['times', 'lucidasans-24'] 41 | pass # XXX need defaults here 42 | 43 | def sleep(secs): 44 | global _root_window 45 | if _root_window == None: 46 | time.sleep(secs) 47 | else: 48 | _root_window.update_idletasks() 49 | _root_window.after(int(1000 * secs), _root_window.quit) 50 | _root_window.mainloop() 51 | 52 | def begin_graphics(width=640, height=480, color=formatColor(0, 0, 0), title=None): 53 | 54 | global _root_window, _canvas, _canvas_x, _canvas_y, _canvas_xs, _canvas_ys, _bg_color 55 | 56 | # Check for duplicate call 57 | if _root_window is not None: 58 | # Lose the window. 59 | _root_window.destroy() 60 | 61 | # Save the canvas size parameters 62 | _canvas_xs, _canvas_ys = width - 1, height - 1 63 | _canvas_x, _canvas_y = 0, _canvas_ys 64 | _bg_color = color 65 | 66 | # Create the root window 67 | _root_window = Tkinter.Tk() 68 | _root_window.protocol('WM_DELETE_WINDOW', _destroy_window) 69 | _root_window.title(title or 'Graphics Window') 70 | _root_window.resizable(0, 0) 71 | 72 | # Create the canvas object 73 | try: 74 | _canvas = Tkinter.Canvas(_root_window, width=width, height=height) 75 | _canvas.pack() 76 | draw_background() 77 | _canvas.update() 78 | except: 79 | _root_window = None 80 | raise 81 | 82 | # Bind to key-down and key-up events 83 | _root_window.bind( "", _keypress ) 84 | _root_window.bind( "", _keyrelease ) 85 | _root_window.bind( "", _clear_keys ) 86 | _root_window.bind( "", _clear_keys ) 87 | _root_window.bind( "", _leftclick ) 88 | _root_window.bind( "", _rightclick ) 89 | _root_window.bind( "", _rightclick ) 90 | _root_window.bind( "", _ctrl_leftclick) 91 | _clear_keys() 92 | 93 | _leftclick_loc = None 94 | _rightclick_loc = None 95 | _ctrl_leftclick_loc = None 96 | 97 | def _leftclick(event): 98 | global _leftclick_loc 99 | _leftclick_loc = (event.x, event.y) 100 | 101 | def _rightclick(event): 102 | global _rightclick_loc 103 | _rightclick_loc = (event.x, event.y) 104 | 105 | def _ctrl_leftclick(event): 106 | global _ctrl_leftclick_loc 107 | _ctrl_leftclick_loc = (event.x, event.y) 108 | 109 | def wait_for_click(): 110 | while True: 111 | global _leftclick_loc 112 | global _rightclick_loc 113 | global _ctrl_leftclick_loc 114 | if _leftclick_loc != None: 115 | val = _leftclick_loc 116 | _leftclick_loc = None 117 | return val, 'left' 118 | if _rightclick_loc != None: 119 | val = _rightclick_loc 120 | _rightclick_loc = None 121 | return val, 'right' 122 | if _ctrl_leftclick_loc != None: 123 | val = _ctrl_leftclick_loc 124 | _ctrl_leftclick_loc = None 125 | return val, 'ctrl_left' 126 | sleep(0.05) 127 | 128 | def draw_background(): 129 | corners = [(0,0), (0, _canvas_ys), (_canvas_xs, _canvas_ys), (_canvas_xs, 0)] 130 | polygon(corners, _bg_color, fillColor=_bg_color, filled=True, smoothed=False) 131 | 132 | def _destroy_window(event=None): 133 | sys.exit(0) 134 | # global _root_window 135 | # _root_window.destroy() 136 | # _root_window = None 137 | #print "DESTROY" 138 | 139 | def end_graphics(): 140 | global _root_window, _canvas, _mouse_enabled 141 | try: 142 | try: 143 | sleep(1) 144 | if _root_window != None: 145 | _root_window.destroy() 146 | except SystemExit, e: 147 | print 'Ending graphics raised an exception:', e 148 | finally: 149 | _root_window = None 150 | _canvas = None 151 | _mouse_enabled = 0 152 | _clear_keys() 153 | 154 | def clear_screen(background=None): 155 | global _canvas_x, _canvas_y 156 | _canvas.delete('all') 157 | draw_background() 158 | _canvas_x, _canvas_y = 0, _canvas_ys 159 | 160 | def polygon(coords, outlineColor, fillColor=None, filled=1, smoothed=1, behind=0, width=1): 161 | c = [] 162 | for coord in coords: 163 | c.append(coord[0]) 164 | c.append(coord[1]) 165 | if fillColor == None: fillColor = outlineColor 166 | if filled == 0: fillColor = "" 167 | poly = _canvas.create_polygon(c, outline=outlineColor, fill=fillColor, smooth=smoothed, width=width) 168 | if behind > 0: 169 | _canvas.tag_lower(poly, behind) # Higher should be more visible 170 | return poly 171 | 172 | def square(pos, r, color, filled=1, behind=0): 173 | x, y = pos 174 | coords = [(x - r, y - r), (x + r, y - r), (x + r, y + r), (x - r, y + r)] 175 | return polygon(coords, color, color, filled, 0, behind=behind) 176 | 177 | def circle(pos, r, outlineColor, fillColor, endpoints=None, style='pieslice', width=2): 178 | x, y = pos 179 | x0, x1 = x - r - 1, x + r 180 | y0, y1 = y - r - 1, y + r 181 | if endpoints == None: 182 | e = [0, 359] 183 | else: 184 | e = list(endpoints) 185 | while e[0] > e[1]: e[1] = e[1] + 360 186 | 187 | return _canvas.create_arc(x0, y0, x1, y1, outline=outlineColor, fill=fillColor, 188 | extent=e[1] - e[0], start=e[0], style=style, width=width) 189 | 190 | def image(pos, file="../../blueghost.gif"): 191 | x, y = pos 192 | # img = PhotoImage(file=file) 193 | return _canvas.create_image(x, y, image = Tkinter.PhotoImage(file=file), anchor = Tkinter.NW) 194 | 195 | 196 | def refresh(): 197 | _canvas.update_idletasks() 198 | 199 | def moveCircle(id, pos, r, endpoints=None): 200 | global _canvas_x, _canvas_y 201 | 202 | x, y = pos 203 | # x0, x1 = x - r, x + r + 1 204 | # y0, y1 = y - r, y + r + 1 205 | x0, x1 = x - r - 1, x + r 206 | y0, y1 = y - r - 1, y + r 207 | if endpoints == None: 208 | e = [0, 359] 209 | else: 210 | e = list(endpoints) 211 | while e[0] > e[1]: e[1] = e[1] + 360 212 | 213 | edit(id, ('start', e[0]), ('extent', e[1] - e[0])) 214 | move_to(id, x0, y0) 215 | 216 | def edit(id, *args): 217 | _canvas.itemconfigure(id, **dict(args)) 218 | 219 | def text(pos, color, contents, font='Helvetica', size=12, style='normal', anchor="nw"): 220 | global _canvas_x, _canvas_y 221 | x, y = pos 222 | font = (font, str(size), style) 223 | return _canvas.create_text(x, y, fill=color, text=contents, font=font, anchor=anchor) 224 | 225 | def changeText(id, newText, font=None, size=12, style='normal'): 226 | _canvas.itemconfigure(id, text=newText) 227 | if font != None: 228 | _canvas.itemconfigure(id, font=(font, '-%d' % size, style)) 229 | 230 | def changeColor(id, newColor): 231 | _canvas.itemconfigure(id, fill=newColor) 232 | 233 | def line(here, there, color=formatColor(0, 0, 0), width=2): 234 | x0, y0 = here[0], here[1] 235 | x1, y1 = there[0], there[1] 236 | return _canvas.create_line(x0, y0, x1, y1, fill=color, width=width) 237 | 238 | ############################################################################## 239 | ### Keypress handling ######################################################## 240 | ############################################################################## 241 | 242 | # We bind to key-down and key-up events. 243 | 244 | _keysdown = {} 245 | _keyswaiting = {} 246 | # This holds an unprocessed key release. We delay key releases by up to 247 | # one call to keys_pressed() to get round a problem with auto repeat. 248 | _got_release = None 249 | 250 | def _keypress(event): 251 | global _got_release 252 | #remap_arrows(event) 253 | _keysdown[event.keysym] = 1 254 | _keyswaiting[event.keysym] = 1 255 | # print event.char, event.keycode 256 | _got_release = None 257 | 258 | def _keyrelease(event): 259 | global _got_release 260 | #remap_arrows(event) 261 | try: 262 | del _keysdown[event.keysym] 263 | except: 264 | pass 265 | _got_release = 1 266 | 267 | def remap_arrows(event): 268 | # TURN ARROW PRESSES INTO LETTERS (SHOULD BE IN KEYBOARD AGENT) 269 | if event.char in ['a', 's', 'd', 'w']: 270 | return 271 | if event.keycode in [37, 101]: # LEFT ARROW (win / x) 272 | event.char = 'a' 273 | if event.keycode in [38, 99]: # UP ARROW 274 | event.char = 'w' 275 | if event.keycode in [39, 102]: # RIGHT ARROW 276 | event.char = 'd' 277 | if event.keycode in [40, 104]: # DOWN ARROW 278 | event.char = 's' 279 | 280 | def _clear_keys(event=None): 281 | global _keysdown, _got_release, _keyswaiting 282 | _keysdown = {} 283 | _keyswaiting = {} 284 | _got_release = None 285 | 286 | def keys_pressed(d_o_e=Tkinter.tkinter.dooneevent, 287 | d_w=Tkinter.tkinter.DONT_WAIT): 288 | d_o_e(d_w) 289 | if _got_release: 290 | d_o_e(d_w) 291 | return _keysdown.keys() 292 | 293 | def keys_waiting(): 294 | global _keyswaiting 295 | keys = _keyswaiting.keys() 296 | _keyswaiting = {} 297 | return keys 298 | 299 | # Block for a list of keys... 300 | 301 | def wait_for_keys(): 302 | keys = [] 303 | while keys == []: 304 | keys = keys_pressed() 305 | sleep(0.05) 306 | return keys 307 | 308 | def remove_from_screen(x, 309 | d_o_e=Tkinter.tkinter.dooneevent, 310 | d_w=Tkinter.tkinter.DONT_WAIT): 311 | _canvas.delete(x) 312 | d_o_e(d_w) 313 | 314 | def _adjust_coords(coord_list, x, y): 315 | for i in range(0, len(coord_list), 2): 316 | coord_list[i] = coord_list[i] + x 317 | coord_list[i + 1] = coord_list[i + 1] + y 318 | return coord_list 319 | 320 | def move_to(object, x, y=None, 321 | d_o_e=Tkinter.tkinter.dooneevent, 322 | d_w=Tkinter.tkinter.DONT_WAIT): 323 | if y is None: 324 | try: x, y = x 325 | except: raise 'incomprehensible coordinates' 326 | 327 | horiz = True 328 | newCoords = [] 329 | current_x, current_y = _canvas.coords(object)[0:2] # first point 330 | for coord in _canvas.coords(object): 331 | if horiz: 332 | inc = x - current_x 333 | else: 334 | inc = y - current_y 335 | horiz = not horiz 336 | 337 | newCoords.append(coord + inc) 338 | 339 | _canvas.coords(object, *newCoords) 340 | d_o_e(d_w) 341 | 342 | def move_by(object, x, y=None, 343 | d_o_e=Tkinter.tkinter.dooneevent, 344 | d_w=Tkinter.tkinter.DONT_WAIT, lift=False): 345 | if y is None: 346 | try: x, y = x 347 | except: raise Exception, 'incomprehensible coordinates' 348 | 349 | horiz = True 350 | newCoords = [] 351 | for coord in _canvas.coords(object): 352 | if horiz: 353 | inc = x 354 | else: 355 | inc = y 356 | horiz = not horiz 357 | 358 | newCoords.append(coord + inc) 359 | 360 | _canvas.coords(object, *newCoords) 361 | d_o_e(d_w) 362 | if lift: 363 | _canvas.tag_raise(object) 364 | 365 | def writePostscript(filename): 366 | "Writes the current canvas to a postscript file." 367 | psfile = file(filename, 'w') 368 | psfile.write(_canvas.postscript(pageanchor='sw', 369 | y='0.c', 370 | x='0.c')) 371 | psfile.close() 372 | 373 | ghost_shape = [ 374 | (0, - 0.5), 375 | (0.25, - 0.75), 376 | (0.5, - 0.5), 377 | (0.75, - 0.75), 378 | (0.75, 0.5), 379 | (0.5, 0.75), 380 | (- 0.5, 0.75), 381 | (- 0.75, 0.5), 382 | (- 0.75, - 0.75), 383 | (- 0.5, - 0.5), 384 | (- 0.25, - 0.75) 385 | ] 386 | 387 | if __name__ == '__main__': 388 | begin_graphics() 389 | clear_screen() 390 | ghost_shape = [(x * 10 + 20, y * 10 + 20) for x, y in ghost_shape] 391 | g = polygon(ghost_shape, formatColor(1, 1, 1)) 392 | move_to(g, (50, 50)) 393 | circle((150, 150), 20, formatColor(0.7, 0.3, 0.0), endpoints=[15, - 15]) 394 | sleep(2) 395 | -------------------------------------------------------------------------------- /keyboardAgents.py: -------------------------------------------------------------------------------- 1 | # keyboardAgents.py 2 | # ----------------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | from game import Agent 12 | from game import Directions 13 | import random 14 | 15 | class KeyboardAgent(Agent): 16 | """ 17 | An agent controlled by the keyboard. 18 | """ 19 | # NOTE: Arrow keys also work. 20 | WEST_KEY = 'a' 21 | EAST_KEY = 'd' 22 | NORTH_KEY = 'w' 23 | SOUTH_KEY = 's' 24 | STOP_KEY = 'q' 25 | 26 | def __init__( self, index = 0 ): 27 | 28 | self.lastMove = Directions.STOP 29 | self.index = index 30 | self.keys = [] 31 | 32 | def getAction( self, state): 33 | from graphicsUtils import keys_waiting 34 | from graphicsUtils import keys_pressed 35 | keys = keys_waiting() + keys_pressed() 36 | if keys != []: 37 | self.keys = keys 38 | 39 | legal = state.getLegalActions(self.index) 40 | move = self.getMove(legal) 41 | 42 | if move == Directions.STOP: 43 | # Try to move in the same direction as before 44 | if self.lastMove in legal: 45 | move = self.lastMove 46 | 47 | if (self.STOP_KEY in self.keys) and Directions.STOP in legal: move = Directions.STOP 48 | 49 | if move not in legal: 50 | move = random.choice(legal) 51 | 52 | self.lastMove = move 53 | return move 54 | 55 | def getMove(self, legal): 56 | move = Directions.STOP 57 | if (self.WEST_KEY in self.keys or 'Left' in self.keys) and Directions.WEST in legal: move = Directions.WEST 58 | if (self.EAST_KEY in self.keys or 'Right' in self.keys) and Directions.EAST in legal: move = Directions.EAST 59 | if (self.NORTH_KEY in self.keys or 'Up' in self.keys) and Directions.NORTH in legal: move = Directions.NORTH 60 | if (self.SOUTH_KEY in self.keys or 'Down' in self.keys) and Directions.SOUTH in legal: move = Directions.SOUTH 61 | return move 62 | 63 | class KeyboardAgent2(KeyboardAgent): 64 | """ 65 | A second agent controlled by the keyboard. 66 | """ 67 | # NOTE: Arrow keys also work. 68 | WEST_KEY = 'j' 69 | EAST_KEY = "l" 70 | NORTH_KEY = 'i' 71 | SOUTH_KEY = 'k' 72 | STOP_KEY = 'u' 73 | 74 | def getMove(self, legal): 75 | move = Directions.STOP 76 | if (self.WEST_KEY in self.keys) and Directions.WEST in legal: move = Directions.WEST 77 | if (self.EAST_KEY in self.keys) and Directions.EAST in legal: move = Directions.EAST 78 | if (self.NORTH_KEY in self.keys) and Directions.NORTH in legal: move = Directions.NORTH 79 | if (self.SOUTH_KEY in self.keys) and Directions.SOUTH in legal: move = Directions.SOUTH 80 | return move 81 | -------------------------------------------------------------------------------- /layout.py: -------------------------------------------------------------------------------- 1 | # layout.py 2 | # --------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | from util import manhattanDistance 12 | from game import Grid 13 | import os 14 | import random 15 | 16 | VISIBILITY_MATRIX_CACHE = {} 17 | 18 | class Layout: 19 | """ 20 | A Layout manages the static information about the game board. 21 | """ 22 | 23 | def __init__(self, layoutText): 24 | self.width = len(layoutText[0]) 25 | self.height= len(layoutText) 26 | self.walls = Grid(self.width, self.height, False) 27 | self.food = Grid(self.width, self.height, False) 28 | self.capsules = [] 29 | self.agentPositions = [] 30 | self.numGhosts = 0 31 | self.processLayoutText(layoutText) 32 | self.layoutText = layoutText 33 | # self.initializeVisibilityMatrix() 34 | 35 | def getNumGhosts(self): 36 | return self.numGhosts 37 | 38 | def initializeVisibilityMatrix(self): 39 | global VISIBILITY_MATRIX_CACHE 40 | if reduce(str.__add__, self.layoutText) not in VISIBILITY_MATRIX_CACHE: 41 | from game import Directions 42 | vecs = [(-0.5,0), (0.5,0),(0,-0.5),(0,0.5)] 43 | dirs = [Directions.NORTH, Directions.SOUTH, Directions.WEST, Directions.EAST] 44 | vis = Grid(self.width, self.height, {Directions.NORTH:set(), Directions.SOUTH:set(), Directions.EAST:set(), Directions.WEST:set(), Directions.STOP:set()}) 45 | for x in range(self.width): 46 | for y in range(self.height): 47 | if self.walls[x][y] == False: 48 | for vec, direction in zip(vecs, dirs): 49 | dx, dy = vec 50 | nextx, nexty = x + dx, y + dy 51 | while (nextx + nexty) != int(nextx) + int(nexty) or not self.walls[int(nextx)][int(nexty)] : 52 | vis[x][y][direction].add((nextx, nexty)) 53 | nextx, nexty = x + dx, y + dy 54 | self.visibility = vis 55 | VISIBILITY_MATRIX_CACHE[reduce(str.__add__, self.layoutText)] = vis 56 | else: 57 | self.visibility = VISIBILITY_MATRIX_CACHE[reduce(str.__add__, self.layoutText)] 58 | 59 | def isWall(self, pos): 60 | x, col = pos 61 | return self.walls[x][col] 62 | 63 | def getRandomLegalPosition(self): 64 | x = random.choice(range(self.width)) 65 | y = random.choice(range(self.height)) 66 | while self.isWall( (x, y) ): 67 | x = random.choice(range(self.width)) 68 | y = random.choice(range(self.height)) 69 | return (x,y) 70 | 71 | def getRandomCorner(self): 72 | poses = [(1,1), (1, self.height - 2), (self.width - 2, 1), (self.width - 2, self.height - 2)] 73 | return random.choice(poses) 74 | 75 | def getFurthestCorner(self, pacPos): 76 | poses = [(1,1), (1, self.height - 2), (self.width - 2, 1), (self.width - 2, self.height - 2)] 77 | dist, pos = max([(manhattanDistance(p, pacPos), p) for p in poses]) 78 | return pos 79 | 80 | def isVisibleFrom(self, ghostPos, pacPos, pacDirection): 81 | row, col = [int(x) for x in pacPos] 82 | return ghostPos in self.visibility[row][col][pacDirection] 83 | 84 | def __str__(self): 85 | return "\n".join(self.layoutText) 86 | 87 | def deepCopy(self): 88 | return Layout(self.layoutText[:]) 89 | 90 | def processLayoutText(self, layoutText): 91 | """ 92 | Coordinates are flipped from the input format to the (x,y) convention here 93 | 94 | The shape of the maze. Each character 95 | represents a different type of object. 96 | % - Wall 97 | . - Food 98 | o - Capsule 99 | G - Ghost 100 | P - Pacman 101 | Other characters are ignored. 102 | """ 103 | maxY = self.height - 1 104 | for y in range(self.height): 105 | for x in range(self.width): 106 | layoutChar = layoutText[maxY - y][x] 107 | self.processLayoutChar(x, y, layoutChar) 108 | self.agentPositions.sort() 109 | self.agentPositions = [ ( i == 0, pos) for i, pos in self.agentPositions] 110 | 111 | def processLayoutChar(self, x, y, layoutChar): 112 | if layoutChar == '%': 113 | self.walls[x][y] = True 114 | elif layoutChar == '.': 115 | self.food[x][y] = True 116 | elif layoutChar == 'o': 117 | self.capsules.append((x, y)) 118 | elif layoutChar == 'P': 119 | self.agentPositions.append( (0, (x, y) ) ) 120 | elif layoutChar in ['G']: 121 | self.agentPositions.append( (1, (x, y) ) ) 122 | self.numGhosts += 1 123 | elif layoutChar in ['1', '2', '3', '4']: 124 | self.agentPositions.append( (int(layoutChar), (x,y))) 125 | self.numGhosts += 1 126 | def getLayout(name, back = 2): 127 | if name.endswith('.lay'): 128 | layout = tryToLoad('layouts/' + name) 129 | if layout == None: layout = tryToLoad(name) 130 | else: 131 | layout = tryToLoad('layouts/' + name + '.lay') 132 | if layout == None: layout = tryToLoad(name + '.lay') 133 | if layout == None and back >= 0: 134 | curdir = os.path.abspath('.') 135 | os.chdir('..') 136 | layout = getLayout(name, back -1) 137 | os.chdir(curdir) 138 | return layout 139 | 140 | def tryToLoad(fullname): 141 | if(not os.path.exists(fullname)): return None 142 | f = open(fullname) 143 | try: return Layout([line.strip() for line in f]) 144 | finally: f.close() 145 | -------------------------------------------------------------------------------- /layouts/capsuleClassic.lay: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%% 2 | %G. G ....% 3 | %.% % %%%%%% %.%%.% 4 | %.%o% % o% %.o%.% 5 | %.%%%.% %%% %..%.% 6 | %..... P %..%G% 7 | %%%%%%%%%%%%%%%%%%%% 8 | -------------------------------------------------------------------------------- /layouts/contestClassic.lay: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%% 2 | %o...%........%...o% 3 | %.%%.%.%%..%%.%.%%.% 4 | %...... G GG%......% 5 | %.%.%%.%% %%%.%%.%.% 6 | %.%....% ooo%.%..%.% 7 | %.%.%%.% %% %.%.%%.% 8 | %o%......P....%....% 9 | %%%%%%%%%%%%%%%%%%%% 10 | -------------------------------------------------------------------------------- /layouts/mediumClassic.lay: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%% 2 | %o...%........%....% 3 | %.%%.%.%%%%%%.%.%%.% 4 | %.%..............%.% 5 | %.%.%%.%% %%.%%.%.% 6 | %......%G G%......% 7 | %.%.%%.%%%%%%.%%.%.% 8 | %.%..............%.% 9 | %.%%.%.%%%%%%.%.%%.% 10 | %....%...P....%...o% 11 | %%%%%%%%%%%%%%%%%%%% 12 | -------------------------------------------------------------------------------- /layouts/mediumGrid.lay: -------------------------------------------------------------------------------- 1 | %%%%%%%% 2 | %P % 3 | % .% . % 4 | % % % 5 | % .% . % 6 | % G% 7 | %%%%%%%% 8 | -------------------------------------------------------------------------------- /layouts/minimaxClassic.lay: -------------------------------------------------------------------------------- 1 | %%%%%%%%% 2 | %.P G% 3 | % %.%G%%% 4 | %G %%% 5 | %%%%%%%%% 6 | -------------------------------------------------------------------------------- /layouts/openClassic.lay: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%% 2 | %.. P .... .... % 3 | %.. ... ... ... ... % 4 | %.. ... ... ... ... % 5 | %.. .... .... G % 6 | %.. ... ... ... ... % 7 | %.. ... ... ... ... % 8 | %.. .... .... o% 9 | %%%%%%%%%%%%%%%%%%%%%%%%% 10 | -------------------------------------------------------------------------------- /layouts/originalClassic.lay: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | %............%%............% 3 | %.%%%%.%%%%%.%%.%%%%%.%%%%.% 4 | %o%%%%.%%%%%.%%.%%%%%.%%%%o% 5 | %.%%%%.%%%%%.%%.%%%%%.%%%%.% 6 | %..........................% 7 | %.%%%%.%%.%%%%%%%%.%%.%%%%.% 8 | %.%%%%.%%.%%%%%%%%.%%.%%%%.% 9 | %......%%....%%....%%......% 10 | %%%%%%.%%%%% %% %%%%%.%%%%%% 11 | %%%%%%.%%%%% %% %%%%%.%%%%%% 12 | %%%%%%.% %.%%%%%% 13 | %%%%%%.% %%%% %%%% %.%%%%%% 14 | % . %G GG G% . % 15 | %%%%%%.% %%%%%%%%%% %.%%%%%% 16 | %%%%%%.% %.%%%%%% 17 | %%%%%%.% %%%%%%%%%% %.%%%%%% 18 | %............%%............% 19 | %.%%%%.%%%%%.%%.%%%%%.%%%%.% 20 | %.%%%%.%%%%%.%%.%%%%%.%%%%.% 21 | %o..%%....... .......%%..o% 22 | %%%.%%.%%.%%%%%%%%.%%.%%.%%% 23 | %%%.%%.%%.%%%%%%%%.%%.%%.%%% 24 | %......%%....%%....%%......% 25 | %.%%%%%%%%%%.%%.%%%%%%%%%%.% 26 | %.............P............% 27 | %%%%%%%%%%%%%%%%%%%%%%%%%%%% 28 | -------------------------------------------------------------------------------- /layouts/smallClassic.lay: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%% 2 | %......%G G%......% 3 | %.%%...%% %%...%%.% 4 | %.%o.%........%.o%.% 5 | %.%%.%.%%%%%%.%.%%.% 6 | %........P.........% 7 | %%%%%%%%%%%%%%%%%%%% 8 | -------------------------------------------------------------------------------- /layouts/smallGrid.lay: -------------------------------------------------------------------------------- 1 | %%%%%%% 2 | % P % 3 | % %%% % 4 | % %. % 5 | % %%% % 6 | %. G % 7 | %%%%%%% 8 | -------------------------------------------------------------------------------- /layouts/testClassic.lay: -------------------------------------------------------------------------------- 1 | %%%%% 2 | % . % 3 | %.G.% 4 | % . % 5 | %. .% 6 | % % 7 | % .% 8 | % % 9 | %P .% 10 | %%%%% 11 | -------------------------------------------------------------------------------- /layouts/trappedClassic.lay: -------------------------------------------------------------------------------- 1 | %%%%%%%% 2 | % P G% 3 | %G%%%%%% 4 | %.... % 5 | %%%%%%%% 6 | -------------------------------------------------------------------------------- /layouts/trickyClassic.lay: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%% 2 | %o...%........%...o% 3 | %.%%.%.%%..%%.%.%%.% 4 | %.%.....%..%.....%.% 5 | %.%.%%.%% %%.%%.%.% 6 | %...... GGGG%.%....% 7 | %.%....%%%%%%.%..%.% 8 | %.%....% oo%.%..%.% 9 | %.%....% %%%%.%..%.% 10 | %.%...........%..%.% 11 | %.%%.%.%%%%%%.%.%%.% 12 | %o...%...P....%...o% 13 | %%%%%%%%%%%%%%%%%%%% 14 | -------------------------------------------------------------------------------- /learningAgents.py: -------------------------------------------------------------------------------- 1 | # learningAgents.py 2 | # ----------------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | from game import Directions, Agent, Actions 12 | 13 | import random,util,time 14 | 15 | class ValueEstimationAgent(Agent): 16 | """ 17 | Abstract agent which assigns values to (state,action) 18 | Q-Values for an environment. As well as a value to a 19 | state and a policy given respectively by, 20 | 21 | V(s) = max_{a in actions} Q(s,a) 22 | policy(s) = arg_max_{a in actions} Q(s,a) 23 | 24 | Both ValueIterationAgent and QLearningAgent inherit 25 | from this agent. While a ValueIterationAgent has 26 | a model of the environment via a MarkovDecisionProcess 27 | (see mdp.py) that is used to estimate Q-Values before 28 | ever actually acting, the QLearningAgent estimates 29 | Q-Values while acting in the environment. 30 | """ 31 | 32 | def __init__(self, alpha=1.0, epsilon=0.05, gamma=0.8, numTraining = 10): 33 | """ 34 | Sets options, which can be passed in via the Pacman command line using -a alpha=0.5,... 35 | alpha - learning rate 36 | epsilon - exploration rate 37 | gamma - discount factor 38 | numTraining - number of training episodes, i.e. no learning after these many episodes 39 | """ 40 | self.alpha = float(alpha) 41 | self.epsilon = float(epsilon) 42 | self.discount = float(gamma) 43 | self.numTraining = int(numTraining) 44 | 45 | #################################### 46 | # Override These Functions # 47 | #################################### 48 | def getQValue(self, state, action): 49 | """ 50 | Should return Q(state,action) 51 | """ 52 | util.raiseNotDefined() 53 | 54 | def getValue(self, state): 55 | """ 56 | What is the value of this state under the best action? 57 | Concretely, this is given by 58 | 59 | V(s) = max_{a in actions} Q(s,a) 60 | """ 61 | util.raiseNotDefined() 62 | 63 | def getPolicy(self, state): 64 | """ 65 | What is the best action to take in the state. Note that because 66 | we might want to explore, this might not coincide with getAction 67 | Concretely, this is given by 68 | 69 | policy(s) = arg_max_{a in actions} Q(s,a) 70 | 71 | If many actions achieve the maximal Q-value, 72 | it doesn't matter which is selected. 73 | """ 74 | util.raiseNotDefined() 75 | 76 | def getAction(self, state): 77 | """ 78 | state: can call state.getLegalActions() 79 | Choose an action and return it. 80 | """ 81 | util.raiseNotDefined() 82 | 83 | class ReinforcementAgent(ValueEstimationAgent): 84 | """ 85 | Abstract Reinforcemnt Agent: A ValueEstimationAgent 86 | which estimates Q-Values (as well as policies) from experience 87 | rather than a model 88 | 89 | What you need to know: 90 | - The environment will call 91 | observeTransition(state,action,nextState,deltaReward), 92 | which will call update(state, action, nextState, deltaReward) 93 | which you should override. 94 | - Use self.getLegalActions(state) to know which actions 95 | are available in a state 96 | """ 97 | #################################### 98 | # Override These Functions # 99 | #################################### 100 | 101 | def update(self, state, action, nextState, reward): 102 | """ 103 | This class will call this function, which you write, after 104 | observing a transition and reward 105 | """ 106 | util.raiseNotDefined() 107 | 108 | #################################### 109 | # Read These Functions # 110 | #################################### 111 | 112 | def getLegalActions(self,state): 113 | """ 114 | Get the actions available for a given 115 | state. This is what you should use to 116 | obtain legal actions for a state 117 | """ 118 | return self.actionFn(state) 119 | 120 | def observeTransition(self, state,action,nextState,deltaReward): 121 | """ 122 | Called by environment to inform agent that a transition has 123 | been observed. This will result in a call to self.update 124 | on the same arguments 125 | 126 | NOTE: Do *not* override or call this function 127 | """ 128 | self.episodeRewards += deltaReward 129 | self.update(state,action,nextState,deltaReward) 130 | 131 | def startEpisode(self): 132 | """ 133 | Called by environment when new episode is starting 134 | """ 135 | self.lastState = None 136 | self.lastAction = None 137 | self.episodeRewards = 0.0 138 | 139 | def stopEpisode(self): 140 | """ 141 | Called by environment when episode is done 142 | """ 143 | if self.episodesSoFar < self.numTraining: 144 | self.accumTrainRewards += self.episodeRewards 145 | else: 146 | self.accumTestRewards += self.episodeRewards 147 | self.episodesSoFar += 1 148 | if self.episodesSoFar >= self.numTraining: 149 | # Take off the training wheels 150 | self.epsilon = 0.0 # no exploration 151 | self.alpha = 0.0 # no learning 152 | 153 | def isInTraining(self): 154 | return self.episodesSoFar < self.numTraining 155 | 156 | def isInTesting(self): 157 | return not self.isInTraining() 158 | 159 | def __init__(self, actionFn = None, numTraining=100, epsilon=0.5, alpha=0.5, gamma=1): 160 | """ 161 | actionFn: Function which takes a state and returns the list of legal actions 162 | 163 | alpha - learning rate 164 | epsilon - exploration rate 165 | gamma - discount factor 166 | numTraining - number of training episodes, i.e. no learning after these many episodes 167 | """ 168 | if actionFn == None: 169 | actionFn = lambda state: state.getLegalActions() 170 | self.actionFn = actionFn 171 | self.episodesSoFar = 0 172 | self.accumTrainRewards = 0.0 173 | self.accumTestRewards = 0.0 174 | self.numTraining = int(numTraining) 175 | self.epsilon = float(epsilon) 176 | self.alpha = float(alpha) 177 | self.discount = float(gamma) 178 | 179 | ################################ 180 | # Controls needed for Crawler # 181 | ################################ 182 | def setEpsilon(self, epsilon): 183 | self.epsilon = epsilon 184 | 185 | def setLearningRate(self, alpha): 186 | self.alpha = alpha 187 | 188 | def setDiscount(self, discount): 189 | self.discount = discount 190 | 191 | def doAction(self,state,action): 192 | """ 193 | Called by inherited class when 194 | an action is taken in a state 195 | """ 196 | self.lastState = state 197 | self.lastAction = action 198 | 199 | ################### 200 | # Pacman Specific # 201 | ################### 202 | def observationFunction(self, state): 203 | """ 204 | This is where we ended up after our last action. 205 | The simulation should somehow ensure this is called 206 | """ 207 | if not self.lastState is None: 208 | reward = state.getScore() - self.lastState.getScore() 209 | self.observeTransition(self.lastState, self.lastAction, state, reward) 210 | return state 211 | 212 | def registerInitialState(self, state): 213 | self.startEpisode() 214 | if self.episodesSoFar == 0: 215 | print 'Beginning %d episodes of Training' % (self.numTraining) 216 | 217 | def final(self, state): 218 | """ 219 | Called by Pacman game at the terminal state 220 | """ 221 | deltaReward = state.getScore() - self.lastState.getScore() 222 | self.observeTransition(self.lastState, self.lastAction, state, deltaReward) 223 | self.stopEpisode() 224 | 225 | # Make sure we have this var 226 | if not 'episodeStartTime' in self.__dict__: 227 | self.episodeStartTime = time.time() 228 | if not 'lastWindowAccumRewards' in self.__dict__: 229 | self.lastWindowAccumRewards = 0.0 230 | self.lastWindowAccumRewards += state.getScore() 231 | 232 | NUM_EPS_UPDATE = 100 233 | if self.episodesSoFar % NUM_EPS_UPDATE == 0: 234 | print 'Reinforcement Learning Status:' 235 | windowAvg = self.lastWindowAccumRewards / float(NUM_EPS_UPDATE) 236 | if self.episodesSoFar <= self.numTraining: 237 | trainAvg = self.accumTrainRewards / float(self.episodesSoFar) 238 | print '\tCompleted %d out of %d training episodes' % ( 239 | self.episodesSoFar,self.numTraining) 240 | print '\tAverage Rewards over all training: %.2f' % ( 241 | trainAvg) 242 | else: 243 | testAvg = float(self.accumTestRewards) / (self.episodesSoFar - self.numTraining) 244 | print '\tCompleted %d test episodes' % (self.episodesSoFar - self.numTraining) 245 | print '\tAverage Rewards over testing: %.2f' % testAvg 246 | print '\tAverage Rewards for last %d episodes: %.2f' % ( 247 | NUM_EPS_UPDATE,windowAvg) 248 | print '\tEpisode took %.2f seconds' % (time.time() - self.episodeStartTime) 249 | self.lastWindowAccumRewards = 0.0 250 | self.episodeStartTime = time.time() 251 | 252 | if self.episodesSoFar == self.numTraining: 253 | msg = 'Training Done (turning off epsilon and alpha)' 254 | print '%s\n%s' % (msg,'-' * len(msg)) 255 | -------------------------------------------------------------------------------- /mdp.py: -------------------------------------------------------------------------------- 1 | # mdp.py 2 | # ------ 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | import random 12 | 13 | class MarkovDecisionProcess: 14 | 15 | def getStates(self): 16 | """ 17 | Return a list of all states in the MDP. 18 | Not generally possible for large MDPs. 19 | """ 20 | abstract 21 | 22 | def getStartState(self): 23 | """ 24 | Return the start state of the MDP. 25 | """ 26 | abstract 27 | 28 | def getPossibleActions(self, state): 29 | """ 30 | Return list of possible actions from 'state'. 31 | """ 32 | abstract 33 | 34 | def getTransitionStatesAndProbs(self, state, action): 35 | """ 36 | Returns list of (nextState, prob) pairs 37 | representing the states reachable 38 | from 'state' by taking 'action' along 39 | with their transition probabilities. 40 | 41 | Note that in Q-Learning and reinforcment 42 | learning in general, we do not know these 43 | probabilities nor do we directly model them. 44 | """ 45 | abstract 46 | 47 | def getReward(self, state, action, nextState): 48 | """ 49 | Get the reward for the state, action, nextState transition. 50 | 51 | Not available in reinforcement learning. 52 | """ 53 | abstract 54 | 55 | def isTerminal(self, state): 56 | """ 57 | Returns true if the current state is a terminal state. By convention, 58 | a terminal state has zero future rewards. Sometimes the terminal state(s) 59 | may have no possible actions. It is also common to think of the terminal 60 | state as having a self-loop action 'pass' with zero reward; the formulations 61 | are equivalent. 62 | """ 63 | abstract 64 | -------------------------------------------------------------------------------- /pacmanAgents.py: -------------------------------------------------------------------------------- 1 | # pacmanAgents.py 2 | # --------------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | from pacman import Directions 12 | from game import Agent 13 | import random 14 | import game 15 | import util 16 | 17 | class LeftTurnAgent(game.Agent): 18 | "An agent that turns left at every opportunity" 19 | 20 | def getAction(self, state): 21 | legal = state.getLegalPacmanActions() 22 | current = state.getPacmanState().configuration.direction 23 | if current == Directions.STOP: current = Directions.NORTH 24 | left = Directions.LEFT[current] 25 | if left in legal: return left 26 | if current in legal: return current 27 | if Directions.RIGHT[current] in legal: return Directions.RIGHT[current] 28 | if Directions.LEFT[left] in legal: return Directions.LEFT[left] 29 | return Directions.STOP 30 | 31 | class GreedyAgent(Agent): 32 | def __init__(self, evalFn="scoreEvaluation"): 33 | self.evaluationFunction = util.lookup(evalFn, globals()) 34 | assert self.evaluationFunction != None 35 | 36 | def getAction(self, state): 37 | # Generate candidate actions 38 | legal = state.getLegalPacmanActions() 39 | if Directions.STOP in legal: legal.remove(Directions.STOP) 40 | 41 | successors = [(state.generateSuccessor(0, action), action) for action in legal] 42 | scored = [(self.evaluationFunction(state), action) for state, action in successors] 43 | bestScore = max(scored)[0] 44 | bestActions = [pair[1] for pair in scored if pair[0] == bestScore] 45 | return random.choice(bestActions) 46 | 47 | def scoreEvaluation(state): 48 | return state.getScore() 49 | -------------------------------------------------------------------------------- /projectParams.py: -------------------------------------------------------------------------------- 1 | # projectParams.py 2 | # ---------------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | STUDENT_CODE_DEFAULT = 'analysis.py,qlearningAgents.py,valueIterationAgents.py' 12 | PROJECT_TEST_CLASSES = 'reinforcementTestClasses.py' 13 | PROJECT_NAME = 'Project 3: Reinforcement learning' 14 | -------------------------------------------------------------------------------- /qlearningAgents.py: -------------------------------------------------------------------------------- 1 | # qlearningAgents.py 2 | # ------------------ 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | from game import * 12 | from learningAgents import ReinforcementAgent 13 | from featureExtractors import * 14 | import random,util,math 15 | 16 | class QLearningAgent(ReinforcementAgent): 17 | """ 18 | Q-Learning Agent 19 | 20 | Functions you should fill in: 21 | - computeValueFromQValues 22 | - computeActionFromQValues 23 | - getQValue 24 | - getAction 25 | - update 26 | 27 | Instance variables you have access to 28 | - self.epsilon (exploration prob) 29 | - self.alpha (learning rate) 30 | - self.discount (discount rate) 31 | 32 | Functions you should use 33 | - self.getLegalActions(state) 34 | which returns legal actions for a state 35 | """ 36 | def __init__(self, **args): 37 | "You can initialize Q-values here..." 38 | ReinforcementAgent.__init__(self, **args) 39 | self.q_values = util.Counter() 40 | 41 | def getQValue(self, state, action): 42 | """ 43 | Returns Q(state,action) 44 | Should return 0.0 if we have never seen a state 45 | or the Q node value otherwise 46 | """ 47 | return self.q_values[(state, action)] 48 | 49 | def computeValueFromQValues(self, state): 50 | """ 51 | Returns max_action Q(state,action) 52 | where the max is over legal actions. Note that if 53 | there are no legal actions, which is the case at the 54 | terminal state, you should return a value of 0.0. 55 | """ 56 | q_vals = [] 57 | for action in self.getLegalActions(state): 58 | q_vals.append(self.getQValue(state, action)) 59 | if len(self.getLegalActions(state)) == 0: 60 | return 0.0 61 | else: 62 | return max(q_vals) 63 | 64 | def computeActionFromQValues(self, state): 65 | """ 66 | Compute the best action to take in a state. Note that if there 67 | are no legal actions, which is the case at the terminal state, 68 | you should return None. 69 | """ 70 | max_action = None 71 | max_q_val = 0 72 | for action in self.getLegalActions(state): 73 | q_val = self.getQValue(state, action) 74 | if q_val > max_q_val or max_action is None: 75 | max_q_val = q_val 76 | max_action = action 77 | return max_action 78 | 79 | def getAction(self, state): 80 | """ 81 | Compute the action to take in the current state. With 82 | probability self.epsilon, we should take a random action and 83 | take the best policy action otherwise. Note that if there are 84 | no legal actions, which is the case at the terminal state, you 85 | should choose None as the action. 86 | 87 | HINT: You might want to use util.flipCoin(prob) 88 | HINT: To pick randomly from a list, use random.choice(list) 89 | """ 90 | # Pick Action 91 | legalActions = self.getLegalActions(state) 92 | if util.flipCoin(self.epsilon): 93 | return random.choice(legalActions) 94 | else: 95 | return self.computeActionFromQValues(state) 96 | 97 | def update(self, state, action, nextState, reward): 98 | """ 99 | The parent class calls this to observe a 100 | state = action => nextState and reward transition. 101 | You should do your Q-Value update here 102 | 103 | NOTE: You should never call this function, 104 | it will be called on your behalf 105 | """ 106 | first_part = (1 - self.alpha) * self.getQValue(state, action) 107 | if len(self.getLegalActions(nextState)) == 0: 108 | sample = reward 109 | else: 110 | sample = reward + (self.discount * max([self.getQValue(nextState, next_action) for next_action in self.getLegalActions(nextState)])) 111 | second_part = self.alpha * sample 112 | self.q_values[(state, action)] = first_part + second_part 113 | 114 | def getPolicy(self, state): 115 | return self.computeActionFromQValues(state) 116 | 117 | def getValue(self, state): 118 | return self.computeValueFromQValues(state) 119 | 120 | 121 | class PacmanQAgent(QLearningAgent): 122 | "Exactly the same as QLearningAgent, but with different default parameters" 123 | 124 | def __init__(self, epsilon=0.05,gamma=0.8,alpha=0.2, numTraining=0, **args): 125 | """ 126 | These default parameters can be changed from the pacman.py command line. 127 | For example, to change the exploration rate, try: 128 | python pacman.py -p PacmanQLearningAgent -a epsilon=0.1 129 | 130 | alpha - learning rate 131 | epsilon - exploration rate 132 | gamma - discount factor 133 | numTraining - number of training episodes, i.e. no learning after these many episodes 134 | """ 135 | args['epsilon'] = epsilon 136 | args['gamma'] = gamma 137 | args['alpha'] = alpha 138 | args['numTraining'] = numTraining 139 | self.index = 0 # This is always Pacman 140 | QLearningAgent.__init__(self, **args) 141 | 142 | def getAction(self, state): 143 | """ 144 | Simply calls the getAction method of QLearningAgent and then 145 | informs parent of action for Pacman. Do not change or remove this 146 | method. 147 | """ 148 | action = QLearningAgent.getAction(self,state) 149 | self.doAction(state,action) 150 | return action 151 | 152 | 153 | class ApproximateQAgent(PacmanQAgent): 154 | """ 155 | ApproximateQLearningAgent 156 | 157 | You should only have to overwrite getQValue 158 | and update. All other QLearningAgent functions 159 | should work as is. 160 | """ 161 | def __init__(self, extractor='IdentityExtractor', **args): 162 | self.featExtractor = util.lookup(extractor, globals())() 163 | PacmanQAgent.__init__(self, **args) 164 | self.weights = util.Counter() 165 | self.weight = 0 166 | 167 | def getWeights(self): 168 | return self.weights 169 | 170 | def getQValue(self, state, action): 171 | """ 172 | Should return Q(state,action) = w * featureVector 173 | where * is the dotProduct operator 174 | """ 175 | q_value = 0 176 | features = self.featExtractor.getFeatures(state, action) 177 | counter = 0 178 | for feature in features: 179 | q_value += features[feature] * self.weights[feature] 180 | counter += 1 181 | 182 | return q_value 183 | 184 | def update(self, state, action, nextState, reward): 185 | """ 186 | Should update your weights based on transition 187 | """ 188 | features = self.featExtractor.getFeatures(state, action) 189 | features_list = features.sortedKeys() 190 | counter = 0 191 | for feature in features: 192 | difference = 0 193 | if len(self.getLegalActions(nextState)) == 0: 194 | difference = reward - self.getQValue(state, action) 195 | else: 196 | difference = (reward + self.discount * max([self.getQValue(nextState, nextAction) for nextAction in self.getLegalActions(nextState)])) - self.getQValue(state, action) 197 | self.weights[feature] = self.weights[feature] + self.alpha * difference * features[feature] 198 | counter += 1 199 | 200 | def final(self, state): 201 | "Called at the end of each game." 202 | # call the super-class final method 203 | PacmanQAgent.final(self, state) 204 | 205 | # did we finish training? 206 | if self.episodesSoFar == self.numTraining: 207 | # you might want to print your weights here for debugging 208 | "*** YOUR CODE HERE ***" 209 | pass 210 | -------------------------------------------------------------------------------- /testClasses.py: -------------------------------------------------------------------------------- 1 | # testClasses.py 2 | # -------------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | # import modules from python standard library 12 | import inspect 13 | import re 14 | import sys 15 | 16 | 17 | # Class which models a question in a project. Note that questions have a 18 | # maximum number of points they are worth, and are composed of a series of 19 | # test cases 20 | class Question(object): 21 | 22 | def raiseNotDefined(self): 23 | print 'Method not implemented: %s' % inspect.stack()[1][3] 24 | sys.exit(1) 25 | 26 | def __init__(self, questionDict): 27 | self.maxPoints = int(questionDict['max_points']) 28 | self.testCases = [] 29 | 30 | def getMaxPoints(self): 31 | return self.maxPoints 32 | 33 | # Note that 'thunk' must be a function which accepts a single argument, 34 | # namely a 'grading' object 35 | def addTestCase(self, testCase, thunk): 36 | self.testCases.append((testCase, thunk)) 37 | 38 | def execute(self, grades): 39 | self.raiseNotDefined() 40 | 41 | # Question in which all test cases must be passed in order to receive credit 42 | class PassAllTestsQuestion(Question): 43 | 44 | def execute(self, grades): 45 | # TODO: is this the right way to use grades? The autograder doesn't seem to use it. 46 | testsFailed = False 47 | grades.assignZeroCredit() 48 | for _, f in self.testCases: 49 | if not f(grades): 50 | testsFailed = True 51 | if testsFailed: 52 | grades.fail("Tests failed.") 53 | else: 54 | grades.assignFullCredit() 55 | 56 | 57 | # Question in which predict credit is given for test cases with a ``points'' property. 58 | # All other tests are mandatory and must be passed. 59 | class HackedPartialCreditQuestion(Question): 60 | 61 | def execute(self, grades): 62 | # TODO: is this the right way to use grades? The autograder doesn't seem to use it. 63 | grades.assignZeroCredit() 64 | 65 | points = 0 66 | passed = True 67 | for testCase, f in self.testCases: 68 | testResult = f(grades) 69 | if "points" in testCase.testDict: 70 | if testResult: points += float(testCase.testDict["points"]) 71 | else: 72 | passed = passed and testResult 73 | 74 | ## FIXME: Below terrible hack to match q3's logic 75 | if int(points) == self.maxPoints and not passed: 76 | grades.assignZeroCredit() 77 | else: 78 | grades.addPoints(int(points)) 79 | 80 | 81 | class Q6PartialCreditQuestion(Question): 82 | """Fails any test which returns False, otherwise doesn't effect the grades object. 83 | Partial credit tests will add the required points.""" 84 | 85 | def execute(self, grades): 86 | grades.assignZeroCredit() 87 | 88 | results = [] 89 | for _, f in self.testCases: 90 | results.append(f(grades)) 91 | if False in results: 92 | grades.assignZeroCredit() 93 | 94 | class PartialCreditQuestion(Question): 95 | """Fails any test which returns False, otherwise doesn't effect the grades object. 96 | Partial credit tests will add the required points.""" 97 | 98 | def execute(self, grades): 99 | grades.assignZeroCredit() 100 | 101 | for _, f in self.testCases: 102 | if not f(grades): 103 | grades.assignZeroCredit() 104 | grades.fail("Tests failed.") 105 | return False 106 | 107 | 108 | 109 | class NumberPassedQuestion(Question): 110 | """Grade is the number of test cases passed.""" 111 | 112 | def execute(self, grades): 113 | grades.addPoints([f(grades) for _, f in self.testCases].count(True)) 114 | 115 | 116 | 117 | 118 | 119 | # Template modeling a generic test case 120 | class TestCase(object): 121 | 122 | def raiseNotDefined(self): 123 | print 'Method not implemented: %s' % inspect.stack()[1][3] 124 | sys.exit(1) 125 | 126 | def getPath(self): 127 | return self.path 128 | 129 | def __init__(self, question, testDict): 130 | self.question = question 131 | self.testDict = testDict 132 | self.path = testDict['path'] 133 | self.messages = [] 134 | 135 | def __str__(self): 136 | self.raiseNotDefined() 137 | 138 | def execute(self, grades, moduleDict, solutionDict): 139 | self.raiseNotDefined() 140 | 141 | def writeSolution(self, moduleDict, filePath): 142 | self.raiseNotDefined() 143 | return True 144 | 145 | # Tests should call the following messages for grading 146 | # to ensure a uniform format for test output. 147 | # 148 | # TODO: this is hairy, but we need to fix grading.py's interface 149 | # to get a nice hierarchical project - question - test structure, 150 | # then these should be moved into Question proper. 151 | def testPass(self, grades): 152 | grades.addMessage('PASS: %s' % (self.path,)) 153 | for line in self.messages: 154 | grades.addMessage(' %s' % (line,)) 155 | return True 156 | 157 | def testFail(self, grades): 158 | grades.addMessage('FAIL: %s' % (self.path,)) 159 | for line in self.messages: 160 | grades.addMessage(' %s' % (line,)) 161 | return False 162 | 163 | # This should really be question level? 164 | # 165 | def testPartial(self, grades, points, maxPoints): 166 | grades.addPoints(points) 167 | extraCredit = max(0, points - maxPoints) 168 | regularCredit = points - extraCredit 169 | 170 | grades.addMessage('%s: %s (%s of %s points)' % ("PASS" if points >= maxPoints else "FAIL", self.path, regularCredit, maxPoints)) 171 | if extraCredit > 0: 172 | grades.addMessage('EXTRA CREDIT: %s points' % (extraCredit,)) 173 | 174 | for line in self.messages: 175 | grades.addMessage(' %s' % (line,)) 176 | 177 | return True 178 | 179 | def addMessage(self, message): 180 | self.messages.extend(message.split('\n')) 181 | 182 | -------------------------------------------------------------------------------- /testParser.py: -------------------------------------------------------------------------------- 1 | # testParser.py 2 | # ------------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | import re 12 | import sys 13 | 14 | class TestParser(object): 15 | 16 | def __init__(self, path): 17 | # save the path to the test file 18 | self.path = path 19 | 20 | def removeComments(self, rawlines): 21 | # remove any portion of a line following a '#' symbol 22 | fixed_lines = [] 23 | for l in rawlines: 24 | idx = l.find('#') 25 | if idx == -1: 26 | fixed_lines.append(l) 27 | else: 28 | fixed_lines.append(l[0:idx]) 29 | return '\n'.join(fixed_lines) 30 | 31 | def parse(self): 32 | # read in the test case and remove comments 33 | test = {} 34 | with open(self.path) as handle: 35 | raw_lines = handle.read().split('\n') 36 | 37 | test_text = self.removeComments(raw_lines) 38 | test['__raw_lines__'] = raw_lines 39 | test['path'] = self.path 40 | test['__emit__'] = [] 41 | lines = test_text.split('\n') 42 | i = 0 43 | # read a property in each loop cycle 44 | while(i < len(lines)): 45 | # skip blank lines 46 | if re.match('\A\s*\Z', lines[i]): 47 | test['__emit__'].append(("raw", raw_lines[i])) 48 | i += 1 49 | continue 50 | m = re.match('\A([^"]*?):\s*"([^"]*)"\s*\Z', lines[i]) 51 | if m: 52 | test[m.group(1)] = m.group(2) 53 | test['__emit__'].append(("oneline", m.group(1))) 54 | i += 1 55 | continue 56 | m = re.match('\A([^"]*?):\s*"""\s*\Z', lines[i]) 57 | if m: 58 | msg = [] 59 | i += 1 60 | while(not re.match('\A\s*"""\s*\Z', lines[i])): 61 | msg.append(raw_lines[i]) 62 | i += 1 63 | test[m.group(1)] = '\n'.join(msg) 64 | test['__emit__'].append(("multiline", m.group(1))) 65 | i += 1 66 | continue 67 | print 'error parsing test file: %s' % self.path 68 | sys.exit(1) 69 | return test 70 | 71 | 72 | def emitTestDict(testDict, handle): 73 | for kind, data in testDict['__emit__']: 74 | if kind == "raw": 75 | handle.write(data + "\n") 76 | elif kind == "oneline": 77 | handle.write('%s: "%s"\n' % (data, testDict[data])) 78 | elif kind == "multiline": 79 | handle.write('%s: """\n%s\n"""\n' % (data, testDict[data])) 80 | else: 81 | raise Exception("Bad __emit__") -------------------------------------------------------------------------------- /test_cases/CONFIG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightninglu10/pacman-reinforcementlearning/2b4b89cbec2bdcfb274cc92dbcf53801e0450b77/test_cases/CONFIG -------------------------------------------------------------------------------- /test_cases/q1/1-tinygrid.solution: -------------------------------------------------------------------------------- 1 | values_k_0: """ 2 | 0.0000 3 | 0.0000 4 | 0.0000 5 | """ 6 | 7 | q_values_k_0_action_north: """ 8 | illegal 9 | 0.0000 10 | illegal 11 | """ 12 | 13 | q_values_k_0_action_east: """ 14 | illegal 15 | 0.0000 16 | illegal 17 | """ 18 | 19 | q_values_k_0_action_exit: """ 20 | -10.0000 21 | illegal 22 | 10.0000 23 | """ 24 | 25 | q_values_k_0_action_south: """ 26 | illegal 27 | 0.0000 28 | illegal 29 | """ 30 | 31 | q_values_k_0_action_west: """ 32 | illegal 33 | 0.0000 34 | illegal 35 | """ 36 | 37 | values_k_1: """ 38 | -10.0000 39 | 0.0000 40 | 10.0000 41 | """ 42 | 43 | q_values_k_1_action_north: """ 44 | illegal 45 | -5.0000 46 | illegal 47 | """ 48 | 49 | q_values_k_1_action_east: """ 50 | illegal 51 | 0.0000 52 | illegal 53 | """ 54 | 55 | q_values_k_1_action_exit: """ 56 | -10.0000 57 | illegal 58 | 10.0000 59 | """ 60 | 61 | q_values_k_1_action_south: """ 62 | illegal 63 | 5.0000 64 | illegal 65 | """ 66 | 67 | q_values_k_1_action_west: """ 68 | illegal 69 | 0.0000 70 | illegal 71 | """ 72 | 73 | values_k_2: """ 74 | -10.0000 75 | 5.0000 76 | 10.0000 77 | """ 78 | 79 | q_values_k_2_action_north: """ 80 | illegal 81 | -5.0000 82 | illegal 83 | """ 84 | 85 | q_values_k_2_action_east: """ 86 | illegal 87 | 2.5000 88 | illegal 89 | """ 90 | 91 | q_values_k_2_action_exit: """ 92 | -10.0000 93 | illegal 94 | 10.0000 95 | """ 96 | 97 | q_values_k_2_action_south: """ 98 | illegal 99 | 5.0000 100 | illegal 101 | """ 102 | 103 | q_values_k_2_action_west: """ 104 | illegal 105 | 2.5000 106 | illegal 107 | """ 108 | 109 | values_k_3: """ 110 | -10.0000 111 | 5.0000 112 | 10.0000 113 | """ 114 | 115 | q_values_k_3_action_north: """ 116 | illegal 117 | -5.0000 118 | illegal 119 | """ 120 | 121 | q_values_k_3_action_east: """ 122 | illegal 123 | 2.5000 124 | illegal 125 | """ 126 | 127 | q_values_k_3_action_exit: """ 128 | -10.0000 129 | illegal 130 | 10.0000 131 | """ 132 | 133 | q_values_k_3_action_south: """ 134 | illegal 135 | 5.0000 136 | illegal 137 | """ 138 | 139 | q_values_k_3_action_west: """ 140 | illegal 141 | 2.5000 142 | illegal 143 | """ 144 | 145 | values_k_4: """ 146 | -10.0000 147 | 5.0000 148 | 10.0000 149 | """ 150 | 151 | q_values_k_4_action_north: """ 152 | illegal 153 | -5.0000 154 | illegal 155 | """ 156 | 157 | q_values_k_4_action_east: """ 158 | illegal 159 | 2.5000 160 | illegal 161 | """ 162 | 163 | q_values_k_4_action_exit: """ 164 | -10.0000 165 | illegal 166 | 10.0000 167 | """ 168 | 169 | q_values_k_4_action_south: """ 170 | illegal 171 | 5.0000 172 | illegal 173 | """ 174 | 175 | q_values_k_4_action_west: """ 176 | illegal 177 | 2.5000 178 | illegal 179 | """ 180 | 181 | values_k_5: """ 182 | -10.0000 183 | 5.0000 184 | 10.0000 185 | """ 186 | 187 | q_values_k_5_action_north: """ 188 | illegal 189 | -5.0000 190 | illegal 191 | """ 192 | 193 | q_values_k_5_action_east: """ 194 | illegal 195 | 2.5000 196 | illegal 197 | """ 198 | 199 | q_values_k_5_action_exit: """ 200 | -10.0000 201 | illegal 202 | 10.0000 203 | """ 204 | 205 | q_values_k_5_action_south: """ 206 | illegal 207 | 5.0000 208 | illegal 209 | """ 210 | 211 | q_values_k_5_action_west: """ 212 | illegal 213 | 2.5000 214 | illegal 215 | """ 216 | 217 | values_k_6: """ 218 | -10.0000 219 | 5.0000 220 | 10.0000 221 | """ 222 | 223 | q_values_k_6_action_north: """ 224 | illegal 225 | -5.0000 226 | illegal 227 | """ 228 | 229 | q_values_k_6_action_east: """ 230 | illegal 231 | 2.5000 232 | illegal 233 | """ 234 | 235 | q_values_k_6_action_exit: """ 236 | -10.0000 237 | illegal 238 | 10.0000 239 | """ 240 | 241 | q_values_k_6_action_south: """ 242 | illegal 243 | 5.0000 244 | illegal 245 | """ 246 | 247 | q_values_k_6_action_west: """ 248 | illegal 249 | 2.5000 250 | illegal 251 | """ 252 | 253 | values_k_7: """ 254 | -10.0000 255 | 5.0000 256 | 10.0000 257 | """ 258 | 259 | q_values_k_7_action_north: """ 260 | illegal 261 | -5.0000 262 | illegal 263 | """ 264 | 265 | q_values_k_7_action_east: """ 266 | illegal 267 | 2.5000 268 | illegal 269 | """ 270 | 271 | q_values_k_7_action_exit: """ 272 | -10.0000 273 | illegal 274 | 10.0000 275 | """ 276 | 277 | q_values_k_7_action_south: """ 278 | illegal 279 | 5.0000 280 | illegal 281 | """ 282 | 283 | q_values_k_7_action_west: """ 284 | illegal 285 | 2.5000 286 | illegal 287 | """ 288 | 289 | values_k_8: """ 290 | -10.0000 291 | 5.0000 292 | 10.0000 293 | """ 294 | 295 | q_values_k_8_action_north: """ 296 | illegal 297 | -5.0000 298 | illegal 299 | """ 300 | 301 | q_values_k_8_action_east: """ 302 | illegal 303 | 2.5000 304 | illegal 305 | """ 306 | 307 | q_values_k_8_action_exit: """ 308 | -10.0000 309 | illegal 310 | 10.0000 311 | """ 312 | 313 | q_values_k_8_action_south: """ 314 | illegal 315 | 5.0000 316 | illegal 317 | """ 318 | 319 | q_values_k_8_action_west: """ 320 | illegal 321 | 2.5000 322 | illegal 323 | """ 324 | 325 | values_k_9: """ 326 | -10.0000 327 | 5.0000 328 | 10.0000 329 | """ 330 | 331 | q_values_k_9_action_north: """ 332 | illegal 333 | -5.0000 334 | illegal 335 | """ 336 | 337 | q_values_k_9_action_east: """ 338 | illegal 339 | 2.5000 340 | illegal 341 | """ 342 | 343 | q_values_k_9_action_exit: """ 344 | -10.0000 345 | illegal 346 | 10.0000 347 | """ 348 | 349 | q_values_k_9_action_south: """ 350 | illegal 351 | 5.0000 352 | illegal 353 | """ 354 | 355 | q_values_k_9_action_west: """ 356 | illegal 357 | 2.5000 358 | illegal 359 | """ 360 | 361 | values_k_100: """ 362 | -10.0000 363 | 5.0000 364 | 10.0000 365 | """ 366 | 367 | q_values_k_100_action_north: """ 368 | illegal 369 | -5.0000 370 | illegal 371 | """ 372 | 373 | q_values_k_100_action_east: """ 374 | illegal 375 | 2.5000 376 | illegal 377 | """ 378 | 379 | q_values_k_100_action_exit: """ 380 | -10.0000 381 | illegal 382 | 10.0000 383 | """ 384 | 385 | q_values_k_100_action_south: """ 386 | illegal 387 | 5.0000 388 | illegal 389 | """ 390 | 391 | q_values_k_100_action_west: """ 392 | illegal 393 | 2.5000 394 | illegal 395 | """ 396 | 397 | policy: """ 398 | exit 399 | south 400 | exit 401 | """ 402 | 403 | actions: """ 404 | north 405 | east 406 | exit 407 | south 408 | west 409 | """ 410 | 411 | -------------------------------------------------------------------------------- /test_cases/q1/1-tinygrid.test: -------------------------------------------------------------------------------- 1 | class: "ValueIterationTest" 2 | 3 | # GridWorld specification 4 | # _ is empty space 5 | # numbers are terminal states with that value 6 | # # is a wall 7 | # S is a start state 8 | # 9 | grid: """ 10 | -10 11 | S 12 | 10 13 | """ 14 | discount: "0.5" 15 | noise: "0.0" 16 | livingReward: "0.0" 17 | epsilon: "0.5" 18 | learningRate: "0.1" 19 | numExperiences: "100" 20 | valueIterations: "100" 21 | iterations: "10000" 22 | 23 | -------------------------------------------------------------------------------- /test_cases/q1/2-tinygrid-noisy.solution: -------------------------------------------------------------------------------- 1 | values_k_0: """ 2 | 0.0000 3 | 0.0000 4 | 0.0000 5 | """ 6 | 7 | q_values_k_0_action_north: """ 8 | illegal 9 | 0.0000 10 | illegal 11 | """ 12 | 13 | q_values_k_0_action_east: """ 14 | illegal 15 | 0.0000 16 | illegal 17 | """ 18 | 19 | q_values_k_0_action_exit: """ 20 | -10.0000 21 | illegal 22 | 10.0000 23 | """ 24 | 25 | q_values_k_0_action_south: """ 26 | illegal 27 | 0.0000 28 | illegal 29 | """ 30 | 31 | q_values_k_0_action_west: """ 32 | illegal 33 | 0.0000 34 | illegal 35 | """ 36 | 37 | values_k_1: """ 38 | -10.0000 39 | 0.0000 40 | 10.0000 41 | """ 42 | 43 | q_values_k_1_action_north: """ 44 | illegal 45 | -5.6250 46 | illegal 47 | """ 48 | 49 | q_values_k_1_action_east: """ 50 | illegal 51 | 0.0000 52 | illegal 53 | """ 54 | 55 | q_values_k_1_action_exit: """ 56 | -10.0000 57 | illegal 58 | 10.0000 59 | """ 60 | 61 | q_values_k_1_action_south: """ 62 | illegal 63 | 5.6250 64 | illegal 65 | """ 66 | 67 | q_values_k_1_action_west: """ 68 | illegal 69 | 0.0000 70 | illegal 71 | """ 72 | 73 | values_k_2: """ 74 | -10.0000 75 | 5.6250 76 | 10.0000 77 | """ 78 | 79 | q_values_k_2_action_north: """ 80 | illegal 81 | -4.5703 82 | illegal 83 | """ 84 | 85 | q_values_k_2_action_east: """ 86 | illegal 87 | 3.1641 88 | illegal 89 | """ 90 | 91 | q_values_k_2_action_exit: """ 92 | -10.0000 93 | illegal 94 | 10.0000 95 | """ 96 | 97 | q_values_k_2_action_south: """ 98 | illegal 99 | 6.6797 100 | illegal 101 | """ 102 | 103 | q_values_k_2_action_west: """ 104 | illegal 105 | 3.1641 106 | illegal 107 | """ 108 | 109 | values_k_3: """ 110 | -10.0000 111 | 6.6797 112 | 10.0000 113 | """ 114 | 115 | q_values_k_3_action_north: """ 116 | illegal 117 | -4.3726 118 | illegal 119 | """ 120 | 121 | q_values_k_3_action_east: """ 122 | illegal 123 | 3.7573 124 | illegal 125 | """ 126 | 127 | q_values_k_3_action_exit: """ 128 | -10.0000 129 | illegal 130 | 10.0000 131 | """ 132 | 133 | q_values_k_3_action_south: """ 134 | illegal 135 | 6.8774 136 | illegal 137 | """ 138 | 139 | q_values_k_3_action_west: """ 140 | illegal 141 | 3.7573 142 | illegal 143 | """ 144 | 145 | values_k_4: """ 146 | -10.0000 147 | 6.8774 148 | 10.0000 149 | """ 150 | 151 | q_values_k_4_action_north: """ 152 | illegal 153 | -4.3355 154 | illegal 155 | """ 156 | 157 | q_values_k_4_action_east: """ 158 | illegal 159 | 3.8686 160 | illegal 161 | """ 162 | 163 | q_values_k_4_action_exit: """ 164 | -10.0000 165 | illegal 166 | 10.0000 167 | """ 168 | 169 | q_values_k_4_action_south: """ 170 | illegal 171 | 6.9145 172 | illegal 173 | """ 174 | 175 | q_values_k_4_action_west: """ 176 | illegal 177 | 3.8686 178 | illegal 179 | """ 180 | 181 | values_k_5: """ 182 | -10.0000 183 | 6.9145 184 | 10.0000 185 | """ 186 | 187 | q_values_k_5_action_north: """ 188 | illegal 189 | -4.3285 190 | illegal 191 | """ 192 | 193 | q_values_k_5_action_east: """ 194 | illegal 195 | 3.8894 196 | illegal 197 | """ 198 | 199 | q_values_k_5_action_exit: """ 200 | -10.0000 201 | illegal 202 | 10.0000 203 | """ 204 | 205 | q_values_k_5_action_south: """ 206 | illegal 207 | 6.9215 208 | illegal 209 | """ 210 | 211 | q_values_k_5_action_west: """ 212 | illegal 213 | 3.8894 214 | illegal 215 | """ 216 | 217 | values_k_6: """ 218 | -10.0000 219 | 6.9215 220 | 10.0000 221 | """ 222 | 223 | q_values_k_6_action_north: """ 224 | illegal 225 | -4.3272 226 | illegal 227 | """ 228 | 229 | q_values_k_6_action_east: """ 230 | illegal 231 | 3.8933 232 | illegal 233 | """ 234 | 235 | q_values_k_6_action_exit: """ 236 | -10.0000 237 | illegal 238 | 10.0000 239 | """ 240 | 241 | q_values_k_6_action_south: """ 242 | illegal 243 | 6.9228 244 | illegal 245 | """ 246 | 247 | q_values_k_6_action_west: """ 248 | illegal 249 | 3.8933 250 | illegal 251 | """ 252 | 253 | values_k_7: """ 254 | -10.0000 255 | 6.9228 256 | 10.0000 257 | """ 258 | 259 | q_values_k_7_action_north: """ 260 | illegal 261 | -4.3270 262 | illegal 263 | """ 264 | 265 | q_values_k_7_action_east: """ 266 | illegal 267 | 3.8941 268 | illegal 269 | """ 270 | 271 | q_values_k_7_action_exit: """ 272 | -10.0000 273 | illegal 274 | 10.0000 275 | """ 276 | 277 | q_values_k_7_action_south: """ 278 | illegal 279 | 6.9230 280 | illegal 281 | """ 282 | 283 | q_values_k_7_action_west: """ 284 | illegal 285 | 3.8941 286 | illegal 287 | """ 288 | 289 | values_k_8: """ 290 | -10.0000 291 | 6.9230 292 | 10.0000 293 | """ 294 | 295 | q_values_k_8_action_north: """ 296 | illegal 297 | -4.3269 298 | illegal 299 | """ 300 | 301 | q_values_k_8_action_east: """ 302 | illegal 303 | 3.8942 304 | illegal 305 | """ 306 | 307 | q_values_k_8_action_exit: """ 308 | -10.0000 309 | illegal 310 | 10.0000 311 | """ 312 | 313 | q_values_k_8_action_south: """ 314 | illegal 315 | 6.9231 316 | illegal 317 | """ 318 | 319 | q_values_k_8_action_west: """ 320 | illegal 321 | 3.8942 322 | illegal 323 | """ 324 | 325 | values_k_9: """ 326 | -10.0000 327 | 6.9231 328 | 10.0000 329 | """ 330 | 331 | q_values_k_9_action_north: """ 332 | illegal 333 | -4.3269 334 | illegal 335 | """ 336 | 337 | q_values_k_9_action_east: """ 338 | illegal 339 | 3.8942 340 | illegal 341 | """ 342 | 343 | q_values_k_9_action_exit: """ 344 | -10.0000 345 | illegal 346 | 10.0000 347 | """ 348 | 349 | q_values_k_9_action_south: """ 350 | illegal 351 | 6.9231 352 | illegal 353 | """ 354 | 355 | q_values_k_9_action_west: """ 356 | illegal 357 | 3.8942 358 | illegal 359 | """ 360 | 361 | values_k_100: """ 362 | -10.0000 363 | 6.9231 364 | 10.0000 365 | """ 366 | 367 | q_values_k_100_action_north: """ 368 | illegal 369 | -4.3269 370 | illegal 371 | """ 372 | 373 | q_values_k_100_action_east: """ 374 | illegal 375 | 3.8942 376 | illegal 377 | """ 378 | 379 | q_values_k_100_action_exit: """ 380 | -10.0000 381 | illegal 382 | 10.0000 383 | """ 384 | 385 | q_values_k_100_action_south: """ 386 | illegal 387 | 6.9231 388 | illegal 389 | """ 390 | 391 | q_values_k_100_action_west: """ 392 | illegal 393 | 3.8942 394 | illegal 395 | """ 396 | 397 | policy: """ 398 | exit 399 | south 400 | exit 401 | """ 402 | 403 | actions: """ 404 | north 405 | east 406 | exit 407 | south 408 | west 409 | """ 410 | 411 | -------------------------------------------------------------------------------- /test_cases/q1/2-tinygrid-noisy.test: -------------------------------------------------------------------------------- 1 | class: "ValueIterationTest" 2 | 3 | # GridWorld specification 4 | # _ is empty space 5 | # numbers are terminal states with that value 6 | # # is a wall 7 | # S is a start state 8 | # 9 | grid: """ 10 | -10 11 | S 12 | 10 13 | """ 14 | discount: "0.75" 15 | noise: "0.25" 16 | livingReward: "0.0" 17 | epsilon: "0.5" 18 | learningRate: "0.1" 19 | numExperiences: "100" 20 | valueIterations: "100" 21 | iterations: "10000" 22 | 23 | -------------------------------------------------------------------------------- /test_cases/q1/3-bridge.test: -------------------------------------------------------------------------------- 1 | class: "ValueIterationTest" 2 | 3 | # GridWorld specification 4 | # _ is empty space 5 | # numbers are terminal states with that value 6 | # # is a wall 7 | # S is a start state 8 | # 9 | grid: """ 10 | # 10 # 11 | -100 _ -100 12 | -100 _ -100 13 | -100 _ -100 14 | -100 _ -100 15 | -100 S -100 16 | # 1 # 17 | """ 18 | gridName: "bridgeGrid" 19 | discount: "0.85" 20 | noise: "0.1" 21 | livingReward: "0.0" 22 | epsilon: "0.5" 23 | learningRate: "0.1" 24 | numExperiences: "500" 25 | valueIterations: "100" 26 | iterations: "10000" 27 | 28 | -------------------------------------------------------------------------------- /test_cases/q1/4-discountgrid.test: -------------------------------------------------------------------------------- 1 | class: "ValueIterationTest" 2 | 3 | # GridWorld specification 4 | # _ is empty space 5 | # numbers are terminal states with that value 6 | # # is a wall 7 | # S is a start state 8 | # 9 | grid: """ 10 | -10 _ 10 _ _ 11 | -10 _ # _ _ 12 | -10 _ 1 _ _ 13 | -10 _ # # _ 14 | -10 S _ _ _ 15 | """ 16 | discount: "0.9" 17 | noise: "0.2" 18 | livingReward: "0.0" 19 | epsilon: "0.2" 20 | learningRate: "0.1" 21 | numExperiences: "3000" 22 | valueIterations: "100" 23 | iterations: "10000" 24 | 25 | -------------------------------------------------------------------------------- /test_cases/q1/CONFIG: -------------------------------------------------------------------------------- 1 | max_points: "6" 2 | class: "PassAllTestsQuestion" -------------------------------------------------------------------------------- /test_cases/q2/1-bridge-grid.solution: -------------------------------------------------------------------------------- 1 | # This is the solution file for test_cases/q2/1-bridge-grid.test. 2 | # File intentionally blank. 3 | -------------------------------------------------------------------------------- /test_cases/q2/1-bridge-grid.test: -------------------------------------------------------------------------------- 1 | class: "GridPolicyTest" 2 | 3 | # Function in module in analysis that returns (discount, noise) 4 | parameterFn: "question2" 5 | question2: "true" 6 | 7 | # GridWorld specification 8 | # _ is empty space 9 | # numbers are terminal states with that value 10 | # # is a wall 11 | # S is a start state 12 | # 13 | grid: """ 14 | # -100 -100 -100 -100 -100 # 15 | 1 S _ _ _ _ 10 16 | # -100 -100 -100 -100 -100 # 17 | """ 18 | gridName: "bridgeGrid" 19 | 20 | # Policy specification 21 | # _ policy choice not checked 22 | # N, E, S, W policy action must be north, east, south, west 23 | # 24 | policy: """ 25 | _ _ _ _ _ _ _ 26 | _ E _ _ _ _ _ 27 | _ _ _ _ _ _ _ 28 | """ 29 | 30 | -------------------------------------------------------------------------------- /test_cases/q2/CONFIG: -------------------------------------------------------------------------------- 1 | max_points: "1" 2 | class: "PassAllTestsQuestion" 3 | -------------------------------------------------------------------------------- /test_cases/q3/1-question-3.1.solution: -------------------------------------------------------------------------------- 1 | # This is the solution file for test_cases/q3/1-question-3.1.test. 2 | # File intentionally blank. 3 | -------------------------------------------------------------------------------- /test_cases/q3/1-question-3.1.test: -------------------------------------------------------------------------------- 1 | class: "GridPolicyTest" 2 | 3 | # Function in module in analysis that returns (discount, noise) 4 | parameterFn: "question3a" 5 | 6 | # GridWorld specification 7 | # _ is empty space 8 | # numbers are terminal states with that value 9 | # # is a wall 10 | # S is a start state 11 | # 12 | grid: """ 13 | _ _ _ _ _ 14 | _ # _ _ _ 15 | _ # 1 # 10 16 | S _ _ _ _ 17 | -10 -10 -10 -10 -10 18 | """ 19 | gridName: "discountGrid" 20 | 21 | # Policy specification 22 | # _ policy choice not checked 23 | # N, E, S, W policy action must be north, east, south, west 24 | # 25 | policy: """ 26 | _ _ _ _ _ 27 | _ _ _ _ _ 28 | _ _ _ _ _ 29 | E E N _ _ 30 | _ _ _ _ _ 31 | """ 32 | -------------------------------------------------------------------------------- /test_cases/q3/2-question-3.2.solution: -------------------------------------------------------------------------------- 1 | # This is the solution file for test_cases/q3/2-question-3.2.test. 2 | # File intentionally blank. 3 | -------------------------------------------------------------------------------- /test_cases/q3/2-question-3.2.test: -------------------------------------------------------------------------------- 1 | class: "GridPolicyTest" 2 | 3 | # Function in module in analysis that returns (discount, noise) 4 | parameterFn: "question3b" 5 | 6 | # GridWorld specification 7 | # _ is empty space 8 | # numbers are terminal states with that value 9 | # # is a wall 10 | # S is a start state 11 | # 12 | grid: """ 13 | _ _ _ _ _ 14 | _ # _ _ _ 15 | _ # 1 # 10 16 | S _ _ _ _ 17 | -10 -10 -10 -10 -10 18 | """ 19 | gridName: "discountGrid" 20 | 21 | # Policy specification 22 | # _ policy choice not checked 23 | # N, E, S, W policy action must be north, east, south, west 24 | # 25 | policy: """ 26 | E E S _ _ 27 | N _ S _ _ 28 | N _ _ _ _ 29 | N _ _ _ _ 30 | _ _ _ _ _ 31 | """ 32 | -------------------------------------------------------------------------------- /test_cases/q3/3-question-3.3.solution: -------------------------------------------------------------------------------- 1 | # This is the solution file for test_cases/q3/3-question-3.3.test. 2 | # File intentionally blank. 3 | -------------------------------------------------------------------------------- /test_cases/q3/3-question-3.3.test: -------------------------------------------------------------------------------- 1 | class: "GridPolicyTest" 2 | 3 | # Function in module in analysis that returns (discount, noise) 4 | parameterFn: "question3c" 5 | 6 | # GridWorld specification 7 | # _ is empty space 8 | # numbers are terminal states with that value 9 | # # is a wall 10 | # S is a start state 11 | # 12 | grid: """ 13 | _ _ _ _ _ 14 | _ # _ _ _ 15 | _ # 1 # 10 16 | S _ _ _ _ 17 | -10 -10 -10 -10 -10 18 | """ 19 | gridName: "discountGrid" 20 | 21 | # Policy specification 22 | # _ policy choice not checked 23 | # N, E, S, W policy action must be north, east, south, west 24 | # 25 | policy: """ 26 | _ _ _ _ _ 27 | _ _ _ _ _ 28 | _ _ _ _ _ 29 | E E E E N 30 | _ _ _ _ _ 31 | """ 32 | -------------------------------------------------------------------------------- /test_cases/q3/4-question-3.4.solution: -------------------------------------------------------------------------------- 1 | # This is the solution file for test_cases/q3/4-question-3.4.test. 2 | # File intentionally blank. 3 | -------------------------------------------------------------------------------- /test_cases/q3/4-question-3.4.test: -------------------------------------------------------------------------------- 1 | class: "GridPolicyTest" 2 | 3 | # Function in module in analysis that returns (discount, noise) 4 | parameterFn: "question3d" 5 | 6 | # GridWorld specification 7 | # _ is empty space 8 | # numbers are terminal states with that value 9 | # # is a wall 10 | # S is a start state 11 | # 12 | grid: """ 13 | _ _ _ _ _ 14 | _ # _ _ _ 15 | _ # 1 # 10 16 | S _ _ _ _ 17 | -10 -10 -10 -10 -10 18 | """ 19 | gridName: "discountGrid" 20 | 21 | # Policy specification 22 | # _ policy choice not checked 23 | # N, E, S, W policy action must be north, east, south, west 24 | # 25 | policy: """ 26 | _ _ _ _ _ 27 | _ _ _ _ _ 28 | _ _ _ _ _ 29 | N _ _ _ _ 30 | _ _ _ _ _ 31 | """ 32 | 33 | # State the most probable path must visit 34 | # (x,y) for a particular location; (0,0) is bottom left 35 | # TERMINAL_STATE for the terminal state 36 | pathVisits: "(4,2)" 37 | -------------------------------------------------------------------------------- /test_cases/q3/5-question-3.5.solution: -------------------------------------------------------------------------------- 1 | # This is the solution file for test_cases/q3/5-question-3.5.test. 2 | # File intentionally blank. 3 | -------------------------------------------------------------------------------- /test_cases/q3/5-question-3.5.test: -------------------------------------------------------------------------------- 1 | class: "GridPolicyTest" 2 | 3 | # Function in module in analysis that returns (discount, noise) 4 | parameterFn: "question3e" 5 | 6 | # GridWorld specification 7 | # _ is empty space 8 | # numbers are terminal states with that value 9 | # # is a wall 10 | # S is a start state 11 | # 12 | grid: """ 13 | _ _ _ _ _ 14 | _ # _ _ _ 15 | _ # 1 # 10 16 | S _ _ _ _ 17 | -10 -10 -10 -10 -10 18 | """ 19 | gridName: "discountGrid" 20 | 21 | # Policy specification 22 | # _ policy choice not checked 23 | # N, E, S, W policy action must be north, east, south, west 24 | # 25 | policy: """ 26 | _ _ _ _ _ 27 | _ _ _ _ _ 28 | _ _ _ _ _ 29 | _ _ _ _ _ 30 | _ _ _ _ _ 31 | """ 32 | 33 | # State the most probable path must not visit 34 | # (x,y) for a particular location; (0,0) is bottom left 35 | # TERMINAL_STATE for the terminal state 36 | pathNotVisits: "TERMINAL_STATE" 37 | -------------------------------------------------------------------------------- /test_cases/q3/CONFIG: -------------------------------------------------------------------------------- 1 | max_points: "5" 2 | class: "NumberPassedQuestion" 3 | -------------------------------------------------------------------------------- /test_cases/q4/1-tinygrid.solution: -------------------------------------------------------------------------------- 1 | q_values_k_0_action_north: """ 2 | illegal 3 | 0.0000 4 | illegal 5 | """ 6 | 7 | q_values_k_0_action_east: """ 8 | illegal 9 | 0.0000 10 | illegal 11 | """ 12 | 13 | q_values_k_0_action_exit: """ 14 | 0.0000 15 | illegal 16 | 0.0000 17 | """ 18 | 19 | q_values_k_0_action_south: """ 20 | illegal 21 | 0.0000 22 | illegal 23 | """ 24 | 25 | q_values_k_0_action_west: """ 26 | illegal 27 | 0.0000 28 | illegal 29 | """ 30 | 31 | q_values_k_1_action_north: """ 32 | illegal 33 | 0.0000 34 | illegal 35 | """ 36 | 37 | q_values_k_1_action_east: """ 38 | illegal 39 | 0.0000 40 | illegal 41 | """ 42 | 43 | q_values_k_1_action_exit: """ 44 | 0.0000 45 | illegal 46 | 1.0000 47 | """ 48 | 49 | q_values_k_1_action_south: """ 50 | illegal 51 | 0.0000 52 | illegal 53 | """ 54 | 55 | q_values_k_1_action_west: """ 56 | illegal 57 | 0.0000 58 | illegal 59 | """ 60 | 61 | q_values_k_2_action_north: """ 62 | illegal 63 | 0.0000 64 | illegal 65 | """ 66 | 67 | q_values_k_2_action_east: """ 68 | illegal 69 | 0.0000 70 | illegal 71 | """ 72 | 73 | q_values_k_2_action_exit: """ 74 | 0.0000 75 | illegal 76 | 1.0000 77 | """ 78 | 79 | q_values_k_2_action_south: """ 80 | illegal 81 | 0.0000 82 | illegal 83 | """ 84 | 85 | q_values_k_2_action_west: """ 86 | illegal 87 | 0.0000 88 | illegal 89 | """ 90 | 91 | q_values_k_3_action_north: """ 92 | illegal 93 | 0.0000 94 | illegal 95 | """ 96 | 97 | q_values_k_3_action_east: """ 98 | illegal 99 | 0.0000 100 | illegal 101 | """ 102 | 103 | q_values_k_3_action_exit: """ 104 | 0.0000 105 | illegal 106 | 1.9000 107 | """ 108 | 109 | q_values_k_3_action_south: """ 110 | illegal 111 | 0.0000 112 | illegal 113 | """ 114 | 115 | q_values_k_3_action_west: """ 116 | illegal 117 | 0.0000 118 | illegal 119 | """ 120 | 121 | q_values_k_4_action_north: """ 122 | illegal 123 | 0.0000 124 | illegal 125 | """ 126 | 127 | q_values_k_4_action_east: """ 128 | illegal 129 | 0.0000 130 | illegal 131 | """ 132 | 133 | q_values_k_4_action_exit: """ 134 | 0.0000 135 | illegal 136 | 2.7100 137 | """ 138 | 139 | q_values_k_4_action_south: """ 140 | illegal 141 | 0.0000 142 | illegal 143 | """ 144 | 145 | q_values_k_4_action_west: """ 146 | illegal 147 | 0.0000 148 | illegal 149 | """ 150 | 151 | q_values_k_5_action_north: """ 152 | illegal 153 | 0.0000 154 | illegal 155 | """ 156 | 157 | q_values_k_5_action_east: """ 158 | illegal 159 | 0.0000 160 | illegal 161 | """ 162 | 163 | q_values_k_5_action_exit: """ 164 | -1.0000 165 | illegal 166 | 2.7100 167 | """ 168 | 169 | q_values_k_5_action_south: """ 170 | illegal 171 | 0.0000 172 | illegal 173 | """ 174 | 175 | q_values_k_5_action_west: """ 176 | illegal 177 | 0.0000 178 | illegal 179 | """ 180 | 181 | q_values_k_6_action_north: """ 182 | illegal 183 | 0.0000 184 | illegal 185 | """ 186 | 187 | q_values_k_6_action_east: """ 188 | illegal 189 | 0.0000 190 | illegal 191 | """ 192 | 193 | q_values_k_6_action_exit: """ 194 | -1.0000 195 | illegal 196 | 3.4390 197 | """ 198 | 199 | q_values_k_6_action_south: """ 200 | illegal 201 | 0.0000 202 | illegal 203 | """ 204 | 205 | q_values_k_6_action_west: """ 206 | illegal 207 | 0.0000 208 | illegal 209 | """ 210 | 211 | q_values_k_7_action_north: """ 212 | illegal 213 | 0.0000 214 | illegal 215 | """ 216 | 217 | q_values_k_7_action_east: """ 218 | illegal 219 | 0.0000 220 | illegal 221 | """ 222 | 223 | q_values_k_7_action_exit: """ 224 | -1.0000 225 | illegal 226 | 3.4390 227 | """ 228 | 229 | q_values_k_7_action_south: """ 230 | illegal 231 | 0.1720 232 | illegal 233 | """ 234 | 235 | q_values_k_7_action_west: """ 236 | illegal 237 | 0.0000 238 | illegal 239 | """ 240 | 241 | q_values_k_8_action_north: """ 242 | illegal 243 | 0.0000 244 | illegal 245 | """ 246 | 247 | q_values_k_8_action_east: """ 248 | illegal 249 | 0.0000 250 | illegal 251 | """ 252 | 253 | q_values_k_8_action_exit: """ 254 | -1.0000 255 | illegal 256 | 4.0951 257 | """ 258 | 259 | q_values_k_8_action_south: """ 260 | illegal 261 | 0.1720 262 | illegal 263 | """ 264 | 265 | q_values_k_8_action_west: """ 266 | illegal 267 | 0.0000 268 | illegal 269 | """ 270 | 271 | q_values_k_9_action_north: """ 272 | illegal 273 | 0.0000 274 | illegal 275 | """ 276 | 277 | q_values_k_9_action_east: """ 278 | illegal 279 | 0.0000 280 | illegal 281 | """ 282 | 283 | q_values_k_9_action_exit: """ 284 | -1.0000 285 | illegal 286 | 4.6856 287 | """ 288 | 289 | q_values_k_9_action_south: """ 290 | illegal 291 | 0.1720 292 | illegal 293 | """ 294 | 295 | q_values_k_9_action_west: """ 296 | illegal 297 | 0.0000 298 | illegal 299 | """ 300 | 301 | q_values_k_100_action_north: """ 302 | illegal 303 | -0.4534 304 | illegal 305 | """ 306 | 307 | q_values_k_100_action_east: """ 308 | illegal 309 | 0.4063 310 | illegal 311 | """ 312 | 313 | q_values_k_100_action_exit: """ 314 | -9.4767 315 | illegal 316 | 9.8175 317 | """ 318 | 319 | q_values_k_100_action_south: """ 320 | illegal 321 | 2.1267 322 | illegal 323 | """ 324 | 325 | q_values_k_100_action_west: """ 326 | illegal 327 | 0.3919 328 | illegal 329 | """ 330 | 331 | values: """ 332 | -9.4767 333 | 2.1267 334 | 9.8175 335 | """ 336 | 337 | policy: """ 338 | exit 339 | south 340 | exit 341 | """ 342 | 343 | -------------------------------------------------------------------------------- /test_cases/q4/1-tinygrid.test: -------------------------------------------------------------------------------- 1 | class: "QLearningTest" 2 | 3 | # GridWorld specification 4 | # _ is empty space 5 | # numbers are terminal states with that value 6 | # # is a wall 7 | # S is a start state 8 | # 9 | grid: """ 10 | -10 11 | S 12 | 10 13 | """ 14 | discount: "0.5" 15 | noise: "0.0" 16 | livingReward: "0.0" 17 | epsilon: "0.5" 18 | learningRate: "0.1" 19 | numExperiences: "100" 20 | valueIterations: "100" 21 | iterations: "10000" 22 | 23 | -------------------------------------------------------------------------------- /test_cases/q4/2-tinygrid-noisy.solution: -------------------------------------------------------------------------------- 1 | q_values_k_0_action_north: """ 2 | illegal 3 | 0.0000 4 | illegal 5 | """ 6 | 7 | q_values_k_0_action_east: """ 8 | illegal 9 | 0.0000 10 | illegal 11 | """ 12 | 13 | q_values_k_0_action_exit: """ 14 | 0.0000 15 | illegal 16 | 0.0000 17 | """ 18 | 19 | q_values_k_0_action_south: """ 20 | illegal 21 | 0.0000 22 | illegal 23 | """ 24 | 25 | q_values_k_0_action_west: """ 26 | illegal 27 | 0.0000 28 | illegal 29 | """ 30 | 31 | q_values_k_1_action_north: """ 32 | illegal 33 | 0.0000 34 | illegal 35 | """ 36 | 37 | q_values_k_1_action_east: """ 38 | illegal 39 | 0.0000 40 | illegal 41 | """ 42 | 43 | q_values_k_1_action_exit: """ 44 | 0.0000 45 | illegal 46 | 1.0000 47 | """ 48 | 49 | q_values_k_1_action_south: """ 50 | illegal 51 | 0.0000 52 | illegal 53 | """ 54 | 55 | q_values_k_1_action_west: """ 56 | illegal 57 | 0.0000 58 | illegal 59 | """ 60 | 61 | q_values_k_2_action_north: """ 62 | illegal 63 | 0.0000 64 | illegal 65 | """ 66 | 67 | q_values_k_2_action_east: """ 68 | illegal 69 | 0.0000 70 | illegal 71 | """ 72 | 73 | q_values_k_2_action_exit: """ 74 | 0.0000 75 | illegal 76 | 1.0000 77 | """ 78 | 79 | q_values_k_2_action_south: """ 80 | illegal 81 | 0.0000 82 | illegal 83 | """ 84 | 85 | q_values_k_2_action_west: """ 86 | illegal 87 | 0.0000 88 | illegal 89 | """ 90 | 91 | q_values_k_3_action_north: """ 92 | illegal 93 | 0.0000 94 | illegal 95 | """ 96 | 97 | q_values_k_3_action_east: """ 98 | illegal 99 | 0.0000 100 | illegal 101 | """ 102 | 103 | q_values_k_3_action_exit: """ 104 | 0.0000 105 | illegal 106 | 1.9000 107 | """ 108 | 109 | q_values_k_3_action_south: """ 110 | illegal 111 | 0.0000 112 | illegal 113 | """ 114 | 115 | q_values_k_3_action_west: """ 116 | illegal 117 | 0.0000 118 | illegal 119 | """ 120 | 121 | q_values_k_4_action_north: """ 122 | illegal 123 | 0.0000 124 | illegal 125 | """ 126 | 127 | q_values_k_4_action_east: """ 128 | illegal 129 | 0.0000 130 | illegal 131 | """ 132 | 133 | q_values_k_4_action_exit: """ 134 | 0.0000 135 | illegal 136 | 2.7100 137 | """ 138 | 139 | q_values_k_4_action_south: """ 140 | illegal 141 | 0.0000 142 | illegal 143 | """ 144 | 145 | q_values_k_4_action_west: """ 146 | illegal 147 | 0.0000 148 | illegal 149 | """ 150 | 151 | q_values_k_5_action_north: """ 152 | illegal 153 | 0.0000 154 | illegal 155 | """ 156 | 157 | q_values_k_5_action_east: """ 158 | illegal 159 | 0.0000 160 | illegal 161 | """ 162 | 163 | q_values_k_5_action_exit: """ 164 | -1.0000 165 | illegal 166 | 2.7100 167 | """ 168 | 169 | q_values_k_5_action_south: """ 170 | illegal 171 | 0.0000 172 | illegal 173 | """ 174 | 175 | q_values_k_5_action_west: """ 176 | illegal 177 | 0.0000 178 | illegal 179 | """ 180 | 181 | q_values_k_6_action_north: """ 182 | illegal 183 | 0.0000 184 | illegal 185 | """ 186 | 187 | q_values_k_6_action_east: """ 188 | illegal 189 | 0.0000 190 | illegal 191 | """ 192 | 193 | q_values_k_6_action_exit: """ 194 | -1.0000 195 | illegal 196 | 3.4390 197 | """ 198 | 199 | q_values_k_6_action_south: """ 200 | illegal 201 | 0.0000 202 | illegal 203 | """ 204 | 205 | q_values_k_6_action_west: """ 206 | illegal 207 | 0.0000 208 | illegal 209 | """ 210 | 211 | q_values_k_7_action_north: """ 212 | illegal 213 | 0.0000 214 | illegal 215 | """ 216 | 217 | q_values_k_7_action_east: """ 218 | illegal 219 | 0.0000 220 | illegal 221 | """ 222 | 223 | q_values_k_7_action_exit: """ 224 | -1.0000 225 | illegal 226 | 3.4390 227 | """ 228 | 229 | q_values_k_7_action_south: """ 230 | illegal 231 | 0.2579 232 | illegal 233 | """ 234 | 235 | q_values_k_7_action_west: """ 236 | illegal 237 | 0.0000 238 | illegal 239 | """ 240 | 241 | q_values_k_8_action_north: """ 242 | illegal 243 | 0.0000 244 | illegal 245 | """ 246 | 247 | q_values_k_8_action_east: """ 248 | illegal 249 | 0.0000 250 | illegal 251 | """ 252 | 253 | q_values_k_8_action_exit: """ 254 | -1.0000 255 | illegal 256 | 4.0951 257 | """ 258 | 259 | q_values_k_8_action_south: """ 260 | illegal 261 | 0.2579 262 | illegal 263 | """ 264 | 265 | q_values_k_8_action_west: """ 266 | illegal 267 | 0.0000 268 | illegal 269 | """ 270 | 271 | q_values_k_9_action_north: """ 272 | illegal 273 | 0.0000 274 | illegal 275 | """ 276 | 277 | q_values_k_9_action_east: """ 278 | illegal 279 | 0.0000 280 | illegal 281 | """ 282 | 283 | q_values_k_9_action_exit: """ 284 | -1.0000 285 | illegal 286 | 4.6856 287 | """ 288 | 289 | q_values_k_9_action_south: """ 290 | illegal 291 | 0.2579 292 | illegal 293 | """ 294 | 295 | q_values_k_9_action_west: """ 296 | illegal 297 | 0.0000 298 | illegal 299 | """ 300 | 301 | q_values_k_100_action_north: """ 302 | illegal 303 | -0.6670 304 | illegal 305 | """ 306 | 307 | q_values_k_100_action_east: """ 308 | illegal 309 | 0.9499 310 | illegal 311 | """ 312 | 313 | q_values_k_100_action_exit: """ 314 | -9.4767 315 | illegal 316 | 9.8175 317 | """ 318 | 319 | q_values_k_100_action_south: """ 320 | illegal 321 | 3.2562 322 | illegal 323 | """ 324 | 325 | q_values_k_100_action_west: """ 326 | illegal 327 | 0.8236 328 | illegal 329 | """ 330 | 331 | values: """ 332 | -9.4767 333 | 3.2562 334 | 9.8175 335 | """ 336 | 337 | policy: """ 338 | exit 339 | south 340 | exit 341 | """ 342 | 343 | -------------------------------------------------------------------------------- /test_cases/q4/2-tinygrid-noisy.test: -------------------------------------------------------------------------------- 1 | class: "QLearningTest" 2 | 3 | # GridWorld specification 4 | # _ is empty space 5 | # numbers are terminal states with that value 6 | # # is a wall 7 | # S is a start state 8 | # 9 | grid: """ 10 | -10 11 | S 12 | 10 13 | """ 14 | discount: "0.75" 15 | noise: "0.25" 16 | livingReward: "0.0" 17 | epsilon: "0.5" 18 | learningRate: "0.1" 19 | numExperiences: "100" 20 | valueIterations: "100" 21 | iterations: "10000" 22 | 23 | -------------------------------------------------------------------------------- /test_cases/q4/3-bridge.test: -------------------------------------------------------------------------------- 1 | class: "QLearningTest" 2 | 3 | # GridWorld specification 4 | # _ is empty space 5 | # numbers are terminal states with that value 6 | # # is a wall 7 | # S is a start state 8 | # 9 | grid: """ 10 | # 10 # 11 | -100 _ -100 12 | -100 _ -100 13 | -100 _ -100 14 | -100 _ -100 15 | -100 S -100 16 | # 1 # 17 | """ 18 | gridName: "bridgeGrid" 19 | discount: "0.85" 20 | noise: "0.1" 21 | livingReward: "0.0" 22 | epsilon: "0.5" 23 | learningRate: "0.1" 24 | numExperiences: "500" 25 | valueIterations: "100" 26 | iterations: "10000" 27 | 28 | -------------------------------------------------------------------------------- /test_cases/q4/4-discountgrid.test: -------------------------------------------------------------------------------- 1 | class: "QLearningTest" 2 | 3 | # GridWorld specification 4 | # _ is empty space 5 | # numbers are terminal states with that value 6 | # # is a wall 7 | # S is a start state 8 | # 9 | grid: """ 10 | -10 _ 10 _ _ 11 | -10 _ # _ _ 12 | -10 _ 1 _ _ 13 | -10 _ # # _ 14 | -10 S _ _ _ 15 | """ 16 | discount: "0.9" 17 | noise: "0.2" 18 | livingReward: "0.0" 19 | epsilon: "0.2" 20 | learningRate: "0.1" 21 | numExperiences: "3000" 22 | valueIterations: "100" 23 | iterations: "10000" 24 | 25 | -------------------------------------------------------------------------------- /test_cases/q4/CONFIG: -------------------------------------------------------------------------------- 1 | max_points: "5" 2 | class: "PassAllTestsQuestion" 3 | -------------------------------------------------------------------------------- /test_cases/q5/1-tinygrid.solution: -------------------------------------------------------------------------------- 1 | # This is the solution file for test_cases/q5/1-tinygrid.test. 2 | # File intentionally blank. 3 | -------------------------------------------------------------------------------- /test_cases/q5/1-tinygrid.test: -------------------------------------------------------------------------------- 1 | class: "EpsilonGreedyTest" 2 | 3 | # GridWorld specification 4 | # _ is empty space 5 | # numbers are terminal states with that value 6 | # # is a wall 7 | # S is a start state 8 | # 9 | grid: """ 10 | -10 11 | S 12 | 10 13 | """ 14 | discount: "0.5" 15 | noise: "0.0" 16 | livingReward: "0.0" 17 | epsilon: "0.5" 18 | learningRate: "0.1" 19 | numExperiences: "100" 20 | valueIterations: "100" 21 | iterations: "10000" 22 | 23 | -------------------------------------------------------------------------------- /test_cases/q5/2-tinygrid-noisy.solution: -------------------------------------------------------------------------------- 1 | # This is the solution file for test_cases/q5/2-tinygrid-noisy.test. 2 | # File intentionally blank. 3 | -------------------------------------------------------------------------------- /test_cases/q5/2-tinygrid-noisy.test: -------------------------------------------------------------------------------- 1 | class: "EpsilonGreedyTest" 2 | 3 | # GridWorld specification 4 | # _ is empty space 5 | # numbers are terminal states with that value 6 | # # is a wall 7 | # S is a start state 8 | # 9 | grid: """ 10 | -10 11 | S 12 | 10 13 | """ 14 | discount: "0.75" 15 | noise: "0.25" 16 | livingReward: "0.0" 17 | epsilon: "0.5" 18 | learningRate: "0.1" 19 | numExperiences: "100" 20 | valueIterations: "100" 21 | iterations: "10000" 22 | 23 | -------------------------------------------------------------------------------- /test_cases/q5/3-bridge.solution: -------------------------------------------------------------------------------- 1 | # This is the solution file for test_cases/q5/3-bridge.test. 2 | # File intentionally blank. 3 | -------------------------------------------------------------------------------- /test_cases/q5/3-bridge.test: -------------------------------------------------------------------------------- 1 | class: "EpsilonGreedyTest" 2 | 3 | # GridWorld specification 4 | # _ is empty space 5 | # numbers are terminal states with that value 6 | # # is a wall 7 | # S is a start state 8 | # 9 | grid: """ 10 | # 10 # 11 | -100 _ -100 12 | -100 _ -100 13 | -100 _ -100 14 | -100 _ -100 15 | -100 S -100 16 | # 1 # 17 | """ 18 | gridName: "bridgeGrid" 19 | discount: "0.85" 20 | noise: "0.1" 21 | livingReward: "0.0" 22 | epsilon: "0.5" 23 | learningRate: "0.1" 24 | numExperiences: "500" 25 | valueIterations: "100" 26 | iterations: "10000" 27 | 28 | -------------------------------------------------------------------------------- /test_cases/q5/4-discountgrid.solution: -------------------------------------------------------------------------------- 1 | # This is the solution file for test_cases/q5/4-discountgrid.test. 2 | # File intentionally blank. 3 | -------------------------------------------------------------------------------- /test_cases/q5/4-discountgrid.test: -------------------------------------------------------------------------------- 1 | class: "EpsilonGreedyTest" 2 | 3 | # GridWorld specification 4 | # _ is empty space 5 | # numbers are terminal states with that value 6 | # # is a wall 7 | # S is a start state 8 | # 9 | grid: """ 10 | -10 _ 10 _ _ 11 | -10 _ # _ _ 12 | -10 _ 1 _ _ 13 | -10 _ # # _ 14 | -10 S _ _ _ 15 | """ 16 | discount: "0.9" 17 | noise: "0.2" 18 | livingReward: "0.0" 19 | epsilon: "0.2" 20 | learningRate: "0.1" 21 | numExperiences: "3000" 22 | valueIterations: "100" 23 | iterations: "10000" 24 | 25 | -------------------------------------------------------------------------------- /test_cases/q5/CONFIG: -------------------------------------------------------------------------------- 1 | max_points: "3" 2 | class: "PassAllTestsQuestion" 3 | -------------------------------------------------------------------------------- /test_cases/q6/CONFIG: -------------------------------------------------------------------------------- 1 | max_points: "1" 2 | class: "PassAllTestsQuestion" 3 | -------------------------------------------------------------------------------- /test_cases/q6/grade-agent.solution: -------------------------------------------------------------------------------- 1 | # This is the solution file for test_cases/q6/grade-agent.test. 2 | # File intentionally blank. 3 | -------------------------------------------------------------------------------- /test_cases/q6/grade-agent.test: -------------------------------------------------------------------------------- 1 | class: "Question6Test" 2 | 3 | -------------------------------------------------------------------------------- /test_cases/q7/CONFIG: -------------------------------------------------------------------------------- 1 | max_points: "1" 2 | class: "PassAllTestsQuestion" 3 | -------------------------------------------------------------------------------- /test_cases/q7/grade-agent.solution: -------------------------------------------------------------------------------- 1 | # This is the solution file for test_cases/q7/grade-agent.test. 2 | # File intentionally blank. 3 | -------------------------------------------------------------------------------- /test_cases/q7/grade-agent.test: -------------------------------------------------------------------------------- 1 | class: "EvalAgentTest" 2 | 3 | # 100 test games after 2000 training games 4 | pacmanParams: "-p PacmanQAgent -x 2000 -n 2100 -l smallGrid -q -f --fixRandomSeed" 5 | 6 | winsThresholds: "70" 7 | -------------------------------------------------------------------------------- /test_cases/q8/1-tinygrid.solution: -------------------------------------------------------------------------------- 1 | weights_k_0: """ 2 | {((0, 0), 'exit'): 0, 3 | ((0, 1), 'east'): 0, 4 | ((0, 1), 'north'): 0, 5 | ((0, 1), 'south'): 0, 6 | ((0, 1), 'west'): 0, 7 | ((0, 2), 'exit'): 0} 8 | """ 9 | 10 | q_values_k_0_action_north: """ 11 | illegal 12 | 0.0000 13 | illegal 14 | """ 15 | 16 | q_values_k_0_action_east: """ 17 | illegal 18 | 0.0000 19 | illegal 20 | """ 21 | 22 | q_values_k_0_action_exit: """ 23 | 0.0000 24 | illegal 25 | 0.0000 26 | """ 27 | 28 | q_values_k_0_action_south: """ 29 | illegal 30 | 0.0000 31 | illegal 32 | """ 33 | 34 | q_values_k_0_action_west: """ 35 | illegal 36 | 0.0000 37 | illegal 38 | """ 39 | 40 | weights_k_1: """ 41 | {((0, 0), 'exit'): 1.0, 42 | ((0, 1), 'east'): 0, 43 | ((0, 1), 'north'): 0, 44 | ((0, 1), 'south'): 0, 45 | ((0, 1), 'west'): 0, 46 | ((0, 2), 'exit'): 0} 47 | """ 48 | 49 | q_values_k_1_action_north: """ 50 | illegal 51 | 0.0000 52 | illegal 53 | """ 54 | 55 | q_values_k_1_action_east: """ 56 | illegal 57 | 0.0000 58 | illegal 59 | """ 60 | 61 | q_values_k_1_action_exit: """ 62 | 0.0000 63 | illegal 64 | 1.0000 65 | """ 66 | 67 | q_values_k_1_action_south: """ 68 | illegal 69 | 0.0000 70 | illegal 71 | """ 72 | 73 | q_values_k_1_action_west: """ 74 | illegal 75 | 0.0000 76 | illegal 77 | """ 78 | 79 | weights_k_2: """ 80 | {((0, 0), 'exit'): 1.0, 81 | ((0, 1), 'east'): 0, 82 | ((0, 1), 'north'): 0, 83 | ((0, 1), 'south'): 0.0, 84 | ((0, 1), 'west'): 0, 85 | ((0, 2), 'exit'): 0} 86 | """ 87 | 88 | q_values_k_2_action_north: """ 89 | illegal 90 | 0.0000 91 | illegal 92 | """ 93 | 94 | q_values_k_2_action_east: """ 95 | illegal 96 | 0.0000 97 | illegal 98 | """ 99 | 100 | q_values_k_2_action_exit: """ 101 | 0.0000 102 | illegal 103 | 1.0000 104 | """ 105 | 106 | q_values_k_2_action_south: """ 107 | illegal 108 | 0.0000 109 | illegal 110 | """ 111 | 112 | q_values_k_2_action_west: """ 113 | illegal 114 | 0.0000 115 | illegal 116 | """ 117 | 118 | weights_k_3: """ 119 | {((0, 0), 'exit'): 1.8999999999999999, 120 | ((0, 1), 'east'): 0, 121 | ((0, 1), 'north'): 0, 122 | ((0, 1), 'south'): 0.0, 123 | ((0, 1), 'west'): 0, 124 | ((0, 2), 'exit'): 0} 125 | """ 126 | 127 | q_values_k_3_action_north: """ 128 | illegal 129 | 0.0000 130 | illegal 131 | """ 132 | 133 | q_values_k_3_action_east: """ 134 | illegal 135 | 0.0000 136 | illegal 137 | """ 138 | 139 | q_values_k_3_action_exit: """ 140 | 0.0000 141 | illegal 142 | 1.9000 143 | """ 144 | 145 | q_values_k_3_action_south: """ 146 | illegal 147 | 0.0000 148 | illegal 149 | """ 150 | 151 | q_values_k_3_action_west: """ 152 | illegal 153 | 0.0000 154 | illegal 155 | """ 156 | 157 | weights_k_4: """ 158 | {((0, 0), 'exit'): 2.71, 159 | ((0, 1), 'east'): 0, 160 | ((0, 1), 'north'): 0, 161 | ((0, 1), 'south'): 0.0, 162 | ((0, 1), 'west'): 0, 163 | ((0, 2), 'exit'): 0} 164 | """ 165 | 166 | q_values_k_4_action_north: """ 167 | illegal 168 | 0.0000 169 | illegal 170 | """ 171 | 172 | q_values_k_4_action_east: """ 173 | illegal 174 | 0.0000 175 | illegal 176 | """ 177 | 178 | q_values_k_4_action_exit: """ 179 | 0.0000 180 | illegal 181 | 2.7100 182 | """ 183 | 184 | q_values_k_4_action_south: """ 185 | illegal 186 | 0.0000 187 | illegal 188 | """ 189 | 190 | q_values_k_4_action_west: """ 191 | illegal 192 | 0.0000 193 | illegal 194 | """ 195 | 196 | weights_k_5: """ 197 | {((0, 0), 'exit'): 2.71, 198 | ((0, 1), 'east'): 0, 199 | ((0, 1), 'north'): 0, 200 | ((0, 1), 'south'): 0.0, 201 | ((0, 1), 'west'): 0, 202 | ((0, 2), 'exit'): -1.0} 203 | """ 204 | 205 | q_values_k_5_action_north: """ 206 | illegal 207 | 0.0000 208 | illegal 209 | """ 210 | 211 | q_values_k_5_action_east: """ 212 | illegal 213 | 0.0000 214 | illegal 215 | """ 216 | 217 | q_values_k_5_action_exit: """ 218 | -1.0000 219 | illegal 220 | 2.7100 221 | """ 222 | 223 | q_values_k_5_action_south: """ 224 | illegal 225 | 0.0000 226 | illegal 227 | """ 228 | 229 | q_values_k_5_action_west: """ 230 | illegal 231 | 0.0000 232 | illegal 233 | """ 234 | 235 | weights_k_6: """ 236 | {((0, 0), 'exit'): 3.4390000000000001, 237 | ((0, 1), 'east'): 0, 238 | ((0, 1), 'north'): 0, 239 | ((0, 1), 'south'): 0.0, 240 | ((0, 1), 'west'): 0, 241 | ((0, 2), 'exit'): -1.0} 242 | """ 243 | 244 | q_values_k_6_action_north: """ 245 | illegal 246 | 0.0000 247 | illegal 248 | """ 249 | 250 | q_values_k_6_action_east: """ 251 | illegal 252 | 0.0000 253 | illegal 254 | """ 255 | 256 | q_values_k_6_action_exit: """ 257 | -1.0000 258 | illegal 259 | 3.4390 260 | """ 261 | 262 | q_values_k_6_action_south: """ 263 | illegal 264 | 0.0000 265 | illegal 266 | """ 267 | 268 | q_values_k_6_action_west: """ 269 | illegal 270 | 0.0000 271 | illegal 272 | """ 273 | 274 | weights_k_7: """ 275 | {((0, 0), 'exit'): 3.4390000000000001, 276 | ((0, 1), 'east'): 0, 277 | ((0, 1), 'north'): 0, 278 | ((0, 1), 'south'): 0.17195000000000002, 279 | ((0, 1), 'west'): 0, 280 | ((0, 2), 'exit'): -1.0} 281 | """ 282 | 283 | q_values_k_7_action_north: """ 284 | illegal 285 | 0.0000 286 | illegal 287 | """ 288 | 289 | q_values_k_7_action_east: """ 290 | illegal 291 | 0.0000 292 | illegal 293 | """ 294 | 295 | q_values_k_7_action_exit: """ 296 | -1.0000 297 | illegal 298 | 3.4390 299 | """ 300 | 301 | q_values_k_7_action_south: """ 302 | illegal 303 | 0.1720 304 | illegal 305 | """ 306 | 307 | q_values_k_7_action_west: """ 308 | illegal 309 | 0.0000 310 | illegal 311 | """ 312 | 313 | weights_k_8: """ 314 | {((0, 0), 'exit'): 4.0951000000000004, 315 | ((0, 1), 'east'): 0, 316 | ((0, 1), 'north'): 0, 317 | ((0, 1), 'south'): 0.17195000000000002, 318 | ((0, 1), 'west'): 0, 319 | ((0, 2), 'exit'): -1.0} 320 | """ 321 | 322 | q_values_k_8_action_north: """ 323 | illegal 324 | 0.0000 325 | illegal 326 | """ 327 | 328 | q_values_k_8_action_east: """ 329 | illegal 330 | 0.0000 331 | illegal 332 | """ 333 | 334 | q_values_k_8_action_exit: """ 335 | -1.0000 336 | illegal 337 | 4.0951 338 | """ 339 | 340 | q_values_k_8_action_south: """ 341 | illegal 342 | 0.1720 343 | illegal 344 | """ 345 | 346 | q_values_k_8_action_west: """ 347 | illegal 348 | 0.0000 349 | illegal 350 | """ 351 | 352 | weights_k_9: """ 353 | {((0, 0), 'exit'): 4.6855900000000004, 354 | ((0, 1), 'east'): 0, 355 | ((0, 1), 'north'): 0, 356 | ((0, 1), 'south'): 0.17195000000000002, 357 | ((0, 1), 'west'): 0, 358 | ((0, 2), 'exit'): -1.0} 359 | """ 360 | 361 | q_values_k_9_action_north: """ 362 | illegal 363 | 0.0000 364 | illegal 365 | """ 366 | 367 | q_values_k_9_action_east: """ 368 | illegal 369 | 0.0000 370 | illegal 371 | """ 372 | 373 | q_values_k_9_action_exit: """ 374 | -1.0000 375 | illegal 376 | 4.6856 377 | """ 378 | 379 | q_values_k_9_action_south: """ 380 | illegal 381 | 0.1720 382 | illegal 383 | """ 384 | 385 | q_values_k_9_action_west: """ 386 | illegal 387 | 0.0000 388 | illegal 389 | """ 390 | 391 | weights_k_100: """ 392 | {((0, 0), 'exit'): 9.8175199636859922, 393 | ((0, 1), 'east'): 0.40629236674335106, 394 | ((0, 1), 'north'): -0.45341857899847993, 395 | ((0, 1), 'south'): 2.126721095524319, 396 | ((0, 1), 'west'): 0.39193283364906867, 397 | ((0, 2), 'exit'): -9.4766523669726386} 398 | """ 399 | 400 | q_values_k_100_action_north: """ 401 | illegal 402 | -0.4534 403 | illegal 404 | """ 405 | 406 | q_values_k_100_action_east: """ 407 | illegal 408 | 0.4063 409 | illegal 410 | """ 411 | 412 | q_values_k_100_action_exit: """ 413 | -9.4767 414 | illegal 415 | 9.8175 416 | """ 417 | 418 | q_values_k_100_action_south: """ 419 | illegal 420 | 2.1267 421 | illegal 422 | """ 423 | 424 | q_values_k_100_action_west: """ 425 | illegal 426 | 0.3919 427 | illegal 428 | """ 429 | 430 | -------------------------------------------------------------------------------- /test_cases/q8/1-tinygrid.test: -------------------------------------------------------------------------------- 1 | class: "ApproximateQLearningTest" 2 | 3 | # GridWorld specification 4 | # _ is empty space 5 | # numbers are terminal states with that value 6 | # # is a wall 7 | # S is a start state 8 | # 9 | grid: """ 10 | -10 11 | S 12 | 10 13 | """ 14 | discount: "0.5" 15 | noise: "0.0" 16 | livingReward: "0.0" 17 | epsilon: "0.5" 18 | learningRate: "0.1" 19 | numExperiences: "100" 20 | valueIterations: "100" 21 | iterations: "10000" 22 | 23 | -------------------------------------------------------------------------------- /test_cases/q8/2-tinygrid-noisy.solution: -------------------------------------------------------------------------------- 1 | weights_k_0: """ 2 | {((0, 0), 'exit'): 0, 3 | ((0, 1), 'east'): 0, 4 | ((0, 1), 'north'): 0, 5 | ((0, 1), 'south'): 0, 6 | ((0, 1), 'west'): 0, 7 | ((0, 2), 'exit'): 0} 8 | """ 9 | 10 | q_values_k_0_action_north: """ 11 | illegal 12 | 0.0000 13 | illegal 14 | """ 15 | 16 | q_values_k_0_action_east: """ 17 | illegal 18 | 0.0000 19 | illegal 20 | """ 21 | 22 | q_values_k_0_action_exit: """ 23 | 0.0000 24 | illegal 25 | 0.0000 26 | """ 27 | 28 | q_values_k_0_action_south: """ 29 | illegal 30 | 0.0000 31 | illegal 32 | """ 33 | 34 | q_values_k_0_action_west: """ 35 | illegal 36 | 0.0000 37 | illegal 38 | """ 39 | 40 | weights_k_1: """ 41 | {((0, 0), 'exit'): 1.0, 42 | ((0, 1), 'east'): 0, 43 | ((0, 1), 'north'): 0, 44 | ((0, 1), 'south'): 0, 45 | ((0, 1), 'west'): 0, 46 | ((0, 2), 'exit'): 0} 47 | """ 48 | 49 | q_values_k_1_action_north: """ 50 | illegal 51 | 0.0000 52 | illegal 53 | """ 54 | 55 | q_values_k_1_action_east: """ 56 | illegal 57 | 0.0000 58 | illegal 59 | """ 60 | 61 | q_values_k_1_action_exit: """ 62 | 0.0000 63 | illegal 64 | 1.0000 65 | """ 66 | 67 | q_values_k_1_action_south: """ 68 | illegal 69 | 0.0000 70 | illegal 71 | """ 72 | 73 | q_values_k_1_action_west: """ 74 | illegal 75 | 0.0000 76 | illegal 77 | """ 78 | 79 | weights_k_2: """ 80 | {((0, 0), 'exit'): 1.0, 81 | ((0, 1), 'east'): 0, 82 | ((0, 1), 'north'): 0, 83 | ((0, 1), 'south'): 0.0, 84 | ((0, 1), 'west'): 0, 85 | ((0, 2), 'exit'): 0} 86 | """ 87 | 88 | q_values_k_2_action_north: """ 89 | illegal 90 | 0.0000 91 | illegal 92 | """ 93 | 94 | q_values_k_2_action_east: """ 95 | illegal 96 | 0.0000 97 | illegal 98 | """ 99 | 100 | q_values_k_2_action_exit: """ 101 | 0.0000 102 | illegal 103 | 1.0000 104 | """ 105 | 106 | q_values_k_2_action_south: """ 107 | illegal 108 | 0.0000 109 | illegal 110 | """ 111 | 112 | q_values_k_2_action_west: """ 113 | illegal 114 | 0.0000 115 | illegal 116 | """ 117 | 118 | weights_k_3: """ 119 | {((0, 0), 'exit'): 1.8999999999999999, 120 | ((0, 1), 'east'): 0, 121 | ((0, 1), 'north'): 0, 122 | ((0, 1), 'south'): 0.0, 123 | ((0, 1), 'west'): 0, 124 | ((0, 2), 'exit'): 0} 125 | """ 126 | 127 | q_values_k_3_action_north: """ 128 | illegal 129 | 0.0000 130 | illegal 131 | """ 132 | 133 | q_values_k_3_action_east: """ 134 | illegal 135 | 0.0000 136 | illegal 137 | """ 138 | 139 | q_values_k_3_action_exit: """ 140 | 0.0000 141 | illegal 142 | 1.9000 143 | """ 144 | 145 | q_values_k_3_action_south: """ 146 | illegal 147 | 0.0000 148 | illegal 149 | """ 150 | 151 | q_values_k_3_action_west: """ 152 | illegal 153 | 0.0000 154 | illegal 155 | """ 156 | 157 | weights_k_4: """ 158 | {((0, 0), 'exit'): 2.71, 159 | ((0, 1), 'east'): 0, 160 | ((0, 1), 'north'): 0, 161 | ((0, 1), 'south'): 0.0, 162 | ((0, 1), 'west'): 0, 163 | ((0, 2), 'exit'): 0} 164 | """ 165 | 166 | q_values_k_4_action_north: """ 167 | illegal 168 | 0.0000 169 | illegal 170 | """ 171 | 172 | q_values_k_4_action_east: """ 173 | illegal 174 | 0.0000 175 | illegal 176 | """ 177 | 178 | q_values_k_4_action_exit: """ 179 | 0.0000 180 | illegal 181 | 2.7100 182 | """ 183 | 184 | q_values_k_4_action_south: """ 185 | illegal 186 | 0.0000 187 | illegal 188 | """ 189 | 190 | q_values_k_4_action_west: """ 191 | illegal 192 | 0.0000 193 | illegal 194 | """ 195 | 196 | weights_k_5: """ 197 | {((0, 0), 'exit'): 2.71, 198 | ((0, 1), 'east'): 0, 199 | ((0, 1), 'north'): 0, 200 | ((0, 1), 'south'): 0.0, 201 | ((0, 1), 'west'): 0, 202 | ((0, 2), 'exit'): -1.0} 203 | """ 204 | 205 | q_values_k_5_action_north: """ 206 | illegal 207 | 0.0000 208 | illegal 209 | """ 210 | 211 | q_values_k_5_action_east: """ 212 | illegal 213 | 0.0000 214 | illegal 215 | """ 216 | 217 | q_values_k_5_action_exit: """ 218 | -1.0000 219 | illegal 220 | 2.7100 221 | """ 222 | 223 | q_values_k_5_action_south: """ 224 | illegal 225 | 0.0000 226 | illegal 227 | """ 228 | 229 | q_values_k_5_action_west: """ 230 | illegal 231 | 0.0000 232 | illegal 233 | """ 234 | 235 | weights_k_6: """ 236 | {((0, 0), 'exit'): 3.4390000000000001, 237 | ((0, 1), 'east'): 0, 238 | ((0, 1), 'north'): 0, 239 | ((0, 1), 'south'): 0.0, 240 | ((0, 1), 'west'): 0, 241 | ((0, 2), 'exit'): -1.0} 242 | """ 243 | 244 | q_values_k_6_action_north: """ 245 | illegal 246 | 0.0000 247 | illegal 248 | """ 249 | 250 | q_values_k_6_action_east: """ 251 | illegal 252 | 0.0000 253 | illegal 254 | """ 255 | 256 | q_values_k_6_action_exit: """ 257 | -1.0000 258 | illegal 259 | 3.4390 260 | """ 261 | 262 | q_values_k_6_action_south: """ 263 | illegal 264 | 0.0000 265 | illegal 266 | """ 267 | 268 | q_values_k_6_action_west: """ 269 | illegal 270 | 0.0000 271 | illegal 272 | """ 273 | 274 | weights_k_7: """ 275 | {((0, 0), 'exit'): 3.4390000000000001, 276 | ((0, 1), 'east'): 0, 277 | ((0, 1), 'north'): 0, 278 | ((0, 1), 'south'): 0.25792500000000002, 279 | ((0, 1), 'west'): 0, 280 | ((0, 2), 'exit'): -1.0} 281 | """ 282 | 283 | q_values_k_7_action_north: """ 284 | illegal 285 | 0.0000 286 | illegal 287 | """ 288 | 289 | q_values_k_7_action_east: """ 290 | illegal 291 | 0.0000 292 | illegal 293 | """ 294 | 295 | q_values_k_7_action_exit: """ 296 | -1.0000 297 | illegal 298 | 3.4390 299 | """ 300 | 301 | q_values_k_7_action_south: """ 302 | illegal 303 | 0.2579 304 | illegal 305 | """ 306 | 307 | q_values_k_7_action_west: """ 308 | illegal 309 | 0.0000 310 | illegal 311 | """ 312 | 313 | weights_k_8: """ 314 | {((0, 0), 'exit'): 4.0951000000000004, 315 | ((0, 1), 'east'): 0, 316 | ((0, 1), 'north'): 0, 317 | ((0, 1), 'south'): 0.25792500000000002, 318 | ((0, 1), 'west'): 0, 319 | ((0, 2), 'exit'): -1.0} 320 | """ 321 | 322 | q_values_k_8_action_north: """ 323 | illegal 324 | 0.0000 325 | illegal 326 | """ 327 | 328 | q_values_k_8_action_east: """ 329 | illegal 330 | 0.0000 331 | illegal 332 | """ 333 | 334 | q_values_k_8_action_exit: """ 335 | -1.0000 336 | illegal 337 | 4.0951 338 | """ 339 | 340 | q_values_k_8_action_south: """ 341 | illegal 342 | 0.2579 343 | illegal 344 | """ 345 | 346 | q_values_k_8_action_west: """ 347 | illegal 348 | 0.0000 349 | illegal 350 | """ 351 | 352 | weights_k_9: """ 353 | {((0, 0), 'exit'): 4.6855900000000004, 354 | ((0, 1), 'east'): 0, 355 | ((0, 1), 'north'): 0, 356 | ((0, 1), 'south'): 0.25792500000000002, 357 | ((0, 1), 'west'): 0, 358 | ((0, 2), 'exit'): -1.0} 359 | """ 360 | 361 | q_values_k_9_action_north: """ 362 | illegal 363 | 0.0000 364 | illegal 365 | """ 366 | 367 | q_values_k_9_action_east: """ 368 | illegal 369 | 0.0000 370 | illegal 371 | """ 372 | 373 | q_values_k_9_action_exit: """ 374 | -1.0000 375 | illegal 376 | 4.6856 377 | """ 378 | 379 | q_values_k_9_action_south: """ 380 | illegal 381 | 0.2579 382 | illegal 383 | """ 384 | 385 | q_values_k_9_action_west: """ 386 | illegal 387 | 0.0000 388 | illegal 389 | """ 390 | 391 | weights_k_100: """ 392 | {((0, 0), 'exit'): 9.8175199636859922, 393 | ((0, 1), 'east'): 0.94989681048235752, 394 | ((0, 1), 'north'): -0.66699795412272, 395 | ((0, 1), 'south'): 3.256207905310105, 396 | ((0, 1), 'west'): 0.82362807350146272, 397 | ((0, 2), 'exit'): -9.4766523669726386} 398 | """ 399 | 400 | q_values_k_100_action_north: """ 401 | illegal 402 | -0.6670 403 | illegal 404 | """ 405 | 406 | q_values_k_100_action_east: """ 407 | illegal 408 | 0.9499 409 | illegal 410 | """ 411 | 412 | q_values_k_100_action_exit: """ 413 | -9.4767 414 | illegal 415 | 9.8175 416 | """ 417 | 418 | q_values_k_100_action_south: """ 419 | illegal 420 | 3.2562 421 | illegal 422 | """ 423 | 424 | q_values_k_100_action_west: """ 425 | illegal 426 | 0.8236 427 | illegal 428 | """ 429 | 430 | -------------------------------------------------------------------------------- /test_cases/q8/2-tinygrid-noisy.test: -------------------------------------------------------------------------------- 1 | class: "ApproximateQLearningTest" 2 | 3 | # GridWorld specification 4 | # _ is empty space 5 | # numbers are terminal states with that value 6 | # # is a wall 7 | # S is a start state 8 | # 9 | grid: """ 10 | -10 11 | S 12 | 10 13 | """ 14 | discount: "0.75" 15 | noise: "0.25" 16 | livingReward: "0.0" 17 | epsilon: "0.5" 18 | learningRate: "0.1" 19 | numExperiences: "100" 20 | valueIterations: "100" 21 | iterations: "10000" 22 | 23 | -------------------------------------------------------------------------------- /test_cases/q8/3-bridge.test: -------------------------------------------------------------------------------- 1 | class: "ApproximateQLearningTest" 2 | 3 | # GridWorld specification 4 | # _ is empty space 5 | # numbers are terminal states with that value 6 | # # is a wall 7 | # S is a start state 8 | # 9 | grid: """ 10 | # 10 # 11 | -100 _ -100 12 | -100 _ -100 13 | -100 _ -100 14 | -100 _ -100 15 | -100 S -100 16 | # 1 # 17 | """ 18 | gridName: "bridgeGrid" 19 | discount: "0.85" 20 | noise: "0.1" 21 | livingReward: "0.0" 22 | epsilon: "0.5" 23 | learningRate: "0.1" 24 | numExperiences: "500" 25 | valueIterations: "100" 26 | iterations: "10000" 27 | 28 | -------------------------------------------------------------------------------- /test_cases/q8/4-discountgrid.test: -------------------------------------------------------------------------------- 1 | class: "ApproximateQLearningTest" 2 | 3 | # GridWorld specification 4 | # _ is empty space 5 | # numbers are terminal states with that value 6 | # # is a wall 7 | # S is a start state 8 | # 9 | grid: """ 10 | -10 _ 10 _ _ 11 | -10 _ # _ _ 12 | -10 _ 1 _ _ 13 | -10 _ # # _ 14 | -10 S _ _ _ 15 | """ 16 | discount: "0.9" 17 | noise: "0.2" 18 | livingReward: "0.0" 19 | epsilon: "0.2" 20 | learningRate: "0.1" 21 | numExperiences: "3000" 22 | valueIterations: "100" 23 | iterations: "10000" 24 | 25 | -------------------------------------------------------------------------------- /test_cases/q8/CONFIG: -------------------------------------------------------------------------------- 1 | max_points: "3" 2 | class: "PassAllTestsQuestion" 3 | -------------------------------------------------------------------------------- /textDisplay.py: -------------------------------------------------------------------------------- 1 | # textDisplay.py 2 | # -------------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | import pacman, time 12 | 13 | DRAW_EVERY = 1 14 | SLEEP_TIME = 0 # This can be overwritten by __init__ 15 | DISPLAY_MOVES = False 16 | QUIET = False # Supresses output 17 | 18 | class NullGraphics: 19 | def initialize(self, state, isBlue = False): 20 | pass 21 | 22 | def update(self, state): 23 | pass 24 | 25 | def pause(self): 26 | time.sleep(SLEEP_TIME) 27 | 28 | def draw(self, state): 29 | print state 30 | 31 | def finish(self): 32 | pass 33 | 34 | class PacmanGraphics: 35 | def __init__(self, speed=None): 36 | if speed != None: 37 | global SLEEP_TIME 38 | SLEEP_TIME = speed 39 | 40 | def initialize(self, state, isBlue = False): 41 | self.draw(state) 42 | self.pause() 43 | self.turn = 0 44 | self.agentCounter = 0 45 | 46 | def update(self, state): 47 | numAgents = len(state.agentStates) 48 | self.agentCounter = (self.agentCounter + 1) % numAgents 49 | if self.agentCounter == 0: 50 | self.turn += 1 51 | if DISPLAY_MOVES: 52 | ghosts = [pacman.nearestPoint(state.getGhostPosition(i)) for i in range(1, numAgents)] 53 | print "%4d) P: %-8s" % (self.turn, str(pacman.nearestPoint(state.getPacmanPosition()))),'| Score: %-5d' % state.score,'| Ghosts:', ghosts 54 | if self.turn % DRAW_EVERY == 0: 55 | self.draw(state) 56 | self.pause() 57 | if state._win or state._lose: 58 | self.draw(state) 59 | 60 | def pause(self): 61 | time.sleep(SLEEP_TIME) 62 | 63 | def draw(self, state): 64 | print state 65 | 66 | def finish(self): 67 | pass 68 | -------------------------------------------------------------------------------- /textGridworldDisplay.py: -------------------------------------------------------------------------------- 1 | # textGridworldDisplay.py 2 | # ----------------------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | import util 12 | 13 | class TextGridworldDisplay: 14 | 15 | def __init__(self, gridworld): 16 | self.gridworld = gridworld 17 | 18 | def start(self): 19 | pass 20 | 21 | def pause(self): 22 | pass 23 | 24 | def displayValues(self, agent, currentState = None, message = None): 25 | if message != None: 26 | print message 27 | values = util.Counter() 28 | policy = {} 29 | states = self.gridworld.getStates() 30 | for state in states: 31 | values[state] = agent.getValue(state) 32 | policy[state] = agent.getPolicy(state) 33 | prettyPrintValues(self.gridworld, values, policy, currentState) 34 | 35 | def displayNullValues(self, agent, currentState = None, message = None): 36 | if message != None: print message 37 | prettyPrintNullValues(self.gridworld, currentState) 38 | 39 | def displayQValues(self, agent, currentState = None, message = None): 40 | if message != None: print message 41 | qValues = util.Counter() 42 | states = self.gridworld.getStates() 43 | for state in states: 44 | for action in self.gridworld.getPossibleActions(state): 45 | qValues[(state, action)] = agent.getQValue(state, action) 46 | prettyPrintQValues(self.gridworld, qValues, currentState) 47 | 48 | 49 | def prettyPrintValues(gridWorld, values, policy=None, currentState = None): 50 | grid = gridWorld.grid 51 | maxLen = 11 52 | newRows = [] 53 | for y in range(grid.height): 54 | newRow = [] 55 | for x in range(grid.width): 56 | state = (x, y) 57 | value = values[state] 58 | action = None 59 | if policy != None and state in policy: 60 | action = policy[state] 61 | actions = gridWorld.getPossibleActions(state) 62 | if action not in actions and 'exit' in actions: 63 | action = 'exit' 64 | valString = None 65 | if action == 'exit': 66 | valString = border('%.2f' % value) 67 | else: 68 | valString = '\n\n%.2f\n\n' % value 69 | valString += ' '*maxLen 70 | if grid[x][y] == 'S': 71 | valString = '\n\nS: %.2f\n\n' % value 72 | valString += ' '*maxLen 73 | if grid[x][y] == '#': 74 | valString = '\n#####\n#####\n#####\n' 75 | valString += ' '*maxLen 76 | pieces = [valString] 77 | text = ("\n".join(pieces)).split('\n') 78 | if currentState == state: 79 | l = len(text[1]) 80 | if l == 0: 81 | text[1] = '*' 82 | else: 83 | text[1] = "|" + ' ' * int((l-1)/2-1) + '*' + ' ' * int((l)/2-1) + "|" 84 | if action == 'east': 85 | text[2] = ' ' + text[2] + ' >' 86 | elif action == 'west': 87 | text[2] = '< ' + text[2] + ' ' 88 | elif action == 'north': 89 | text[0] = ' ' * int(maxLen/2) + '^' +' ' * int(maxLen/2) 90 | elif action == 'south': 91 | text[4] = ' ' * int(maxLen/2) + 'v' +' ' * int(maxLen/2) 92 | newCell = "\n".join(text) 93 | newRow.append(newCell) 94 | newRows.append(newRow) 95 | numCols = grid.width 96 | for rowNum, row in enumerate(newRows): 97 | row.insert(0,"\n\n"+str(rowNum)) 98 | newRows.reverse() 99 | colLabels = [str(colNum) for colNum in range(numCols)] 100 | colLabels.insert(0,' ') 101 | finalRows = [colLabels] + newRows 102 | print indent(finalRows,separateRows=True,delim='|', prefix='|',postfix='|', justify='center',hasHeader=True) 103 | 104 | 105 | def prettyPrintNullValues(gridWorld, currentState = None): 106 | grid = gridWorld.grid 107 | maxLen = 11 108 | newRows = [] 109 | for y in range(grid.height): 110 | newRow = [] 111 | for x in range(grid.width): 112 | state = (x, y) 113 | 114 | # value = values[state] 115 | 116 | action = None 117 | # if policy != None and state in policy: 118 | # action = policy[state] 119 | # 120 | actions = gridWorld.getPossibleActions(state) 121 | 122 | if action not in actions and 'exit' in actions: 123 | action = 'exit' 124 | 125 | valString = None 126 | # if action == 'exit': 127 | # valString = border('%.2f' % value) 128 | # else: 129 | # valString = '\n\n%.2f\n\n' % value 130 | # valString += ' '*maxLen 131 | 132 | if grid[x][y] == 'S': 133 | valString = '\n\nS\n\n' 134 | valString += ' '*maxLen 135 | elif grid[x][y] == '#': 136 | valString = '\n#####\n#####\n#####\n' 137 | valString += ' '*maxLen 138 | elif type(grid[x][y]) == float or type(grid[x][y]) == int: 139 | valString = border('%.2f' % float(grid[x][y])) 140 | else: valString = border(' ') 141 | pieces = [valString] 142 | 143 | text = ("\n".join(pieces)).split('\n') 144 | 145 | if currentState == state: 146 | l = len(text[1]) 147 | if l == 0: 148 | text[1] = '*' 149 | else: 150 | text[1] = "|" + ' ' * int((l-1)/2-1) + '*' + ' ' * int((l)/2-1) + "|" 151 | 152 | if action == 'east': 153 | text[2] = ' ' + text[2] + ' >' 154 | elif action == 'west': 155 | text[2] = '< ' + text[2] + ' ' 156 | elif action == 'north': 157 | text[0] = ' ' * int(maxLen/2) + '^' +' ' * int(maxLen/2) 158 | elif action == 'south': 159 | text[4] = ' ' * int(maxLen/2) + 'v' +' ' * int(maxLen/2) 160 | newCell = "\n".join(text) 161 | newRow.append(newCell) 162 | newRows.append(newRow) 163 | numCols = grid.width 164 | for rowNum, row in enumerate(newRows): 165 | row.insert(0,"\n\n"+str(rowNum)) 166 | newRows.reverse() 167 | colLabels = [str(colNum) for colNum in range(numCols)] 168 | colLabels.insert(0,' ') 169 | finalRows = [colLabels] + newRows 170 | print indent(finalRows,separateRows=True,delim='|', prefix='|',postfix='|', justify='center',hasHeader=True) 171 | 172 | def prettyPrintQValues(gridWorld, qValues, currentState=None): 173 | grid = gridWorld.grid 174 | maxLen = 11 175 | newRows = [] 176 | for y in range(grid.height): 177 | newRow = [] 178 | for x in range(grid.width): 179 | state = (x, y) 180 | actions = gridWorld.getPossibleActions(state) 181 | if actions == None or len(actions) == 0: 182 | actions = [None] 183 | bestQ = max([qValues[(state, action)] for action in actions]) 184 | bestActions = [action for action in actions if qValues[(state, action)] == bestQ] 185 | 186 | # display cell 187 | qStrings = dict([(action, "%.2f" % qValues[(state, action)]) for action in actions]) 188 | northString = ('north' in qStrings and qStrings['north']) or ' ' 189 | southString = ('south' in qStrings and qStrings['south']) or ' ' 190 | eastString = ('east' in qStrings and qStrings['east']) or ' ' 191 | westString = ('west' in qStrings and qStrings['west']) or ' ' 192 | exitString = ('exit' in qStrings and qStrings['exit']) or ' ' 193 | 194 | eastLen = len(eastString) 195 | westLen = len(westString) 196 | if eastLen < westLen: 197 | eastString = ' '*(westLen-eastLen)+eastString 198 | if westLen < eastLen: 199 | westString = westString+' '*(eastLen-westLen) 200 | 201 | if 'north' in bestActions: 202 | northString = '/'+northString+'\\' 203 | if 'south' in bestActions: 204 | southString = '\\'+southString+'/' 205 | if 'east' in bestActions: 206 | eastString = ''+eastString+'>' 207 | else: 208 | eastString = ''+eastString+' ' 209 | if 'west' in bestActions: 210 | westString = '<'+westString+'' 211 | else: 212 | westString = ' '+westString+'' 213 | if 'exit' in bestActions: 214 | exitString = '[ '+exitString+' ]' 215 | 216 | 217 | ewString = westString + " " + eastString 218 | if state == currentState: 219 | ewString = westString + " * " + eastString 220 | if state == gridWorld.getStartState(): 221 | ewString = westString + " S " + eastString 222 | if state == currentState and state == gridWorld.getStartState(): 223 | ewString = westString + " S:* " + eastString 224 | 225 | text = [northString, "\n"+exitString, ewString, ' '*maxLen+"\n", southString] 226 | 227 | if grid[x][y] == '#': 228 | text = ['', '\n#####\n#####\n#####', ''] 229 | 230 | newCell = "\n".join(text) 231 | newRow.append(newCell) 232 | newRows.append(newRow) 233 | numCols = grid.width 234 | for rowNum, row in enumerate(newRows): 235 | row.insert(0,"\n\n\n"+str(rowNum)) 236 | newRows.reverse() 237 | colLabels = [str(colNum) for colNum in range(numCols)] 238 | colLabels.insert(0,' ') 239 | finalRows = [colLabels] + newRows 240 | 241 | print indent(finalRows,separateRows=True,delim='|',prefix='|',postfix='|', justify='center',hasHeader=True) 242 | 243 | def border(text): 244 | length = len(text) 245 | pieces = ['-' * (length+2), '|'+' ' * (length+2)+'|', ' | '+text+' | ', '|'+' ' * (length+2)+'|','-' * (length+2)] 246 | return '\n'.join(pieces) 247 | 248 | # INDENTING CODE 249 | 250 | # Indenting code based on a post from George Sakkis 251 | # (http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/267662) 252 | 253 | import cStringIO,operator 254 | 255 | def indent(rows, hasHeader=False, headerChar='-', delim=' | ', justify='left', 256 | separateRows=False, prefix='', postfix='', wrapfunc=lambda x:x): 257 | """Indents a table by column. 258 | - rows: A sequence of sequences of items, one sequence per row. 259 | - hasHeader: True if the first row consists of the columns' names. 260 | - headerChar: Character to be used for the row separator line 261 | (if hasHeader==True or separateRows==True). 262 | - delim: The column delimiter. 263 | - justify: Determines how are data justified in their column. 264 | Valid values are 'left','right' and 'center'. 265 | - separateRows: True if rows are to be separated by a line 266 | of 'headerChar's. 267 | - prefix: A string prepended to each printed row. 268 | - postfix: A string appended to each printed row. 269 | - wrapfunc: A function f(text) for wrapping text; each element in 270 | the table is first wrapped by this function.""" 271 | # closure for breaking logical rows to physical, using wrapfunc 272 | def rowWrapper(row): 273 | newRows = [wrapfunc(item).split('\n') for item in row] 274 | return [[substr or '' for substr in item] for item in map(None,*newRows)] 275 | # break each logical row into one or more physical ones 276 | logicalRows = [rowWrapper(row) for row in rows] 277 | # columns of physical rows 278 | columns = map(None,*reduce(operator.add,logicalRows)) 279 | # get the maximum of each column by the string length of its items 280 | maxWidths = [max([len(str(item)) for item in column]) for column in columns] 281 | rowSeparator = headerChar * (len(prefix) + len(postfix) + sum(maxWidths) + \ 282 | len(delim)*(len(maxWidths)-1)) 283 | # select the appropriate justify method 284 | justify = {'center':str.center, 'right':str.rjust, 'left':str.ljust}[justify.lower()] 285 | output=cStringIO.StringIO() 286 | if separateRows: print >> output, rowSeparator 287 | for physicalRows in logicalRows: 288 | for row in physicalRows: 289 | print >> output, \ 290 | prefix \ 291 | + delim.join([justify(str(item),width) for (item,width) in zip(row,maxWidths)]) \ 292 | + postfix 293 | if separateRows or hasHeader: print >> output, rowSeparator; hasHeader=False 294 | return output.getvalue() 295 | 296 | import math 297 | def wrap_always(text, width): 298 | """A simple word-wrap function that wraps text on exactly width characters. 299 | It doesn't split the text in words.""" 300 | return '\n'.join([ text[width*i:width*(i+1)] \ 301 | for i in xrange(int(math.ceil(1.*len(text)/width))) ]) 302 | 303 | 304 | # TEST OF DISPLAY CODE 305 | 306 | if __name__ == '__main__': 307 | import gridworld, util 308 | 309 | grid = gridworld.getCliffGrid3() 310 | print grid.getStates() 311 | 312 | policy = dict([(state,'east') for state in grid.getStates()]) 313 | values = util.Counter(dict([(state,1000.23) for state in grid.getStates()])) 314 | prettyPrintValues(grid, values, policy, currentState = (0,0)) 315 | 316 | stateCrossActions = [[(state, action) for action in grid.getPossibleActions(state)] for state in grid.getStates()] 317 | qStates = reduce(lambda x,y: x+y, stateCrossActions, []) 318 | qValues = util.Counter(dict([((state, action), 10.5) for state, action in qStates])) 319 | qValues = util.Counter(dict([((state, action), 10.5) for state, action in reduce(lambda x,y: x+y, stateCrossActions, [])])) 320 | prettyPrintQValues(grid, qValues, currentState = (0,0)) 321 | -------------------------------------------------------------------------------- /valueIterationAgents.py: -------------------------------------------------------------------------------- 1 | # valueIterationAgents.py 2 | # ----------------------- 3 | # Licensing Information: Please do not distribute or publish solutions to this 4 | # project. You are free to use and extend these projects for educational 5 | # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by 6 | # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). 7 | # Student side autograding was added by Brad Miller, Nick Hay, and Pieter 8 | # Abbeel in Spring 2013. 9 | # For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html 10 | 11 | import mdp, util 12 | 13 | from learningAgents import ValueEstimationAgent 14 | 15 | class ValueIterationAgent(ValueEstimationAgent): 16 | """ 17 | * Please read learningAgents.py before reading this.* 18 | 19 | A ValueIterationAgent takes a Markov decision process 20 | (see mdp.py) on initialization and runs value iteration 21 | for a given number of iterations using the supplied 22 | discount factor. 23 | """ 24 | def __init__(self, mdp, discount = 0.9, iterations = 100): 25 | """ 26 | Your value iteration agent should take an mdp on 27 | construction, run the indicated number of iterations 28 | and then act according to the resulting policy. 29 | 30 | Some useful mdp methods you will use: 31 | mdp.getStates() 32 | mdp.getPossibleActions(state) 33 | mdp.getTransitionStatesAndProbs(state, action) 34 | mdp.getReward(state, action, nextState) 35 | mdp.isTerminal(state) 36 | """ 37 | self.mdp = mdp 38 | self.discount = discount 39 | self.iterations = iterations 40 | self.values = util.Counter() # A Counter is a dict with default 0 41 | # Write value iteration code here 42 | vcurr = util.Counter() 43 | for i in xrange(self.iterations): 44 | vcurr = self.values.copy() 45 | for state in self.mdp.getStates(): 46 | all_actions = self.mdp.getPossibleActions(state) 47 | transitions = [] 48 | value_list = [] 49 | if self.mdp.isTerminal(state): 50 | self.values[state] = 0 51 | else: 52 | for action in all_actions: 53 | transitions = self.mdp.getTransitionStatesAndProbs(state, action) 54 | value = 0 55 | for transition in transitions: 56 | value += transition[1]*(self.mdp.getReward(state, action, transition[0]) + self.discount * vcurr[transition[0]]) 57 | value_list.append(value) 58 | self.values[state] = max(value_list) 59 | 60 | 61 | def getValue(self, state): 62 | """ 63 | Return the value of the state (computed in __init__). 64 | """ 65 | return self.values[state] 66 | 67 | 68 | def computeQValueFromValues(self, state, action): 69 | """ 70 | Compute the Q-value of action in state from the 71 | value function stored in self.values. 72 | """ 73 | value = 0 74 | transitions = self.mdp.getTransitionStatesAndProbs(state, action) 75 | for transition in transitions: 76 | value += transition[1]*(self.mdp.getReward(state, action, transition[0]) + self.discount * self.values[transition[0]]) 77 | return value 78 | 79 | 80 | 81 | def computeActionFromValues(self, state): 82 | """ 83 | The policy is the best action in the given state 84 | according to the values currently stored in self.values. 85 | 86 | You may break ties any way you see fit. Note that if 87 | there are no legal actions, which is the case at the 88 | terminal state, you should return None. 89 | """ 90 | if self.mdp.isTerminal(state): 91 | return None 92 | else: 93 | bestval = -99999999999 94 | bestaction = 0 95 | all_actions = self.mdp.getPossibleActions(state) 96 | for action in all_actions: 97 | transitions = self.mdp.getTransitionStatesAndProbs(state, action) 98 | value = 0 99 | for transition in transitions: 100 | value += transition[1]*(self.mdp.getReward(state, action, transition[0]) + self.discount * self.values[transition[0]]) 101 | if value > bestval: 102 | bestaction = action 103 | bestval = value 104 | return bestaction 105 | 106 | 107 | 108 | def getPolicy(self, state): 109 | return self.computeActionFromValues(state) 110 | 111 | def getAction(self, state): 112 | "Returns the policy at the state (no exploration)." 113 | return self.computeActionFromValues(state) 114 | 115 | def getQValue(self, state, action): 116 | return self.computeQValueFromValues(state, action) 117 | --------------------------------------------------------------------------------